|
@@ -164,8 +164,12 @@ def indep_process():
|
|
# 获取全是 -99 的列的列名
|
|
# 获取全是 -99 的列的列名
|
|
cols_to_drop = all_minus_99[all_minus_99 == True].index.tolist()
|
|
cols_to_drop = all_minus_99[all_minus_99 == True].index.tolist()
|
|
|
|
|
|
- # 使用 drop() 方法删除列 MBD: 有一部分是-99的列没处理
|
|
|
|
|
|
+ # 使用 drop() 方法删除列
|
|
tower = tower.drop(cols_to_drop, axis=1)
|
|
tower = tower.drop(cols_to_drop, axis=1)
|
|
|
|
+ # MBD: 将一部分是-99的列删除,把-99替换为nan
|
|
|
|
+ tower_nan = tower.replace(-99, np.nan, inplace=False)
|
|
|
|
+ # nan 超过80% 删除
|
|
|
|
+ tower = tower.dropna(axis=1, thresh=len(tower_nan) * 0.8)
|
|
utils.savedata.saveData("/tower/tower-{}-process.csv".format(i), tower)
|
|
utils.savedata.saveData("/tower/tower-{}-process.csv".format(i), tower)
|
|
|
|
|
|
# 测风塔时间统一
|
|
# 测风塔时间统一
|
|
@@ -177,7 +181,7 @@ def indep_process():
|
|
utils.savedata.saveData("/tower/tower-{}-process.csv".format(1), tower1)
|
|
utils.savedata.saveData("/tower/tower-{}-process.csv".format(1), tower1)
|
|
# utils.savedata.saveData("/tower/tower-{}-process.csv".format(2), tower2)
|
|
# utils.savedata.saveData("/tower/tower-{}-process.csv".format(2), tower2)
|
|
|
|
|
|
- # 所有表时间统一 MBD: 没有power和tower的统一
|
|
|
|
|
|
+ # 所有表时间统一
|
|
filenames = ["/NWP.csv","/power.csv", '/tower/tower-1-process.csv']
|
|
filenames = ["/NWP.csv","/power.csv", '/tower/tower-1-process.csv']
|
|
dataframes = []
|
|
dataframes = []
|
|
for i in arg.turbineloc:
|
|
for i in arg.turbineloc:
|
|
@@ -221,8 +225,8 @@ def NWP_indep_process():
|
|
if i == 0:
|
|
if i == 0:
|
|
split_indices.append((0, missing_values.index[i]))
|
|
split_indices.append((0, missing_values.index[i]))
|
|
else:
|
|
else:
|
|
- split_indices.append((missing_values.index[i - 1], missing_values.index[i])) # MBD:分割这块有问题
|
|
|
|
- split_indices.append((missing_values.index[-1], len(df))) # MBD: 刘大为改
|
|
|
|
|
|
+ split_indices.append((missing_values.index[i - 1], missing_values.index[i]))
|
|
|
|
+ split_indices.append((missing_values.index[-1], len(df))) # MBD: 分割少了一个点
|
|
split_datasets = [NWP.iloc[start:end,:] for start, end in split_indices]
|
|
split_datasets = [NWP.iloc[start:end,:] for start, end in split_indices]
|
|
for i, split_df in enumerate(split_datasets):
|
|
for i, split_df in enumerate(split_datasets):
|
|
utils.savedata.saveData("Dataset_training/NWP/NWP_{}.csv".format(i),split_df)
|
|
utils.savedata.saveData("Dataset_training/NWP/NWP_{}.csv".format(i),split_df)
|