Kaynağa Gözat

刘大为修改,备注:MBD

liudawei 2 yıl önce
ebeveyn
işleme
1a285dace6
1 değiştirilmiş dosya ile 8 ekleme ve 4 silme
  1. 8 4
      db-wind/getdata/inputData.py

+ 8 - 4
db-wind/getdata/inputData.py

@@ -164,8 +164,12 @@ def indep_process():
         # 获取全是 -99 的列的列名
         cols_to_drop = all_minus_99[all_minus_99 == True].index.tolist()
 
-        # 使用 drop() 方法删除列  MBD: 有一部分是-99的列没处理
+        # 使用 drop() 方法删除列
         tower = tower.drop(cols_to_drop, axis=1)
+        # MBD: 将一部分是-99的列删除,把-99替换为nan
+        tower_nan = tower.replace(-99, np.nan, inplace=False)
+        # nan 超过80% 删除
+        tower = tower.dropna(axis=1, thresh=len(tower_nan) * 0.8)
         utils.savedata.saveData("/tower/tower-{}-process.csv".format(i), tower)
 
     # 测风塔时间统一
@@ -177,7 +181,7 @@ def indep_process():
     utils.savedata.saveData("/tower/tower-{}-process.csv".format(1), tower1)
     # utils.savedata.saveData("/tower/tower-{}-process.csv".format(2), tower2)
 
-    # 所有表时间统一 MBD: 没有power和tower的统一
+    # 所有表时间统一
     filenames = ["/NWP.csv","/power.csv", '/tower/tower-1-process.csv']
     dataframes = []
     for i in arg.turbineloc:
@@ -221,8 +225,8 @@ def NWP_indep_process():
         if i == 0:
             split_indices.append((0, missing_values.index[i]))
         else:
-            split_indices.append((missing_values.index[i - 1], missing_values.index[i]))  # MBD:分割这块有问题
-    split_indices.append((missing_values.index[-1], len(df)))  # MBD: 刘大为改
+            split_indices.append((missing_values.index[i - 1], missing_values.index[i]))
+    split_indices.append((missing_values.index[-1], len(df)))  # MBD: 分割少了一个点
     split_datasets = [NWP.iloc[start:end,:] for start, end in split_indices]
     for i, split_df in enumerate(split_datasets):
         utils.savedata.saveData("Dataset_training/NWP/NWP_{}.csv".format(i),split_df)