David 3 달 전
부모
커밋
34e620cc60
1개의 변경된 파일6개의 추가작업 그리고 8개의 파일을 삭제
  1. 6 8
      data_processing/data_operation/data_handler.py

+ 6 - 8
data_processing/data_operation/data_handler.py

@@ -104,8 +104,7 @@ class DataHandler(object):
 
     def fill_pre_data(self, unite):
         unite = unite.interpolate(method='linear')  # nwp先进行线性填充
-        unite = unite.fillna(method='ffill')  # 再对超过采样边缘无法填充的点进行二次填充
-        unite = unite.fillna(method='bfill')
+        unite = unite.ffill().bfill() # 再对超过采样边缘无法填充的点进行二次填充
         return unite
 
     def missing_time_splite(self, df, dt_short, dt_long, col_time):
@@ -197,7 +196,7 @@ class DataHandler(object):
         # 标准化特征和目标
         scaled_train_data = train_scaler.fit_transform(train_data_cleaned[self.opt.features])
         scaled_target = target_scaler.fit_transform(train_data_cleaned[[target]])
-        scaled_cap = target_scaler.transform(np.array([[self.opt.cap]]))[0,0]
+        scaled_cap = target_scaler.transform(np.array([[float(self.opt.cap)]]))[0,0]
         train_data_cleaned[self.opt.features] = scaled_train_data
         train_data_cleaned[[target]] = scaled_target
         # 3.缺值补值
@@ -233,13 +232,12 @@ class DataHandler(object):
         # features, time_steps, col_time, model_name, col_reserve = str_to_list(args['features']), int(
         #     args['time_steps']), args['col_time'], args['model_name'], str_to_list(args['col_reserve'])
         col_time, features = self.opt.col_time, self.opt.features
-        data = data.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x)
+        data = data.map(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x)
         data = data.sort_values(by=col_time).reset_index(drop=True, inplace=False)
+        pre_data = data[features].copy()
         if self.opt.Model['predict_data_fill']:
-            data = self.fill_pre_data(data)
-        pre_data = data[features]
-        scaled_features = feature_scaler.transform(data[features])
-        pre_data.loc[:, features] = scaled_features
+            pre_data = self.fill_pre_data(pre_data)
+        pre_data.loc[:, features] = feature_scaler.transform(pre_data)
         if bp_data:
             pre_x = np.array(pre_data)
         else: