|
@@ -102,6 +102,12 @@ class DataHandler(object):
|
|
|
data_train = self.data_fill(data_train, col_time)
|
|
|
return data_train
|
|
|
|
|
|
+ def fill_pre_data(self, unite):
|
|
|
+ unite = unite.interpolate(method='linear') # nwp先进行线性填充
|
|
|
+ unite = unite.fillna(method='ffill') # 再对超过采样边缘无法填充的点进行二次填充
|
|
|
+ unite = unite.fillna(method='bfill')
|
|
|
+ return unite
|
|
|
+
|
|
|
def missing_time_splite(self, df, dt_short, dt_long, col_time):
|
|
|
df.reset_index(drop=True, inplace=True)
|
|
|
n_long, n_short, n_points = 0, 0, 0
|
|
@@ -183,15 +189,16 @@ class DataHandler(object):
|
|
|
# 对清洗完限电的数据进行特征预处理:
|
|
|
# 1.空值异常值清洗
|
|
|
train_data_cleaned = cleaning(train_data, '训练集', self.logger, features + [target], col_time)
|
|
|
+ self.opt.features = [x for x in train_data_cleaned.columns.tolist() if x not in [target, col_time] and x in features]
|
|
|
# 2. 标准化
|
|
|
# 创建特征和目标的标准化器
|
|
|
train_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
|
target_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
|
# 标准化特征和目标
|
|
|
- scaled_train_data = train_scaler.fit_transform(train_data_cleaned[features])
|
|
|
+ scaled_train_data = train_scaler.fit_transform(train_data_cleaned[self.opt.features])
|
|
|
scaled_target = target_scaler.fit_transform(train_data_cleaned[[target]])
|
|
|
scaled_cap = target_scaler.transform(np.array([[self.opt.cap]]))[0,0]
|
|
|
- train_data_cleaned[features] = scaled_train_data
|
|
|
+ train_data_cleaned[self.opt.features] = scaled_train_data
|
|
|
train_data_cleaned[[target]] = scaled_target
|
|
|
# 3.缺值补值
|
|
|
train_datas = self.fill_train_data(train_data_cleaned, col_time)
|
|
@@ -205,10 +212,10 @@ class DataHandler(object):
|
|
|
|
|
|
if bp_data:
|
|
|
train_data = pd.concat(train_datas, axis=0)
|
|
|
- train_x, valid_x, train_y, valid_y = self.train_valid_split(train_data[features].values, train_data[target].values, valid_rate=self.opt.Model["valid_data_rate"], shuffle=self.opt.Model['shuffle_train_data'])
|
|
|
+ train_x, valid_x, train_y, valid_y = self.train_valid_split(train_data[self.opt.features].values, train_data[target].values, valid_rate=self.opt.Model["valid_data_rate"], shuffle=self.opt.Model['shuffle_train_data'])
|
|
|
train_x, valid_x, train_y, valid_y = np.array(train_x), np.array(valid_x), np.array(train_y), np.array(valid_y)
|
|
|
else:
|
|
|
- train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, features, target)
|
|
|
+ train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, self.opt.features, target)
|
|
|
return train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap
|
|
|
|
|
|
def pre_data_handler(self, data, feature_scaler, bp_data=False):
|
|
@@ -228,6 +235,8 @@ class DataHandler(object):
|
|
|
col_time, features = self.opt.col_time, self.opt.features
|
|
|
data = data.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x)
|
|
|
data = data.sort_values(by=col_time).reset_index(drop=True, inplace=False)
|
|
|
+ if self.opt.Model['predict_data_fill']:
|
|
|
+ data = self.fill_pre_data(data)
|
|
|
pre_data = data[features]
|
|
|
scaled_features = feature_scaler.transform(data[features])
|
|
|
pre_data.loc[:, features] = scaled_features
|