|
@@ -24,7 +24,7 @@ class CustomDataHandler(object):
|
|
|
if len(df) < self.opt.Model["time_step"]:
|
|
|
self.logger.info("特征处理-训练数据-不满足time_step")
|
|
|
|
|
|
- datax, datay = self.get_timestep_features(df, col_time, target, is_train=True, time_series=time_series)
|
|
|
+ datax, datay = self.get_timestep_features_zone(df, col_time, target, is_train=True, time_series=time_series)
|
|
|
if len(datax) < 10:
|
|
|
self.logger.info("特征处理-训练数据-无法进行最小分割")
|
|
|
continue
|
|
@@ -34,8 +34,8 @@ class CustomDataHandler(object):
|
|
|
train_y.extend(ty)
|
|
|
valid_y.extend(vy)
|
|
|
|
|
|
- train_y = [np.array([y[0].values for y in train_y]), np.concatenate([[y[1].iloc[:, 1].values for y in train_y]])]
|
|
|
- valid_y = [np.array([y[0].values for y in valid_y]), np.concatenate([[y[1].iloc[:, 1].values for y in valid_y]])]
|
|
|
+ train_y = [np.array([y[0].values for y in train_y]), np.array([y[1].iloc[:, 1].values for y in train_y])]
|
|
|
+ valid_y = [np.array([y[0].values for y in valid_y]), np.array([y[1].iloc[:, 1].values for y in valid_y])]
|
|
|
|
|
|
train_x = np.array([x.values for x in train_x])
|
|
|
valid_x = np.array([x.values for x in valid_x])
|
|
@@ -58,7 +58,7 @@ class CustomDataHandler(object):
|
|
|
均分数据,获取预测数据集
|
|
|
"""
|
|
|
time_step = self.opt.Model["time_step"]
|
|
|
- feature_data = norm_data.reset_index(drop=True)
|
|
|
+ feature_data = norm_data.loc[:, self.opt.features].reset_index(drop=True)
|
|
|
time_step *= int(time_series)
|
|
|
time_step_loc = time_step - 1
|
|
|
iters = int(len(feature_data)) // time_step
|
|
@@ -97,8 +97,8 @@ class CustomDataHandler(object):
|
|
|
time_step_loc = time_step*time_series - 1
|
|
|
train_num = int(len(feature_data))
|
|
|
label_features_power = [col_time, target] if is_train is True else [col_time, target]
|
|
|
- label_features_zone = list(self.opt.zone.keys())
|
|
|
- nwp_cs = self.opt.features
|
|
|
+ label_features_zone = self.opt.zone
|
|
|
+ nwp_cs = [x for x in self.opt.features if x not in self.opt.zone]
|
|
|
nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
|
|
|
labels_power = [feature_data.loc[i:i + time_step_loc, label_features_power].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
|
|
|
labels_zone = [feature_data.loc[i:i + time_step_loc, label_features_zone].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
|
|
@@ -202,7 +202,7 @@ class CustomDataHandler(object):
|
|
|
if 'is_limit' in data.columns:
|
|
|
data = data[data['is_limit'] == False]
|
|
|
# 筛选特征,数值化,排序
|
|
|
- train_data = data[[col_time] + features + [target]]
|
|
|
+ train_data = data[[col_time] + features + [target] + self.opt.zone]
|
|
|
train_data = train_data.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x)
|
|
|
train_data = train_data.sort_values(by=col_time)
|
|
|
# 清洗特征平均缺失率大于20%的天
|
|
@@ -216,10 +216,10 @@ class CustomDataHandler(object):
|
|
|
train_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
|
target_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
|
# 标准化特征和目标
|
|
|
- scaled_train_data = train_scaler.fit_transform(train_data_cleaned[self.opt.features])
|
|
|
+ scaled_train_data = train_scaler.fit_transform(train_data_cleaned[self.opt.features+self.opt.zone])
|
|
|
scaled_target = target_scaler.fit_transform(train_data_cleaned[[target]])
|
|
|
scaled_cap = target_scaler.transform(np.array([[float(self.opt.cap)]]))[0,0]
|
|
|
- train_data_cleaned[self.opt.features] = scaled_train_data
|
|
|
+ train_data_cleaned[self.opt.features+self.opt.zone] = scaled_train_data
|
|
|
train_data_cleaned[[target]] = scaled_target
|
|
|
# 3.缺值补值
|
|
|
train_datas = self.fill_train_data(train_data_cleaned, col_time)
|
|
@@ -254,9 +254,11 @@ class CustomDataHandler(object):
|
|
|
if not set(features).issubset(set(data.columns.tolist())):
|
|
|
raise ValueError("预测数据特征不满足模型特征!")
|
|
|
pre_data = data[features].copy()
|
|
|
+ pre_data[self.opt.zone] = 1
|
|
|
if self.opt.Model['predict_data_fill']:
|
|
|
pre_data = self.fill_pre_data(pre_data)
|
|
|
- pre_data.loc[:, features] = feature_scaler.transform(pre_data)
|
|
|
-
|
|
|
+ scaled_pre_data = feature_scaler.transform(pre_data)[:, :len(features)]
|
|
|
+ pre_data.drop(columns=self.opt.zone, inplace=True)
|
|
|
+ pre_data.loc[:, features] = scaled_pre_data
|
|
|
pre_x = self.get_predict_data([pre_data], time_series)
|
|
|
return pre_x, data
|