|
@@ -37,8 +37,6 @@ class DataHandler(object):
|
|
|
|
|
|
train_x = np.array([x.values for x in train_x])
|
|
train_x = np.array([x.values for x in train_x])
|
|
valid_x = np.array([x.values for x in valid_x])
|
|
valid_x = np.array([x.values for x in valid_x])
|
|
- # train_x = [np.array([x[0].values for x in train_x]), np.array([x[1].values for x in train_x])]
|
|
|
|
- # valid_x = [np.array([x[0].values for x in valid_x]), np.array([x[1].values for x in valid_x])]
|
|
|
|
|
|
|
|
return train_x, valid_x, train_y, valid_y
|
|
return train_x, valid_x, train_y, valid_y
|
|
|
|
|
|
@@ -50,8 +48,7 @@ class DataHandler(object):
|
|
continue
|
|
continue
|
|
datax = self.get_predict_features(df, features)
|
|
datax = self.get_predict_features(df, features)
|
|
test_x.extend(datax)
|
|
test_x.extend(datax)
|
|
-
|
|
|
|
- test_x = [np.array([x[0].values for x in test_x]), np.array([x[1].values for x in test_x])]
|
|
|
|
|
|
+ test_x = np.array(test_x)
|
|
return test_x
|
|
return test_x
|
|
|
|
|
|
def get_predict_features(self, norm_data, features):
|
|
def get_predict_features(self, norm_data, features):
|
|
@@ -61,7 +58,7 @@ class DataHandler(object):
|
|
time_step = self.opt.Model["time_step"]
|
|
time_step = self.opt.Model["time_step"]
|
|
feature_data = norm_data.reset_index(drop=True)
|
|
feature_data = norm_data.reset_index(drop=True)
|
|
time_step_loc = time_step - 1
|
|
time_step_loc = time_step - 1
|
|
- iters = int(len(feature_data)) / self.opt.Model['time_step']
|
|
|
|
|
|
+ iters = int(len(feature_data)) // self.opt.Model['time_step']
|
|
features_x = np.array([feature_data.loc[i*time_step:i*time_step + time_step_loc, features].reset_index(drop=True) for i in range(iters)])
|
|
features_x = np.array([feature_data.loc[i*time_step:i*time_step + time_step_loc, features].reset_index(drop=True) for i in range(iters)])
|
|
return features_x
|
|
return features_x
|
|
|
|
|
|
@@ -186,7 +183,7 @@ class DataHandler(object):
|
|
target_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
target_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
# 标准化特征和目标
|
|
# 标准化特征和目标
|
|
scaled_train_data = train_scaler.fit_transform(train_data_cleaned[features])
|
|
scaled_train_data = train_scaler.fit_transform(train_data_cleaned[features])
|
|
- scaled_target = target_scaler.fit_transform(train_data[[target]])
|
|
|
|
|
|
+ scaled_target = target_scaler.fit_transform(train_data_cleaned[[target]])
|
|
train_data_cleaned[features] = scaled_train_data
|
|
train_data_cleaned[features] = scaled_train_data
|
|
train_data_cleaned[[target]] = scaled_target
|
|
train_data_cleaned[[target]] = scaled_target
|
|
train_datas = self.fill_train_data(train_data_cleaned, col_time)
|
|
train_datas = self.fill_train_data(train_data_cleaned, col_time)
|
|
@@ -194,15 +191,15 @@ class DataHandler(object):
|
|
scaled_train_bytes = BytesIO()
|
|
scaled_train_bytes = BytesIO()
|
|
scaled_target_bytes = BytesIO()
|
|
scaled_target_bytes = BytesIO()
|
|
|
|
|
|
- joblib.dump(scaled_train_data, scaled_train_bytes)
|
|
|
|
- joblib.dump(scaled_target, scaled_target_bytes)
|
|
|
|
|
|
+ joblib.dump(train_scaler, scaled_train_bytes)
|
|
|
|
+ joblib.dump(target_scaler, scaled_target_bytes)
|
|
scaled_train_bytes.seek(0) # Reset pointer to the beginning of the byte stream
|
|
scaled_train_bytes.seek(0) # Reset pointer to the beginning of the byte stream
|
|
scaled_target_bytes.seek(0)
|
|
scaled_target_bytes.seek(0)
|
|
|
|
|
|
train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, features, target)
|
|
train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, features, target)
|
|
return train_x, valid_x, train_y, valid_y, scaled_train_bytes, scaled_target_bytes
|
|
return train_x, valid_x, train_y, valid_y, scaled_train_bytes, scaled_target_bytes
|
|
|
|
|
|
- def pre_data_handler(self, data, feature_scaler, args):
|
|
|
|
|
|
+ def pre_data_handler(self, data, feature_scaler, opt):
|
|
"""
|
|
"""
|
|
预测数据简单处理
|
|
预测数据简单处理
|
|
Args:
|
|
Args:
|
|
@@ -213,9 +210,11 @@ class DataHandler(object):
|
|
"""
|
|
"""
|
|
if 'is_limit' in data.columns:
|
|
if 'is_limit' in data.columns:
|
|
data = data[data['is_limit'] == False]
|
|
data = data[data['is_limit'] == False]
|
|
- features, time_steps, col_time, model_name, col_reserve = str_to_list(args['features']), int(
|
|
|
|
- args['time_steps']), args['col_time'], args['model_name'], str_to_list(args['col_reserve'])
|
|
|
|
- pre_data = data.sort_values(by=col_time)
|
|
|
|
|
|
+ # features, time_steps, col_time, model_name, col_reserve = str_to_list(args['features']), int(
|
|
|
|
+ # args['time_steps']), args['col_time'], args['model_name'], str_to_list(args['col_reserve'])
|
|
|
|
+ col_time, features = opt.col_time, opt.features
|
|
|
|
+ pre_data = data.sort_values(by=col_time)[features]
|
|
scaled_features = feature_scaler.transform(pre_data[features])
|
|
scaled_features = feature_scaler.transform(pre_data[features])
|
|
- pre_x = self.get_predict_data([scaled_features], features)
|
|
|
|
|
|
+ pre_data[features] = scaled_features
|
|
|
|
+ pre_x = self.get_predict_data([pre_data], features)
|
|
return pre_x
|
|
return pre_x
|