|
@@ -76,28 +76,47 @@ class DataHandler(object):
|
|
|
features_x = np.array([feature_data.loc[i*time_step:i*time_step + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(iters)])
|
|
|
if end > 0:
|
|
|
df = feature_data.tail(end)
|
|
|
- df_repeated = pd.concat([df] + [pd.DataFrame([df.iloc[0]]* (time_step-end))]).reset_index(drop=True)
|
|
|
+ df_repeated = pd.concat([df] + [pd.DataFrame([df.iloc[-1]]* (time_step-end))]).reset_index(drop=True)
|
|
|
features_x = np.concatenate((features_x, np.expand_dims(df_repeated, 0)), axis=0)
|
|
|
return features_x
|
|
|
|
|
|
def get_predict_features2(self, norm_data, time_series=2):
|
|
|
"""
|
|
|
- lstm2:以时间步长切分数据,获取预测数据集
|
|
|
+ 时序2:以时间步长切分数据,得到切分原子段,根据所需的时序原子段得到预测数据集
|
|
|
"""
|
|
|
time_step = self.opt.Model["time_step"]
|
|
|
feature_data = norm_data.reset_index(drop=True)
|
|
|
time_step_loc = time_step*time_series - 1
|
|
|
- features_x = np.array([feature_data.loc[i:i + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(0, len(norm_data) - time_step_loc, time_step)])
|
|
|
+ iters = int(len(feature_data)) // time_step
|
|
|
+ iters = iters - (time_series - 1)
|
|
|
+ end = int(len(feature_data)) % time_step
|
|
|
+ features_x = np.array([feature_data.loc[i*time_step:i*time_step + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(0, iters)])
|
|
|
+ if end > 0:
|
|
|
+ df = norm_data.tail(end)
|
|
|
+ df_repeated = pd.concat([norm_data.tail((time_series-1)*time_step)] + [df] + [df.tail(1)] * (time_step - end)).reset_index(drop=True)
|
|
|
+ features_x = np.concatenate((features_x, np.expand_dims(df_repeated, 0)), axis=0)
|
|
|
return features_x
|
|
|
|
|
|
def get_predict_features3(self, norm_data, time_series=3):
|
|
|
"""
|
|
|
- 均分数据,获取预测数据集
|
|
|
+ 时序3:以时间步长切分数据,得到切分原子段,根据所需的时序原子段得到预测数据集
|
|
|
"""
|
|
|
time_step = self.opt.Model["time_step"]
|
|
|
feature_data = norm_data.reset_index(drop=True)
|
|
|
time_step_loc = time_step*time_series - 1
|
|
|
- features_x = np.array([feature_data.loc[i:i+time_step_loc , self.opt.features].reset_index(drop=True) for i in range(0, len(norm_data) - time_step_loc, time_step*(time_series-2))])
|
|
|
+ features_x = np.array([x for x in [feature_data.loc[i*time_step:i*time_step + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(0, len(norm_data), (time_series-2)*time_step)] if not len(x) < time_step*time_series])
|
|
|
+ end = norm_data.tail(len(feature_data) - (features_x.shape[0] * time_step) - time_step).reset_index(drop=True)
|
|
|
+ num = len(end) // ((time_series - 2) * time_step)
|
|
|
+ bu = len(end) % ((time_series - 2) * time_step)
|
|
|
+ front = norm_data.tail(time_step)
|
|
|
+ back = norm_data.tail(time_step)
|
|
|
+ df_repeated = [pd.concat([front]+[end.iloc[i*time_step:i*time_step + time_step].reset_index(drop=True)]+[back]) for i in range(0, num)]
|
|
|
+ if bu > 0:
|
|
|
+ last = pd.concat([front] + [end.tail(bu)] + [end.tail(1)] * (time_step - bu) + [back])
|
|
|
+ df_repeated = np.array(df_repeated + [last])
|
|
|
+ else:
|
|
|
+ df_repeated = np.array(df_repeated)
|
|
|
+ features_x = np.concatenate((features_x, df_repeated), axis=0)
|
|
|
return features_x
|
|
|
|
|
|
def get_timestep_features(self, norm_data, col_time, target, is_train, time_series=1):
|