Browse Source

Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg

liudawei 5 days ago
parent
commit
4354261dbe

+ 73 - 29
data_processing/data_operation/data_handler.py

@@ -18,17 +18,17 @@ class DataHandler(object):
         self.logger = logger
         self.opt = argparse.Namespace(**args)
 
-    def get_train_data(self, dfs, col_time, target, time_series=1):
+    def get_train_data(self, dfs, col_time, target, time_series=1, lstm_type=1):
         train_x, valid_x, train_y, valid_y = [], [], [], []
         for i, df in enumerate(dfs, start=1):
             if len(df) < self.opt.Model["time_step"]:
                 self.logger.info("特征处理-训练数据-不满足time_step")
-            if time_series == 2:
-                datax, datay = self.get_timestep_features_lstm2(df, col_time, target, is_train=True)
-            elif time_series == 3:
-                datax, datay = self.get_timestep_features_bilstm(df, col_time, target, is_train=True)
+            if lstm_type == 2:
+                datax, datay = self.get_timestep_features_lstm2(df, col_time, target, is_train=True, time_series=time_series)
+            elif lstm_type == 3:
+                datax, datay = self.get_timestep_features_bilstm(df, col_time, target, is_train=True, time_series=time_series)
             else:
-                datax, datay = self.get_timestep_features(df, col_time, target, is_train=True)
+                datax, datay = self.get_timestep_features(df, col_time, target, is_train=True, time_series=time_series)
             if len(datax) < 10:
                 self.logger.info("特征处理-训练数据-无法进行最小分割")
                 continue
@@ -46,17 +46,23 @@ class DataHandler(object):
 
         return train_x, valid_x, train_y, valid_y
 
-    def get_predict_data(self, dfs, time_series=1):
+    def get_predict_data(self, dfs, time_series=1, lstm_type=1):
         test_x = []
         for i, df in enumerate(dfs, start=1):
-            if len(df) < self.opt.Model["time_step"]:
+            if len(df) < self.opt.Model["time_step"]*time_series:
                 self.logger.info("特征处理-预测数据-不满足time_step")
                 continue
-            datax = self.get_predict_features(df, time_series)
+            if lstm_type == 2:
+                datax = self.get_predict_features2(df, time_series)
+            elif lstm_type == 3:
+                datax = self.get_predict_features3(df, time_series)
+            else:
+                datax = self.get_predict_features(df, time_series)
             test_x.append(datax)
         test_x = np.concatenate(test_x, axis=0)
         return test_x
 
+
     def get_predict_features(self, norm_data, time_series=1):
         """
         均分数据,获取预测数据集
@@ -70,59 +76,97 @@ class DataHandler(object):
         features_x = np.array([feature_data.loc[i*time_step:i*time_step + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(iters)])
         if end > 0:
             df = feature_data.tail(end)
-            df_repeated = pd.concat([df] + [pd.DataFrame([df.iloc[0]]* (time_step-end))]).reset_index(drop=True)
+            df_repeated = pd.concat([df] + [pd.DataFrame([df.iloc[-1]]* (time_step-end))]).reset_index(drop=True)
             features_x = np.concatenate((features_x, np.expand_dims(df_repeated, 0)), axis=0)
         return features_x
 
-    def get_timestep_features(self, norm_data, col_time, target, is_train):
+    def get_predict_features2(self, norm_data, time_series=2):
+        """
+        时序2:以时间步长切分数据,得到切分原子段,根据所需的时序原子段得到预测数据集
+        """
+        time_step = self.opt.Model["time_step"]
+        feature_data = norm_data.reset_index(drop=True)
+        time_step_loc = time_step*time_series - 1
+        iters = int(len(feature_data)) // time_step
+        iters = iters - (time_series - 1)
+        end = int(len(feature_data)) % time_step
+        features_x = np.array([feature_data.loc[i*time_step:i*time_step + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(0, iters)])
+        if end > 0:
+            df = norm_data.tail(end)
+            df_repeated = pd.concat([norm_data.tail((time_series-1)*time_step)] + [df] + [df.tail(1)] * (time_step - end)).reset_index(drop=True)
+            features_x = np.concatenate((features_x, np.expand_dims(df_repeated, 0)), axis=0)
+        return features_x
+
+    def get_predict_features3(self, norm_data, time_series=3):
+        """
+        时序3:以时间步长切分数据,得到切分原子段,根据所需的时序原子段得到预测数据集
+        """
+        time_step = self.opt.Model["time_step"]
+        feature_data = norm_data.reset_index(drop=True)
+        time_step_loc = time_step*time_series - 1
+        features_x = np.array([x for x in [feature_data.loc[i*time_step:i*time_step + time_step_loc, self.opt.features].reset_index(drop=True) for i in range(0, len(norm_data), (time_series-2)*time_step)] if not len(x) < time_step*time_series])
+        end = norm_data.tail(len(feature_data) - (features_x.shape[0] * time_step) - time_step).reset_index(drop=True)
+        num = len(end) // ((time_series - 2) * time_step)
+        bu = len(end) % ((time_series - 2) * time_step)
+        front = norm_data.tail(time_step)
+        back = norm_data.tail(time_step)
+        df_repeated = [pd.concat([front]+[end.iloc[i*time_step:i*time_step + time_step].reset_index(drop=True)]+[back]) for i in range(0, num)]
+        if bu > 0:
+            last = pd.concat([front]  + [end.tail(bu)] + [end.tail(1)] * (time_step - bu) + [back])
+            df_repeated = np.array(df_repeated + [last])
+        else:
+            df_repeated = np.array(df_repeated)
+        features_x = np.concatenate((features_x, df_repeated), axis=0)
+        return features_x
+
+    def get_timestep_features(self, norm_data, col_time, target, is_train, time_series=1):
         """
         步长分割数据,获取时序训练集
         """
         time_step = self.opt.Model["time_step"]
         feature_data = norm_data.reset_index(drop=True)
-        time_step_loc = time_step - 1
+        time_step_loc = time_step*time_series - 1
         train_num = int(len(feature_data))
         label_features = [col_time, target] if is_train is True else [col_time, target]
         nwp_cs = self.opt.features
-        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step + 1)]  # 数据库字段 'C_T': 'C_WS170'
-        labels = [feature_data.loc[i:i + time_step_loc, label_features].reset_index(drop=True) for i in range(train_num - time_step + 1)]
+        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]  # 数据库字段 'C_T': 'C_WS170'
+        labels = [feature_data.loc[i:i + time_step_loc, label_features].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
         features_x, features_y = [], []
         for i, row in enumerate(zip(nwp, labels)):
             features_x.append(row[0])
             features_y.append(row[1])
         return features_x, features_y
 
-    def get_timestep_features_lstm2(self, norm_data, col_time, target, is_train):
+    def get_timestep_features_lstm2(self, norm_data, col_time, target, is_train, time_series=2):
         """
-        步长分割数据,获取时序训练集
+        步长分割数据,获取最后一个时间步长作为训练集
         """
         time_step = self.opt.Model["time_step"]
         feature_data = norm_data.reset_index(drop=True)
-        time_step_loc = time_step*2 - 1
+        time_step_loc = time_step*time_series - 1
         train_num = int(len(feature_data))
         label_features = [col_time, target] if is_train is True else [col_time, target]
         nwp_cs = self.opt.features
-        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*2 + 1)]  # 数据库字段 'C_T': 'C_WS170'
-        labels = [feature_data.loc[i+time_step:i+time_step_loc, label_features].reset_index(drop=True) for i in range(train_num - time_step*2 + 1)]
+        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]  # 数据库字段 'C_T': 'C_WS170'
+        labels = [feature_data.loc[i+time_step_loc-time_step+1: i+time_step_loc, label_features].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
         features_x, features_y = [], []
         for i, row in enumerate(zip(nwp, labels)):
             features_x.append(row[0])
             features_y.append(row[1])
         return features_x, features_y
 
-    def get_timestep_features_bilstm(self, norm_data, col_time, target, is_train):
+    def get_timestep_features_bilstm(self, norm_data, col_time, target, is_train, time_series=3):
         """
-        步长分割数据,获取时序训练集
+        步长分割数据,获取中间的时间步长作为训练集
         """
         time_step = self.opt.Model["time_step"]
         feature_data = norm_data.reset_index(drop=True)
-        time_step_loc = time_step*3 - 1
-        time_step_m = time_step*2 - 1
+        time_step_loc = time_step*time_series - 1
         train_num = int(len(feature_data))
         label_features = [col_time, target] if is_train is True else [col_time, target]
         nwp_cs = self.opt.features
-        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*3 + 1)]  # 数据库字段 'C_T': 'C_WS170'
-        labels = [feature_data.loc[i+time_step:i+time_step_m, label_features].reset_index(drop=True) for i in range(train_num - time_step*3 + 1)]
+        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]  # 数据库字段 'C_T': 'C_WS170'
+        labels = [feature_data.loc[i+time_step: i+time_step_loc-time_step, label_features].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
         features_x, features_y = [], []
         for i, row in enumerate(zip(nwp, labels)):
             features_x.append(row[0])
@@ -205,7 +249,7 @@ class DataHandler(object):
                 vy.append(data[1])
         return tx, vx, ty, vy
 
-    def train_data_handler(self, data, bp_data=False, time_series=1):
+    def train_data_handler(self, data, bp_data=False, time_series=1, lstm_type=1):
         """
         训练数据预处理:
         清洗+补值+归一化
@@ -257,10 +301,10 @@ class DataHandler(object):
             train_x, valid_x, train_y, valid_y = self.train_valid_split(train_data[self.opt.features].values, train_data[target].values, valid_rate=self.opt.Model["valid_data_rate"], shuffle=self.opt.Model['shuffle_train_data'])
             train_x, valid_x, train_y, valid_y =  np.array(train_x), np.array(valid_x), np.array(train_y), np.array(valid_y)
         else:
-            train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, target, time_series)
+            train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, target, time_series, lstm_type)
         return train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap
 
-    def pre_data_handler(self, data, feature_scaler, bp_data=False, time_series=1):
+    def pre_data_handler(self, data, feature_scaler, bp_data=False, time_series=1, lstm_type=1):
         """
         预测数据简单处理
         Args:
@@ -286,5 +330,5 @@ class DataHandler(object):
         if bp_data:
             pre_x = np.array(pre_data)
         else:
-            pre_x = self.get_predict_data([pre_data], time_series)
+            pre_x = self.get_predict_data([pre_data], time_series, lstm_type)
         return pre_x, data

+ 1 - 0
models_processing/model_tf/lstm.yaml

@@ -21,6 +21,7 @@ Model:
   train_data_fill: true
   use_cuda: false
   valid_data_rate: 0.15
+use_bidirectional: false
 region: south
 calculate: []
 cap: 153.0

+ 8 - 4
models_processing/model_tf/tf_bilstm.py

@@ -37,16 +37,20 @@ class TSHandler(object):
             self.logger.info("加载模型权重失败:{}".format(e.args))
 
     @staticmethod
-    def get_keras_model(opt):
+    def get_keras_model(opt, time_series=3, use_bidirectional=False):
+        assert time_series >= 3
         loss = region_loss(opt)
         l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
-        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size']), name='nwp')
+        nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
 
         con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
         con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
-        nwp_bi_lstm = Bidirectional(LSTM(units=opt.Model['hidden_size'], return_sequences=False,  kernel_regularizer=l2_reg), merge_mode='concat')(con1_p) # 默认拼接双向输出(最终维度=2*hidden_size)
+        if use_bidirectional:
+            nwp_lstm = Bidirectional(LSTM(units=opt.Model['hidden_size'], return_sequences=False,  kernel_regularizer=l2_reg), merge_mode='concat')(con1_p) # 默认拼接双向输出(最终维度=2*hidden_size)
+        else:
+            nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False,  kernel_regularizer=l2_reg)(con1_p) # 默认拼接双向输出(最终维度=2*hidden_size)
 
-        output = Dense(opt.Model['output_size'], name='cdq_output')(nwp_bi_lstm)
+        output = Dense(opt.Model['time_step']*(time_series-2), name='cdq_output')(nwp_lstm)
 
         model = Model(nwp_input, output)
         adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)

+ 1 - 1
models_processing/model_tf/tf_cnn_pre.py

@@ -44,7 +44,7 @@ def update_config():
     g.cnn = CNNHandler(logger, current_config)
 
 @app.route('/tf_cnn_predict', methods=['POST'])
-def model_prediction_bp():
+def model_prediction_cnn():
     # 获取程序开始时间
     start_time = time.time()
     result = {}

+ 1 - 1
models_processing/model_tf/tf_cnn_train.py

@@ -41,7 +41,7 @@ def update_config():
     g.cnn = CNNHandler(logger, current_config)
 
 @app.route('/tf_cnn_training', methods=['POST'])
-def model_training_bp():
+def model_training_cnn():
     # 获取程序开始时间
     start_time = time.time()
     result = {}

+ 5 - 3
models_processing/model_tf/tf_lstm.py

@@ -37,7 +37,7 @@ class TSHandler(object):
             self.logger.info("加载模型权重失败:{}".format(e.args))
 
     @staticmethod
-    def get_keras_model(opt, time_series=1):
+    def get_keras_model(opt, time_series=1, lstm_type=1):
         loss = region_loss(opt)
         l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
         l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
@@ -46,8 +46,10 @@ class TSHandler(object):
         con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
         con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
         nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(con1_p)
-
-        output = Dense(opt.Model['output_size'], name='cdq_output')(nwp_lstm)
+        if lstm_type == 2:
+            output = Dense(opt.Model['time_step'], name='cdq_output')(nwp_lstm)
+        else:
+            output = Dense(opt.Model['time_step']*time_series, name='cdq_output')(nwp_lstm)
 
         model = Model(nwp_input, output)
         adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)

+ 5 - 2
models_processing/model_tf/tf_lstm2_pre.py

@@ -36,6 +36,7 @@ def update_config():
     request_args = request.values.to_dict()
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 2)
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -44,7 +45,7 @@ def update_config():
     g.ts = TSHandler(logger, current_config)
 
 @app.route('/tf_lstm2_predict', methods=['POST'])
-def model_prediction_bp():
+def model_prediction_lstm2():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
@@ -62,10 +63,11 @@ def model_prediction_bp():
         ts.opt.cap = round(target_scaler.transform(np.array([[float(args['cap'])]]))[0, 0], 2)
         ts.get_model(args)
         dh.opt.features = json.loads(ts.model_params)['Model']['features'].split(',')
-        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=2)
+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=args['time_series'], lstm_type=2)
         res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x))))
         pre_data['farm_id'] = args.get('farm_id', 'null')
         if int(args.get('algorithm_test', 0)):
+            pre_data = pre_data.iloc[(args['time_series']-1)*dh.opt.Model["time_step"]:]
             pre_data[args['model_name']] = res[:len(pre_data)]
             pre_data.rename(columns={args['col_time']: 'dateTime'}, inplace=True)
             pre_data = pre_data[['dateTime', 'farm_id', args['target'], args['model_name'], 'dq']]
@@ -75,6 +77,7 @@ def model_prediction_bp():
                 pre_data['howLongAgo'] = int(args['howLongAgo'])
                 res_cols += ['howLongAgo']
         else:
+            pre_data = pre_data.iloc[(args['time_series'] - 1) * dh.opt.Model["time_step"]:]
             pre_data['power_forecast'] = res[:len(pre_data)]
             pre_data.rename(columns={args['col_time']: 'date_time'}, inplace=True)
             res_cols = ['date_time', 'power_forecast', 'farm_id']

+ 7 - 6
models_processing/model_tf/tf_lstm2_train.py

@@ -31,6 +31,7 @@ def update_config():
     request_args = request.values.to_dict()
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 2)
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -40,7 +41,7 @@ def update_config():
 
 
 @app.route('/tf_lstm2_training', methods=['POST'])
-def model_training_bp():
+def model_training_lstm2():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
@@ -52,24 +53,24 @@ def model_training_bp():
     try:
         # ------------ 获取数据,预处理训练数据 ------------
         train_data = get_data_from_mongo(args)
-        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=2)
+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'], lstm_type=2)
         ts.opt.cap = round(scaled_cap, 2)
         ts.opt.Model['input_size'] = len(dh.opt.features)
         # ------------ 训练模型,保存模型 ------------
         # 1. 如果是加强训练模式,先加载预训练模型特征参数,再预处理训练数据
         # 2. 如果是普通模式,先预处理训练数据,再根据训练数据特征加载模型
-        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt, time_series=2)
+        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt, time_series=args['time_series'], lstm_type=2)
         if ts.opt.Model['add_train']:
             if model:
                 feas = json.loads(ts.model_params)['features']
                 if set(feas).issubset(set(dh.opt.features)):
                     dh.opt.features = list(feas)
-                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=2)
+                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'], lstm_type=2)
                 else:
-                    model = ts.get_keras_model(ts.opt, time_series=2)
+                    model = ts.get_keras_model(ts.opt, time_series=args['time_series'], lstm_type=2)
                     logger.info("训练数据特征,不满足,加强训练模型特征")
             else:
-                model = ts.get_keras_model(ts.opt, time_series=2)
+                model = ts.get_keras_model(ts.opt, time_series=args['time_series'], lstm_type=2)
         ts_model = ts.training(model, [train_x, train_y, valid_x, valid_y])
         args['Model']['features'] = ','.join(dh.opt.features)
         args['params'] = json.dumps(args)

+ 5 - 2
models_processing/model_tf/tf_lstm3_pre.py

@@ -35,6 +35,7 @@ def update_config():
     request_args = request.values.to_dict()
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 3)
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -43,7 +44,7 @@ def update_config():
     g.ts = TSHandler(logger, current_config)
 
 @app.route('/tf_lstm3_predict', methods=['POST'])
-def model_prediction_bp():
+def model_prediction_lstm3():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
@@ -61,10 +62,11 @@ def model_prediction_bp():
         ts.opt.cap = round(target_scaler.transform(np.array([[float(args['cap'])]]))[0, 0], 2)
         ts.get_model(args)
         dh.opt.features = json.loads(ts.model_params)['Model']['features'].split(',')
-        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=3)
+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=args['time_series'], lstm_type=3)
         res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x))))
         pre_data['farm_id'] = args.get('farm_id', 'null')
         if int(args.get('algorithm_test', 0)):
+            pre_data = pre_data.iloc[dh.opt.Model["time_step"]:]
             pre_data[args['model_name']] = res[:len(pre_data)]
             pre_data.rename(columns={args['col_time']: 'dateTime'}, inplace=True)
             pre_data = pre_data[['dateTime', 'farm_id', args['target'], args['model_name'], 'dq']]
@@ -74,6 +76,7 @@ def model_prediction_bp():
                 pre_data['howLongAgo'] = int(args['howLongAgo'])
                 res_cols += ['howLongAgo']
         else:
+            pre_data = pre_data.iloc[dh.opt.Model["time_step"]:]
             pre_data['power_forecast'] = res[:len(pre_data)]
             pre_data.rename(columns={args['col_time']: 'date_time'}, inplace=True)
             res_cols = ['date_time', 'power_forecast', 'farm_id']

+ 7 - 6
models_processing/model_tf/tf_lstm3_train.py

@@ -31,6 +31,7 @@ def update_config():
     request_args = request.values.to_dict()
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 3)
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -40,7 +41,7 @@ def update_config():
 
 
 @app.route('/tf_lstm3_training', methods=['POST'])
-def model_training_bp():
+def model_training_lstm3():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
@@ -52,24 +53,24 @@ def model_training_bp():
     try:
         # ------------ 获取数据,预处理训练数据 ------------
         train_data = get_data_from_mongo(args)
-        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=3)
+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'], lstm_type=3)
         ts.opt.cap = round(scaled_cap, 2)
         ts.opt.Model['input_size'] = len(dh.opt.features)
         # ------------ 训练模型,保存模型 ------------
         # 1. 如果是加强训练模式,先加载预训练模型特征参数,再预处理训练数据
         # 2. 如果是普通模式,先预处理训练数据,再根据训练数据特征加载模型
-        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt)
+        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt, time_series=args['time_series'], use_bidirectional=ts.opt.use_bidirectional)
         if ts.opt.Model['add_train']:
             if model:
                 feas = json.loads(ts.model_params)['features']
                 if set(feas).issubset(set(dh.opt.features)):
                     dh.opt.features = list(feas)
-                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=3)
+                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'], lstm_type=3)
                 else:
-                    model = ts.get_keras_model(ts.opt)
+                    model = ts.get_keras_model(ts.opt, time_series=args['time_series'], use_bidirectional=ts.opt.use_bidirectional)
                     logger.info("训练数据特征,不满足,加强训练模型特征")
             else:
-                model = ts.get_keras_model(ts.opt)
+                model = ts.get_keras_model(ts.opt, time_series=args['time_series'], use_bidirectional=ts.opt.use_bidirectional)
         ts_model = ts.training(model, [train_x, train_y, valid_x, valid_y])
         args['Model']['features'] = ','.join(dh.opt.features)
         args['params'] = json.dumps(args)

+ 3 - 2
models_processing/model_tf/tf_lstm_pre.py

@@ -36,6 +36,7 @@ def update_config():
     request_args = request.values.to_dict()
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 1)
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -44,7 +45,7 @@ def update_config():
     g.ts = TSHandler(logger, current_config)
 
 @app.route('/tf_lstm_predict', methods=['POST'])
-def model_prediction_bp():
+def model_prediction_lstm():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
@@ -62,7 +63,7 @@ def model_prediction_bp():
         ts.opt.cap = round(target_scaler.transform(np.array([[float(args['cap'])]]))[0, 0], 2)
         ts.get_model(args)
         dh.opt.features = json.loads(ts.model_params)['Model']['features'].split(',')
-        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler)
+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=args['time_series'], lstm_type=1)
         res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x))))
         pre_data['farm_id'] = args.get('farm_id', 'null')
         if int(args.get('algorithm_test', 0)):

+ 7 - 6
models_processing/model_tf/tf_lstm_train.py

@@ -31,6 +31,7 @@ def update_config():
     request_args = request.values.to_dict()
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 1)
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -40,7 +41,7 @@ def update_config():
 
 
 @app.route('/tf_lstm_training', methods=['POST'])
-def model_training_bp():
+def model_training_lstm():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
@@ -52,24 +53,24 @@ def model_training_bp():
     try:
         # ------------ 获取数据,预处理训练数据 ------------
         train_data = get_data_from_mongo(args)
-        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data)
+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'])
         ts.opt.cap = round(scaled_cap, 2)
         ts.opt.Model['input_size'] = len(dh.opt.features)
         # ------------ 训练模型,保存模型 ------------
         # 1. 如果是加强训练模式,先加载预训练模型特征参数,再预处理训练数据
         # 2. 如果是普通模式,先预处理训练数据,再根据训练数据特征加载模型
-        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt)
+        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt, time_series=args['time_series'], lstm_type=1)
         if ts.opt.Model['add_train']:
             if model:
                 feas = json.loads(ts.model_params)['features']
                 if set(feas).issubset(set(dh.opt.features)):
                     dh.opt.features = list(feas)
-                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data)
+                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'])
                 else:
-                    model = ts.get_keras_model(ts.opt)
+                    model = ts.get_keras_model(ts.opt, time_series=args['time_series'], lstm_type=1)
                     logger.info("训练数据特征,不满足,加强训练模型特征")
             else:
-                model = ts.get_keras_model(ts.opt)
+                model = ts.get_keras_model(ts.opt, time_series=args['time_series'], lstm_type=1)
         ts_model = ts.training(model, [train_x, train_y, valid_x, valid_y])
         args['Model']['features'] = ','.join(dh.opt.features)
         args['params'] = json.dumps(args)