浏览代码

Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg

liudawei 2 周之前
父节点
当前提交
678948f94c

+ 13 - 11
data_processing/data_operation/custom_data_handler.py

@@ -24,7 +24,7 @@ class CustomDataHandler(object):
             if len(df) < self.opt.Model["time_step"]:
                 self.logger.info("特征处理-训练数据-不满足time_step")
 
-            datax, datay = self.get_timestep_features(df, col_time, target, is_train=True, time_series=time_series)
+            datax, datay = self.get_timestep_features_zone(df, col_time, target, is_train=True, time_series=time_series)
             if len(datax) < 10:
                 self.logger.info("特征处理-训练数据-无法进行最小分割")
                 continue
@@ -34,8 +34,8 @@ class CustomDataHandler(object):
             train_y.extend(ty)
             valid_y.extend(vy)
 
-        train_y = [np.array([y[0].values for y in train_y]), np.concatenate([[y[1].iloc[:, 1].values for y in train_y]])]
-        valid_y = [np.array([y[0].values for y in valid_y]), np.concatenate([[y[1].iloc[:, 1].values for y in valid_y]])]
+        train_y = [np.array([y[0].values for y in train_y]), np.array([y[1].iloc[:, 1].values for y in train_y])]
+        valid_y = [np.array([y[0].values for y in valid_y]), np.array([y[1].iloc[:, 1].values for y in valid_y])]
 
         train_x = np.array([x.values for x in train_x])
         valid_x = np.array([x.values for x in valid_x])
@@ -58,7 +58,7 @@ class CustomDataHandler(object):
         均分数据,获取预测数据集
         """
         time_step = self.opt.Model["time_step"]
-        feature_data = norm_data.reset_index(drop=True)
+        feature_data = norm_data.loc[:, self.opt.features].reset_index(drop=True)
         time_step *= int(time_series)
         time_step_loc = time_step - 1
         iters = int(len(feature_data)) // time_step
@@ -97,8 +97,8 @@ class CustomDataHandler(object):
         time_step_loc = time_step*time_series - 1
         train_num = int(len(feature_data))
         label_features_power = [col_time, target] if is_train is True else [col_time, target]
-        label_features_zone = list(self.opt.zone.keys())
-        nwp_cs = self.opt.features
+        label_features_zone = self.opt.zone
+        nwp_cs = [x for x in self.opt.features if x not in self.opt.zone]
         nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
         labels_power = [feature_data.loc[i:i + time_step_loc, label_features_power].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
         labels_zone = [feature_data.loc[i:i + time_step_loc, label_features_zone].reset_index(drop=True) for i in range(train_num - time_step*time_series + 1)]
@@ -202,7 +202,7 @@ class CustomDataHandler(object):
         if 'is_limit' in data.columns:
             data = data[data['is_limit'] == False]
         # 筛选特征,数值化,排序
-        train_data = data[[col_time] + features + [target]]
+        train_data = data[[col_time] + features + [target] + self.opt.zone]
         train_data = train_data.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x)
         train_data = train_data.sort_values(by=col_time)
         # 清洗特征平均缺失率大于20%的天
@@ -216,10 +216,10 @@ class CustomDataHandler(object):
         train_scaler = MinMaxScaler(feature_range=(0, 1))
         target_scaler = MinMaxScaler(feature_range=(0, 1))
         # 标准化特征和目标
-        scaled_train_data = train_scaler.fit_transform(train_data_cleaned[self.opt.features])
+        scaled_train_data = train_scaler.fit_transform(train_data_cleaned[self.opt.features+self.opt.zone])
         scaled_target = target_scaler.fit_transform(train_data_cleaned[[target]])
         scaled_cap = target_scaler.transform(np.array([[float(self.opt.cap)]]))[0,0]
-        train_data_cleaned[self.opt.features] = scaled_train_data
+        train_data_cleaned[self.opt.features+self.opt.zone] = scaled_train_data
         train_data_cleaned[[target]] = scaled_target
         # 3.缺值补值
         train_datas = self.fill_train_data(train_data_cleaned, col_time)
@@ -254,9 +254,11 @@ class CustomDataHandler(object):
         if not set(features).issubset(set(data.columns.tolist())):
             raise ValueError("预测数据特征不满足模型特征!")
         pre_data = data[features].copy()
+        pre_data[self.opt.zone] = 1
         if self.opt.Model['predict_data_fill']:
             pre_data = self.fill_pre_data(pre_data)
-        pre_data.loc[:, features] = feature_scaler.transform(pre_data)
-
+        scaled_pre_data = feature_scaler.transform(pre_data)[:, :len(features)]
+        pre_data.drop(columns=self.opt.zone, inplace=True)
+        pre_data.loc[:, features] = scaled_pre_data
         pre_x = self.get_predict_data([pre_data], time_series)
         return pre_x, data

+ 23 - 1
models_processing/model_tf/tf_lstm.py

@@ -37,7 +37,7 @@ class TSHandler(object):
             self.logger.info("加载模型权重失败:{}".format(e.args))
 
     @staticmethod
-    def get_keras_model(opt, time_series=1, lstm_type=1):
+    def get_keras_model_20250514(opt, time_series=1, lstm_type=1):
         loss = region_loss(opt)
         l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
         l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
@@ -56,6 +56,28 @@ class TSHandler(object):
         model.compile(loss=loss, optimizer=adam)
         return model
 
+    @staticmethod
+    def get_keras_model(opt, time_series=1, lstm_type=1):
+        loss = region_loss(opt)
+        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
+        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
+        nwp_input = Input(shape=(opt.Model['time_step'] * time_series, opt.Model['input_size']), name='nwp')
+
+        con1 = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu',
+                      kernel_regularizer=l2_reg)(nwp_input)
+        con1_p = MaxPooling1D(pool_size=1, strides=1, padding='valid', data_format='channels_last')(con1)
+        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(con1_p)
+        if lstm_type == 2:
+            output = Dense(opt.Model['time_step'], name='cdq_output')(nwp_lstm)
+        else:
+            output = Dense(opt.Model['time_step'] * time_series, name='cdq_output')(nwp_lstm)
+
+        model = Model(nwp_input, output)
+        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+        model.compile(loss=loss, optimizer=adam)
+
+        return model
+
     def train_init(self):
         try:
             # 进行加强训练,支持修模

+ 8 - 7
models_processing/model_tf/tf_lstm_zone.py

@@ -43,18 +43,19 @@ class TSHandler(object):
         l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
         nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
 
-        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
-        con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
-        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(con1_p)
-        zone = Dense(len(opt.zone.keys()), name='zone')(nwp_lstm)
+        con1 = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
+        con1_p = MaxPooling1D(pool_size=1, strides=1, padding='valid', data_format='channels_last')(con1)
+        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=True, kernel_regularizer=l2_reg)(con1_p)
+        zone = Dense(len(opt.zone), name='zone')(nwp_lstm)
+        zonef = Flatten()(zone)
         if lstm_type == 2:
-            output = Dense(opt.Model['time_step'], name='cdq_output')(zone)
+            output = Dense(opt.Model['time_step'], name='cdq_output')(zonef)
         else:
-            output = Dense(opt.Model['time_step']*time_series, name='cdq_output')(zone)
+            output = Dense(opt.Model['time_step']*time_series, name='cdq_output')(zonef)
 
         model = Model(nwp_input, [zone, output])
         adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
-        model.compile(loss={"zone": loss, "cdq_output": loss}, loss_weights={"zone": 0.7, "cdq_output": 0.3}, optimizer=adam)
+        model.compile(loss={"zone": loss, "cdq_output": loss}, loss_weights={"zone": 0.5, "cdq_output": 0.5}, optimizer=adam)
 
         return model
 

+ 2 - 1
models_processing/model_tf/tf_lstm_zone_pre.py

@@ -37,6 +37,7 @@ def update_config():
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
     request_args['time_series'] = request_args.get('time_series', 1)
+    request_args['zone'] = request_args['zone'].split(',')
     current_config.update(request_args)
 
     # 存储到请求上下文
@@ -64,7 +65,7 @@ def model_prediction_lstm():
         ts.get_model(args)
         dh.opt.features = json.loads(ts.model_params)['Model']['features'].split(',')
         scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=args['time_series'])
-        res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x))))
+        res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x)[1])))
         pre_data['farm_id'] = args.get('farm_id', 'null')
         if int(args.get('algorithm_test', 0)):
             pre_data[args['model_name']] = res[:len(pre_data)]

+ 1 - 0
models_processing/model_tf/tf_lstm_zone_train.py

@@ -32,6 +32,7 @@ def update_config():
     # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
     request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
     request_args['time_series'] = request_args.get('time_series', 1)
+    request_args['zone'] = request_args['zone'].split(',')
     current_config.update(request_args)
 
     # 存储到请求上下文