David před 3 měsíci
rodič
revize
1625b7fdf8

+ 2 - 2
data_processing/data_operation/data_handler.py

@@ -4,14 +4,14 @@
 # @Time      :2025/1/8 14:56
 # @Author    :David
 # @Company: shenyang JY
-import numpy as np
+import argparse
 import pandas as pd
 from common.data_cleaning import *
 
 class DataHandler(object):
     def __init__(self, logger, args):
         self.logger = logger
-        self.opt = args.parse_args_and_yaml()
+        self.opt = argparse.Namespace(**args)
 
     def get_train_data(self, df):
         train_x, valid_x, train_y, valid_y = [], [], [], []

+ 97 - 0
models_processing/model_koi/bp.yaml

@@ -0,0 +1,97 @@
+Model:
+  add_train: false
+  batch_size: 64
+  dropout_rate: 0.2
+  epoch: 100
+  fusion: true
+  hidden_size: 64
+  his_points: 16
+  how_long_fill: 10
+  input_size_env: 5
+  input_size_nwp: 24
+  lambda_value_1: 0.02
+  lambda_value_2: 0.01
+  learning_rate: 0.001
+  lstm_layers: 1
+  output_size: 16
+  patience: 10
+  predict_data_fill: true
+  region: south129
+  shuffle_train_data: false
+  test_data_fill: false
+  time_step: 16
+  train_data_fill: false
+  use_cuda: false
+  valid_data_rate: 0.15
+authentication:
+  date: '2025-01-08'
+  full_cap: '2024-04-30'
+  repair: '2025-01-08'
+calculate: []
+cap: 50.0
+
+config_yaml: config.yml
+dataloc: ./data
+env_columns:
+- C_TIME
+- C_CELLT
+- C_DIFFUSER
+- C_GLOBALR
+- C_RH
+- C_REAL_VALUE
+first_point:
+  sun_up_time: '2024-11-10 07:30:00'
+  sun_up_value: 0.11
+  switch: true
+full_field: true
+history_hours: 1
+
+new_field: true
+nwp_columns:
+- C_TIME
+- C_T
+- C_RH
+- C_PRESSURE
+- C_SWR
+- C_DIFFUSE_RADIATION
+- C_DIRECT_RADIATION
+- C_WD10
+- C_WD30
+- C_WD50
+- C_WD70
+- C_WD80
+- C_WD90
+- C_WD100
+- C_WD170
+- C_WS10
+- C_WS30
+- C_WS50
+- C_WS70
+- C_WS80
+- C_WS90
+- C_WS100
+- C_WS170
+- DT_TAG
+- C_FP_VALUE
+port: 9008
+predict: C_REAL_VALUE
+repair_days: 81
+repair_model_cycle: 5
+reset_sys: false
+spot_trading: []
+
+update_add_train_days: 60
+update_coe_days: 3
+usable_power:
+  api_able_power: true
+  bias: 2.524
+  clean_power_which: 1
+  coe: 4
+  down_fractile: 30
+  env: C_GLOBALR
+  k: 0.04079
+  outliers_threshold: 1.5
+  up_fractile: 70
+version: solar-3.1.0.south
+weatherloc:
+- 1

+ 43 - 98
models_processing/model_koi/nn_bp.py

@@ -6,11 +6,10 @@
 # company: shenyang JY
 
 import numpy as np
-from sklearn.model_selection import train_test_split
 from flask import Flask, request
 import time
 import traceback
-import logging
+import logging, argparse
 from sklearn.preprocessing import MinMaxScaler
 from io import BytesIO
 import joblib
@@ -20,39 +19,27 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 import tensorflow.keras.backend as K
 import tensorflow as tf
-
 from common.data_cleaning import cleaning
 from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock
-import time
+import time, yaml
 import random
 import matplotlib.pyplot as plt
 model_lock = Lock()
+from common.logs import Log
+logger = Log('models-processing').logger
+np.random.seed(42)  # NumPy随机种子
+tf.random.set_random_seed(42)  # TensorFlow随机种子
+app = Flask('nn_bp——service')
 
+with app.app_context():
+    with open('./models_processing/model_koi/bp.yaml', 'r', encoding='utf-8') as f:
+        arguments = yaml.safe_load(f)
 
-app = Flask('model_training_bp——service')
-
-def draw_loss(history):
-    # 绘制训练集和验证集损失
-    plt.figure(figsize=(20, 8))
-    plt.plot(history.history['loss'], label='Training Loss')
-    plt.plot(history.history['val_loss'], label='Validation Loss')
-    plt.title('Loss Curve')
-    plt.xlabel('Epochs')
-    plt.ylabel('Loss')
-    plt.legend()
-    plt.show()
-
-dh = DataHandler()
+dh = DataHandler(logger, arguments)
 def train_data_handler(data, args):
-    sleep_time = random.uniform(1, 20)  # 生成 5 到 20 之间的随机浮动秒数
-    time.sleep(sleep_time)
-    tf.keras.backend.clear_session()  # 清除当前的图和会话
-    # 设置随机种子
-    np.random.seed(42)  # NumPy随机种子
-    tf.random.set_seed(42)  # TensorFlow随机种子
     col_time, features, target = args['col_time'], str_to_list(args['features']), args['target']
     if 'is_limit' in data.columns:
         data = data[data['is_limit'] == False]
@@ -78,20 +65,12 @@ def pre_data_handler(data, args):
     features, time_steps, col_time, model_name,col_reserve =  str_to_list(args['features']), int(args['time_steps']),args['col_time'],args['model_name'],str_to_list(args['col_reserve'])
     feature_scaler,target_scaler = get_scaler_model_from_mongo(args)
     pre_data = data.sort_values(by=col_time)
-    # 对预测数据进行特征预处理:1.空值异常值清洗 2.缺值补值
-    pre_data_cleaned = cleaning(pre_data, '', logger, pre_data.columns.tolist())
-    pre_data = dh.fill_train_data(pre_data_cleaned)
     scaled_features = feature_scaler.transform(pre_data[features])
     return scaled_features
 
-class NPHandler(object):
-    train = False
-
-    def __init__(self, log, args, graph, sess):
-        self.logger = log
-        self.graph = graph
-        self.sess = sess
-        opt = args.parse_args_and_yaml()
+class BPHandler(object):
+    def __init__(self, logger):
+        self.logger = logger
         self.model = None
 
     def get_model(self, args):
@@ -103,7 +82,7 @@ class NPHandler(object):
                 # NPHandler.model = NPHandler.get_keras_model(opt)
                 self.model = get_h5_model_from_mongo(args)
         except Exception as e:
-            print("加载模型权重失败:{}".format(e.args))
+            self.logger.info("加载模型权重失败:{}".format(e.args))
 
     @staticmethod
     def get_keras_model(opt):
@@ -124,14 +103,14 @@ class NPHandler(object):
         adam = optimizers.Adam(learning_rate=opt.Model['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-7,
                                amsgrad=True)
         reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=5, verbose=1)
-        model.compile(loss=rmse, optimizer=adam)
+        model.compile(loss='rmse', optimizer=adam)
         return model
 
-    def train_init(self, opt, args):
+    def train_init(self, opt):
         try:
             if opt.Model['add_train']:
                 # 进行加强训练,支持修模
-                base_train_model = get_h5_model_from_mongo(args)
+                base_train_model = get_h5_model_from_mongo(vars(opt))
                 base_train_model.summary()
                 self.logger.info("已加载加强训练基础模型")
             else:
@@ -141,23 +120,17 @@ class NPHandler(object):
             self.logger.info("加强训练加载模型权重失败:{}".format(e.args))
 
     def training(self, opt, train_and_valid_data):
-        model = self.train_init(opt)
-        train_X, train_Y, valid_X, valid_Y = train_and_valid_data
-        print("----------", np.array(train_X[0]).shape)
-        print("++++++++++", np.array(train_X[1]).shape)
-        # weight_lstm_1, bias_lstm_1 = model.get_layer('d1').get_weights()
-        # print("weight_lstm_1 = ", weight_lstm_1)
-        # print("bias_lstm_1 = ", bias_lstm_1)
+        model = self.train_init(opt, )
+        tf.reset_default_graph() # 清除默认图
+        train_x, train_y, valid_x, valid_y = train_and_valid_data
+        print("----------", np.array(train_x[0]).shape)
+        print("++++++++++", np.array(train_x[1]).shape)
 
         check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss',
                                       save_best_only=True, mode='auto')
         early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
-        # tbCallBack = TensorBoard(log_dir='../figure',
-        #                          histogram_freq=0,
-        #                          write_graph=True,
-        #                          write_images=True)
-        history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
-                            validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop], shuffle=False)
+        history = model.fit(train_x, train_y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+                            validation_data=(valid_x, valid_y), callbacks=[check_point, early_stop], shuffle=False)
         loss = np.round(history.history['loss'], decimals=5)
         val_loss = np.round(history.history['val_loss'], decimals=5)
         self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
@@ -170,56 +143,25 @@ class NPHandler(object):
         self.logger.info("执行预测方法")
         return result
 
-def build_model(data, args):
-    # 划分训练集和测试集
-    X_train, X_test, y_train, y_test = train_test_split(scaled_features, scaled_target, test_size=0.2, random_state=43)
-
-    # 构建 LSTM 模型
-    model = Sequential([
-        Dense(64, input_dim=X_train.shape[1], activation='relu'),  # 输入层和隐藏层,10个神经元
-        Dropout(0.2),
-        Dense(32, activation='relu'),  # 隐藏层,8个神经元
-        Dropout(0.3),  # Dropout层,30%的神经元输出会被随机丢弃
-        Dense(1, activation='linear')  # 输出层,1个神经元(用于回归任务)
-    ])
-
-    # 编译模型
-    model.compile(optimizer='adam', loss='mean_squared_error')
-    # 定义 EarlyStopping 和 ReduceLROnPlateau 回调
-    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
-    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)
-    # 训练模型
-    # 使用GPU进行训练
-    with tf.device('/GPU:1'):
-        history = model.fit(X_train, y_train,
-                            epochs=100,
-                            batch_size=32,
-                            validation_data=(X_test, y_test),
-                            verbose=2,
-                            shuffle=False,
-                            callbacks=[early_stopping, reduce_lr])
-    draw_loss(history)
-    return model, feature_scaler_bytes, target_scaler_bytes
-
-
 @app.route('/model_training_bp', methods=['POST'])
 def model_training_bp():
     # 获取程序开始时间
     start_time = time.time()
     result = {}
     success = 0
-    nh = NPHandler()
+    bp = BPHandler(logger)
     print("Program starts execution!")
     try:
-        args = request.values.to_dict()
-        print('args', args)
+        params_dict = request.values.to_dict()
+        args = arguments.deepcopy()
+        args.update(params_dict)
+        opt = argparse.Namespace(**args)
         logger.info(args)
-        power_df = get_data_from_mongo(args)
-        train_x, valid_x, train_y, valid_y, train_data_handler = dh.get_train_data(power_df)
-        np_model = nh.training(opt, [train_x, valid_x, train_y, valid_y])
-        model, feature_scaler_bytes, target_scaler_bytes = build_model(power_df, args)
-
-        insert_h5_model_into_mongo(np_model, train_data_handler, args)
+        train_data = get_data_from_mongo(args)
+        train_x, valid_x, train_y, valid_y, scaled_train_bytes = train_data_handler(train_data, args)
+        bp_model = bp.training(opt, [train_x, valid_x, train_y, valid_y])
+        insert_single_model_into_mongo(bp_model, args)
+        insert_scaler_model_into_mongo(scaled_train_bytes, args)
         success = 1
     except Exception as e:
         my_exception = traceback.format_exc()
@@ -241,15 +183,18 @@ def model_prediction_bp():
     start_time = time.time()
     result = {}
     success = 0
-    nh = NPHandler()
+    bp = BPHandler(logger)
     print("Program starts execution!")
     try:
-        args = request.values.to_dict()
+        params_dict = request.values.to_dict()
+        args = arguments.deepcopy()
+        args.update(params_dict)
+        opt = argparse.Namespace(**args)
         print('args', args)
         logger.info(args)
-        power_df = get_data_from_mongo(args)
-        scaled_features = pre_data_handler(power_df, args)
-        result = nh.predict(power_df, args)
+        predict_data = get_data_from_mongo(args)
+        scaled_features = pre_data_handler(predict_data, args)
+        result = bp.predict(scaled_features, args)
         insert_data_into_mongo(result, args)
         success = 1
     except Exception as e: