David 4 maanden geleden
bovenliggende
commit
ef717c3915

+ 12 - 4
data_processing/data_operation/data_handler.py

@@ -156,7 +156,7 @@ class DataHandler(object):
                 vy.append(data[1])
         return tx, vx, ty, vy
 
-    def train_data_handler(self, data, opt):
+    def train_data_handler(self, data, opt, bp_data=False):
         """
         训练数据预处理:
         清洗+补值+归一化
@@ -191,6 +191,7 @@ class DataHandler(object):
         scaled_target = target_scaler.fit_transform(train_data_cleaned[[target]])
         train_data_cleaned[features] = scaled_train_data
         train_data_cleaned[[target]] = scaled_target
+
         train_datas = self.fill_train_data(train_data_cleaned, col_time)
         # 保存两个scaler
         scaled_train_bytes = BytesIO()
@@ -201,10 +202,14 @@ class DataHandler(object):
         scaled_train_bytes.seek(0)  # Reset pointer to the beginning of the byte stream
         scaled_target_bytes.seek(0)
 
-        train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, features, target)
+        if bp_data:
+            train_data = pd.concat(train_datas, axis=0)
+            train_x, valid_x, train_y, valid_y = self.train_valid_split(train_data, scaled_target, valid_rate=self.opt.Model["valid_data_rate"], shuffle=self.opt.Model['shuffle_train_data'])
+        else:
+            train_x, valid_x, train_y, valid_y = self.get_train_data(train_datas, col_time, features, target)
         return train_x, valid_x, train_y, valid_y, scaled_train_bytes, scaled_target_bytes
 
-    def pre_data_handler(self, data, feature_scaler, opt):
+    def pre_data_handler(self, data, feature_scaler, opt, bp_data=False):
         """
         预测数据简单处理
         Args:
@@ -221,5 +226,8 @@ class DataHandler(object):
         pre_data = data.sort_values(by=col_time)[features]
         scaled_features = feature_scaler.transform(pre_data[features])
         pre_data[features] = scaled_features
-        pre_x = self.get_predict_data([pre_data], features)
+        if bp_data:
+            pre_x = self.get_predict_data([pre_data], features)
+        else:
+            pre_x = pre_data.values
         return pre_x

+ 107 - 0
models_processing/model_koi/cnn.yaml

@@ -0,0 +1,107 @@
+Model:
+  add_train: false
+  batch_size: 64
+  dropout_rate: 0.2
+  epoch: 100
+  fusion: true
+  hidden_size: 64
+  his_points: 16
+  how_long_fill: 10
+  input_size: 24
+  lambda_value_1: 0.02
+  lambda_value_2: 0.01
+  learning_rate: 0.001
+  lstm_layers: 1
+  output_size: 16
+  patience: 10
+  predict_data_fill: true
+  region: south129
+  shuffle_train_data: false
+  test_data_fill: false
+  time_step: 16
+  train_data_fill: false
+  use_cuda: false
+  valid_data_rate: 0.15
+authentication:
+  date: '2025-01-08'
+  full_cap: '2024-04-30'
+  repair: '2025-01-08'
+calculate: []
+cap: 50.0
+dataloc: ./data
+env_columns:
+- C_TIME
+- C_CELLT
+- C_DIFFUSER
+- C_GLOBALR
+- C_RH
+- C_REAL_VALUE
+full_field: true
+history_hours: 1
+new_field: true
+features:
+- time
+- temperature10
+- temperature190
+- direction160
+- direction40
+- temperature110
+- direction80
+- speed60
+- mcc
+- temperature150
+- speed20
+- speed110
+- direction120
+- speed190
+- solarZenith
+- temperature90
+- direction200
+- speed150
+- temperature50
+- direction30
+- temperature160
+- direction170
+- temperature20
+- direction70
+- direction130
+- temperature200
+- speed70
+- temperature120
+- speed30
+- speed100
+- speed80
+- speed180
+- dniCalcd
+- speed140
+- temperature60
+- dateTime
+- temperature30
+- temperature170
+- direction20
+- humidity2
+- direction180
+- realPowerAvg
+- direction60
+- direction140
+- speed40
+- hcc
+target: realPower
+repair_days: 81
+repair_model_cycle: 5
+spot_trading: []
+update_add_train_days: 60
+update_coe_days: 3
+usable_power:
+  api_able_power: true
+  bias: 2.524
+  clean_power_which: 1
+  coe: 4
+  down_fractile: 30
+  env: C_GLOBALR
+  k: 0.04079
+  outliers_threshold: 1.5
+  up_fractile: 70
+version: solar-3.1.0.south
+weatherloc:
+- 1

+ 91 - 0
models_processing/model_koi/tf_bp.py

@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_bp.py
+# @Time      :2025/2/13 13:34
+# @Author    :David
+# @Company: shenyang JY
+
+from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten
+from tensorflow.keras.models import Model, load_model
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
+from tensorflow.keras import optimizers, regularizers
+from models_processing.losses.loss_cdq import rmse
+import numpy as np
+from common.database_dml import *
+from threading import Lock
+model_lock = Lock()
+
+class BPHandler(object):
+    def __init__(self, logger):
+        self.logger = logger
+        self.model = None
+
+    def get_model(self, args):
+        """
+        单例模式+线程锁,防止在异步加载时引发线程安全
+        """
+        try:
+            with model_lock:
+                # NPHandler.model = NPHandler.get_keras_model(opt)
+                self.model = get_h5_model_from_mongo(args, {'rmse': rmse})
+        except Exception as e:
+            self.logger.info("加载模型权重失败:{}".format(e.args))
+
+    @staticmethod
+    def get_keras_model(opt):
+        # db_loss = NorthEastLoss(opt)
+        # south_loss = SouthLoss(opt)
+        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
+        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
+        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size']), name='nwp')
+
+        con1 = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
+        d1 = Dense(32, activation='relu', name='d1', kernel_regularizer=l1_reg)(con1)
+        nwp = Dense(8, activation='relu', name='d2', kernel_regularizer=l1_reg)(d1)
+
+        output = Dense(1, name='d5')(nwp)
+        output_f = Flatten()(output)
+        model = Model(nwp_input, output_f)
+        adam = optimizers.Adam(learning_rate=opt.Model['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+        reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=5, verbose=1)
+        model.compile(loss=rmse, optimizer=adam)
+        return model
+
+    def train_init(self, opt):
+        try:
+            if opt.Model['add_train']:
+                # 进行加强训练,支持修模
+                base_train_model = get_h5_model_from_mongo(vars(opt), {'rmse': rmse})
+                base_train_model.summary()
+                self.logger.info("已加载加强训练基础模型")
+            else:
+                base_train_model = self.get_keras_model(opt)
+            return base_train_model
+        except Exception as e:
+            self.logger.info("加强训练加载模型权重失败:{}".format(e.args))
+
+    def training(self, opt, train_and_valid_data):
+        model = self.train_init(opt)
+        # tf.reset_default_graph() # 清除默认图
+        train_x, train_y, valid_x, valid_y = train_and_valid_data
+        print("----------", np.array(train_x[0]).shape)
+        print("++++++++++", np.array(train_x[1]).shape)
+        model.summary()
+        early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+        history = model.fit(train_x, train_y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,  validation_data=(valid_x, valid_y), callbacks=[early_stop], shuffle=False)
+        loss = np.round(history.history['loss'], decimals=5)
+        val_loss = np.round(history.history['val_loss'], decimals=5)
+        self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
+        self.logger.info("训练集损失函数为:{}".format(loss))
+        self.logger.info("验证集损失函数为:{}".format(val_loss))
+        return model
+
+    def predict(self, test_x, batch_size=1):
+        result = self.model.predict(test_x, batch_size=batch_size)
+        self.logger.info("执行预测方法")
+        return result
+
+
+if __name__ == "__main__":
+    run_code = 0
+

+ 119 - 0
models_processing/model_koi/tf_bp_pre.py

@@ -0,0 +1,119 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_bp_pre.py
+# @Time      :2025/2/13 13:35
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request
+import logging, argparse, traceback
+from common.database_dml import *
+from common.processing_data_common import missing_features, str_to_list
+from data_processing.data_operation.data_handler import DataHandler
+from threading import Lock
+import time, yaml
+model_lock = Lock()
+from itertools import chain
+from common.logs import Log
+from tf_bp import BPHandler
+# logger = Log('tf_bp').logger()
+logger = Log('tf_bp').logger
+np.random.seed(42)  # NumPy随机种子
+# tf.set_random_seed(42)  # TensorFlow随机种子
+app = Flask('tf_bp_pre——service')
+
+with app.app_context():
+    with open('../model_koi/bp.yaml', 'r', encoding='utf-8') as f:
+        arguments = yaml.safe_load(f)
+
+    dh = DataHandler(logger, arguments)
+    bp = BPHandler(logger)
+
+
+@app.route('/nn_bp_predict', methods=['POST'])
+def model_prediction_bp():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    params_dict = request.values.to_dict()
+    args = arguments.deepcopy()
+    args.update(params_dict)
+    try:
+        print('args', args)
+        logger.info(args)
+        pre_data = get_data_from_mongo(args)
+        feature_scaler, target_scaler = get_scaler_model_from_mongo(args)
+        scaled_pre_x = dh.pre_data_handler(pre_data, feature_scaler, args, bp_data=True)
+        bp.get_model(args)
+        # result = bp.predict(scaled_pre_x, args)
+        result = list(chain.from_iterable(target_scaler.inverse_transform([bp.predict(scaled_pre_x).flatten()])))
+        pre_data['power_forecast'] = result[:len(pre_data)]
+        pre_data['farm_id'] = 'J00083'
+        pre_data['cdq'] = 1
+        pre_data['dq'] = 1
+        pre_data['zq'] = 1
+        pre_data.rename(columns={arguments['col_time']: 'date_time'}, inplace=True)
+        pre_data = pre_data[['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']]
+
+        pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
+        pre_data.loc[pre_data['power_forecast'] > opt.cap, 'power_forecast'] = opt.cap
+        pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+
+        insert_data_into_mongo(pre_data, arguments)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("model_training_bp log")
+    from waitress import serve
+
+    # serve(app, host="0.0.0.0", port=1010x, threads=4)
+    print("server start!")
+
+    # ------------------------测试代码------------------------
+    args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
+                 'model_table': 'j00083_model', 'mongodb_read_table': 'j00083_test', 'col_time': 'date_time', 'mongodb_write_table': 'j00083_rs',
+                 'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    args_dict['features'] = args_dict['features'].split(',')
+    arguments.update(args_dict)
+    dh = DataHandler(logger, arguments)
+    bp = BPHandler(logger)
+    opt = argparse.Namespace(**arguments)
+
+    opt.Model['input_size'] = len(opt.features)
+    pre_data = get_data_from_mongo(args_dict)
+    feature_scaler, target_scaler = get_scaler_model_from_mongo(arguments)
+    pre_x = dh.pre_data_handler(pre_data, feature_scaler, opt, bp_data=True)
+    bp.get_model(arguments)
+    result = bp.predict(pre_x)
+    result1 = list(chain.from_iterable(target_scaler.inverse_transform([result.flatten()])))
+    pre_data['power_forecast'] = result1[:len(pre_data)]
+    pre_data['farm_id'] = 'J00083'
+    pre_data['cdq'] = 1
+    pre_data['dq'] = 1
+    pre_data['zq'] = 1
+    pre_data.rename(columns={arguments['col_time']: 'date_time'}, inplace=True)
+    pre_data = pre_data[['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']]
+
+    pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
+    pre_data.loc[pre_data['power_forecast'] > opt.cap, 'power_forecast'] = opt.cap
+    pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+
+    insert_data_into_mongo(pre_data, arguments)

+ 92 - 0
models_processing/model_koi/tf_bp_train.py

@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_bp_train.py
+# @Time      :2025/2/13 13:35
+# @Author    :David
+# @Company: shenyang JY
+
+import json, copy
+import numpy as np
+from flask import Flask, request
+import traceback
+import logging, argparse
+from data_processing.data_operation.data_handler import DataHandler
+import time, yaml
+from models_processing.model_koi.tf_bp import BPHandler
+from common.database_dml import *
+import matplotlib.pyplot as plt
+from common.logs import Log
+logger = logging.getLogger()
+# logger = Log('models-processing').logger
+np.random.seed(42)  # NumPy随机种子
+# tf.set_random_seed(42)  # TensorFlow随机种子
+app = Flask('tf_bp_train——service')
+
+with app.app_context():
+    with open('../model_koi/bp.yaml', 'r', encoding='utf-8') as f:
+        arguments = yaml.safe_load(f)
+
+    dh = DataHandler(logger, arguments)
+    bp = BPHandler(logger)
+
+@app.route('/nn_bp_training', methods=['POST'])
+def model_training_bp():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    args_dict = request.values.to_dict()
+    args = arguments.deepcopy()
+    args.update(args_dict)
+    try:
+        opt = argparse.Namespace(**args)
+        logger.info(args_dict)
+        train_data = get_data_from_mongo(args_dict)
+        train_x, valid_x, train_y, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(train_data, opt, bp_data=True)
+        bp_model = bp.training(opt, [train_x, valid_x, train_y, valid_y])
+        args_dict['params'] = json.dumps(args)
+        args_dict['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+        insert_trained_model_into_mongo(bp_model, args_dict)
+        insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("model_training_bp log")
+    from waitress import serve
+
+    # serve(app, host="0.0.0.0", port=10103, threads=4)
+    print("server start!")
+    args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
+    'model_table': 'j00083_model', 'mongodb_read_table': 'j00083', 'col_time': 'dateTime',
+    'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    args_dict['features'] = args_dict['features'].split(',')
+    arguments.update(args_dict)
+    dh = DataHandler(logger, arguments)
+    bp = BPHandler(logger)
+    opt = argparse.Namespace(**arguments)
+    opt.Model['input_size'] = len(opt.features)
+    train_data = get_data_from_mongo(args_dict)
+    train_x, valid_x, train_y, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(train_data, opt, bp_data=True)
+    bp_model = bp.training(opt, [train_x, train_y, valid_x, valid_y])
+
+    args_dict['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+    args_dict['params'] = arguments
+    args_dict['descr'] = '测试'
+    insert_trained_model_into_mongo(bp_model, args_dict)
+    insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args_dict)

+ 1 - 1
models_processing/model_koi/tf_cnn_train.py

@@ -22,7 +22,7 @@ np.random.seed(42)  # NumPy随机种子
 app = Flask('tf_cnn_train——service')
 
 with app.app_context():
-    with open('../model_koi/bp.yaml', 'r', encoding='utf-8') as f:
+    with open('../model_koi/cnn.yaml', 'r', encoding='utf-8') as f:
         arguments = yaml.safe_load(f)
 
     dh = DataHandler(logger, arguments)

+ 1 - 1
models_processing/model_koi/tf_lstm_train.py

@@ -22,7 +22,7 @@ np.random.seed(42)  # NumPy随机种子
 app = Flask('tf_lstm_train——service')
 
 with app.app_context():
-    with open('../model_koi/bp.yaml', 'r', encoding='utf-8') as f:
+    with open('../model_koi/lstm.yaml', 'r', encoding='utf-8') as f:
         arguments = yaml.safe_load(f)
 
     dh = DataHandler(logger, arguments)