Pārlūkot izejas kodu

Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg

liudawei 1 mēnesi atpakaļ
vecāks
revīzija
524c41e706

+ 17 - 14
data_processing/data_operation/pre_prod_ftp.py

@@ -120,6 +120,7 @@ def download_files_via_sftp(mappings, datetime_str, local_temp_dir, model_type):
     :param datetime_str: 日期时间字符串,用于文件名
     :param local_temp_dir: 本地临时目录路径
     """
+    download_success = False
     transport = None
     sftp = None
     try:
@@ -146,10 +147,10 @@ def download_files_via_sftp(mappings, datetime_str, local_temp_dir, model_type):
 
                 try:
                     sftp.get(remote_path, local_path)
+                    download_success = True
                     logger.info(f"下载成功: {remote_path} -> {local_path}")
                 except Exception as e:
-                    print(f"文件下载失败 {remote_path}: {str(e)}")
-
+                    logger.info(f"文件下载失败 {remote_path}: {str(e)}")
     except paramiko.AuthenticationException:
         logger.info("认证失败,请检查用户名和密码")
     except paramiko.SSHException as e:
@@ -163,6 +164,7 @@ def download_files_via_sftp(mappings, datetime_str, local_temp_dir, model_type):
                 sftp.close()
             if transport and transport.is_active():
                 transport.close()
+        return download_success
 
 
 def upload_to_sftp(local_path: str, target_dir: str) -> bool:
@@ -294,20 +296,20 @@ def prod_data_handler(mappings, model_type):
         logger.info(f"目标时间: {datetime_str}")
 
         # 下载文件
-        download_files_via_sftp(mappings, datetime_str, local_temp_dir, model_type)
+        if download_files_via_sftp(mappings, datetime_str, local_temp_dir, model_type):
 
-        # 处理下载的文件
-        process_zips(mappings, local_temp_dir, datetime_str, final_collect_dir)
+            # 处理下载的文件
+            process_zips(mappings, local_temp_dir, datetime_str, final_collect_dir)
 
-        # 创建最终ZIP
-        zip_path  = create_final_zip(final_collect_dir, datetime_str, model_type)
+            # 创建最终ZIP
+            zip_path  = create_final_zip(final_collect_dir, datetime_str, model_type)
 
-        # 上传打包ZIP文件
-        if upload_to_sftp(zip_path, f"/{model_type}"):
-            # 步骤3: 上传成功后清理
-            clean_up_file(zip_path)
-        else:
-            logger.info("[WARNING] 上传未成功,保留本地文件")
+            # 上传打包ZIP文件
+            if upload_to_sftp(zip_path, f"/{model_type}"):
+                # 步骤3: 上传成功后清理
+                clean_up_file(zip_path)
+            else:
+                logger.info("[WARNING] 上传未成功,保留本地文件")
 
 
 from apscheduler.schedulers.blocking import BlockingScheduler
@@ -342,4 +344,5 @@ def configure_scheduler():
         logger.info("⏹️ 定时任务已停止")
 
 if __name__ == "__main__":
-    configure_scheduler()
+    # configure_scheduler()
+    target_job()

+ 162 - 0
models_processing/model_tf/tf_test.py

@@ -0,0 +1,162 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm.py
+# @Time      :2025/2/12 14:03
+# @Author    :David
+# @Company: shenyang JY
+
+from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten
+from tensorflow.keras.models import Model, load_model
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
+from tensorflow.keras import optimizers, regularizers
+from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling1D, Dropout, Add, Concatenate, Multiply
+from models_processing.model_tf.losses import region_loss
+import numpy as np
+from common.database_dml_koi import *
+from models_processing.model_tf.settings import set_deterministic
+from threading import Lock
+import argparse
+model_lock = Lock()
+set_deterministic(42)
+
+class TSHandler(object):
+    def __init__(self, logger, args):
+        self.logger = logger
+        self.opt = argparse.Namespace(**args)
+        self.model = None
+
+    def get_model(self, args):
+        """
+        单例模式+线程锁,防止在异步加载时引发线程安全
+        """
+        try:
+            with model_lock:
+                loss = region_loss(self.opt)
+                self.model = get_keras_model_from_mongo(args, {type(loss).__name__: loss})
+        except Exception as e:
+            self.logger.info("加载模型权重失败:{}".format(e.args))
+
+    @staticmethod
+    def get_keras_model(opt):
+        """优化后的新能源功率预测模型
+        主要改进点:
+        1. 多尺度特征提取
+        2. 注意力机制
+        3. 残差连接
+        4. 弹性正则化
+        5. 自适应学习率调整
+        """
+        # 正则化配置
+        l1_l2_reg = regularizers.l1_l2(
+            l1=opt.Model['lambda_value_1'],
+            l2=opt.Model['lambda_value_2']
+        )
+
+        # 输入层
+        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size']), name='nwp_input')
+
+        # %% 多尺度特征提取模块
+        def multi_scale_block(input_layer):
+            # 并行卷积路径
+            conv3 = Conv1D(64, 3, padding='causal', activation='relu')(input_layer)
+            conv5 = Conv1D(64, 5, padding='causal', activation='relu')(input_layer)
+            return Concatenate()([conv3, conv5])
+
+        # 特征主干
+        x = multi_scale_block(nwp_input)
+
+        # %% 残差注意力模块
+        def residual_attention_block(input_layer, filters):
+            # 主路径
+            y = Conv1D(filters, 3, padding='same', activation='relu')(input_layer)
+            y = BatchNormalization()(y)
+
+            # 注意力门控
+            attention = Dense(filters, activation='sigmoid')(y)
+            y = Multiply()([y, attention])
+
+            # 残差连接
+            shortcut = Conv1D(filters, 1, padding='same')(input_layer)
+            return Add()([y, shortcut])
+
+        x = residual_attention_block(x, 128)
+        x = Dropout(0.3)(x)
+
+        # %% 特征聚合
+        x = GlobalAveragePooling1D()(x)  # 替代Flatten保留时序特征
+
+        # %% 深度可调全连接层
+        x = Dense(256, activation='swish', kernel_regularizer=l1_l2_reg)(x)
+        x = BatchNormalization()(x)
+        x = Dropout(0.5)(x)
+
+        # %% 输出层(可扩展为概率预测)
+        output = Dense(1, activation='linear', name='main_output')(x)
+
+        # 概率预测扩展(可选)
+        # variance = Dense(1, activation='softplus')(x)  # 输出方差
+        # output = Concatenate()([output, variance])
+
+        # %% 模型编译
+        model = Model(inputs=nwp_input, outputs=output)
+
+        # 自适应优化器配置
+        adam = optimizers.Adam(
+            learning_rate=opt.Model['learning_rate'],
+            beta_1=0.92,  # 调整动量参数
+            beta_2=0.999,
+            epsilon=1e-07,
+            amsgrad=True
+        )
+
+        # 编译配置(假设region_loss已定义)
+        model.compile(
+            loss=region_loss(opt),  # 自定义损失函数
+            optimizer=adam,
+            metrics=['mae', 'mse']  # 监控指标
+        )
+
+        return model
+
+    def train_init(self):
+        try:
+            if self.opt.Model['add_train']:
+                # 进行加强训练,支持修模
+                loss = region_loss(self.opt)
+                base_train_model = get_keras_model_from_mongo(vars(self.opt), {type(loss).__name__: loss})
+                base_train_model.summary()
+                self.logger.info("已加载加强训练基础模型")
+            else:
+                base_train_model = self.get_keras_model(self.opt)
+            return base_train_model
+        except Exception as e:
+            self.logger.info("加载模型权重失败:{}".format(e.args))
+
+    def training(self, train_and_valid_data):
+        model = self.train_init()
+        model.summary()
+        train_x, train_y, valid_x, valid_y = train_and_valid_data
+        # 回调函数配置
+        callbacks = [
+            EarlyStopping(monitor='val_loss', patience=self.opt.Model['patience'], restore_best_weights=True),
+            ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=8, min_lr=1e-7)
+        ]
+
+        history = model.fit(train_x, train_y, batch_size=self.opt.Model['batch_size'], epochs=self.opt.Model['epoch'],
+                            verbose=2, validation_data=(valid_x, valid_y), callbacks=callbacks, shuffle=False)
+        loss = np.round(history.history['loss'], decimals=5)
+        val_loss = np.round(history.history['val_loss'], decimals=5)
+        self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
+        self.logger.info("训练集损失函数为:{}".format(loss))
+        self.logger.info("验证集损失函数为:{}".format(val_loss))
+        return model
+
+    def predict(self, test_x, batch_size=1):
+        result = self.model.predict(test_x, batch_size=batch_size)
+        self.logger.info("执行预测方法")
+        return result
+
+
+
+if __name__ == "__main__":
+    run_code = 0

+ 134 - 0
models_processing/model_tf/tf_test_pre.py

@@ -0,0 +1,134 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm_pre.py
+# @Time      :2025/2/13 10:52
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request, g
+import logging, argparse, traceback
+from common.database_dml_koi import *
+from common.processing_data_common import missing_features, str_to_list
+from data_processing.data_operation.data_handler import DataHandler
+from threading import Lock
+import time, yaml
+model_lock = Lock()
+from itertools import chain
+from common.logs import Log
+from tf_lstm import TSHandler
+# logger = Log('tf_bp').logger()
+logger = Log('tf_test').logger
+np.random.seed(42)  # NumPy随机种子
+# tf.set_random_seed(42)  # TensorFlow随机种子
+app = Flask('tf_test_pre——service')
+
+with app.app_context():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
+        args = yaml.safe_load(f)
+
+    dh = DataHandler(logger, args)
+    ts = TSHandler(logger, args)
+
+@app.before_request
+def update_config():
+    # ------------ 整理参数,整合请求参数 ------------
+    args_dict = request.values.to_dict()
+    args_dict['features'] = args_dict['features'].split(',')
+    args.update(args_dict)
+    opt = argparse.Namespace(**args)
+    dh.opt = opt
+    ts.opt = opt
+    g.opt = opt
+    logger.info(args)
+
+@app.route('/nn_test_predict', methods=['POST'])
+def model_prediction_test():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        pre_data = get_data_from_mongo(args)
+        feature_scaler, target_scaler = get_scaler_model_from_mongo(args)
+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler)
+        ts.opt.cap = round(target_scaler.transform(np.array([[args['cap']]]))[0, 0], 2)
+        ts.get_model(args)
+        res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x))))
+        pre_data['farm_id'] = args.get('farm_id', 'null')
+        if args.get('algorithm_test', 0):
+            pre_data[args['model_name']] = res[:len(pre_data)]
+            pre_data.rename(columns={args['col_time']: 'dateTime'}, inplace=True)
+            pre_data = pre_data[['dateTime', 'farm_id', args['target'], args['model_name'], 'dq']]
+            pre_data = pre_data.melt(id_vars=['dateTime', 'farm_id', args['target']], var_name='model', value_name='power_forecast')
+            res_cols = ['dateTime', 'power_forecast', 'farm_id', args['target'], 'model']
+            if 'howLongAgo' in args:
+                pre_data['howLongAgo'] = int(args['howLongAgo'])
+                res_cols += ['howLongAgo']
+        else:
+            pre_data['cdq'] = args.get('cdq', 1)
+            pre_data['dq'] = args.get('dq', 1)
+            pre_data['zq'] = args.get('zq', 1)
+            pre_data['power_forecast'] = res[:len(pre_data)]
+            pre_data.rename(columns={args['col_time']: 'date_time'}, inplace=True)
+            res_cols = ['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']
+        pre_data = pre_data[res_cols]
+
+        pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
+        pre_data.loc[pre_data['power_forecast'] > args['cap'], 'power_forecast'] = args['cap']
+        pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+
+        insert_data_into_mongo(pre_data, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10114)
+    print("server start!")
+
+    # ------------------------测试代码------------------------
+    # args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
+    #              'model_table': 'j00083_model', 'mongodb_read_table': 'j00083_test', 'col_time': 'date_time', 'mongodb_write_table': 'j00083_rs',
+    #              'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    # args_dict['features'] = args_dict['features'].split(',')
+    # arguments.update(args_dict)
+    # dh = DataHandler(logger, arguments)
+    # ts = TSHandler(logger)
+    # opt = argparse.Namespace(**arguments)
+    #
+    # opt.Model['input_size'] = len(opt.features)
+    # pre_data = get_data_from_mongo(args_dict)
+    # feature_scaler, target_scaler = get_scaler_model_from_mongo(arguments)
+    # pre_x = dh.pre_data_handler(pre_data, feature_scaler, opt)
+    # ts.get_model(arguments)
+    # result = ts.predict(pre_x)
+    # result1 = list(chain.from_iterable(target_scaler.inverse_transform([result.flatten()])))
+    # pre_data['power_forecast'] = result1[:len(pre_data)]
+    # pre_data['farm_id'] = 'J00083'
+    # pre_data['cdq'] = 1
+    # pre_data['dq'] = 1
+    # pre_data['zq'] = 1
+    # pre_data.rename(columns={arguments['col_time']: 'date_time'}, inplace=True)
+    # pre_data = pre_data[['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']]
+    #
+    # pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
+    # pre_data.loc[pre_data['power_forecast'] > opt.cap, 'power_forecast'] = opt.cap
+    # pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+    #
+    # insert_data_into_mongo(pre_data, arguments)

+ 98 - 0
models_processing/model_tf/tf_test_train.py

@@ -0,0 +1,98 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm_train.py
+# @Time      :2025/2/13 10:52
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request, jsonify
+import traceback, uuid
+import logging, argparse
+from data_processing.data_operation.data_handler import DataHandler
+import time, yaml, threading
+from models_processing.model_tf.tf_lstm import TSHandler
+from common.database_dml_koi import *
+from common.logs import Log
+logger = Log('tf_test').logger
+np.random.seed(42)  # NumPy随机种子
+app = Flask('tf_test_train——service')
+
+with app.app_context():
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
+        args = yaml.safe_load(f)
+
+    dh = DataHandler(logger, args)
+    ts = TSHandler(logger, args)
+
+@app.before_request
+def update_config():
+    # ------------ 整理参数,整合请求参数 ------------
+    args_dict = request.values.to_dict()
+    args_dict['features'] = args_dict['features'].split(',')
+    args.update(args_dict)
+    opt = argparse.Namespace(**args)
+    dh.opt = opt
+    ts.opt = opt
+    logger.info(args)
+
+@app.route('/nn_test_training', methods=['POST'])
+def model_training_test():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        # ------------ 获取数据,预处理训练数据 ------------
+        train_data = get_data_from_mongo(args)
+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data)
+        # ------------ 训练模型,保存模型 ------------
+        ts.opt.Model['input_size'] = train_x.shape[2]
+        ts.opt.cap = round(scaled_cap, 2)
+        ts_model = ts.training([train_x, train_y, valid_x, valid_y])
+
+        args['params'] = json.dumps(args)
+        args['descr'] = '测试'
+        args['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+
+        insert_trained_model_into_mongo(ts_model, args)
+        insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10115)
+    print("server start!")
+    # args_dict = {"mongodb_database": 'realtimeDq', 'scaler_table': 'j00600_scaler', 'model_name': 'lstm1',
+    # 'model_table': 'j00600_model', 'mongodb_read_table': 'j00600', 'col_time': 'dateTime',
+    # 'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    # args_dict['features'] = args_dict['features'].split(',')
+    # args.update(args_dict)
+    # dh = DataHandler(logger, args)
+    # ts = TSHandler(logger, args)
+    # opt = argparse.Namespace(**args)
+    # opt.Model['input_size'] = len(opt.features)
+    # train_data = get_data_from_mongo(args_dict)
+    # train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(train_data)
+    # ts_model = ts.training([train_x, train_y, valid_x, valid_y])
+    #
+    # args_dict['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+    # args_dict['params'] = args
+    # args_dict['descr'] = '测试'
+    # insert_trained_model_into_mongo(ts_model, args_dict)
+    # insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args_dict)

+ 2 - 0
run_all.py

@@ -24,6 +24,8 @@ services = [
     ("models_processing/model_tf/tf_cnn_train.py", 10113),
     ("models_processing/model_tf/tf_lstm_pre.py", 10114),
     ("models_processing/model_tf/tf_lstm_train.py", 10115),
+    ("models_processing/model_tf/tf_test_pre.py", 10116),
+    ("models_processing/model_tf/tf_test_train.py", 10117),
 
     ("post_processing/post_processing.py", 10098),
     ("evaluation_processing/analysis.py", 10099),