David 2 周之前
父節點
當前提交
f100b8fca7

+ 0 - 136
models_processing/model_tf/async_query_task.py

@@ -1,136 +0,0 @@
-#!/usr/bin/env python
-# -*- coding:utf-8 -*-
-# @FileName  :sync_query.py
-# @Time      :2025/3/5 12:55
-# @Author    :David
-# @Company: shenyang JY
-
-
-from flask import jsonify
-import threading
-import uuid
-import time
-import traceback
-from collections import defaultdict
-
-# 全局存储训练进度(生产环境建议使用Redis)
-training_progress = defaultdict(dict)
-progress_lock = threading.Lock()
-
-
-@app.route('/nn_bp_training', methods=['POST'])
-def start_training():
-    """启动训练任务接口"""
-    task_id = str(uuid.uuid4())
-
-    # 初始化任务进度
-    with progress_lock:
-        training_progress[task_id] = {
-            'status': 'pending',
-            'progress': 0,
-            'message': '任务已创建',
-            'result': None,
-            'start_time': time.time(),
-            'end_time': None
-        }
-
-    # 启动异步训练线程
-    thread = threading.Thread(
-        target=async_training_task,
-        args=(task_id,),
-        daemon=True
-    )
-    thread.start()
-
-    return jsonify({
-        'success': 1,
-        'task_id': task_id,
-        'message': '训练任务已启动'
-    })
-
-
-@app.route('/training_progress/')
-def get_progress(task_id):
-    """查询训练进度接口"""
-    with progress_lock:
-        progress = training_progress.get(task_id, {
-            'status': 'not_found',
-            'progress': 0,
-            'message': '任务不存在'
-        })
-
-    return jsonify(progress)
-
-
-def async_training_task(task_id):
-    """异步训练任务"""
-    args = {}  # 根据实际情况获取参数
-    result = {}
-    start_time = time.time()
-
-    try:
-        # 更新任务状态
-        update_progress(task_id, 10, '数据准备中...')
-
-        # ------------ 数据准备 ------------
-        train_data = get_data_from_mongo(args)
-        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(
-            train_data, bp_data=True)
-
-        # ------------ 模型训练 ------------
-        update_progress(task_id, 30, '模型训练中...')
-        bp.opt.Model['input_size'] = train_x.shape[1]
-
-        # 包装训练函数以跟踪进度
-        def training_callback(epoch, total_epoch):
-            progress = 30 + 60 * (epoch / total_epoch)
-            update_progress(task_id, progress, f'训练第 {epoch}/{total_epoch} 轮')
-
-        bp_model = bp.training([train_x, train_y, valid_x, valid_y],
-                               callback=training_callback)
-
-        # ------------ 保存结果 ------------
-        update_progress(task_id, 95, '保存模型中...')
-        args['params'] = json.dumps(args)
-        args['descr'] = '测试'
-        args['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime())
-        insert_trained_model_into_mongo(bp_model, args)
-        insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args)
-
-        # 最终结果
-        result.update({
-            'success': 1,
-            'args': args,
-            'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)),
-            'end_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()),
-        })
-
-        update_progress(task_id, 100, '训练完成', result=result)
-
-    except Exception as e:
-        error_msg = traceback.format_exc().replace("\n", "\t")
-        result = {
-            'success': 0,
-            'msg': error_msg,
-            'start_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)),
-            'end_time': time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()),
-        }
-        update_progress(task_id, -1, '训练失败', result=result)
-
-
-def update_progress(task_id, progress, message, result=None):
-    """更新进度工具函数"""
-    with progress_lock:
-        training_progress[task_id]['progress'] = progress
-        training_progress[task_id]['message'] = message
-        training_progress[task_id]['status'] = 'running'
-
-        if progress >= 100:
-            training_progress[task_id]['status'] = 'completed'
-            training_progress[task_id]['end_time'] = time.time()
-        elif progress < 0:
-            training_progress[task_id]['status'] = 'failed'
-            training_progress[task_id]['end_time'] = time.time()
-
-        if result:
-            training_progress[task_id]['result'] = result

+ 80 - 1
models_processing/model_tf/tf_bilstm.py

@@ -5,6 +5,85 @@
 # @Author    :David
 # @Author    :David
 # @Company: shenyang JY
 # @Company: shenyang JY
 
 
+from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten, Bidirectional
+from tensorflow.keras.models import Model, load_model
+from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
+from tensorflow.keras import optimizers, regularizers
+from models_processing.model_tf.losses import region_loss
+import numpy as np
+from common.database_dml_koi import *
+from models_processing.model_tf.settings import set_deterministic
+from threading import Lock
+import argparse
+model_lock = Lock()
+set_deterministic(42)
+
+class TSHandler(object):
+    def __init__(self, logger, args):
+        self.logger = logger
+        self.opt = argparse.Namespace(**args)
+        self.model = None
+        self.model_params = None
+
+    def get_model(self, args):
+        """
+        单例模式+线程锁,防止在异步加载时引发线程安全
+        """
+        try:
+            with model_lock:
+                loss = region_loss(self.opt)
+                self.model, self.model_params = get_keras_model_from_mongo(args, {type(loss).__name__: loss})
+        except Exception as e:
+            self.logger.info("加载模型权重失败:{}".format(e.args))
+
+    @staticmethod
+    def get_keras_model(opt):
+        loss = region_loss(opt)
+        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
+        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size']), name='nwp')
+
+        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
+        con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
+        nwp_bi_lstm = Bidirectional(LSTM(units=opt.Model['hidden_size'], return_sequences=False,  kernel_regularizer=l2_reg), merge_mode='concat')(con1_p) # 默认拼接双向输出(最终维度=2*hidden_size)
+
+        output = Dense(opt.Model['output_size'], name='cdq_output')(nwp_bi_lstm)
+
+        model = Model(nwp_input, output)
+        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+        model.compile(loss=loss, optimizer=adam)
+        return model
+
+    def train_init(self):
+        try:
+            # 进行加强训练,支持修模
+            loss = region_loss(self.opt)
+            base_train_model, self.model_params = get_keras_model_from_mongo(vars(self.opt), {type(loss).__name__: loss})
+            base_train_model.summary()
+            self.logger.info("已加载加强训练基础模型")
+            return base_train_model
+        except Exception as e:
+            self.logger.info("加载加强训练模型权重失败:{}".format(e.args))
+            return False
+
+    def training(self, model, train_and_valid_data):
+        model.summary()
+        train_x, train_y, valid_x, valid_y = train_and_valid_data
+        early_stop = EarlyStopping(monitor='val_loss', patience=self.opt.Model['patience'], mode='auto')
+        history = model.fit(train_x, train_y, batch_size=self.opt.Model['batch_size'], epochs=self.opt.Model['epoch'],
+                            verbose=2, validation_data=(valid_x, valid_y), callbacks=[early_stop], shuffle=False)
+        loss = np.round(history.history['loss'], decimals=5)
+        val_loss = np.round(history.history['val_loss'], decimals=5)
+        self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
+        self.logger.info("训练集损失函数为:{}".format(loss))
+        self.logger.info("验证集损失函数为:{}".format(val_loss))
+        return model
+
+    def predict(self, test_x, batch_size=1):
+        result = self.model.predict(test_x, batch_size=batch_size)
+        self.logger.info("执行预测方法")
+        return result
+
+
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
-    run_code = 0
+    run_code = 0

+ 141 - 0
models_processing/model_tf/tf_lstm3_pre.py

@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm_pre.py
+# @Time      :2025/2/13 10:52
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request, g
+import logging, argparse, traceback
+from common.database_dml_koi import *
+from common.processing_data_common import missing_features, str_to_list
+from data_processing.data_operation.data_handler import DataHandler
+from threading import Lock
+import time, yaml
+from copy import deepcopy
+model_lock = Lock()
+from itertools import chain
+from common.logs import Log
+from tf_lstm import TSHandler
+# logger = Log('tf_bp').logger()
+logger = Log('tf_ts').logger
+np.random.seed(42)  # NumPy随机种子
+# tf.set_random_seed(42)  # TensorFlow随机种子
+app = Flask('tf_lstm_pre——service')
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
+    global_config = yaml.safe_load(f)  # 只读的全局配置
+
+@app.before_request
+def update_config():
+    # ------------ 整理参数,整合请求参数 ------------
+    # 深拷贝全局配置 + 合并请求参数
+    current_config = deepcopy(global_config)
+    request_args = request.values.to_dict()
+    # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
+    request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    current_config.update(request_args)
+
+    # 存储到请求上下文
+    g.opt = argparse.Namespace(**current_config)
+    g.dh = DataHandler(logger, current_config)  # 每个请求独立实例
+    g.ts = TSHandler(logger, current_config)
+
+@app.route('/tf_lstm_predict', methods=['POST'])
+def model_prediction_bp():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    dh = g.dh
+    ts = g.ts
+    args = deepcopy(g.opt.__dict__)
+    logger.info("Program starts execution!")
+    try:
+        pre_data = get_data_from_mongo(args)
+        if args.get('algorithm_test', 0):
+            field_mapping = {'clearsky_ghi': 'clearskyGhi', 'dni_calcd': 'dniCalcd','surface_pressure': 'surfacePressure'}
+            pre_data = pre_data.rename(columns=field_mapping)
+        feature_scaler, target_scaler = get_scaler_model_from_mongo(args)
+        ts.opt.cap = round(target_scaler.transform(np.array([[float(args['cap'])]]))[0, 0], 2)
+        ts.get_model(args)
+        dh.opt.features = json.loads(ts.model_params)['Model']['features'].split(',')
+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=3)
+        res = list(chain.from_iterable(target_scaler.inverse_transform(ts.predict(scaled_pre_x))))
+        pre_data['farm_id'] = args.get('farm_id', 'null')
+        if int(args.get('algorithm_test', 0)):
+            pre_data[args['model_name']] = res[:len(pre_data)]
+            pre_data.rename(columns={args['col_time']: 'dateTime'}, inplace=True)
+            pre_data = pre_data[['dateTime', 'farm_id', args['target'], args['model_name'], 'dq']]
+            pre_data = pre_data.melt(id_vars=['dateTime', 'farm_id', args['target']], var_name='model', value_name='power_forecast')
+            res_cols = ['dateTime', 'power_forecast', 'farm_id', args['target'], 'model']
+            if 'howLongAgo' in args:
+                pre_data['howLongAgo'] = int(args['howLongAgo'])
+                res_cols += ['howLongAgo']
+        else:
+            pre_data['power_forecast'] = res[:len(pre_data)]
+            pre_data.rename(columns={args['col_time']: 'date_time'}, inplace=True)
+            res_cols = ['date_time', 'power_forecast', 'farm_id']
+        pre_data = pre_data[res_cols]
+
+        pre_data.loc[:, 'power_forecast'] = pre_data['power_forecast'].round(2)
+        pre_data.loc[pre_data['power_forecast'] > float(args['cap']), 'power_forecast'] = float(args['cap'])
+        pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+
+        insert_data_into_mongo(pre_data, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10120,
+          threads=8,  # 指定线程数(默认4,根据硬件调整)
+          channel_timeout=600  # 连接超时时间(秒)
+          )
+    print("server start!")
+
+    # ------------------------测试代码------------------------
+    # args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
+    #              'model_table': 'j00083_model', 'mongodb_read_table': 'j00083_test', 'col_time': 'date_time', 'mongodb_write_table': 'j00083_rs',
+    #              'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    # args_dict['features'] = args_dict['features'].split(',')
+    # arguments.update(args_dict)
+    # dh = DataHandler(logger, arguments)
+    # ts = TSHandler(logger)
+    # opt = argparse.Namespace(**arguments)
+    #
+    # opt.Model['input_size'] = len(opt.features)
+    # pre_data = get_data_from_mongo(args_dict)
+    # feature_scaler, target_scaler = get_scaler_model_from_mongo(arguments)
+    # pre_x = dh.pre_data_handler(pre_data, feature_scaler, opt)
+    # ts.get_model(arguments)
+    # result = ts.predict(pre_x)
+    # result1 = list(chain.from_iterable(target_scaler.inverse_transform([result.flatten()])))
+    # pre_data['power_forecast'] = result1[:len(pre_data)]
+    # pre_data['farm_id'] = 'J00083'
+    # pre_data['cdq'] = 1
+    # pre_data['dq'] = 1
+    # pre_data['zq'] = 1
+    # pre_data.rename(columns={arguments['col_time']: 'date_time'}, inplace=True)
+    # pre_data = pre_data[['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']]
+    #
+    # pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
+    # pre_data.loc[pre_data['power_forecast'] > opt.cap, 'power_forecast'] = opt.cap
+    # pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+    #
+    # insert_data_into_mongo(pre_data, arguments)

+ 120 - 0
models_processing/model_tf/tf_lstm3_train.py

@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm_train.py
+# @Time      :2025/2/13 10:52
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request, jsonify, g
+import traceback, uuid
+import logging, argparse
+from data_processing.data_operation.data_handler import DataHandler
+import time, yaml, threading
+from copy import deepcopy
+from models_processing.model_tf.tf_lstm import TSHandler
+from common.database_dml_koi import *
+from common.logs import Log
+logger = Log('tf_ts').logger
+np.random.seed(42)  # NumPy随机种子
+app = Flask('tf_lstm_train——service')
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
+    global_config = yaml.safe_load(f)  # 只读的全局配置
+
+@app.before_request
+def update_config():
+    # ------------ 整理参数,整合请求参数 ------------
+    # 深拷贝全局配置 + 合并请求参数
+    current_config = deepcopy(global_config)
+    request_args = request.values.to_dict()
+    # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
+    request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    current_config.update(request_args)
+
+    # 存储到请求上下文
+    g.opt = argparse.Namespace(**current_config)
+    g.dh = DataHandler(logger, current_config)  # 每个请求独立实例
+    g.ts = TSHandler(logger, current_config)
+
+
+@app.route('/tf_lstm_training', methods=['POST'])
+def model_training_bp():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    dh = g.dh
+    ts = g.ts
+    args = deepcopy(g.opt.__dict__)
+    logger.info("Program starts execution!")
+    try:
+        # ------------ 获取数据,预处理训练数据 ------------
+        train_data = get_data_from_mongo(args)
+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=3)
+        ts.opt.cap = round(scaled_cap, 2)
+        ts.opt.Model['input_size'] = len(dh.opt.features)
+        # ------------ 训练模型,保存模型 ------------
+        # 1. 如果是加强训练模式,先加载预训练模型特征参数,再预处理训练数据
+        # 2. 如果是普通模式,先预处理训练数据,再根据训练数据特征加载模型
+        model = ts.train_init() if ts.opt.Model['add_train'] else ts.get_keras_model(ts.opt)
+        if ts.opt.Model['add_train']:
+            if model:
+                feas = json.loads(ts.model_params)['features']
+                if set(feas).issubset(set(dh.opt.features)):
+                    dh.opt.features = list(feas)
+                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=3)
+                else:
+                    model = ts.get_keras_model(ts.opt)
+                    logger.info("训练数据特征,不满足,加强训练模型特征")
+            else:
+                model = ts.get_keras_model(ts.opt)
+        ts_model = ts.training(model, [train_x, train_y, valid_x, valid_y])
+        args['Model']['features'] = ','.join(dh.opt.features)
+        args['params'] = json.dumps(args)
+        args['descr'] = 'lstm2'
+        args['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+
+        insert_trained_model_into_mongo(ts_model, args)
+        insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10119,
+          threads=8,  # 指定线程数(默认4,根据硬件调整)
+          channel_timeout=600  # 连接超时时间(秒)
+          )
+    print("server start!")
+    # args_dict = {"mongodb_database": 'realtimeDq', 'scaler_table': 'j00600_scaler', 'model_name': 'lstm1',
+    # 'model_table': 'j00600_model', 'mongodb_read_table': 'j00600', 'col_time': 'dateTime',
+    # 'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    # args_dict['features'] = args_dict['features'].split(',')
+    # args.update(args_dict)
+    # dh = DataHandler(logger, args)
+    # ts = TSHandler(logger, args)
+    # opt = argparse.Namespace(**args)
+    # opt.Model['input_size'] = len(opt.features)
+    # train_data = get_data_from_mongo(args_dict)
+    # train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(train_data)
+    # ts_model = ts.training([train_x, train_y, valid_x, valid_y])
+    #
+    # args_dict['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+    # args_dict['params'] = args
+    # args_dict['descr'] = '测试'
+    # insert_trained_model_into_mongo(ts_model, args_dict)
+    # insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args_dict)