27 커밋 ec26eb2294 ... 4ea561c18e

작성자 SHA1 메시지 날짜
  David 4ea561c18e 06171256 1 주 전
  anweiguo 3b7b02db75 Merge branch 'dev_awg' of http://git.jiayuepowertech.com:9022/anweiguo/algorithm_platform into dev_awg 2 주 전
  liudawei 1f674cd1bd Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg 2 주 전
  anweiguo 5ccfe317b1 1.修改光伏物理模型 2 주 전
  anweiguo 3dce5dbff9 1.修改光伏物理模型装机类型 2 주 전
  huangzihan 3dd9fb4242 Merge branch 'dev_hzh' of anweiguo/algorithm_platform into dev_awg 2 주 전
  hzh e626ddc6b6 fix 修改日志和无权重参数报错 2 주 전
  anweiguo 4888026061 1.修改曦谋hive to mongo 2 주 전
  huangzihan 0b616756dd Merge branch 'dev_hzh' of anweiguo/algorithm_platform into dev_awg 2 주 전
  hzh f242cd7535 fix 取消注释 2 주 전
  anweiguo 776820a115 Merge branch 'dev_hzh' of anweiguo/algorithm_platform into dev_awg 2 주 전
  hzh 2a62d21022 fix flask名称修改 2 주 전
  hzh fcaf1776bd fix 统一预测参数 2 주 전
  hzh 7936fd42b3 fix 程序端口设置 2 주 전
  hzh 1ea02b834e fix 程序端口设置 2 주 전
  hzh e9710dac23 Merge branch 'refs/heads/dev_awg' into dev_hzh 2 주 전
  anweiguo 50328097c4 1.配置hive 2 주 전
  anweiguo 4af5b64c9b 1.配置jdk 2 주 전
  anweiguo 154f718263 1.配置jdk 2 주 전
  liudawei a0fa19a1d1 Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg 2 주 전
  hzh bc717026e4 fix 设置程序端口 2 주 전
  anweiguo a95b93b205 1.配置jdk 2 주 전
  hzh d0b121d0df docs 注释和报错打印删除 2 주 전
  liudawei 44e00acfb7 Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg 2 주 전
  hzh f006a988e9 feat 支持svr与样本权重 2 주 전
  hzh 21f39260c9 fix adjust 2 주 전
  hzh 0cc21a30c7 feat 支持lightgbm和SVR 1 개월 전

+ 9 - 2
Dockerfile

@@ -3,13 +3,20 @@ FROM 192.168.1.36:5000/python:3.12
 
 ENV TZ=Asia/Shanghai
 RUN apt-get update && \
-    apt-get install -y tzdata && \
+    apt-get install -y tzdata openjdk-17-jdk && \
     ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
     echo $TZ > /etc/timezone && \
     DEBIAN_FRONTEND=noninteractive dpkg-reconfigure tzdata && \
-    apt-get clean
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+
 
 ENV LANG=en_US.UTF-8
+ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
+ENV PATH="$JAVA_HOME/bin:$PATH"
+ENV CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/lib/tools.jar
+
 # 设置工作目录
 WORKDIR /app
 

+ 42 - 11
common/database_dml.py

@@ -74,6 +74,20 @@ def get_df_list_from_mongo(args):
     return df_list
 
 
+def delete_data_from_mongo(args):
+    mongodb_connection = config['mongodb']['mongodb_connection']
+    mongodb_database = args['mongodb_database']
+    mongodb_write_table = args['mongodb_write_table']
+    client = MongoClient(mongodb_connection)
+    db = client[mongodb_database]
+    collection = db[mongodb_write_table]
+    if mongodb_write_table in db.list_collection_names():
+        collection.drop()
+        print(f"Collection '{mongodb_write_table}' already exists, deleted successfully!")
+    else:
+        print(f"Collection '{mongodb_write_table}' already not exists!")
+
+
 def insert_data_into_mongo(res_df, args):
     """
     插入数据到 MongoDB 集合中,可以选择覆盖、追加或按指定的 key 进行更新插入。
@@ -144,7 +158,7 @@ def get_data_fromMysql(params):
     return df
 
 
-def insert_pickle_model_into_mongo(model, args):
+def insert_pickle_model_into_mongo(model, args, features=None):
     # 获取 hive 配置部分
     mongodb_connection = config['mongodb']['mongodb_connection']
     mongodb_database, mongodb_write_table, model_name = args['mongodb_database'], args['mongodb_write_table'], args[
@@ -157,6 +171,8 @@ def insert_pickle_model_into_mongo(model, args):
         'model_name': model_name,
         'model': model_bytes,  # 将模型字节流存入数据库
     }
+    if features is not None:
+        model_data['features'] = features
     print('Training completed!')
 
     if mongodb_write_table in db.list_collection_names():
@@ -729,15 +745,28 @@ def get_xmo_data_from_hive(args):
     password = hive_config['password']
     features = config['xmo']['features']
     numeric_features = config['xmo']['numeric_features']
-    if 'moment' not in args or 'farm_id' not in args:
-        msg_error = 'One or more of the following parameters are missing: moment, farm_id!'
+    if 'farm_id' not in args:
+        msg_error = 'One or more of the following parameters are missing: farm_id!'
         return msg_error
     else:
-        moment = args['moment']
         farm_id = args['farm_id']
-
+        if 'moment' in args:
+            moment = args['moment']
+        else:
+            hour = datetime.strptime(args['current_date'], "%Y%m%d%H").hour
+            if hour < 3:
+                moment = '00'
+            elif hour < 6:
+                moment = '03'
+            elif hour < 15:
+                moment = '09'
+            elif hour < 19:
+                moment = '12'
+            else:
+                moment = '18'
+            print(moment)
         if 'current_date' in args:
-            current_date = datetime.strptime(args['current_date'], "%Y%m%d")
+            current_date = datetime.strptime(args['current_date'], "%Y%m%d%H")
         else:
             current_date = datetime.now()
         if 'days' in args:
@@ -746,6 +775,7 @@ def get_xmo_data_from_hive(args):
             days = 1
         json_feature = f"nwp_xmo_{moment}"
         # 建立连接
+        """"""
         conn = jaydebeapi.connect(driver_class, jdbc_url, [user, password], jar_file)
         # 查询 Hive 表
         cursor = conn.cursor()
@@ -757,7 +787,7 @@ def get_xmo_data_from_hive(args):
             else:
                 query_sql += "union \n"
 
-            query_sql += """select rowkey,datatimestamp,{2} from hbase_forecast.forecast_xmo_d{3} 
+            query_sql += """select rowkey,datatimestamp,{2} from hbase_forecast.forecast_xmo_d{3}
                                                      where rowkey>='{0}-{1}0000' and rowkey<='{0}-{1}2345' \n""".format(
                 farm_id, sysdate_pre, json_feature, i)
         print("query_sql\n", query_sql)
@@ -781,14 +811,15 @@ def get_xmo_data_from_hive(args):
             return df_features[features]
 
 
+
 if __name__ == "__main__":
     print("Program starts execution!")
     args = {
-        'moment': '06',
-        'current_date': '20250609',
+        # 'moment': '06',
+        'current_date': '2025060901',
         'farm_id': 'J00883',
         'days': '13'
     }
-    df = get_xmo_data_from_hive(args)
-    print(df.head(2),df.shape)
+    get_xmo_data_from_hive(args)
+
     print("server start!")

BIN
common/jar/hive-jdbc-standalone.jar


+ 25 - 0
common/log_utils.py

@@ -0,0 +1,25 @@
+# log_utils.py
+import logging
+from io import StringIO
+from flask import g
+
+
+def init_request_logging(logger):
+    # 创建内存缓冲区
+    g.log_stream = StringIO()
+
+    # 创建自定义日志处理器
+    handler = logging.StreamHandler(g.log_stream)
+    handler.setFormatter(logging.Formatter(
+        '%(asctime)s [%(levelname)s] %(message)s'
+    ))
+
+    # 临时添加到logger
+    g.log_handler = handler
+    logger.addHandler(handler)
+
+
+def teardown_request_logging(response, logger):
+    if hasattr(g, 'log_handler'):
+        logger.removeHandler(g.log_handler)
+    return response

+ 3 - 1
data_processing/data_operation/hive_to_mongo.py

@@ -2,7 +2,8 @@ from flask import Flask,request,jsonify
 import time
 import logging
 import traceback
-from common.database_dml import insert_data_into_mongo,get_xmo_data_from_hive
+from common.database_dml import insert_data_into_mongo, get_xmo_data_from_hive, delete_data_from_mongo
+
 app = Flask('hive_to_mongo——service')
 
 
@@ -22,6 +23,7 @@ def data_join():
         args = request.values.to_dict()
         print('args', args)
         logger.info(args)
+        delete_data_from_mongo(args)
         df_hive = get_xmo_data_from_hive(args)
         if isinstance(df_hive, str):
             success = 0

+ 46 - 0
data_processing/data_operation/weight.py

@@ -0,0 +1,46 @@
+import numpy as np
+
+
+def balance_weights(y: np.ndarray, bins=10, normalize: bool = True, **kwargs) -> np.ndarray:
+    """
+    平衡权重,分布数量越少权重越大
+    """
+    bins = int(bins)
+    counts, bin_edges = np.histogram(y, bins=bins)
+
+    # digitize 不使用 right=True,这样最小值也能落在 bin 0 开始
+    bin_indices = np.digitize(y, bin_edges[1:-1], right=False)
+
+    # bin_counts 用 0 到 bins-1 的索引
+    bin_counts = {i: count for i, count in enumerate(counts)}
+
+    # 对于每个样本分配权重(加个兜底:出现异常时给个较大默认值)
+    weights = np.array([1.0 / bin_counts.get(b, 1e-6) for b in bin_indices])
+
+    if normalize:
+        weights /= np.mean(weights)
+
+    return weights
+
+def south_weight(target: np.ndarray, cap, **kwargs) -> np.ndarray:
+    """
+    应付南方点网的奇怪考核
+    为了不把曲线压太低,这里有所收敛(添加开方处理,不让权重分布过于离散)
+    """
+    cap = float(cap)
+    weight = 1 / np.sqrt(np.where(target < 0.2 * cap, 0.2 * cap, target))
+    return weight
+
+def standard_weight(target: np.array, **kwargs) -> np.ndarray:
+    """
+    标准化权重
+    """
+    weight = np.sqrt(np.abs(target - np.mean(target))) / np.std(target)
+    return weight
+
+# ------------------------------权重函数注册------------------------------------------------
+WEIGHT_REGISTER = {
+    "balance": balance_weights,
+    "south": south_weight,
+    "std": standard_weight
+}

+ 1 - 2
models_processing/model_predict/model_prediction_lightgbm.py

@@ -119,5 +119,4 @@ if __name__=="__main__":
     logger = logging.getLogger("model_prediction_lightgbm log")
     from waitress import serve
     serve(app, host="0.0.0.0", port=10090)
-    print("server start!")
-    
+    print("server start!")

+ 163 - 0
models_processing/model_predict/model_prediction_ml.py

@@ -0,0 +1,163 @@
+import pandas as pd
+from pymongo import MongoClient
+import pickle
+from flask import Flask, request, g
+import time
+import logging
+import traceback
+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
+from common.alert import send_message
+from datetime import datetime, timedelta
+import pytz
+from pytz import timezone
+from common.log_utils import init_request_logging, teardown_request_logging
+from common.processing_data_common import get_xxl_dq
+
+app = Flask('model_prediction_ml——service')
+
+@app.before_request
+def setup_logging():
+    init_request_logging(logger)
+
+# 请求后清理日志处理器
+@app.after_request
+def teardown_logging(response):
+    return teardown_request_logging(response, logger)
+
+def str_to_list(arg):
+    if arg == '':
+        return []
+    else:
+        return arg.split(',')
+
+def forecast_data_distribution(pre_data, args):
+    col_time = args['col_time']
+    farm_id = args['farm_id']
+    dt = datetime.now(pytz.utc).astimezone(timezone("Asia/Shanghai")).strftime('%Y%m%d')
+    tomorrow = (datetime.now(pytz.utc).astimezone(timezone("Asia/Shanghai")) + timedelta(days=1)).strftime('%Y-%m-%d')
+    field_mapping = {'clearsky_ghi': 'clearskyGhi', 'dni_calcd': 'dniCalcd', 'surface_pressure': 'surfacePressure',
+                     'wd140m': 'tj_wd140', 'ws140m': 'tj_ws140', 'wd170m': 'tj_wd170', 'cldt': 'tj_tcc',
+                     'wd70m': 'tj_wd70',
+                     'ws100m': 'tj_ws100', 'DSWRFsfc': 'tj_radiation', 'wd10m': 'tj_wd10', 'TMP2m': 'tj_t2',
+                     'wd30m': 'tj_wd30',
+                     'ws30m': 'tj_ws30', 'rh2m': 'tj_rh', 'PRATEsfc': 'tj_pratesfc', 'ws170m': 'tj_ws170',
+                     'wd50m': 'tj_wd50',
+                     'ws50m': 'tj_ws50', 'wd100m': 'tj_wd100', 'ws70m': 'tj_ws70', 'ws10m': 'tj_ws10',
+                     'psz': 'tj_pressure',
+                     'cldl': 'tj_lcc', 'pres': 'tj_pres', 'dateTime': 'date_time'}
+    # 根据字段映射重命名列
+    pre_data = pre_data.rename(columns=field_mapping)
+
+    if len(pre_data) == 0:
+        logging.info("nwp dataframe is empty")
+        send_message('lightgbm预测组件', farm_id, '请注意:获取NWP数据为空,预测文件无法生成!')
+        result = get_xxl_dq(farm_id, dt)
+
+    elif len(pre_data[pre_data[col_time].str.contains(tomorrow)]) < 96:
+        send_message('lightgbm预测组件', farm_id, "日前数据记录缺失,不足96条,用DQ代替并补值!")
+        result = get_xxl_dq(farm_id, dt)
+    else:
+        df = pre_data.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
+        mongodb_connection, mongodb_database, mongodb_model_table, model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
+            args['mongodb_database'], args['mongodb_model_table'], args['model_name']
+        client = MongoClient(mongodb_connection)
+        db = client[mongodb_database]
+        collection = db[mongodb_model_table]
+        model_data = collection.find_one({"model_name": model_name})
+        logger.info(f"use {model_data['model_name']} features: {model_data['features']}")
+        if model_data is not None:
+            model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
+            # 反序列化模型
+            model = pickle.loads(model_binary)
+            if 'features' in model_data.keys():
+                features = model_data['features']
+            else:
+                features = model.feature_name()
+            diff = set(features) - set(pre_data.columns)
+            if len(diff) > 0:
+                send_message('lightgbm预测组件', farm_id, f'NWP特征列缺失,使用DQ代替!features:{diff}')
+                result = get_xxl_dq(farm_id, dt)
+            else:
+                df['power_forecast'] = model.predict(df[features])
+                df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
+                logger.info("model predict result successfully!")
+                if 'farm_id' not in df.columns:
+                    df['farm_id'] = farm_id
+                result = df[['farm_id', 'date_time', 'power_forecast']]
+        else:
+            send_message('lightgbm预测组件', farm_id, "模型文件缺失,用DQ代替并补值!")
+            result = get_xxl_dq(farm_id, dt)
+    result['power_forecast'] = round(result['power_forecast'], 2)
+    return result
+
+def model_prediction(df, args):
+    mongodb_connection, mongodb_database, mongodb_model_table, model_name, howLongAgo, farm_id, target = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
+    args['mongodb_database'], args['mongodb_model_table'], args['model_name'], int(args['howLongAgo']), args['farm_id'], \
+    args['target']
+    client = MongoClient(mongodb_connection)
+    db = client[mongodb_database]
+    collection = db[mongodb_model_table]
+    model_data = collection.find_one({"model_name": model_name})
+    if 'is_limit' in df.columns:
+        df = df[df['is_limit'] == False]
+
+    logger.info(f"use {model_data['model_name']} features: {model_data['features']}")
+    if model_data is not None:
+        model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
+        # 反序列化模型
+        model = pickle.loads(model_binary)
+        if 'features' in model_data.keys():
+            features = model_data['features']
+        else:
+            features = model.feature_name()
+        df.dropna(subset=features, inplace=True)
+        df['power_forecast'] = model.predict(df[features])
+        df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
+        df['model'] = model_name
+        df['howLongAgo'] = howLongAgo
+        df['farm_id'] = farm_id
+        logger.info("model predict result successfully!")
+
+    return df[['dateTime', 'howLongAgo', 'model', 'farm_id', 'power_forecast', target]]
+
+@app.route('/model_prediction_ml', methods=['POST'])
+def model_prediction_ml():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args', args)
+        forecast_file = int(args['forecast_file'])
+        power_df = get_data_from_mongo(args)
+        if forecast_file == 1:
+            predict_data = forecast_data_distribution(power_df, args)
+        else:
+            predict_data = model_prediction(power_df, args)
+        insert_data_into_mongo(predict_data, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        logger.error(my_exception)
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['log'] = result['log'] = g.log_stream.getvalue().splitlines()
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("model_prediction_ml log")
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=10129)
+    print("server start!")

+ 12 - 3
models_processing/model_predict/model_prediction_photovoltaic_physical.py

@@ -22,23 +22,30 @@ def forecast_data_distribution(pre_data, args):
     col_time = args['col_time']
     farm_id = args['farm_id']
     col_radiance = args['col_radiance']
-    radiance_max = float(args['radiance_max'])
+    pre_radiance_max = pre_data[col_radiance].max()
     cap = float(args['cap'])
     pre_data['farm_id'] = farm_id
+    if 'control_type' in args and args['control_type'] == 'cut':
+        radiance_max = args['radiance_max']
+    else:
+        radiance_max = max(float(args['radiance_max']), pre_radiance_max)
     pre_data['power_forecast'] = round(pre_data[col_radiance] * cap / radiance_max, 2)
+
     if 'sunrise_time' in args:
         sunrise_time = args['sunrise_time']
         pre_data.loc[pre_data[col_time].dt.time < sunrise_time, 'power_forecast'] = 0
     if 'sunset_time' in args:
         sunset_time = args['sunset_time']
-        pre_data[pre_data[col_time] > sunset_time, 'power_forecast'] = 0
+        pre_data.loc[pre_data[col_time] > sunset_time, 'power_forecast'] = 0
+    pre_data.loc[pre_data['power_forecast'] > cap, 'power_forecast'] = cap
+    pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
     return pre_data[['farm_id', 'date_time', 'power_forecast']]
 
 
 def model_prediction(df, args):
     # 新增日出、日落时间参数
     howLongAgo, farm_id, target, cap, col_radiance, radiance_max, model_name, col_time = int(args['howLongAgo']), args['farm_id'], \
-    args['target'], args['cap'], args['col_radiance'], args['radiance_max'], args['model_name'], args['col_time']
+    args['target'], float(args['cap']), args['col_radiance'], float(args['radiance_max']), args['model_name'], args['col_time']
     df['power_forecast'] = round(df[col_radiance]*cap/radiance_max, 2)
     df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
     if 'sunrise_time' in args:
@@ -50,6 +57,8 @@ def model_prediction(df, args):
     df['model'] = model_name
     df['howLongAgo'] = howLongAgo
     df['farm_id'] = farm_id
+    df.loc[df['power_forecast'] > cap, 'power_forecast'] = cap
+    df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
     print("model predict result  successfully!")
     return df[['dateTime', 'howLongAgo', 'model', 'farm_id', 'power_forecast', target]]
 

+ 3 - 1
models_processing/model_tf/losses_cash.py

@@ -73,7 +73,9 @@ class SouthLossCash(Loss):
         ############### 新增惩罚项部分 ###############
         # 计算负偏差惩罚(预测值低于真实值时进行惩罚)
         negative_bias = tf.maximum(diff, 0.0)  # 获取负偏差部分的绝对值
-        penalty = self.penalty_coeff * tf.reduce_mean(negative_bias, axis=-1)
+        # 统一惩罚项 和 准确率损失函数量纲
+        negative_bias_ = tf.square(negative_bias / safe_base)
+        penalty = self.penalty_coeff * tf.reduce_mean(negative_bias_, axis=-1)
 
         # 组合最终损失(可以尝试不同权重)
         total_loss = base_loss + penalty

+ 0 - 20
models_processing/model_tf/tf_transformer.py

@@ -37,26 +37,6 @@ class TransformerHandler(object):
             self.logger.info("加载模型权重失败:{}".format(e.args))
 
     @staticmethod
-    def get_keras_model(opt, time_series=1, lstm_type=1):
-        loss = region_loss(opt)
-        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
-        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
-        nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
-
-        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
-        con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
-        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(con1_p)
-        if lstm_type == 2:
-            output = Dense(opt.Model['time_step'], name='cdq_output')(nwp_lstm)
-        else:
-            output = Dense(opt.Model['time_step']*time_series, name='cdq_output')(nwp_lstm)
-
-        model = Model(nwp_input, output)
-        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
-        model.compile(loss=loss, optimizer=adam)
-        return model
-
-    @staticmethod
     def get_transformer_model(opt, time_series=1):
         time_steps = 48
         input_features = 21

+ 143 - 0
models_processing/model_tf/tf_transformer_pre.py

@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm_pre.py
+# @Time      :2025/2/13 10:52
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request, g
+import logging, argparse, traceback
+from common.database_dml import *
+from common.processing_data_common import missing_features, str_to_list
+from data_processing.data_operation.data_handler import DataHandler
+from threading import Lock
+import time, yaml
+from copy import deepcopy
+model_lock = Lock()
+from itertools import chain
+from common.logs import Log
+from common.data_utils import deep_update
+from tf_transformer import TransformerHandler
+# logger = Log('tf_bp').logger()
+logger = Log('tf_ts').logger
+np.random.seed(42)  # NumPy随机种子
+# tf.set_random_seed(42)  # TensorFlow随机种子
+app = Flask('tf_lstm_pre——service')
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
+    global_config = yaml.safe_load(f)  # 只读的全局配置
+
+@app.before_request
+def update_config():
+    # ------------ 整理参数,整合请求参数 ------------
+    # 深拷贝全局配置 + 合并请求参数
+    current_config = deepcopy(global_config)
+    request_args = request.values.to_dict()
+    # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
+    request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 1)
+    current_config = deep_update(current_config, request_args)
+
+    # 存储到请求上下文
+    g.opt = argparse.Namespace(**current_config)
+    g.dh = DataHandler(logger, current_config)  # 每个请求独立实例
+    g.trans = TransformerHandler(logger, current_config)
+
+@app.route('/tf_lstm_predict', methods=['POST'])
+def model_prediction_lstm():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    dh = g.dh
+    trans = g.trans
+    args = deepcopy(g.opt.__dict__)
+    logger.info("Program starts execution!")
+    try:
+        pre_data = get_data_from_mongo(args)
+        if args.get('algorithm_test', 0):
+            field_mapping = {'clearsky_ghi': 'clearskyGhi', 'dni_calcd': 'dniCalcd','surface_pressure': 'surfacePressure'}
+            pre_data = pre_data.rename(columns=field_mapping)
+        feature_scaler, target_scaler = get_scaler_model_from_mongo(args)
+        trans.opt.cap = round(target_scaler.transform(np.array([[float(args['cap'])]]))[0, 0], 2)
+        trans.get_model(args)
+        dh.opt.features = json.loads(trans.model_params)['Model']['features'].split(',')
+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=args['time_series'], lstm_type=1)
+        res = list(chain.from_iterable(target_scaler.inverse_transform(trans.predict(scaled_pre_x))))
+        pre_data['farm_id'] = args.get('farm_id', 'null')
+        if int(args.get('algorithm_test', 0)):
+            pre_data[args['model_name']] = res[:len(pre_data)]
+            pre_data.rename(columns={args['col_time']: 'dateTime'}, inplace=True)
+            pre_data = pre_data[['dateTime', 'farm_id', args['target'], args['model_name'], 'dq']]
+            pre_data = pre_data.melt(id_vars=['dateTime', 'farm_id', args['target']], var_name='model', value_name='power_forecast')
+            res_cols = ['dateTime', 'power_forecast', 'farm_id', args['target'], 'model']
+            if 'howLongAgo' in args:
+                pre_data['howLongAgo'] = int(args['howLongAgo'])
+                res_cols += ['howLongAgo']
+        else:
+            pre_data['power_forecast'] = res[:len(pre_data)]
+            pre_data.rename(columns={args['col_time']: 'date_time'}, inplace=True)
+            res_cols = ['date_time', 'power_forecast', 'farm_id']
+        pre_data = pre_data[res_cols]
+
+        pre_data.loc[:, 'power_forecast'] = pre_data.loc[:, 'power_forecast'].apply(lambda x: float(f"{x:.2f}"))
+        pre_data.loc[pre_data['power_forecast'] > float(args['cap']), 'power_forecast'] = float(args['cap'])
+        pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+
+        insert_data_into_mongo(pre_data, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10114,
+          threads=8,  # 指定线程数(默认4,根据硬件调整)
+          channel_timeout=600  # 连接超时时间(秒)
+          )
+    print("server start!")
+
+    # ------------------------测试代码------------------------
+    # args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
+    #              'model_table': 'j00083_model', 'mongodb_read_table': 'j00083_test', 'col_time': 'date_time', 'mongodb_write_table': 'j00083_rs',
+    #              'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    # args_dict['features'] = args_dict['features'].split(',')
+    # arguments.update(args_dict)
+    # dh = DataHandler(logger, arguments)
+    # ts = TSHandler(logger)
+    # opt = argparse.Namespace(**arguments)
+    #
+    # opt.Model['input_size'] = len(opt.features)
+    # pre_data = get_data_from_mongo(args_dict)
+    # feature_scaler, target_scaler = get_scaler_model_from_mongo(arguments)
+    # pre_x = dh.pre_data_handler(pre_data, feature_scaler, opt)
+    # ts.get_model(arguments)
+    # result = ts.predict(pre_x)
+    # result1 = list(chain.from_iterable(target_scaler.inverse_transform([result.flatten()])))
+    # pre_data['power_forecast'] = result1[:len(pre_data)]
+    # pre_data['farm_id'] = 'J00083'
+    # pre_data['cdq'] = 1
+    # pre_data['dq'] = 1
+    # pre_data['zq'] = 1
+    # pre_data.rename(columns={arguments['col_time']: 'date_time'}, inplace=True)
+    # pre_data = pre_data[['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']]
+    #
+    # pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
+    # pre_data.loc[pre_data['power_forecast'] > opt.cap, 'power_forecast'] = opt.cap
+    # pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
+    #
+    # insert_data_into_mongo(pre_data, arguments)

+ 122 - 0
models_processing/model_tf/tf_transformer_train.py

@@ -0,0 +1,122 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :tf_lstm_train.py
+# @Time      :2025/2/13 10:52
+# @Author    :David
+# @Company: shenyang JY
+import json, copy
+import numpy as np
+from flask import Flask, request, jsonify, g
+import traceback, uuid
+import logging, argparse
+from data_processing.data_operation.data_handler import DataHandler
+import time, yaml, threading
+from copy import deepcopy
+from models_processing.model_tf.tf_transformer import TransformerHandler
+from common.database_dml import *
+from common.logs import Log
+from common.data_utils import deep_update
+logger = Log('tf_ts').logger
+np.random.seed(42)  # NumPy随机种子
+app = Flask('tf_lstm_train——service')
+
+current_dir = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
+    global_config = yaml.safe_load(f)  # 只读的全局配置
+
+@app.before_request
+def update_config():
+    # ------------ 整理参数,整合请求参数 ------------
+    # 深拷贝全局配置 + 合并请求参数
+    current_config = deepcopy(global_config)
+    request_args = request.values.to_dict()
+    # features参数规则:1.有传入,解析,覆盖 2. 无传入,不覆盖,原始值
+    request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
+    request_args['time_series'] = request_args.get('time_series', 1)
+    current_config = deep_update(current_config, request_args)
+
+    # 存储到请求上下文
+    g.opt = argparse.Namespace(**current_config)
+    g.dh = DataHandler(logger, current_config)  # 每个请求独立实例
+    g.trans = TransformerHandler(logger, current_config)
+
+
+@app.route('/tf_lstm_training', methods=['POST'])
+def model_training_lstm():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    dh = g.dh
+    trans = g.trans
+    args = deepcopy(g.opt.__dict__)
+    logger.info("Program starts execution!")
+    try:
+        # ------------ 获取数据,预处理训练数据 ------------
+        train_data = get_data_from_mongo(args)
+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'])
+        trans.opt.cap = round(scaled_cap, 2)
+        trans.opt.Model['input_size'] = len(dh.opt.features)
+        # ------------ 训练模型,保存模型 ------------
+        # 1. 如果是加强训练模式,先加载预训练模型特征参数,再预处理训练数据
+        # 2. 如果是普通模式,先预处理训练数据,再根据训练数据特征加载模型
+        model = trans.train_init() if trans.opt.Model['add_train'] else trans.get_transformer_model(trans.opt, time_series=args['time_series'], lstm_type=1)
+        if trans.opt.Model['add_train']:
+            if model:
+                feas = json.loads(trans.model_params)['features']
+                if set(feas).issubset(set(dh.opt.features)):
+                    dh.opt.features = list(feas)
+                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'])
+                else:
+                    model = trans.get_transformer_model(trans.opt, time_series=args['time_series'], lstm_type=1)
+                    logger.info("训练数据特征,不满足,加强训练模型特征")
+            else:
+                model = trans.get_transformer_model(trans.opt, time_series=args['time_series'], lstm_type=1)
+        ts_model = trans.training(model, [train_x, train_y, valid_x, valid_y])
+        args['Model']['features'] = ','.join(dh.opt.features)
+        args['params'] = json.dumps(args)
+        args['descr'] = '测试'
+        args['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+
+        insert_trained_model_into_mongo(ts_model, args)
+        insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10115,
+          threads=8,  # 指定线程数(默认4,根据硬件调整)
+          channel_timeout=600  # 连接超时时间(秒)
+          )
+    print("server start!")
+    # args_dict = {"mongodb_database": 'realtimeDq', 'scaler_table': 'j00600_scaler', 'model_name': 'lstm1',
+    # 'model_table': 'j00600_model', 'mongodb_read_table': 'j00600', 'col_time': 'dateTime',
+    # 'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
+    # args_dict['features'] = args_dict['features'].split(',')
+    # args.update(args_dict)
+    # dh = DataHandler(logger, args)
+    # ts = TSHandler(logger, args)
+    # opt = argparse.Namespace(**args)
+    # opt.Model['input_size'] = len(opt.features)
+    # train_data = get_data_from_mongo(args_dict)
+    # train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(train_data)
+    # ts_model = ts.training([train_x, train_y, valid_x, valid_y])
+    #
+    # args_dict['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
+    # args_dict['params'] = args
+    # args_dict['descr'] = '测试'
+    # insert_trained_model_into_mongo(ts_model, args_dict)
+    # insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args_dict)

+ 164 - 0
models_processing/model_train/model_training_ml.py

@@ -0,0 +1,164 @@
+import lightgbm as lgb
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_squared_error, mean_absolute_error
+from flask import Flask, request, g
+import time
+import traceback
+import logging
+from common.database_dml import get_data_from_mongo, insert_pickle_model_into_mongo
+from common.processing_data_common import missing_features, str_to_list
+from sklearn.pipeline import Pipeline
+from sklearn.svm import SVR
+from sklearn.preprocessing import MinMaxScaler
+from data_processing.data_operation.weight import WEIGHT_REGISTER
+from io import StringIO
+from common.log_utils import init_request_logging, teardown_request_logging
+
+app = Flask('model_training_ml——service')
+
+
+# 请求前设置日志捕获
+@app.before_request
+def setup_logging():
+    init_request_logging(logger)
+
+# 请求后清理日志处理器
+@app.after_request
+def teardown_logging(response):
+    return teardown_request_logging(response, logger)
+
+def get_sample_weight(df, label, args):
+    # 样本权重
+    if 'sample_weight' in args.keys():
+        if args['sample_weight'] in WEIGHT_REGISTER.keys():
+            sample_weight = WEIGHT_REGISTER[args['sample_weight']](df[label].values.reshape(-1), **args)
+            logger.info(f"use predefined weights {args['sample_weight']}")
+        elif args['sample_weight'] in df.columns.tolist():
+            sample_weight = df[args['sample_weight']].values.reshape(-1)
+            logger.info(f'use dataframe col {args["sample_weight"]}')
+        else:
+            sample_weight = None
+            logger.info('sample_weight is neither in the predefined weights nor a column of the DataFrame, not applicable')
+    else:
+        sample_weight = None
+        logger.info('no sample_weight')
+    return sample_weight
+
+def train_lgb(data_split, categorical_features, model_params, num_boost_round=100, sample_weight=None):
+    X_train, X_test, y_train, y_test = data_split
+    # 创建LightGBM数据集
+    lgb_train = lgb.Dataset(X_train, y_train, categorical_feature=categorical_features, weight=sample_weight)
+    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
+    # 设置参数
+    params = {
+        'objective': 'regression',
+        'metric': 'rmse',
+        'boosting_type': 'gbdt',
+        'verbose': 1
+    }
+    params.update(model_params)
+    # 训练模型
+    print('Starting training...')
+    gbm = lgb.train(params,
+                    lgb_train,
+                    num_boost_round=num_boost_round,
+                    valid_sets=[lgb_train, lgb_eval],
+                    )
+    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
+    return gbm, y_pred
+
+
+def train_svr(data_split, model_params, sample_weight=None):
+    X_train, X_test, y_train, y_test = data_split
+
+    svr = Pipeline([('scaler', MinMaxScaler()),
+                    ('model', SVR(**model_params))])
+
+    # 训练模型
+    print('Starting training...')
+    svr.fit(X_train, y_train, model__sample_weight=sample_weight)
+    y_pred = svr.predict(X_test)
+    return svr, y_pred
+
+
+def build_model(df, args):
+    np.random.seed(42)
+    # 参数
+    numerical_features, categorical_features, label, model_name, model_params, col_time = str_to_list(
+        args['numerical_features']), str_to_list(args['categorical_features']), args['label'], args['model_name'], eval(
+        args['model_params']), args['col_time']
+
+    features = numerical_features + categorical_features
+    print("features:************", features)
+    if 'is_limit' in df.columns:
+        df = df[df['is_limit'] == False]
+    # 清洗特征平均缺失率大于20%的天
+    df = missing_features(df, features, col_time)
+    df = df[~np.isnan(df[label])]
+    # 拆分数据为训练集和测试集
+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=42,
+                                                        shuffle=False)
+    X_train, y_train = df_train[features].values, df_train[label].values
+    X_test, y_test = df_test[features].values, df_test[label].values
+
+    # 获取样本权重
+    sample_weight = get_sample_weight(df_train, label=label, args=args)
+
+    model_type = args['model_type']
+    # 区分常规机器学习模型和lgb,这里只实例化svr,后续可扩展
+    if model_type == "lightgbm":
+        logger.info("lightgbm training")
+        num_boost_round = int(args['num_boost_round'])
+        model, y_pred = train_lgb([X_train, X_test, y_train, y_test], categorical_features, model_params,
+                                  num_boost_round, sample_weight=sample_weight)
+    elif model_type == "svr":
+        logger.info("svr training")
+        model, y_pred = train_svr([X_train, X_test, y_train, y_test], model_params, sample_weight=sample_weight)
+    else:
+        raise ValueError(f"Invalid model_type, must be one of [lightgbm, svr]")
+
+    # 评估
+    mse = mean_squared_error(y_test, y_pred)
+    rmse = np.sqrt(mse)
+    mae = mean_absolute_error(y_test, y_pred)
+    logger.info(f'The test rmse is: {round(rmse, 2)},"The test mae is:"{round(mae, 2)}')
+    return model, features
+
+
+@app.route('/model_training_ml', methods=['POST'])
+def model_training_ml():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        logger.info(args)
+        power_df = get_data_from_mongo(args)
+        model, features = build_model(power_df, args)
+        insert_pickle_model_into_mongo(model, args, features=features)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        logger.error(my_exception)
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['log'] = g.log_stream.getvalue().splitlines()
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("model_training_ml log")
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=10128)
+    print("server start!")

+ 109 - 0
post_processing/post_process.py

@@ -0,0 +1,109 @@
+import pandas as pd
+from flask import Flask, request, jsonify, g
+import time
+import logging
+import traceback
+from io import StringIO
+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
+from common.log_utils import init_request_logging, teardown_request_logging
+app = Flask('post_process——service')
+
+# 请求前设置日志捕获
+@app.before_request
+def setup_logging():
+    init_request_logging(logger)
+
+
+# 请求后清理日志处理器
+@app.after_request
+def teardown_logging(response):
+    return teardown_request_logging(response, logger)
+
+def get_data(args):
+    df = get_data_from_mongo(args)
+    col_time = args['col_time']
+    if not df.empty:
+        logger.info(f"{args['mongodb_read_table']} load success")
+        df[col_time] = pd.to_datetime(df[col_time])
+        df.set_index(col_time, inplace=True)
+        df.sort_index(inplace=True)
+    else:
+        raise ValueError("未获取到预测数据。")
+    return df
+
+
+def predict_result_adjustment(df, args):
+    """
+    光伏/风电 数据后处理 主要操作
+    1. 光伏 (夜间 置零 + 平滑)
+    2. 风电 (平滑)
+    3. cap 封顶
+    """
+    mongodb_database, plant_type, cap, col_time = args['mongodb_database'], args['plant_type'], float(args['cap']), \
+        args['col_time']
+    if 'smooth_window' in args.keys():
+        smooth_window = int(args['smooth_window'])
+    else:
+        smooth_window = 3
+
+    # 平滑
+    df_cp = df.copy()
+    df_cp['power_forecast'] = df_cp['power_forecast'].rolling(window=smooth_window, min_periods=1,
+                                                              center=True).mean().clip(0, 0.985 * cap)
+    logger.info(f"smooth processed windows: {smooth_window}")
+
+    # 光伏晚上置零
+    if plant_type == 'solar' and 'mongodb_nwp_table' in args.keys():
+        nwp_param = {
+            'mongodb_database': mongodb_database,
+            'mongodb_read_table': args['mongodb_nwp_table'],
+            'col_time': col_time
+        }
+        nwp = get_data(nwp_param)
+
+        df_cp = df_cp.join(nwp['radiation'])
+        df_cp.loc[nwp['radiation'] == 0, 'power_forecast'] = 0
+        df_cp.drop(columns=['radiation'], inplace=True)
+        logger.info("solar processed")
+    df_cp['power_forecast'] = round(df_cp['power_forecast'], 2)
+    df_cp.reset_index(inplace=True)
+    df_cp[col_time] = df_cp[col_time].dt.strftime('%Y-%m-%d %H:%M:%S')
+    return df_cp
+
+
+@app.route('/post_process', methods=['POST'])
+def data_join():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args', args)
+        logger.info(args)
+        df_pre = get_data(args)
+        res_df = predict_result_adjustment(df_pre, args)
+        insert_data_into_mongo(res_df, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        logger.error(my_exception)
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['log'] = g.log_stream.getvalue().splitlines()
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("post_process")
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=10130)
+    print("server start!")

+ 46 - 42
run_all.py

@@ -2,49 +2,53 @@ import subprocess
 import os
 # 定义要启动的应用及其路径和端口
 services = [
-    ("data_processing/data_operation/data_join.py", 10094),
-    ("data_processing/data_operation/mysql_to_mongo.py", 10095),
-    ("data_processing/data_operation/pre_prod_ftp.py", 10118),
-    ("data_processing/processing_limit_power/processing_limit_power_by_agcavc.py", 10086),
-    ("data_processing/processing_limit_power/processing_limit_power_by_machines.py", 10087),
-    ("data_processing/processing_limit_power/processing_limit_power_by_records.py", 10088),
-    ("data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py", 10085),
-    ("data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py", 10093),
-    ("data_processing/data_operation/pre_prod_ftp.py", '_'),
-    ("evaluation_processing/analysis_report.py", 10092),
-    ("evaluation_processing/evaluation_accuracy.py", 10091),
-    ("evaluation_processing/analysis_cdq.py", 10108),
-    ("models_processing/model_train/model_training_lightgbm.py", 10089),
-    ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
-    ("models_processing/model_train/model_training_lstm.py", 10096),
-    ("models_processing/model_predict/model_prediction_lstm.py", 10097),
-    ("models_processing/model_tf/tf_bp_pre.py", 10110),
-    ("models_processing/model_tf/tf_bp_train.py", 10111),
-    ("models_processing/model_tf/tf_cnn_pre.py", 10112),
-    ("models_processing/model_tf/tf_cnn_train.py", 10113),
-    ("models_processing/model_tf/tf_lstm_pre.py", 10114),
-    ("models_processing/model_tf/tf_lstm_train.py", 10115),
-    ("models_processing/model_tf/tf_test_pre.py", 10116),
-    ("models_processing/model_tf/tf_test_train.py", 10117),
-    ("models_processing/model_tf/tf_lstm2_pre.py", 10120),
-    ("models_processing/model_tf/tf_lstm2_train.py", 10119),
-    ("models_processing/model_tf/tf_lstm3_pre.py", 10122),
-    ("models_processing/model_tf/tf_lstm3_train.py", 10121),
-    ("models_processing/model_tf/tf_lstm_zone_pre.py", 10125),
-    ("models_processing/model_tf/tf_lstm_zone_train.py", 10124),
+    # ("data_processing/data_operation/data_join.py", 10094),
+    # ("data_processing/data_operation/mysql_to_mongo.py", 10095),
+    # ("data_processing/data_operation/pre_prod_ftp.py", 10118),
+    # ("data_processing/processing_limit_power/processing_limit_power_by_agcavc.py", 10086),
+    # ("data_processing/processing_limit_power/processing_limit_power_by_machines.py", 10087),
+    # ("data_processing/processing_limit_power/processing_limit_power_by_records.py", 10088),
+    # ("data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py", 10085),
+    # ("data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py", 10093),
+    # ("data_processing/data_operation/pre_prod_ftp.py", '_'),
+    # ("evaluation_processing/analysis_report.py", 10092),
+    # ("evaluation_processing/evaluation_accuracy.py", 10091),
+    # ("evaluation_processing/analysis_cdq.py", 10108),
+    # ("models_processing/model_train/model_training_lightgbm.py", 10089),
+    # ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
+    # ("models_processing/model_train/model_training_lstm.py", 10096),
+    # ("models_processing/model_predict/model_prediction_lstm.py", 10097),
+    # ("models_processing/model_tf/tf_bp_pre.py", 10110),
+    # ("models_processing/model_tf/tf_bp_train.py", 10111),
+    # ("models_processing/model_tf/tf_cnn_pre.py", 10112),
+    # ("models_processing/model_tf/tf_cnn_train.py", 10113),
+    # ("models_processing/model_tf/tf_lstm_pre.py", 10114),
+    # ("models_processing/model_tf/tf_lstm_train.py", 10115),
+    # ("models_processing/model_tf/tf_test_pre.py", 10116),
+    # ("models_processing/model_tf/tf_test_train.py", 10117),
+    # ("models_processing/model_tf/tf_lstm2_pre.py", 10120),
+    # ("models_processing/model_tf/tf_lstm2_train.py", 10119),
+    # ("models_processing/model_tf/tf_lstm3_pre.py", 10122),
+    # ("models_processing/model_tf/tf_lstm3_train.py", 10121),
+    # ("models_processing/model_tf/tf_lstm_zone_pre.py", 10125),
+    # ("models_processing/model_tf/tf_lstm_zone_train.py", 10124),
+    #
+    # ("post_processing/post_processing.py", 10098),
+    # ("evaluation_processing/analysis.py", 10099),
+    # ("models_processing/model_predict/res_prediction.py", 10105),
+    # ("data_processing/data_operation/pre_data_ftp.py", 10101),
+    # ("data_processing/data_operation/data_nwp_ftp.py", 10102),
+    # ("models_processing/model_train/model_training_bp.py", 10103),
+    # ("models_processing/model_predict/model_prediction_bp.py", 10104),
+    # ("data_processing/data_operation/data_tj_nwp_ftp.py", 10106),
+    # ("post_processing/pre_post_processing.py", 10107),
+    # ("post_processing/cdq_coe_gen.py", 10123),
+    # ("models_processing/model_predict/model_prediction_photovoltaic_physical.py", 10126),
+    # ("data_processing/data_operation/hive_to_mongo.py", 10127),
 
-    ("post_processing/post_processing.py", 10098),
-    ("evaluation_processing/analysis.py", 10099),
-    ("models_processing/model_predict/res_prediction.py", 10105),
-    ("data_processing/data_operation/pre_data_ftp.py", 10101),
-    ("data_processing/data_operation/data_nwp_ftp.py", 10102),
-    ("models_processing/model_train/model_training_bp.py", 10103),
-    ("models_processing/model_predict/model_prediction_bp.py", 10104),
-    ("data_processing/data_operation/data_tj_nwp_ftp.py", 10106),
-    ("post_processing/pre_post_processing.py", 10107),
-    ("post_processing/cdq_coe_gen.py", 10123),
-    ("models_processing/model_predict/model_prediction_photovoltaic_physical.py", 10126),
-    ("data_processing/data_operation/hive_to_mongo.py", 10127),
+    ("models_processing/model_train/model_training_ml.py", 10128),
+    ("models_processing/model_predict/model_prediction_ml.py", 10129),
+    ("post_processing/post_process.py", 10130)
 ]
 
 # 获取当前脚本所在的根目录