SHA1
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,13 +3,20 @@ FROM 192.168.1.36:5000/python:3.12
 
				 
			
 
				 ENV TZ=Asia/Shanghai
			
 
				 RUN apt-get update && \
			
 
				-    apt-get install -y tzdata && \
			
 
				+    apt-get install -y tzdata openjdk-17-jdk && \
			
 
				     ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && \
			
 
				     echo $TZ > /etc/timezone && \
			
 
				     DEBIAN_FRONTEND=noninteractive dpkg-reconfigure tzdata && \
			
 
				-    apt-get clean
			
 
				+    apt-get clean && \
			
 
				+    rm -rf /var/lib/apt/lists/*
			
 
				+
			
 
				+
			
 
				 
			
 
				 ENV LANG=en_US.UTF-8
			
 
				+ENV JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64
			
 
				+ENV PATH="$JAVA_HOME/bin:$PATH"
			
 
				+ENV CLASSPATH=.:$JAVA_HOME/lib:$JAVA_HOME/lib/tools.jar
			
 
				+
			
 
				 # 设置工作目录
			
 
				 WORKDIR /app
			
 
				 
			
--- a/common/database_dml.py
+++ b/common/database_dml.py
@@ -74,6 +74,20 @@ def get_df_list_from_mongo(args):
 
				     return df_list
			
 
				 
			
 
				 
			
 
				+def delete_data_from_mongo(args):
			
 
				+    mongodb_connection = config['mongodb']['mongodb_connection']
			
 
				+    mongodb_database = args['mongodb_database']
			
 
				+    mongodb_write_table = args['mongodb_write_table']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[mongodb_write_table]
			
 
				+    if mongodb_write_table in db.list_collection_names():
			
 
				+        collection.drop()
			
 
				+        print(f"Collection '{mongodb_write_table}' already exists, deleted successfully!")
			
 
				+    else:
			
 
				+        print(f"Collection '{mongodb_write_table}' already not exists!")
			
 
				+
			
 
				+
			
 
				 def insert_data_into_mongo(res_df, args):
			
 
				     """
			
 
				     插入数据到 MongoDB 集合中，可以选择覆盖、追加或按指定的 key 进行更新插入。
			
@@ -144,7 +158,7 @@ def get_data_fromMysql(params):
 
				     return df
			
 
				 
			
 
				 
			
 
				-def insert_pickle_model_into_mongo(model, args):
			
 
				+def insert_pickle_model_into_mongo(model, args, features=None):
			
 
				     # 获取 hive 配置部分
			
 
				     mongodb_connection = config['mongodb']['mongodb_connection']
			
 
				     mongodb_database, mongodb_write_table, model_name = args['mongodb_database'], args['mongodb_write_table'], args[
			
@@ -157,6 +171,8 @@ def insert_pickle_model_into_mongo(model, args):
 
				         'model_name': model_name,
			
 
				         'model': model_bytes,  # 将模型字节流存入数据库
			
 
				     }
			
 
				+    if features is not None:
			
 
				+        model_data['features'] = features
			
 
				     print('Training completed!')
			
 
				 
			
 
				     if mongodb_write_table in db.list_collection_names():
			
@@ -729,15 +745,28 @@ def get_xmo_data_from_hive(args):
 
				     password = hive_config['password']
			
 
				     features = config['xmo']['features']
			
 
				     numeric_features = config['xmo']['numeric_features']
			
 
				-    if 'moment' not in args or 'farm_id' not in args:
			
 
				-        msg_error = 'One or more of the following parameters are missing: moment, farm_id!'
			
 
				+    if 'farm_id' not in args:
			
 
				+        msg_error = 'One or more of the following parameters are missing: farm_id!'
			
 
				         return msg_error
			
 
				     else:
			
 
				-        moment = args['moment']
			
 
				         farm_id = args['farm_id']
			
 
				-
			
 
				+        if 'moment' in args:
			
 
				+            moment = args['moment']
			
 
				+        else:
			
 
				+            hour = datetime.strptime(args['current_date'], "%Y%m%d%H").hour
			
 
				+            if hour < 3:
			
 
				+                moment = '00'
			
 
				+            elif hour < 6:
			
 
				+                moment = '03'
			
 
				+            elif hour < 15:
			
 
				+                moment = '09'
			
 
				+            elif hour < 19:
			
 
				+                moment = '12'
			
 
				+            else:
			
 
				+                moment = '18'
			
 
				+            print(moment)
			
 
				         if 'current_date' in args:
			
 
				-            current_date = datetime.strptime(args['current_date'], "%Y%m%d")
			
 
				+            current_date = datetime.strptime(args['current_date'], "%Y%m%d%H")
			
 
				         else:
			
 
				             current_date = datetime.now()
			
 
				         if 'days' in args:
			
@@ -746,6 +775,7 @@ def get_xmo_data_from_hive(args):
 
				             days = 1
			
 
				         json_feature = f"nwp_xmo_{moment}"
			
 
				         # 建立连接
			
 
				+        """"""
			
 
				         conn = jaydebeapi.connect(driver_class, jdbc_url, [user, password], jar_file)
			
 
				         # 查询 Hive 表
			
 
				         cursor = conn.cursor()
			
@@ -757,7 +787,7 @@ def get_xmo_data_from_hive(args):
 
				             else:
			
 
				                 query_sql += "union \n"
			
 
				 
			
 
				-            query_sql += """select rowkey,datatimestamp,{2} from hbase_forecast.forecast_xmo_d{3} 
			
 
				+            query_sql += """select rowkey,datatimestamp,{2} from hbase_forecast.forecast_xmo_d{3}
			
 
				                                                      where rowkey>='{0}-{1}0000' and rowkey<='{0}-{1}2345' \n""".format(
			
 
				                 farm_id, sysdate_pre, json_feature, i)
			
 
				         print("query_sql\n", query_sql)
			
@@ -781,14 +811,15 @@ def get_xmo_data_from_hive(args):
 
				             return df_features[features]
			
 
				 
			
 
				 
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     print("Program starts execution!")
			
 
				     args = {
			
 
				-        'moment': '06',
			
 
				-        'current_date': '20250609',
			
 
				+        # 'moment': '06',
			
 
				+        'current_date': '2025060901',
			
 
				         'farm_id': 'J00883',
			
 
				         'days': '13'
			
 
				     }
			
 
				-    df = get_xmo_data_from_hive(args)
			
 
				-    print(df.head(2),df.shape)
			
 
				+    get_xmo_data_from_hive(args)
			
 
				+
			
 
				     print("server start!")
			
--- a/common/jar/hive-jdbc-standalone.jar
+++ b/common/jar/hive-jdbc-standalone.jar
--- a/common/log_utils.py
+++ b/common/log_utils.py
@@ -0,0 +1,25 @@
 
				+# log_utils.py
			
 
				+import logging
			
 
				+from io import StringIO
			
 
				+from flask import g
			
 
				+
			
 
				+
			
 
				+def init_request_logging(logger):
			
 
				+    # 创建内存缓冲区
			
 
				+    g.log_stream = StringIO()
			
 
				+
			
 
				+    # 创建自定义日志处理器
			
 
				+    handler = logging.StreamHandler(g.log_stream)
			
 
				+    handler.setFormatter(logging.Formatter(
			
 
				+        '%(asctime)s [%(levelname)s] %(message)s'
			
 
				+    ))
			
 
				+
			
 
				+    # 临时添加到logger
			
 
				+    g.log_handler = handler
			
 
				+    logger.addHandler(handler)
			
 
				+
			
 
				+
			
 
				+def teardown_request_logging(response, logger):
			
 
				+    if hasattr(g, 'log_handler'):
			
 
				+        logger.removeHandler(g.log_handler)
			
 
				+    return response
			
--- a/data_processing/data_operation/hive_to_mongo.py
+++ b/data_processing/data_operation/hive_to_mongo.py
@@ -2,7 +2,8 @@ from flask import Flask,request,jsonify
 
				 import time
			
 
				 import logging
			
 
				 import traceback
			
 
				-from common.database_dml import insert_data_into_mongo,get_xmo_data_from_hive
			
 
				+from common.database_dml import insert_data_into_mongo, get_xmo_data_from_hive, delete_data_from_mongo
			
 
				+
			
 
				 app = Flask('hive_to_mongo——service')
			
 
				 
			
 
				 
			
@@ -22,6 +23,7 @@ def data_join():
 
				         args = request.values.to_dict()
			
 
				         print('args', args)
			
 
				         logger.info(args)
			
 
				+        delete_data_from_mongo(args)
			
 
				         df_hive = get_xmo_data_from_hive(args)
			
 
				         if isinstance(df_hive, str):
			
 
				             success = 0
			
--- a/data_processing/data_operation/weight.py
+++ b/data_processing/data_operation/weight.py
@@ -0,0 +1,46 @@
 
				+import numpy as np
			
 
				+
			
 
				+
			
 
				+def balance_weights(y: np.ndarray, bins=10, normalize: bool = True, **kwargs) -> np.ndarray:
			
 
				+    """
			
 
				+    平衡权重，分布数量越少权重越大
			
 
				+    """
			
 
				+    bins = int(bins)
			
 
				+    counts, bin_edges = np.histogram(y, bins=bins)
			
 
				+
			
 
				+    # digitize 不使用 right=True，这样最小值也能落在 bin 0 开始
			
 
				+    bin_indices = np.digitize(y, bin_edges[1:-1], right=False)
			
 
				+
			
 
				+    # bin_counts 用 0 到 bins-1 的索引
			
 
				+    bin_counts = {i: count for i, count in enumerate(counts)}
			
 
				+
			
 
				+    # 对于每个样本分配权重（加个兜底：出现异常时给个较大默认值）
			
 
				+    weights = np.array([1.0 / bin_counts.get(b, 1e-6) for b in bin_indices])
			
 
				+
			
 
				+    if normalize:
			
 
				+        weights /= np.mean(weights)
			
 
				+
			
 
				+    return weights
			
 
				+
			
 
				+def south_weight(target: np.ndarray, cap, **kwargs) -> np.ndarray:
			
 
				+    """
			
 
				+    应付南方点网的奇怪考核
			
 
				+    为了不把曲线压太低，这里有所收敛(添加开方处理，不让权重分布过于离散)
			
 
				+    """
			
 
				+    cap = float(cap)
			
 
				+    weight = 1 / np.sqrt(np.where(target < 0.2 * cap, 0.2 * cap, target))
			
 
				+    return weight
			
 
				+
			
 
				+def standard_weight(target: np.array, **kwargs) -> np.ndarray:
			
 
				+    """
			
 
				+    标准化权重
			
 
				+    """
			
 
				+    weight = np.sqrt(np.abs(target - np.mean(target))) / np.std(target)
			
 
				+    return weight
			
 
				+
			
 
				+# ------------------------------权重函数注册------------------------------------------------
			
 
				+WEIGHT_REGISTER = {
			
 
				+    "balance": balance_weights,
			
 
				+    "south": south_weight,
			
 
				+    "std": standard_weight
			
 
				+}
			
--- a/models_processing/model_predict/model_prediction_lightgbm.py
+++ b/models_processing/model_predict/model_prediction_lightgbm.py
@@ -119,5 +119,4 @@ if __name__=="__main__":
 
				     logger = logging.getLogger("model_prediction_lightgbm log")
			
 
				     from waitress import serve
			
 
				     serve(app, host="0.0.0.0", port=10090)
			
 
				-    print("server start!")
			
 
				-    
			
 
				+    print("server start!")
			
--- a/models_processing/model_predict/model_prediction_ml.py
+++ b/models_processing/model_predict/model_prediction_ml.py
@@ -0,0 +1,163 @@
 
				+import pandas as pd
			
 
				+from pymongo import MongoClient
			
 
				+import pickle
			
 
				+from flask import Flask, request, g
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
			
 
				+from common.alert import send_message
			
 
				+from datetime import datetime, timedelta
			
 
				+import pytz
			
 
				+from pytz import timezone
			
 
				+from common.log_utils import init_request_logging, teardown_request_logging
			
 
				+from common.processing_data_common import get_xxl_dq
			
 
				+
			
 
				+app = Flask('model_prediction_ml——service')
			
 
				+
			
 
				+@app.before_request
			
 
				+def setup_logging():
			
 
				+    init_request_logging(logger)
			
 
				+
			
 
				+# 请求后清理日志处理器
			
 
				+@app.after_request
			
 
				+def teardown_logging(response):
			
 
				+    return teardown_request_logging(response, logger)
			
 
				+
			
 
				+def str_to_list(arg):
			
 
				+    if arg == '':
			
 
				+        return []
			
 
				+    else:
			
 
				+        return arg.split(',')
			
 
				+
			
 
				+def forecast_data_distribution(pre_data, args):
			
 
				+    col_time = args['col_time']
			
 
				+    farm_id = args['farm_id']
			
 
				+    dt = datetime.now(pytz.utc).astimezone(timezone("Asia/Shanghai")).strftime('%Y%m%d')
			
 
				+    tomorrow = (datetime.now(pytz.utc).astimezone(timezone("Asia/Shanghai")) + timedelta(days=1)).strftime('%Y-%m-%d')
			
 
				+    field_mapping = {'clearsky_ghi': 'clearskyGhi', 'dni_calcd': 'dniCalcd', 'surface_pressure': 'surfacePressure',
			
 
				+                     'wd140m': 'tj_wd140', 'ws140m': 'tj_ws140', 'wd170m': 'tj_wd170', 'cldt': 'tj_tcc',
			
 
				+                     'wd70m': 'tj_wd70',
			
 
				+                     'ws100m': 'tj_ws100', 'DSWRFsfc': 'tj_radiation', 'wd10m': 'tj_wd10', 'TMP2m': 'tj_t2',
			
 
				+                     'wd30m': 'tj_wd30',
			
 
				+                     'ws30m': 'tj_ws30', 'rh2m': 'tj_rh', 'PRATEsfc': 'tj_pratesfc', 'ws170m': 'tj_ws170',
			
 
				+                     'wd50m': 'tj_wd50',
			
 
				+                     'ws50m': 'tj_ws50', 'wd100m': 'tj_wd100', 'ws70m': 'tj_ws70', 'ws10m': 'tj_ws10',
			
 
				+                     'psz': 'tj_pressure',
			
 
				+                     'cldl': 'tj_lcc', 'pres': 'tj_pres', 'dateTime': 'date_time'}
			
 
				+    # 根据字段映射重命名列
			
 
				+    pre_data = pre_data.rename(columns=field_mapping)
			
 
				+
			
 
				+    if len(pre_data) == 0:
			
 
				+        logging.info("nwp dataframe is empty")
			
 
				+        send_message('lightgbm预测组件', farm_id, '请注意：获取NWP数据为空，预测文件无法生成！')
			
 
				+        result = get_xxl_dq(farm_id, dt)
			
 
				+
			
 
				+    elif len(pre_data[pre_data[col_time].str.contains(tomorrow)]) < 96:
			
 
				+        send_message('lightgbm预测组件', farm_id, "日前数据记录缺失，不足96条，用DQ代替并补值！")
			
 
				+        result = get_xxl_dq(farm_id, dt)
			
 
				+    else:
			
 
				+        df = pre_data.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
			
 
				+        mongodb_connection, mongodb_database, mongodb_model_table, model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
			
 
				+            args['mongodb_database'], args['mongodb_model_table'], args['model_name']
			
 
				+        client = MongoClient(mongodb_connection)
			
 
				+        db = client[mongodb_database]
			
 
				+        collection = db[mongodb_model_table]
			
 
				+        model_data = collection.find_one({"model_name": model_name})
			
 
				+        logger.info(f"use {model_data['model_name']} features: {model_data['features']}")
			
 
				+        if model_data is not None:
			
 
				+            model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
			
 
				+            # 反序列化模型
			
 
				+            model = pickle.loads(model_binary)
			
 
				+            if 'features' in model_data.keys():
			
 
				+                features = model_data['features']
			
 
				+            else:
			
 
				+                features = model.feature_name()
			
 
				+            diff = set(features) - set(pre_data.columns)
			
 
				+            if len(diff) > 0:
			
 
				+                send_message('lightgbm预测组件', farm_id, f'NWP特征列缺失,使用DQ代替！features:{diff}')
			
 
				+                result = get_xxl_dq(farm_id, dt)
			
 
				+            else:
			
 
				+                df['power_forecast'] = model.predict(df[features])
			
 
				+                df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				+                logger.info("model predict result successfully!")
			
 
				+                if 'farm_id' not in df.columns:
			
 
				+                    df['farm_id'] = farm_id
			
 
				+                result = df[['farm_id', 'date_time', 'power_forecast']]
			
 
				+        else:
			
 
				+            send_message('lightgbm预测组件', farm_id, "模型文件缺失，用DQ代替并补值！")
			
 
				+            result = get_xxl_dq(farm_id, dt)
			
 
				+    result['power_forecast'] = round(result['power_forecast'], 2)
			
 
				+    return result
			
 
				+
			
 
				+def model_prediction(df, args):
			
 
				+    mongodb_connection, mongodb_database, mongodb_model_table, model_name, howLongAgo, farm_id, target = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
			
 
				+    args['mongodb_database'], args['mongodb_model_table'], args['model_name'], int(args['howLongAgo']), args['farm_id'], \
			
 
				+    args['target']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[mongodb_model_table]
			
 
				+    model_data = collection.find_one({"model_name": model_name})
			
 
				+    if 'is_limit' in df.columns:
			
 
				+        df = df[df['is_limit'] == False]
			
 
				+
			
 
				+    logger.info(f"use {model_data['model_name']} features: {model_data['features']}")
			
 
				+    if model_data is not None:
			
 
				+        model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
			
 
				+        # 反序列化模型
			
 
				+        model = pickle.loads(model_binary)
			
 
				+        if 'features' in model_data.keys():
			
 
				+            features = model_data['features']
			
 
				+        else:
			
 
				+            features = model.feature_name()
			
 
				+        df.dropna(subset=features, inplace=True)
			
 
				+        df['power_forecast'] = model.predict(df[features])
			
 
				+        df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				+        df['model'] = model_name
			
 
				+        df['howLongAgo'] = howLongAgo
			
 
				+        df['farm_id'] = farm_id
			
 
				+        logger.info("model predict result successfully!")
			
 
				+
			
 
				+    return df[['dateTime', 'howLongAgo', 'model', 'farm_id', 'power_forecast', target]]
			
 
				+
			
 
				+@app.route('/model_prediction_ml', methods=['POST'])
			
 
				+def model_prediction_ml():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args', args)
			
 
				+        forecast_file = int(args['forecast_file'])
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        if forecast_file == 1:
			
 
				+            predict_data = forecast_data_distribution(power_df, args)
			
 
				+        else:
			
 
				+            predict_data = model_prediction(power_df, args)
			
 
				+        insert_data_into_mongo(predict_data, args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        logger.error(my_exception)
			
 
				+    end_time = time.time()
			
 
				+
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['log'] = result['log'] = g.log_stream.getvalue().splitlines()
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("model_prediction_ml log")
			
 
				+    from waitress import serve
			
 
				+
			
 
				+    serve(app, host="0.0.0.0", port=10129)
			
 
				+    print("server start!")
			
--- a/models_processing/model_predict/model_prediction_photovoltaic_physical.py
+++ b/models_processing/model_predict/model_prediction_photovoltaic_physical.py
@@ -22,23 +22,30 @@ def forecast_data_distribution(pre_data, args):
 
				     col_time = args['col_time']
			
 
				     farm_id = args['farm_id']
			
 
				     col_radiance = args['col_radiance']
			
 
				-    radiance_max = float(args['radiance_max'])
			
 
				+    pre_radiance_max = pre_data[col_radiance].max()
			
 
				     cap = float(args['cap'])
			
 
				     pre_data['farm_id'] = farm_id
			
 
				+    if 'control_type' in args and args['control_type'] == 'cut':
			
 
				+        radiance_max = args['radiance_max']
			
 
				+    else:
			
 
				+        radiance_max = max(float(args['radiance_max']), pre_radiance_max)
			
 
				     pre_data['power_forecast'] = round(pre_data[col_radiance] * cap / radiance_max, 2)
			
 
				+
			
 
				     if 'sunrise_time' in args:
			
 
				         sunrise_time = args['sunrise_time']
			
 
				         pre_data.loc[pre_data[col_time].dt.time < sunrise_time, 'power_forecast'] = 0
			
 
				     if 'sunset_time' in args:
			
 
				         sunset_time = args['sunset_time']
			
 
				-        pre_data[pre_data[col_time] > sunset_time, 'power_forecast'] = 0
			
 
				+        pre_data.loc[pre_data[col_time] > sunset_time, 'power_forecast'] = 0
			
 
				+    pre_data.loc[pre_data['power_forecast'] > cap, 'power_forecast'] = cap
			
 
				+    pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				     return pre_data[['farm_id', 'date_time', 'power_forecast']]
			
 
				 
			
 
				 
			
 
				 def model_prediction(df, args):
			
 
				     # 新增日出、日落时间参数
			
 
				     howLongAgo, farm_id, target, cap, col_radiance, radiance_max, model_name, col_time = int(args['howLongAgo']), args['farm_id'], \
			
 
				-    args['target'], args['cap'], args['col_radiance'], args['radiance_max'], args['model_name'], args['col_time']
			
 
				+    args['target'], float(args['cap']), args['col_radiance'], float(args['radiance_max']), args['model_name'], args['col_time']
			
 
				     df['power_forecast'] = round(df[col_radiance]*cap/radiance_max, 2)
			
 
				     df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				     if 'sunrise_time' in args:
			
@@ -50,6 +57,8 @@ def model_prediction(df, args):
 
				     df['model'] = model_name
			
 
				     df['howLongAgo'] = howLongAgo
			
 
				     df['farm_id'] = farm_id
			
 
				+    df.loc[df['power_forecast'] > cap, 'power_forecast'] = cap
			
 
				+    df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				     print("model predict result  successfully!")
			
 
				     return df[['dateTime', 'howLongAgo', 'model', 'farm_id', 'power_forecast', target]]
			
 
				 
			
--- a/models_processing/model_tf/losses_cash.py
+++ b/models_processing/model_tf/losses_cash.py
@@ -73,7 +73,9 @@ class SouthLossCash(Loss):
 
				         ############### 新增惩罚项部分 ###############
			
 
				         # 计算负偏差惩罚（预测值低于真实值时进行惩罚）
			
 
				         negative_bias = tf.maximum(diff, 0.0)  # 获取负偏差部分的绝对值
			
 
				-        penalty = self.penalty_coeff * tf.reduce_mean(negative_bias, axis=-1)
			
 
				+        # 统一惩罚项 和 准确率损失函数量纲
			
 
				+        negative_bias_ = tf.square(negative_bias / safe_base)
			
 
				+        penalty = self.penalty_coeff * tf.reduce_mean(negative_bias_, axis=-1)
			
 
				 
			
 
				         # 组合最终损失（可以尝试不同权重）
			
 
				         total_loss = base_loss + penalty
			
--- a/models_processing/model_tf/tf_transformer.py
+++ b/models_processing/model_tf/tf_transformer.py
@@ -37,26 +37,6 @@ class TransformerHandler(object):
 
				             self.logger.info("加载模型权重失败:{}".format(e.args))
			
 
				 
			
 
				     @staticmethod
			
 
				-    def get_keras_model(opt, time_series=1, lstm_type=1):
			
 
				-        loss = region_loss(opt)
			
 
				-        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
			
 
				-        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
			
 
				-        nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
			
 
				-
			
 
				-        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
			
 
				-        con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
			
 
				-        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(con1_p)
			
 
				-        if lstm_type == 2:
			
 
				-            output = Dense(opt.Model['time_step'], name='cdq_output')(nwp_lstm)
			
 
				-        else:
			
 
				-            output = Dense(opt.Model['time_step']*time_series, name='cdq_output')(nwp_lstm)
			
 
				-
			
 
				-        model = Model(nwp_input, output)
			
 
				-        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
			
 
				-        model.compile(loss=loss, optimizer=adam)
			
 
				-        return model
			
 
				-
			
 
				-    @staticmethod
			
 
				     def get_transformer_model(opt, time_series=1):
			
 
				         time_steps = 48
			
 
				         input_features = 21
			
--- a/models_processing/model_tf/tf_transformer_pre.py
+++ b/models_processing/model_tf/tf_transformer_pre.py
@@ -0,0 +1,143 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding:utf-8 -*-
			
 
				+# @FileName  :tf_lstm_pre.py
			
 
				+# @Time      :2025/2/13 10:52
			
 
				+# @Author    :David
			
 
				+# @Company: shenyang JY
			
 
				+import json, copy
			
 
				+import numpy as np
			
 
				+from flask import Flask, request, g
			
 
				+import logging, argparse, traceback
			
 
				+from common.database_dml import *
			
 
				+from common.processing_data_common import missing_features, str_to_list
			
 
				+from data_processing.data_operation.data_handler import DataHandler
			
 
				+from threading import Lock
			
 
				+import time, yaml
			
 
				+from copy import deepcopy
			
 
				+model_lock = Lock()
			
 
				+from itertools import chain
			
 
				+from common.logs import Log
			
 
				+from common.data_utils import deep_update
			
 
				+from tf_transformer import TransformerHandler
			
 
				+# logger = Log('tf_bp').logger()
			
 
				+logger = Log('tf_ts').logger
			
 
				+np.random.seed(42)  # NumPy随机种子
			
 
				+# tf.set_random_seed(42)  # TensorFlow随机种子
			
 
				+app = Flask('tf_lstm_pre——service')
			
 
				+
			
 
				+current_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
			
 
				+    global_config = yaml.safe_load(f)  # 只读的全局配置
			
 
				+
			
 
				+@app.before_request
			
 
				+def update_config():
			
 
				+    # ------------ 整理参数，整合请求参数 ------------
			
 
				+    # 深拷贝全局配置 + 合并请求参数
			
 
				+    current_config = deepcopy(global_config)
			
 
				+    request_args = request.values.to_dict()
			
 
				+    # features参数规则：1.有传入，解析，覆盖 2. 无传入，不覆盖，原始值
			
 
				+    request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
			
 
				+    request_args['time_series'] = request_args.get('time_series', 1)
			
 
				+    current_config = deep_update(current_config, request_args)
			
 
				+
			
 
				+    # 存储到请求上下文
			
 
				+    g.opt = argparse.Namespace(**current_config)
			
 
				+    g.dh = DataHandler(logger, current_config)  # 每个请求独立实例
			
 
				+    g.trans = TransformerHandler(logger, current_config)
			
 
				+
			
 
				+@app.route('/tf_lstm_predict', methods=['POST'])
			
 
				+def model_prediction_lstm():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    dh = g.dh
			
 
				+    trans = g.trans
			
 
				+    args = deepcopy(g.opt.__dict__)
			
 
				+    logger.info("Program starts execution!")
			
 
				+    try:
			
 
				+        pre_data = get_data_from_mongo(args)
			
 
				+        if args.get('algorithm_test', 0):
			
 
				+            field_mapping = {'clearsky_ghi': 'clearskyGhi', 'dni_calcd': 'dniCalcd','surface_pressure': 'surfacePressure'}
			
 
				+            pre_data = pre_data.rename(columns=field_mapping)
			
 
				+        feature_scaler, target_scaler = get_scaler_model_from_mongo(args)
			
 
				+        trans.opt.cap = round(target_scaler.transform(np.array([[float(args['cap'])]]))[0, 0], 2)
			
 
				+        trans.get_model(args)
			
 
				+        dh.opt.features = json.loads(trans.model_params)['Model']['features'].split(',')
			
 
				+        scaled_pre_x, pre_data = dh.pre_data_handler(pre_data, feature_scaler, time_series=args['time_series'], lstm_type=1)
			
 
				+        res = list(chain.from_iterable(target_scaler.inverse_transform(trans.predict(scaled_pre_x))))
			
 
				+        pre_data['farm_id'] = args.get('farm_id', 'null')
			
 
				+        if int(args.get('algorithm_test', 0)):
			
 
				+            pre_data[args['model_name']] = res[:len(pre_data)]
			
 
				+            pre_data.rename(columns={args['col_time']: 'dateTime'}, inplace=True)
			
 
				+            pre_data = pre_data[['dateTime', 'farm_id', args['target'], args['model_name'], 'dq']]
			
 
				+            pre_data = pre_data.melt(id_vars=['dateTime', 'farm_id', args['target']], var_name='model', value_name='power_forecast')
			
 
				+            res_cols = ['dateTime', 'power_forecast', 'farm_id', args['target'], 'model']
			
 
				+            if 'howLongAgo' in args:
			
 
				+                pre_data['howLongAgo'] = int(args['howLongAgo'])
			
 
				+                res_cols += ['howLongAgo']
			
 
				+        else:
			
 
				+            pre_data['power_forecast'] = res[:len(pre_data)]
			
 
				+            pre_data.rename(columns={args['col_time']: 'date_time'}, inplace=True)
			
 
				+            res_cols = ['date_time', 'power_forecast', 'farm_id']
			
 
				+        pre_data = pre_data[res_cols]
			
 
				+
			
 
				+        pre_data.loc[:, 'power_forecast'] = pre_data.loc[:, 'power_forecast'].apply(lambda x: float(f"{x:.2f}"))
			
 
				+        pre_data.loc[pre_data['power_forecast'] > float(args['cap']), 'power_forecast'] = float(args['cap'])
			
 
				+        pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				+
			
 
				+        insert_data_into_mongo(pre_data, args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n", "\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time()
			
 
				+
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10114,
			
 
				+          threads=8,  # 指定线程数（默认4，根据硬件调整）
			
 
				+          channel_timeout=600  # 连接超时时间（秒）
			
 
				+          )
			
 
				+    print("server start!")
			
 
				+
			
 
				+    # ------------------------测试代码------------------------
			
 
				+    # args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
			
 
				+    #              'model_table': 'j00083_model', 'mongodb_read_table': 'j00083_test', 'col_time': 'date_time', 'mongodb_write_table': 'j00083_rs',
			
 
				+    #              'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
			
 
				+    # args_dict['features'] = args_dict['features'].split(',')
			
 
				+    # arguments.update(args_dict)
			
 
				+    # dh = DataHandler(logger, arguments)
			
 
				+    # ts = TSHandler(logger)
			
 
				+    # opt = argparse.Namespace(**arguments)
			
 
				+    #
			
 
				+    # opt.Model['input_size'] = len(opt.features)
			
 
				+    # pre_data = get_data_from_mongo(args_dict)
			
 
				+    # feature_scaler, target_scaler = get_scaler_model_from_mongo(arguments)
			
 
				+    # pre_x = dh.pre_data_handler(pre_data, feature_scaler, opt)
			
 
				+    # ts.get_model(arguments)
			
 
				+    # result = ts.predict(pre_x)
			
 
				+    # result1 = list(chain.from_iterable(target_scaler.inverse_transform([result.flatten()])))
			
 
				+    # pre_data['power_forecast'] = result1[:len(pre_data)]
			
 
				+    # pre_data['farm_id'] = 'J00083'
			
 
				+    # pre_data['cdq'] = 1
			
 
				+    # pre_data['dq'] = 1
			
 
				+    # pre_data['zq'] = 1
			
 
				+    # pre_data.rename(columns={arguments['col_time']: 'date_time'}, inplace=True)
			
 
				+    # pre_data = pre_data[['date_time', 'power_forecast', 'farm_id', 'cdq', 'dq', 'zq']]
			
 
				+    #
			
 
				+    # pre_data['power_forecast'] = pre_data['power_forecast'].round(2)
			
 
				+    # pre_data.loc[pre_data['power_forecast'] > opt.cap, 'power_forecast'] = opt.cap
			
 
				+    # pre_data.loc[pre_data['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				+    #
			
 
				+    # insert_data_into_mongo(pre_data, arguments)
			
--- a/models_processing/model_tf/tf_transformer_train.py
+++ b/models_processing/model_tf/tf_transformer_train.py
@@ -0,0 +1,122 @@
 
				+#!/usr/bin/env python
			
 
				+# -*- coding:utf-8 -*-
			
 
				+# @FileName  :tf_lstm_train.py
			
 
				+# @Time      :2025/2/13 10:52
			
 
				+# @Author    :David
			
 
				+# @Company: shenyang JY
			
 
				+import json, copy
			
 
				+import numpy as np
			
 
				+from flask import Flask, request, jsonify, g
			
 
				+import traceback, uuid
			
 
				+import logging, argparse
			
 
				+from data_processing.data_operation.data_handler import DataHandler
			
 
				+import time, yaml, threading
			
 
				+from copy import deepcopy
			
 
				+from models_processing.model_tf.tf_transformer import TransformerHandler
			
 
				+from common.database_dml import *
			
 
				+from common.logs import Log
			
 
				+from common.data_utils import deep_update
			
 
				+logger = Log('tf_ts').logger
			
 
				+np.random.seed(42)  # NumPy随机种子
			
 
				+app = Flask('tf_lstm_train——service')
			
 
				+
			
 
				+current_dir = os.path.dirname(os.path.abspath(__file__))
			
 
				+with open(os.path.join(current_dir, 'lstm.yaml'), 'r', encoding='utf-8') as f:
			
 
				+    global_config = yaml.safe_load(f)  # 只读的全局配置
			
 
				+
			
 
				+@app.before_request
			
 
				+def update_config():
			
 
				+    # ------------ 整理参数，整合请求参数 ------------
			
 
				+    # 深拷贝全局配置 + 合并请求参数
			
 
				+    current_config = deepcopy(global_config)
			
 
				+    request_args = request.values.to_dict()
			
 
				+    # features参数规则：1.有传入，解析，覆盖 2. 无传入，不覆盖，原始值
			
 
				+    request_args['features'] = request_args['features'].split(',') if 'features' in request_args else current_config['features']
			
 
				+    request_args['time_series'] = request_args.get('time_series', 1)
			
 
				+    current_config = deep_update(current_config, request_args)
			
 
				+
			
 
				+    # 存储到请求上下文
			
 
				+    g.opt = argparse.Namespace(**current_config)
			
 
				+    g.dh = DataHandler(logger, current_config)  # 每个请求独立实例
			
 
				+    g.trans = TransformerHandler(logger, current_config)
			
 
				+
			
 
				+
			
 
				+@app.route('/tf_lstm_training', methods=['POST'])
			
 
				+def model_training_lstm():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    dh = g.dh
			
 
				+    trans = g.trans
			
 
				+    args = deepcopy(g.opt.__dict__)
			
 
				+    logger.info("Program starts execution!")
			
 
				+    try:
			
 
				+        # ------------ 获取数据，预处理训练数据 ------------
			
 
				+        train_data = get_data_from_mongo(args)
			
 
				+        train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'])
			
 
				+        trans.opt.cap = round(scaled_cap, 2)
			
 
				+        trans.opt.Model['input_size'] = len(dh.opt.features)
			
 
				+        # ------------ 训练模型，保存模型 ------------
			
 
				+        # 1. 如果是加强训练模式，先加载预训练模型特征参数，再预处理训练数据
			
 
				+        # 2. 如果是普通模式，先预处理训练数据，再根据训练数据特征加载模型
			
 
				+        model = trans.train_init() if trans.opt.Model['add_train'] else trans.get_transformer_model(trans.opt, time_series=args['time_series'], lstm_type=1)
			
 
				+        if trans.opt.Model['add_train']:
			
 
				+            if model:
			
 
				+                feas = json.loads(trans.model_params)['features']
			
 
				+                if set(feas).issubset(set(dh.opt.features)):
			
 
				+                    dh.opt.features = list(feas)
			
 
				+                    train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes, scaled_cap = dh.train_data_handler(train_data, time_series=args['time_series'])
			
 
				+                else:
			
 
				+                    model = trans.get_transformer_model(trans.opt, time_series=args['time_series'], lstm_type=1)
			
 
				+                    logger.info("训练数据特征，不满足，加强训练模型特征")
			
 
				+            else:
			
 
				+                model = trans.get_transformer_model(trans.opt, time_series=args['time_series'], lstm_type=1)
			
 
				+        ts_model = trans.training(model, [train_x, train_y, valid_x, valid_y])
			
 
				+        args['Model']['features'] = ','.join(dh.opt.features)
			
 
				+        args['params'] = json.dumps(args)
			
 
				+        args['descr'] = '测试'
			
 
				+        args['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
			
 
				+
			
 
				+        insert_trained_model_into_mongo(ts_model, args)
			
 
				+        insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n", "\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time()
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10115,
			
 
				+          threads=8,  # 指定线程数（默认4，根据硬件调整）
			
 
				+          channel_timeout=600  # 连接超时时间（秒）
			
 
				+          )
			
 
				+    print("server start!")
			
 
				+    # args_dict = {"mongodb_database": 'realtimeDq', 'scaler_table': 'j00600_scaler', 'model_name': 'lstm1',
			
 
				+    # 'model_table': 'j00600_model', 'mongodb_read_table': 'j00600', 'col_time': 'dateTime',
			
 
				+    # 'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
			
 
				+    # args_dict['features'] = args_dict['features'].split(',')
			
 
				+    # args.update(args_dict)
			
 
				+    # dh = DataHandler(logger, args)
			
 
				+    # ts = TSHandler(logger, args)
			
 
				+    # opt = argparse.Namespace(**args)
			
 
				+    # opt.Model['input_size'] = len(opt.features)
			
 
				+    # train_data = get_data_from_mongo(args_dict)
			
 
				+    # train_x, train_y, valid_x, valid_y, scaled_train_bytes, scaled_target_bytes = dh.train_data_handler(train_data)
			
 
				+    # ts_model = ts.training([train_x, train_y, valid_x, valid_y])
			
 
				+    #
			
 
				+    # args_dict['gen_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
			
 
				+    # args_dict['params'] = args
			
 
				+    # args_dict['descr'] = '测试'
			
 
				+    # insert_trained_model_into_mongo(ts_model, args_dict)
			
 
				+    # insert_scaler_model_into_mongo(scaled_train_bytes, scaled_target_bytes, args_dict)
			
--- a/models_processing/model_train/model_training_ml.py
+++ b/models_processing/model_train/model_training_ml.py
@@ -0,0 +1,164 @@
 
				+import lightgbm as lgb
			
 
				+import numpy as np
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from sklearn.metrics import mean_squared_error, mean_absolute_error
			
 
				+from flask import Flask, request, g
			
 
				+import time
			
 
				+import traceback
			
 
				+import logging
			
 
				+from common.database_dml import get_data_from_mongo, insert_pickle_model_into_mongo
			
 
				+from common.processing_data_common import missing_features, str_to_list
			
 
				+from sklearn.pipeline import Pipeline
			
 
				+from sklearn.svm import SVR
			
 
				+from sklearn.preprocessing import MinMaxScaler
			
 
				+from data_processing.data_operation.weight import WEIGHT_REGISTER
			
 
				+from io import StringIO
			
 
				+from common.log_utils import init_request_logging, teardown_request_logging
			
 
				+
			
 
				+app = Flask('model_training_ml——service')
			
 
				+
			
 
				+
			
 
				+# 请求前设置日志捕获
			
 
				+@app.before_request
			
 
				+def setup_logging():
			
 
				+    init_request_logging(logger)
			
 
				+
			
 
				+# 请求后清理日志处理器
			
 
				+@app.after_request
			
 
				+def teardown_logging(response):
			
 
				+    return teardown_request_logging(response, logger)
			
 
				+
			
 
				+def get_sample_weight(df, label, args):
			
 
				+    # 样本权重
			
 
				+    if 'sample_weight' in args.keys():
			
 
				+        if args['sample_weight'] in WEIGHT_REGISTER.keys():
			
 
				+            sample_weight = WEIGHT_REGISTER[args['sample_weight']](df[label].values.reshape(-1), **args)
			
 
				+            logger.info(f"use predefined weights {args['sample_weight']}")
			
 
				+        elif args['sample_weight'] in df.columns.tolist():
			
 
				+            sample_weight = df[args['sample_weight']].values.reshape(-1)
			
 
				+            logger.info(f'use dataframe col {args["sample_weight"]}')
			
 
				+        else:
			
 
				+            sample_weight = None
			
 
				+            logger.info('sample_weight is neither in the predefined weights nor a column of the DataFrame, not applicable')
			
 
				+    else:
			
 
				+        sample_weight = None
			
 
				+        logger.info('no sample_weight')
			
 
				+    return sample_weight
			
 
				+
			
 
				+def train_lgb(data_split, categorical_features, model_params, num_boost_round=100, sample_weight=None):
			
 
				+    X_train, X_test, y_train, y_test = data_split
			
 
				+    # 创建LightGBM数据集
			
 
				+    lgb_train = lgb.Dataset(X_train, y_train, categorical_feature=categorical_features, weight=sample_weight)
			
 
				+    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)
			
 
				+    # 设置参数
			
 
				+    params = {
			
 
				+        'objective': 'regression',
			
 
				+        'metric': 'rmse',
			
 
				+        'boosting_type': 'gbdt',
			
 
				+        'verbose': 1
			
 
				+    }
			
 
				+    params.update(model_params)
			
 
				+    # 训练模型
			
 
				+    print('Starting training...')
			
 
				+    gbm = lgb.train(params,
			
 
				+                    lgb_train,
			
 
				+                    num_boost_round=num_boost_round,
			
 
				+                    valid_sets=[lgb_train, lgb_eval],
			
 
				+                    )
			
 
				+    y_pred = gbm.predict(X_test, num_iteration=gbm.best_iteration)
			
 
				+    return gbm, y_pred
			
 
				+
			
 
				+
			
 
				+def train_svr(data_split, model_params, sample_weight=None):
			
 
				+    X_train, X_test, y_train, y_test = data_split
			
 
				+
			
 
				+    svr = Pipeline([('scaler', MinMaxScaler()),
			
 
				+                    ('model', SVR(**model_params))])
			
 
				+
			
 
				+    # 训练模型
			
 
				+    print('Starting training...')
			
 
				+    svr.fit(X_train, y_train, model__sample_weight=sample_weight)
			
 
				+    y_pred = svr.predict(X_test)
			
 
				+    return svr, y_pred
			
 
				+
			
 
				+
			
 
				+def build_model(df, args):
			
 
				+    np.random.seed(42)
			
 
				+    # 参数
			
 
				+    numerical_features, categorical_features, label, model_name, model_params, col_time = str_to_list(
			
 
				+        args['numerical_features']), str_to_list(args['categorical_features']), args['label'], args['model_name'], eval(
			
 
				+        args['model_params']), args['col_time']
			
 
				+
			
 
				+    features = numerical_features + categorical_features
			
 
				+    print("features:************", features)
			
 
				+    if 'is_limit' in df.columns:
			
 
				+        df = df[df['is_limit'] == False]
			
 
				+    # 清洗特征平均缺失率大于20%的天
			
 
				+    df = missing_features(df, features, col_time)
			
 
				+    df = df[~np.isnan(df[label])]
			
 
				+    # 拆分数据为训练集和测试集
			
 
				+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=42,
			
 
				+                                                        shuffle=False)
			
 
				+    X_train, y_train = df_train[features].values, df_train[label].values
			
 
				+    X_test, y_test = df_test[features].values, df_test[label].values
			
 
				+
			
 
				+    # 获取样本权重
			
 
				+    sample_weight = get_sample_weight(df_train, label=label, args=args)
			
 
				+
			
 
				+    model_type = args['model_type']
			
 
				+    # 区分常规机器学习模型和lgb，这里只实例化svr，后续可扩展
			
 
				+    if model_type == "lightgbm":
			
 
				+        logger.info("lightgbm training")
			
 
				+        num_boost_round = int(args['num_boost_round'])
			
 
				+        model, y_pred = train_lgb([X_train, X_test, y_train, y_test], categorical_features, model_params,
			
 
				+                                  num_boost_round, sample_weight=sample_weight)
			
 
				+    elif model_type == "svr":
			
 
				+        logger.info("svr training")
			
 
				+        model, y_pred = train_svr([X_train, X_test, y_train, y_test], model_params, sample_weight=sample_weight)
			
 
				+    else:
			
 
				+        raise ValueError(f"Invalid model_type, must be one of [lightgbm, svr]")
			
 
				+
			
 
				+    # 评估
			
 
				+    mse = mean_squared_error(y_test, y_pred)
			
 
				+    rmse = np.sqrt(mse)
			
 
				+    mae = mean_absolute_error(y_test, y_pred)
			
 
				+    logger.info(f'The test rmse is: {round(rmse, 2)},"The test mae is:"{round(mae, 2)}')
			
 
				+    return model, features
			
 
				+
			
 
				+
			
 
				+@app.route('/model_training_ml', methods=['POST'])
			
 
				+def model_training_ml():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        logger.info(args)
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        model, features = build_model(power_df, args)
			
 
				+        insert_pickle_model_into_mongo(model, args, features=features)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        logger.error(my_exception)
			
 
				+    end_time = time.time()
			
 
				+
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['log'] = g.log_stream.getvalue().splitlines()
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("model_training_ml log")
			
 
				+    from waitress import serve
			
 
				+
			
 
				+    serve(app, host="0.0.0.0", port=10128)
			
 
				+    print("server start!")
			
--- a/post_processing/post_process.py
+++ b/post_processing/post_process.py
@@ -0,0 +1,109 @@
 
				+import pandas as pd
			
 
				+from flask import Flask, request, jsonify, g
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from io import StringIO
			
 
				+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
			
 
				+from common.log_utils import init_request_logging, teardown_request_logging
			
 
				+app = Flask('post_process——service')
			
 
				+
			
 
				+# 请求前设置日志捕获
			
 
				+@app.before_request
			
 
				+def setup_logging():
			
 
				+    init_request_logging(logger)
			
 
				+
			
 
				+
			
 
				+# 请求后清理日志处理器
			
 
				+@app.after_request
			
 
				+def teardown_logging(response):
			
 
				+    return teardown_request_logging(response, logger)
			
 
				+
			
 
				+def get_data(args):
			
 
				+    df = get_data_from_mongo(args)
			
 
				+    col_time = args['col_time']
			
 
				+    if not df.empty:
			
 
				+        logger.info(f"{args['mongodb_read_table']} load success")
			
 
				+        df[col_time] = pd.to_datetime(df[col_time])
			
 
				+        df.set_index(col_time, inplace=True)
			
 
				+        df.sort_index(inplace=True)
			
 
				+    else:
			
 
				+        raise ValueError("未获取到预测数据。")
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def predict_result_adjustment(df, args):
			
 
				+    """
			
 
				+    光伏/风电 数据后处理 主要操作
			
 
				+    1. 光伏 (夜间 置零 + 平滑)
			
 
				+    2. 风电 (平滑)
			
 
				+    3. cap 封顶
			
 
				+    """
			
 
				+    mongodb_database, plant_type, cap, col_time = args['mongodb_database'], args['plant_type'], float(args['cap']), \
			
 
				+        args['col_time']
			
 
				+    if 'smooth_window' in args.keys():
			
 
				+        smooth_window = int(args['smooth_window'])
			
 
				+    else:
			
 
				+        smooth_window = 3
			
 
				+
			
 
				+    # 平滑
			
 
				+    df_cp = df.copy()
			
 
				+    df_cp['power_forecast'] = df_cp['power_forecast'].rolling(window=smooth_window, min_periods=1,
			
 
				+                                                              center=True).mean().clip(0, 0.985 * cap)
			
 
				+    logger.info(f"smooth processed windows: {smooth_window}")
			
 
				+
			
 
				+    # 光伏晚上置零
			
 
				+    if plant_type == 'solar' and 'mongodb_nwp_table' in args.keys():
			
 
				+        nwp_param = {
			
 
				+            'mongodb_database': mongodb_database,
			
 
				+            'mongodb_read_table': args['mongodb_nwp_table'],
			
 
				+            'col_time': col_time
			
 
				+        }
			
 
				+        nwp = get_data(nwp_param)
			
 
				+
			
 
				+        df_cp = df_cp.join(nwp['radiation'])
			
 
				+        df_cp.loc[nwp['radiation'] == 0, 'power_forecast'] = 0
			
 
				+        df_cp.drop(columns=['radiation'], inplace=True)
			
 
				+        logger.info("solar processed")
			
 
				+    df_cp['power_forecast'] = round(df_cp['power_forecast'], 2)
			
 
				+    df_cp.reset_index(inplace=True)
			
 
				+    df_cp[col_time] = df_cp[col_time].dt.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+    return df_cp
			
 
				+
			
 
				+
			
 
				+@app.route('/post_process', methods=['POST'])
			
 
				+def data_join():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args', args)
			
 
				+        logger.info(args)
			
 
				+        df_pre = get_data(args)
			
 
				+        res_df = predict_result_adjustment(df_pre, args)
			
 
				+        insert_data_into_mongo(res_df, args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        logger.error(my_exception)
			
 
				+    end_time = time.time()
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['log'] = g.log_stream.getvalue().splitlines()
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("post_process")
			
 
				+    from waitress import serve
			
 
				+
			
 
				+    serve(app, host="0.0.0.0", port=10130)
			
 
				+    print("server start!")
			
--- a/run_all.py
+++ b/run_all.py
@@ -2,49 +2,53 @@ import subprocess
 
				 import os
			
 
				 # 定义要启动的应用及其路径和端口
			
 
				 services = [
			
 
				-    ("data_processing/data_operation/data_join.py", 10094),
			
 
				-    ("data_processing/data_operation/mysql_to_mongo.py", 10095),
			
 
				-    ("data_processing/data_operation/pre_prod_ftp.py", 10118),
			
 
				-    ("data_processing/processing_limit_power/processing_limit_power_by_agcavc.py", 10086),
			
 
				-    ("data_processing/processing_limit_power/processing_limit_power_by_machines.py", 10087),
			
 
				-    ("data_processing/processing_limit_power/processing_limit_power_by_records.py", 10088),
			
 
				-    ("data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py", 10085),
			
 
				-    ("data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py", 10093),
			
 
				-    ("data_processing/data_operation/pre_prod_ftp.py", '_'),
			
 
				-    ("evaluation_processing/analysis_report.py", 10092),
			
 
				-    ("evaluation_processing/evaluation_accuracy.py", 10091),
			
 
				-    ("evaluation_processing/analysis_cdq.py", 10108),
			
 
				-    ("models_processing/model_train/model_training_lightgbm.py", 10089),
			
 
				-    ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
			
 
				-    ("models_processing/model_train/model_training_lstm.py", 10096),
			
 
				-    ("models_processing/model_predict/model_prediction_lstm.py", 10097),
			
 
				-    ("models_processing/model_tf/tf_bp_pre.py", 10110),
			
 
				-    ("models_processing/model_tf/tf_bp_train.py", 10111),
			
 
				-    ("models_processing/model_tf/tf_cnn_pre.py", 10112),
			
 
				-    ("models_processing/model_tf/tf_cnn_train.py", 10113),
			
 
				-    ("models_processing/model_tf/tf_lstm_pre.py", 10114),
			
 
				-    ("models_processing/model_tf/tf_lstm_train.py", 10115),
			
 
				-    ("models_processing/model_tf/tf_test_pre.py", 10116),
			
 
				-    ("models_processing/model_tf/tf_test_train.py", 10117),
			
 
				-    ("models_processing/model_tf/tf_lstm2_pre.py", 10120),
			
 
				-    ("models_processing/model_tf/tf_lstm2_train.py", 10119),
			
 
				-    ("models_processing/model_tf/tf_lstm3_pre.py", 10122),
			
 
				-    ("models_processing/model_tf/tf_lstm3_train.py", 10121),
			
 
				-    ("models_processing/model_tf/tf_lstm_zone_pre.py", 10125),
			
 
				-    ("models_processing/model_tf/tf_lstm_zone_train.py", 10124),
			
 
				+    # ("data_processing/data_operation/data_join.py", 10094),
			
 
				+    # ("data_processing/data_operation/mysql_to_mongo.py", 10095),
			
 
				+    # ("data_processing/data_operation/pre_prod_ftp.py", 10118),
			
 
				+    # ("data_processing/processing_limit_power/processing_limit_power_by_agcavc.py", 10086),
			
 
				+    # ("data_processing/processing_limit_power/processing_limit_power_by_machines.py", 10087),
			
 
				+    # ("data_processing/processing_limit_power/processing_limit_power_by_records.py", 10088),
			
 
				+    # ("data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py", 10085),
			
 
				+    # ("data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py", 10093),
			
 
				+    # ("data_processing/data_operation/pre_prod_ftp.py", '_'),
			
 
				+    # ("evaluation_processing/analysis_report.py", 10092),
			
 
				+    # ("evaluation_processing/evaluation_accuracy.py", 10091),
			
 
				+    # ("evaluation_processing/analysis_cdq.py", 10108),
			
 
				+    # ("models_processing/model_train/model_training_lightgbm.py", 10089),
			
 
				+    # ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
			
 
				+    # ("models_processing/model_train/model_training_lstm.py", 10096),
			
 
				+    # ("models_processing/model_predict/model_prediction_lstm.py", 10097),
			
 
				+    # ("models_processing/model_tf/tf_bp_pre.py", 10110),
			
 
				+    # ("models_processing/model_tf/tf_bp_train.py", 10111),
			
 
				+    # ("models_processing/model_tf/tf_cnn_pre.py", 10112),
			
 
				+    # ("models_processing/model_tf/tf_cnn_train.py", 10113),
			
 
				+    # ("models_processing/model_tf/tf_lstm_pre.py", 10114),
			
 
				+    # ("models_processing/model_tf/tf_lstm_train.py", 10115),
			
 
				+    # ("models_processing/model_tf/tf_test_pre.py", 10116),
			
 
				+    # ("models_processing/model_tf/tf_test_train.py", 10117),
			
 
				+    # ("models_processing/model_tf/tf_lstm2_pre.py", 10120),
			
 
				+    # ("models_processing/model_tf/tf_lstm2_train.py", 10119),
			
 
				+    # ("models_processing/model_tf/tf_lstm3_pre.py", 10122),
			
 
				+    # ("models_processing/model_tf/tf_lstm3_train.py", 10121),
			
 
				+    # ("models_processing/model_tf/tf_lstm_zone_pre.py", 10125),
			
 
				+    # ("models_processing/model_tf/tf_lstm_zone_train.py", 10124),
			
 
				+    #
			
 
				+    # ("post_processing/post_processing.py", 10098),
			
 
				+    # ("evaluation_processing/analysis.py", 10099),
			
 
				+    # ("models_processing/model_predict/res_prediction.py", 10105),
			
 
				+    # ("data_processing/data_operation/pre_data_ftp.py", 10101),
			
 
				+    # ("data_processing/data_operation/data_nwp_ftp.py", 10102),
			
 
				+    # ("models_processing/model_train/model_training_bp.py", 10103),
			
 
				+    # ("models_processing/model_predict/model_prediction_bp.py", 10104),
			
 
				+    # ("data_processing/data_operation/data_tj_nwp_ftp.py", 10106),
			
 
				+    # ("post_processing/pre_post_processing.py", 10107),
			
 
				+    # ("post_processing/cdq_coe_gen.py", 10123),
			
 
				+    # ("models_processing/model_predict/model_prediction_photovoltaic_physical.py", 10126),
			
 
				+    # ("data_processing/data_operation/hive_to_mongo.py", 10127),
			
 
				 
			
 
				-    ("post_processing/post_processing.py", 10098),
			
 
				-    ("evaluation_processing/analysis.py", 10099),
			
 
				-    ("models_processing/model_predict/res_prediction.py", 10105),
			
 
				-    ("data_processing/data_operation/pre_data_ftp.py", 10101),
			
 
				-    ("data_processing/data_operation/data_nwp_ftp.py", 10102),
			
 
				-    ("models_processing/model_train/model_training_bp.py", 10103),
			
 
				-    ("models_processing/model_predict/model_prediction_bp.py", 10104),
			
 
				-    ("data_processing/data_operation/data_tj_nwp_ftp.py", 10106),
			
 
				-    ("post_processing/pre_post_processing.py", 10107),
			
 
				-    ("post_processing/cdq_coe_gen.py", 10123),
			
 
				-    ("models_processing/model_predict/model_prediction_photovoltaic_physical.py", 10126),
			
 
				-    ("data_processing/data_operation/hive_to_mongo.py", 10127),
			
 
				+    ("models_processing/model_train/model_training_ml.py", 10128),
			
 
				+    ("models_processing/model_predict/model_prediction_ml.py", 10129),
			
 
				+    ("post_processing/post_process.py", 10130)
			
 
				 ]
			
 
				 
			
 
				 # 获取当前脚本所在的根目录
작성자	SHA1 메시지	날짜
David	4ea561c18e 06171256	1 주 전
anweiguo	3b7b02db75 Merge branch 'dev_awg' of http://git.jiayuepowertech.com:9022/anweiguo/algorithm_platform into dev_awg	2 주 전
liudawei	1f674cd1bd Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg	2 주 전
anweiguo	5ccfe317b1 1.修改光伏物理模型	2 주 전
anweiguo	3dce5dbff9 1.修改光伏物理模型装机类型	2 주 전
huangzihan	3dd9fb4242 Merge branch 'dev_hzh' of anweiguo/algorithm_platform into dev_awg	2 주 전
hzh	e626ddc6b6 fix 修改日志和无权重参数报错	2 주 전
anweiguo	4888026061 1.修改曦谋hive to mongo	2 주 전
huangzihan	0b616756dd Merge branch 'dev_hzh' of anweiguo/algorithm_platform into dev_awg	2 주 전
hzh	f242cd7535 fix 取消注释	2 주 전
anweiguo	776820a115 Merge branch 'dev_hzh' of anweiguo/algorithm_platform into dev_awg	2 주 전
hzh	2a62d21022 fix flask名称修改	2 주 전
hzh	fcaf1776bd fix 统一预测参数	2 주 전
hzh	7936fd42b3 fix 程序端口设置	2 주 전
hzh	1ea02b834e fix 程序端口设置	2 주 전
hzh	e9710dac23 Merge branch 'refs/heads/dev_awg' into dev_hzh	2 주 전
anweiguo	50328097c4 1.配置hive	2 주 전
anweiguo	4af5b64c9b 1.配置jdk	2 주 전
anweiguo	154f718263 1.配置jdk	2 주 전
liudawei	a0fa19a1d1 Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg	2 주 전
hzh	bc717026e4 fix 设置程序端口	2 주 전
anweiguo	a95b93b205 1.配置jdk	2 주 전
hzh	d0b121d0df docs 注释和报错打印删除	2 주 전
liudawei	44e00acfb7 Merge branch 'dev_david' of anweiguo/algorithm_platform into dev_awg	2 주 전
hzh	f006a988e9 feat 支持svr与样本权重	2 주 전
hzh	21f39260c9 fix adjust	2 주 전
hzh	0cc21a30c7 feat 支持lightgbm和SVR	1 개월 전