David 1 mesiac pred
rodič
commit
8d50286ac4
1 zmenil súbory, kde vykonal 110 pridanie a 26 odobranie
  1. 110 26
      post_processing/cdq_coe_gen.py

+ 110 - 26
post_processing/cdq_coe_gen.py

@@ -7,6 +7,7 @@
 import os, requests, json, time, traceback
 import pandas as pd
 import numpy as np
+from bayes_opt import BayesianOptimization
 from common.database_dml_koi import get_data_from_mongo
 from flask import Flask, request, g
 from datetime import datetime
@@ -24,23 +25,25 @@ def update_config():
     g.coe = {'T'+str(x):{} for x in range(1, 17)}
 
 
-def iterate_coe(pre_data, point, col_power, col_pre, coe):
+def iterate_coe_simple(pre_data, point, config, coe):
     """
     更新16个点系数
     """
     T = 'T' + str(point + 1)
+    col_pre = config['col_pre']
     best_acc, best_score1, best_coe_m, best_coe_n = 0, 0, 0, 0
     best_score, best_acc1, best_score_m, best_score_n = 999, 0, 999, 0
 
-    req_his_fix = prepare_request_body(pre_data, col_power, 'his_fix')
-    req_dq = prepare_request_body(pre_data, col_power, col_pre)
+    pre_data = history_error(pre_data, config['col_power'], config['col_pre'], int(coe[T]['hour']//0.25))
+    req_his_fix = prepare_request_body(pre_data, config, 'his_fix')
+    req_dq = prepare_request_body(pre_data, config, col_pre)
 
     his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix)
     dq_acc, dq_score = calculate_acc(API_URL, req_dq)
     for i in range(5, 210):
         for j in range(5, 210):
             pre_data["new"] = round(i / 170 * pre_data[col_pre] + j / 170 * pre_data['his_fix'], 3)
-            req_new = prepare_request_body(pre_data, col_power, 'new')
+            req_new = prepare_request_body(pre_data, config, 'new')
             acc, acc_score = calculate_acc(API_URL, req_new)
 
             if acc > best_acc:
@@ -67,16 +70,95 @@ def iterate_coe(pre_data, point, col_power, col_pre, coe):
     coe[T]['acc_n'] = round(best_coe_n, 3)
     logger.info("系数轮询后,最终调整的系数为:{}".format(coe))
 
-def iterate_his_coe(pre_data, point, col_power, col_pre, coe):
+
+def iterate_coe(pre_data, point, config, coe):
+    """使用贝叶斯优化进行系数寻优"""
+    T = 'T' + str(point + 1)
+    col_pre = config['col_pre']
+
+    # 初始化最优解存储变量
+    best_acc, best_score = 0, 999
+    best_coe_m, best_coe_n = 0, 0
+    best_score_m, best_score_n = 0, 0
+
+    # 历史数据处理(保持原逻辑)
+    pre_data = history_error(pre_data, config['col_power'], config['col_pre'], int(coe[T]['hour'] // 0.25))
+    req_his_fix = prepare_request_body(pre_data, config, 'his_fix')
+    req_dq = prepare_request_body(pre_data, config, col_pre)
+
+    # 获取基准值(保持原逻辑)
+    his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix)
+    dq_acc, dq_score = calculate_acc(API_URL, req_dq)
+
+    # 定义贝叶斯优化目标函数
+    def evaluate_coefficients(m, n):
+        """评估函数返回准确率和考核分的元组"""
+        local_data = pre_data.copy()
+        local_data["new"] = round(m * local_data[col_pre] + n * local_data['his_fix'], 3)
+        req_new = prepare_request_body(local_data, config, 'new')
+        acc, score = calculate_acc(API_URL, req_new)
+        return acc, score
+
+    # 优化准确率
+    def acc_optimizer(m, n):
+        acc, _ = evaluate_coefficients(m, n)
+        return acc
+
+    # 优化考核分
+    def score_optimizer(m, n):
+        _, score = evaluate_coefficients(m, n)
+        return -score  # 取负数因为要最大化负分即最小化原分数
+
+    # 参数空间(保持原参数范围)
+    pbounds = {
+        'm': (5 / 170, 210 / 170),  # 原始范围映射到[0.0294, 1.235]
+        'n': (5 / 170, 210 / 170)
+    }
+
+    # 执行准确率优化
+    acc_bo = BayesianOptimization(f=acc_optimizer, pbounds=pbounds, random_state=42)
+    acc_bo.maximize(init_points=10, n_iter=20)
+    best_acc_params = acc_bo.max['params']
+    best_coe_m, best_coe_n = best_acc_params['m'], best_acc_params['n']
+    best_acc = acc_bo.max['target']
+
+    # 执行考核分优化
+    # score_bo = BayesianOptimization(f=score_optimizer, pbounds=pbounds, random_state=42)
+    # score_bo.maximize(init_points=10, n_iter=20)
+    # best_score_params = score_bo.max['params']
+    # best_score_m, best_score_n = best_score_params['m'], best_score_params['n']
+    # best_score = -score_bo.max['target']  # 恢复原始分数
+
+    # 应用最优系数(保持原处理逻辑)
+    pre_data["coe-acc"] = round(best_coe_m * pre_data[col_pre] + best_coe_n * pre_data['his_fix'], 3)
+    pre_data["coe-ass"] = round(best_score_m * pre_data[col_pre] + best_score_n * pre_data['his_fix'], 3)
+
+    # 记录日志(保持原格式)
+    logger.info("1.过去{} - {}的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},历史功率:{:.4f}".format(
+        pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_acc, T, best_acc, his_fix_acc))
+    logger.info("2.过去{} - {}的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},历史功率:{:.4f}".format(
+        pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_score, T, best_score, his_fix_score))
+
+    # 更新系数表(保持原逻辑)
+    coe[T].update({
+        'score_m': round(best_score_m, 3),
+        'score_n': round(best_score_n, 3),
+        'acc_m': round(best_coe_m, 3),
+        'acc_n': round(best_coe_n, 3)
+    })
+    logger.info("贝叶斯优化后,最终调整的系数为:{}".format(coe))
+
+def iterate_his_coe(pre_data, point, config, coe):
     """
     更新临近时长Δ
     """
     T = 'T' + str(point + 1)
     best_acc, best_hour = 0, 1
     for hour in np.arange(0.25, 4.25, 0.25):
-        his_window = hour // 0.25
-        pre_data = history_error(pre_data, col_power, col_pre, his_window)
-        req_his_fix = prepare_request_body(pre_data, col_power, 'his_fix')
+        data = pre_data.copy()
+        his_window = int(hour // 0.25)
+        pre_data_f = history_error(data, config['col_power'], config['col_pre'], his_window)
+        req_his_fix = prepare_request_body(pre_data_f, config, 'his_fix')
         his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix)
 
         if his_fix_acc > best_acc:
@@ -84,9 +166,8 @@ def iterate_his_coe(pre_data, point, col_power, col_pre, coe):
             best_hour = float(round(hour, 2))
     coe[T]['hour'] = best_hour
     logger.info(f"{T} 点的最优临近时长:{best_hour}")
-    return pre_data
 
-def prepare_request_body(df, col_power, col_pre):
+def prepare_request_body(df, config, predict):
     """
     准备请求体,动态保留MongoDB中的所有字段
     """
@@ -94,7 +175,7 @@ def prepare_request_body(df, col_power, col_pre):
     # 转换时间格式为字符串
     if 'dateTime' in data.columns and isinstance(data['dateTime'].iloc[0], datetime):
         data['dateTime'] = data['dateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')
-    data['model'] = col_pre
+    data['model'] = predict
     # 排除不需要的字段(如果有)
     exclude_fields = ['_id']  # 通常排除MongoDB的默认_id字段
 
@@ -106,18 +187,18 @@ def prepare_request_body(df, col_power, col_pre):
 
     # 构造请求体(固定部分+动态数据部分)
     request_body = {
-        "stationCode": "J00600",
-        "realPowerColumn": col_power,
-        "ablePowerColumn": col_power,
-        "predictPowerColumn": col_pre,
-        "inStalledCapacityName": 153,
+        "stationCode": config['stationCode'],
+        "realPowerColumn": config['col_power'],
+        "ablePowerColumn": config['col_power'],
+        "predictPowerColumn": predict,
+        "inStalledCapacityName": config['inStalledCapacityName'],
         "computTypeEnum": "E2",
-        "computMeasEnum": "E2",
-        "openCapacityName": 153,
-        "onGridEnergy": 0,
-        "price": 0,
-        "fault": -99,
-        "colTime": "dateTime",  #时间列名(可选,要与上面'dateTime一致')
+        "computMeasEnum": config.get('computMeasEnum', 'E2'),
+        "openCapacityName": config['openCapacityName'],
+        "onGridEnergy": config.get('onGridEnergy', 1),
+        "price": config.get('price', 1),
+        "fault": config.get('fault', -99),
+        "colTime": config['col_time'],  #时间列名(可选,要与上面'dateTime一致')
         # "computPowersEnum": "E4"  # 计算功率类型(可选)
         "data": data  # MongoDB数据
     }
@@ -142,7 +223,6 @@ def calculate_acc(api_url, request_body):
         if response.status_code == 200:
             acc = np.average([res['accuracy'] for res in result])
             # ass = np.average([res['accuracyAssessment'] for res in result])
-            print("111111111")
             return acc, 0
         else:
             logger.info(f"失败:{result['status']},{result['error']}")
@@ -162,11 +242,15 @@ def history_error(data, col_power, col_pre, his_window):
     datas = [data.iloc[x: x+his_window, :].reset_index(drop=True) for x in range(0, numbers)]
     data_error = [np.mean(d.iloc[0:his_window, -1]) for d in datas]
     pad_data_error = np.pad(data_error, (his_window, 0), mode='constant', constant_values=0)
+    print("!!!", his_window, numbers, len(pad_data_error))
+    print("???", len(data), len(pad_data_error))
     data['his_fix'] = data[col_pre] + pad_data_error
-    data = data.iloc[5:, :].reset_index(drop=True)
+    data = data.iloc[his_window:, :].reset_index(drop=True)
+
     data.loc[data[col_pre] <= 0, ['his_fix']] = 0
     data['dateTime'] = pd.to_datetime(data['dateTime'])
     data = data.loc[:, ['dateTime', col_power, col_pre, 'his_fix']]
+
     # data.to_csv('J01080原始数据.csv', index=False)
     return data
 
@@ -183,8 +267,8 @@ def get_station_cdq_coe():
         logger.info(args)
         data = get_data_from_mongo(args).sort_values(by=args['col_time'], ascending=True)
         for point in range(0, 16, 1):
-            pre_data = iterate_his_coe(data, point, args['col_power'], args['col_pre'], coe)
-            iterate_coe(pre_data, point, args['col_power'], args['col_pre'], coe)
+            iterate_his_coe(data, point, args, coe)
+            iterate_coe(data, point, args, coe)
         success = 1
     except Exception as e:
         my_exception = traceback.format_exc()