#!/usr/bin/env python # -*- coding:utf-8 -*- # @FileName :cdq_coe_gen.py # @Time :2025/3/28 16:20 # @Author :David # @Company: shenyang JY import os, requests, json, time, traceback import pandas as pd import numpy as np from bayes_opt import BayesianOptimization from common.database_dml_koi import get_data_from_mongo from flask import Flask, request, g from datetime import datetime from common.logs import Log logger = Log('post-processing').logger current_path = os.path.dirname(__file__) API_URL = "http://ds2:18080/accuracyAndBiasByJSON" app = Flask('cdq_coe_gen——service') @app.before_request def update_config(): # ------------ 整理参数,整合请求参数 ------------ g.coe = {'T'+str(x):{} for x in range(1, 17)} def iterate_coe_simple(pre_data, point, config, coe): """ 更新16个点系数 """ T = 'T' + str(point + 1) col_pre = config['col_pre'] best_acc, best_score1, best_coe_m, best_coe_n = 0, 0, 0, 0 best_score, best_acc1, best_score_m, best_score_n = 999, 0, 999, 0 pre_data = history_error(pre_data, config['col_power'], config['col_pre'], int(coe[T]['hour']//0.25)) pre_data = curve_limited(pre_data, config, 'his_fix') req_his_fix = prepare_request_body(pre_data, config, 'his_fix') req_dq = prepare_request_body(pre_data, config, col_pre) his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix) dq_acc, dq_score = calculate_acc(API_URL, req_dq) for i in range(5, 210): for j in range(5, 210): pre_data["new"] = round(i / 170 * pre_data[col_pre] + j / 170 * pre_data['his_fix'], 3) req_new = prepare_request_body(pre_data, config, 'new') acc, acc_score = calculate_acc(API_URL, req_new) if acc > best_acc: best_acc = acc best_score1 = acc_score best_coe_m = i / 170 best_coe_n = j / 170 if acc_score < best_score: best_score = acc_score best_acc1 = acc best_score_m = i / 170 best_score_n = j / 170 pre_data["coe-acc"] = round(best_coe_m * pre_data[col_pre] + best_coe_n * pre_data['his_fix'], 3) pre_data["coe-ass"] = round(best_score_m * pre_data[col_pre] + best_score_n * pre_data['his_fix'], 3) logger.info("1.过去{} - {}的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_acc, T, best_acc, his_fix_acc)) logger.info("2.过去{} - {}的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_score, T, best_score1, his_fix_score)) logger.info("3.过去{} - {}的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_acc, T, best_acc1, his_fix_acc)) logger.info("4.过去{} - {}的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_score, T, best_score, his_fix_score)) coe[T]['score_m'] = round(best_score_m, 3) coe[T]['score_n'] = round(best_score_n, 3) coe[T]['acc_m'] = round(best_coe_m, 3) coe[T]['acc_n'] = round(best_coe_n, 3) logger.info("系数轮询后,最终调整的系数为:{}".format(coe)) def iterate_coe(pre_data, point, config, coe): """使用贝叶斯优化进行系数寻优""" T = 'T' + str(point + 1) col_pre = config['col_pre'] col_time = config['col_time'] # 历史数据处理(保持原逻辑) pre_data = history_error(pre_data, config['col_power'], config['col_pre'], int(coe[T]['hour'] // 0.25)) pre_data = curve_limited(pre_data, config, 'his_fix') req_his_fix = prepare_request_body(pre_data, config, 'his_fix') req_dq = prepare_request_body(pre_data, config, col_pre) # 获取基准值(保持原逻辑) his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix) dq_acc, dq_score = calculate_acc(API_URL, req_dq) # 定义贝叶斯优化目标函数 def evaluate_coefficients(m, n): """评估函数返回准确率和考核分的元组""" local_data = pre_data.copy() local_data["new"] = round(m * local_data[col_pre] + n * local_data['his_fix'], 3) local_data = curve_limited(local_data, config, 'new') req_new = prepare_request_body(local_data, config, 'new') acc, score = calculate_acc(API_URL, req_new) return acc, score # 优化准确率 def acc_optimizer(m, n): acc, _ = evaluate_coefficients(m, n) return acc # 优化考核分 def score_optimizer(m, n): _, score = evaluate_coefficients(m, n) return -score # 取负数因为要最大化负分即最小化原分数 # 参数空间(保持原参数范围) pbounds = { 'm': (5 / 170, 210 / 170), # 原始范围映射到[0.0294, 1.235] 'n': (5 / 170, 210 / 170) } # 执行准确率优化 acc_bo = BayesianOptimization(f=acc_optimizer, pbounds=pbounds, random_state=42) acc_bo.maximize(init_points=10, n_iter=20) best_acc_params = acc_bo.max['params'] best_coe_m, best_coe_n = best_acc_params['m'], best_acc_params['n'] best_acc = acc_bo.max['target'] # 执行考核分优化 # score_bo = BayesianOptimization(f=score_optimizer, pbounds=pbounds, random_state=42) # score_bo.maximize(init_points=10, n_iter=20) # best_score_params = score_bo.max['params'] # best_score_m, best_score_n = best_score_params['m'], best_score_params['n'] # best_score = -score_bo.max['target'] # 恢复原始分数 # 应用最优系数(保持原处理逻辑) pre_data["coe-acc"] = round(best_coe_m * pre_data[col_pre] + best_coe_n * pre_data['his_fix'], 3) # pre_data["coe-ass"] = round(best_score_m * pre_data[col_pre] + best_score_n * pre_data['his_fix'], 3) # 记录日志(保持原格式) logger.info("过去{} - {}的短期的准确率:{:.4f},历史功率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f}".format(pre_data[col_time][0], pre_data[col_time].iloc[-1], dq_acc, his_fix_acc, T, best_acc)) # 更新系数表(保持原逻辑) coe[T].update({ # 'score_m': round(best_score_m, 3), # 'score_n': round(best_score_n, 3), 'acc_m': round(best_coe_m, 3), 'acc_n': round(best_coe_n, 3) }) logger.info("贝叶斯优化后,最终调整的系数为:{}".format(coe)) def iterate_his_coe(pre_data, point, config, coe): """ 更新临近时长Δ """ T = 'T' + str(point + 1) best_acc, best_hour = 0, 1 for hour in np.arange(0.25, 4.25, 0.25): data = pre_data.copy() his_window = int(hour // 0.25) pre_data_f = history_error(data, config['col_power'], config['col_pre'], his_window) pre_data_f = curve_limited(pre_data_f, config, 'his_fix') req_his_fix = prepare_request_body(pre_data_f, config, 'his_fix') his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix) if his_fix_acc > best_acc: best_acc = his_fix_acc best_hour = float(round(hour, 2)) coe[T]['hour'] = best_hour logger.info(f"{T} 点的最优临近时长:{best_hour}") def prepare_request_body(df, config, predict): """ 准备请求体,动态保留MongoDB中的所有字段 """ data = df.copy() # 转换时间格式为字符串 if 'dateTime' in data.columns and isinstance(data['dateTime'].iloc[0], datetime): data['dateTime'] = data['dateTime'].dt.strftime('%Y-%m-%d %H:%M:%S') data['model'] = predict # 排除不需要的字段(如果有) exclude_fields = ['_id'] # 通常排除MongoDB的默认_id字段 # 获取所有字段名(排除不需要的字段) available_fields = [col for col in data.columns if col not in exclude_fields] # 转换为记录列表(保留所有字段) data = data[available_fields].to_dict('records') # 构造请求体(固定部分+动态数据部分) request_body = { "stationCode": config['stationCode'], "realPowerColumn": config['col_power'], "ablePowerColumn": config['col_power'], "predictPowerColumn": predict, "inStalledCapacityName": config['inStalledCapacityName'], "computTypeEnum": "E2", "computMeasEnum": config.get('computMeasEnum', 'E2'), "openCapacityName": config['openCapacityName'], "onGridEnergy": config.get('onGridEnergy', 1), "price": config.get('price', 1), "fault": config.get('fault', -99), "colTime": config['col_time'], #时间列名(可选,要与上面'dateTime一致') # "computPowersEnum": "E4" # 计算功率类型(可选) "data": data # MongoDB数据 } return request_body def calculate_acc(api_url, request_body): """ 调用API接口 """ headers = { 'Content-Type': 'application/json', 'Accept': 'application/json' } try: response = requests.post( api_url, data=json.dumps(request_body, ensure_ascii=False), headers=headers ) result = response.json() if response.status_code == 200: acc = np.average([res['accuracy'] for res in result]) # ass = np.average([res['accuracyAssessment'] for res in result]) return acc, 0 else: logger.info(f"{response.status_code}失败:{result['status']},{result['error']}") except requests.exceptions.RequestException as e: logger.info(f"准确率接口调用失败: {e}") return None def history_error(data, col_power, col_pre, his_window): data['error'] = data[col_power] - data[col_pre] data['error'] = data['error'].round(2) data.reset_index(drop=True, inplace=True) # 用前面5个点的平均error,和象心力相加 numbers = len(data) - his_window datas = [data.iloc[x: x+his_window, :].reset_index(drop=True) for x in range(0, numbers)] data_error = [np.mean(d.iloc[0:his_window, -1]) for d in datas] pad_data_error = np.pad(data_error, (his_window, 0), mode='constant', constant_values=0) data['his_fix'] = data[col_pre] + pad_data_error data = data.iloc[his_window:, :].reset_index(drop=True) return data def curve_limited(pre_data, config, predict): """ plant_type: 0 风 1 光 """ data = pre_data.copy() col_time, cap = config['col_time'], float(config['openCapacityName']) data[col_time] = pd.to_datetime(data[col_time]) data.loc[data[predict] < 0, [predict]] = 0 data.loc[data[predict] > cap, [predict]] = cap return data @app.route('/cdq_coe_gen', methods=['POST']) def get_station_cdq_coe(): # 获取程序开始时间 start_time = time.time() result = {} success = 0 args = {} coe = g.coe try: args = request.values.to_dict() logger.info(args) data = get_data_from_mongo(args).sort_values(by=args['col_time'], ascending=True) for point in range(0, 16, 1): iterate_his_coe(data, point, args, coe) iterate_coe(data, point, args, coe) success = 1 except Exception as e: my_exception = traceback.format_exc() my_exception.replace("\n", "\t") result['msg'] = my_exception logger.info("调系数出错:{}".format(my_exception)) end_time = time.time() result['success'] = success result['args'] = args result['coe'] = coe result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)) result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)) return result if __name__ == "__main__": # args = { # 'mongodb_database': 'ldw_ftp', # 'mongodb_read_table': 'j00600', # # 'timeBegin': '2025-01-01 00:00:00', # # 'timeEnd': '2025-01-03 23:45:00' # } # data = get_data_from_mongo(args).sort_values(by='dateTime', ascending=True) # pre_data = history_error(data, col_power='realPower', col_pre='dq') # for point in range(0, 16, 1): # iterate_coe(pre_data, point, 'realPower', 'dq') # run_code = 0 print("Program starts execution!") from waitress import serve serve(app, host="0.0.0.0", port=10123, threads=8, # 指定线程数(默认4,根据硬件调整) channel_timeout=600 # 连接超时时间(秒) ) print("server start!")