cdq_coe_gen.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. # @FileName :cdq_coe_gen.py
  4. # @Time :2025/3/28 16:20
  5. # @Author :David
  6. # @Company: shenyang JY
  7. import os, requests, json, time, traceback
  8. import pandas as pd
  9. import numpy as np
  10. from bayes_opt import BayesianOptimization
  11. from common.database_dml_koi import get_data_from_mongo
  12. from flask import Flask, request, g
  13. from datetime import datetime
  14. from common.logs import Log
  15. logger = Log('post-processing').logger
  16. current_path = os.path.dirname(__file__)
  17. API_URL = "http://ds2:18080/accuracyAndBiasByJSON"
  18. app = Flask('cdq_coe_gen——service')
  19. @app.before_request
  20. def update_config():
  21. # ------------ 整理参数,整合请求参数 ------------
  22. g.coe = {'T'+str(x):{} for x in range(1, 17)}
  23. def iterate_coe_simple(pre_data, point, config, coe):
  24. """
  25. 更新16个点系数
  26. """
  27. T = 'T' + str(point + 1)
  28. col_pre = config['col_pre']
  29. best_acc, best_score1, best_coe_m, best_coe_n = 0, 0, 0, 0
  30. best_score, best_acc1, best_score_m, best_score_n = 999, 0, 999, 0
  31. pre_data = history_error(pre_data, config['col_power'], config['col_pre'], int(coe[T]['hour']//0.25))
  32. req_his_fix = prepare_request_body(pre_data, config, 'his_fix')
  33. req_dq = prepare_request_body(pre_data, config, col_pre)
  34. his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix)
  35. dq_acc, dq_score = calculate_acc(API_URL, req_dq)
  36. for i in range(5, 210):
  37. for j in range(5, 210):
  38. pre_data["new"] = round(i / 170 * pre_data[col_pre] + j / 170 * pre_data['his_fix'], 3)
  39. req_new = prepare_request_body(pre_data, config, 'new')
  40. acc, acc_score = calculate_acc(API_URL, req_new)
  41. if acc > best_acc:
  42. best_acc = acc
  43. best_score1 = acc_score
  44. best_coe_m = i / 170
  45. best_coe_n = j / 170
  46. if acc_score < best_score:
  47. best_score = acc_score
  48. best_acc1 = acc
  49. best_score_m = i / 170
  50. best_score_n = j / 170
  51. pre_data["coe-acc"] = round(best_coe_m * pre_data[col_pre] + best_coe_n * pre_data['his_fix'], 3)
  52. pre_data["coe-ass"] = round(best_score_m * pre_data[col_pre] + best_score_n * pre_data['his_fix'], 3)
  53. logger.info("1.过去{} - {}的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_acc, T, best_acc, his_fix_acc))
  54. logger.info("2.过去{} - {}的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_score, T, best_score1, his_fix_score))
  55. logger.info("3.过去{} - {}的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_acc, T, best_acc1, his_fix_acc))
  56. logger.info("4.过去{} - {}的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},历史功率:{:.4f}".format(pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1], dq_score, T, best_score, his_fix_score))
  57. coe[T]['score_m'] = round(best_score_m, 3)
  58. coe[T]['score_n'] = round(best_score_n, 3)
  59. coe[T]['acc_m'] = round(best_coe_m, 3)
  60. coe[T]['acc_n'] = round(best_coe_n, 3)
  61. logger.info("系数轮询后,最终调整的系数为:{}".format(coe))
  62. def iterate_coe(pre_data, point, config, coe):
  63. """使用贝叶斯优化进行系数寻优"""
  64. T = 'T' + str(point + 1)
  65. col_pre = config['col_pre']
  66. col_time = config['col_time']
  67. # 历史数据处理(保持原逻辑)
  68. pre_data = history_error(pre_data, config['col_power'], config['col_pre'], int(coe[T]['hour'] // 0.25))
  69. req_his_fix = prepare_request_body(pre_data, config, 'his_fix')
  70. req_dq = prepare_request_body(pre_data, config, col_pre)
  71. # 获取基准值(保持原逻辑)
  72. his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix)
  73. dq_acc, dq_score = calculate_acc(API_URL, req_dq)
  74. # 定义贝叶斯优化目标函数
  75. def evaluate_coefficients(m, n):
  76. """评估函数返回准确率和考核分的元组"""
  77. local_data = pre_data.copy()
  78. local_data["new"] = round(m * local_data[col_pre] + n * local_data['his_fix'], 3)
  79. local_data = curve_limited(local_data, config, 'new')
  80. req_new = prepare_request_body(local_data, config, 'new')
  81. acc, score = calculate_acc(API_URL, req_new)
  82. return acc, score
  83. # 优化准确率
  84. def acc_optimizer(m, n):
  85. acc, _ = evaluate_coefficients(m, n)
  86. return acc
  87. # 优化考核分
  88. def score_optimizer(m, n):
  89. _, score = evaluate_coefficients(m, n)
  90. return -score # 取负数因为要最大化负分即最小化原分数
  91. # 参数空间(保持原参数范围)
  92. pbounds = {
  93. 'm': (5 / 170, 210 / 170), # 原始范围映射到[0.0294, 1.235]
  94. 'n': (5 / 170, 210 / 170)
  95. }
  96. # 执行准确率优化
  97. acc_bo = BayesianOptimization(f=acc_optimizer, pbounds=pbounds, random_state=42)
  98. acc_bo.maximize(init_points=10, n_iter=20)
  99. best_acc_params = acc_bo.max['params']
  100. best_coe_m, best_coe_n = best_acc_params['m'], best_acc_params['n']
  101. best_acc = acc_bo.max['target']
  102. # 执行考核分优化
  103. # score_bo = BayesianOptimization(f=score_optimizer, pbounds=pbounds, random_state=42)
  104. # score_bo.maximize(init_points=10, n_iter=20)
  105. # best_score_params = score_bo.max['params']
  106. # best_score_m, best_score_n = best_score_params['m'], best_score_params['n']
  107. # best_score = -score_bo.max['target'] # 恢复原始分数
  108. # 应用最优系数(保持原处理逻辑)
  109. pre_data["coe-acc"] = round(best_coe_m * pre_data[col_pre] + best_coe_n * pre_data['his_fix'], 3)
  110. # pre_data["coe-ass"] = round(best_score_m * pre_data[col_pre] + best_score_n * pre_data['his_fix'], 3)
  111. # 记录日志(保持原格式)
  112. logger.info("过去{} - {}的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},历史功率:{:.4f}".format(pre_data[col_time][0], pre_data[col_time].iloc[-1], dq_acc, T, best_acc, his_fix_acc))
  113. # 更新系数表(保持原逻辑)
  114. coe[T].update({
  115. # 'score_m': round(best_score_m, 3),
  116. # 'score_n': round(best_score_n, 3),
  117. 'acc_m': round(best_coe_m, 3),
  118. 'acc_n': round(best_coe_n, 3)
  119. })
  120. logger.info("贝叶斯优化后,最终调整的系数为:{}".format(coe))
  121. def iterate_his_coe(pre_data, point, config, coe):
  122. """
  123. 更新临近时长Δ
  124. """
  125. T = 'T' + str(point + 1)
  126. best_acc, best_hour = 0, 1
  127. for hour in np.arange(0.25, 4.25, 0.25):
  128. data = pre_data.copy()
  129. his_window = int(hour // 0.25)
  130. pre_data_f = history_error(data, config['col_power'], config['col_pre'], his_window)
  131. req_his_fix = prepare_request_body(pre_data_f, config, 'his_fix')
  132. his_fix_acc, his_fix_score = calculate_acc(API_URL, req_his_fix)
  133. if his_fix_acc > best_acc:
  134. best_acc = his_fix_acc
  135. best_hour = float(round(hour, 2))
  136. coe[T]['hour'] = best_hour
  137. logger.info(f"{T} 点的最优临近时长:{best_hour}")
  138. def prepare_request_body(df, config, predict):
  139. """
  140. 准备请求体,动态保留MongoDB中的所有字段
  141. """
  142. data = df.copy()
  143. # 转换时间格式为字符串
  144. if 'dateTime' in data.columns and isinstance(data['dateTime'].iloc[0], datetime):
  145. data['dateTime'] = data['dateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')
  146. data['model'] = predict
  147. # 排除不需要的字段(如果有)
  148. exclude_fields = ['_id'] # 通常排除MongoDB的默认_id字段
  149. # 获取所有字段名(排除不需要的字段)
  150. available_fields = [col for col in data.columns if col not in exclude_fields]
  151. # 转换为记录列表(保留所有字段)
  152. data = data[available_fields].to_dict('records')
  153. # 构造请求体(固定部分+动态数据部分)
  154. request_body = {
  155. "stationCode": config['stationCode'],
  156. "realPowerColumn": config['col_power'],
  157. "ablePowerColumn": config['col_power'],
  158. "predictPowerColumn": predict,
  159. "inStalledCapacityName": config['inStalledCapacityName'],
  160. "computTypeEnum": "E2",
  161. "computMeasEnum": config.get('computMeasEnum', 'E2'),
  162. "openCapacityName": config['openCapacityName'],
  163. "onGridEnergy": config.get('onGridEnergy', 1),
  164. "price": config.get('price', 1),
  165. "fault": config.get('fault', -99),
  166. "colTime": config['col_time'], #时间列名(可选,要与上面'dateTime一致')
  167. # "computPowersEnum": "E4" # 计算功率类型(可选)
  168. "data": data # MongoDB数据
  169. }
  170. return request_body
  171. def calculate_acc(api_url, request_body):
  172. """
  173. 调用API接口
  174. """
  175. headers = {
  176. 'Content-Type': 'application/json',
  177. 'Accept': 'application/json'
  178. }
  179. try:
  180. response = requests.post(
  181. api_url,
  182. data=json.dumps(request_body, ensure_ascii=False),
  183. headers=headers
  184. )
  185. result = response.json()
  186. if response.status_code == 200:
  187. acc = np.average([res['accuracy'] for res in result])
  188. # ass = np.average([res['accuracyAssessment'] for res in result])
  189. return acc, 0
  190. else:
  191. logger.info(f"{response.status_code}失败:{result['status']},{result['error']}")
  192. except requests.exceptions.RequestException as e:
  193. logger.info(f"准确率接口调用失败: {e}")
  194. return None
  195. def history_error(data, col_power, col_pre, his_window):
  196. data['error'] = data[col_power] - data[col_pre]
  197. data['error'] = data['error'].round(2)
  198. data.reset_index(drop=True, inplace=True)
  199. # 用前面5个点的平均error,和象心力相加
  200. numbers = len(data) - his_window
  201. datas = [data.iloc[x: x+his_window, :].reset_index(drop=True) for x in range(0, numbers)]
  202. data_error = [np.mean(d.iloc[0:his_window, -1]) for d in datas]
  203. pad_data_error = np.pad(data_error, (his_window, 0), mode='constant', constant_values=0)
  204. print("!!!", his_window, numbers, len(pad_data_error))
  205. print("???", len(data), len(pad_data_error))
  206. data['his_fix'] = data[col_pre] + pad_data_error
  207. data = data.iloc[his_window:, :].reset_index(drop=True)
  208. data = curve_limited(data, config, 'his_fix')
  209. return data
  210. def curve_limited(pre_data, config, predict):
  211. """
  212. plant_type: 0 风 1 光
  213. """
  214. col_pre, col_time, cap = config['col_pre'], config['col_time'], config['openCapacityName']
  215. pre_data[col_time] = pd.to_datetime(pre_data[col_time])
  216. pre_data.loc[pre_data[predict] < 0, [predict]] = 0
  217. pre_data.loc[pre_data[predict] > cap, [predict]] = cap
  218. return pre_data
  219. @app.route('/cdq_coe_gen', methods=['POST'])
  220. def get_station_cdq_coe():
  221. # 获取程序开始时间
  222. start_time = time.time()
  223. result = {}
  224. success = 0
  225. args = {}
  226. coe = g.coe
  227. try:
  228. args = request.values.to_dict()
  229. logger.info(args)
  230. data = get_data_from_mongo(args).sort_values(by=args['col_time'], ascending=True)
  231. for point in range(0, 16, 1):
  232. iterate_his_coe(data, point, args, coe)
  233. iterate_coe(data, point, args, coe)
  234. success = 1
  235. except Exception as e:
  236. my_exception = traceback.format_exc()
  237. my_exception.replace("\n", "\t")
  238. result['msg'] = my_exception
  239. logger.info("调系数出错:{}".format(my_exception))
  240. end_time = time.time()
  241. result['success'] = success
  242. result['args'] = args
  243. result['coe'] = coe
  244. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  245. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  246. return result
  247. if __name__ == "__main__":
  248. # args = {
  249. # 'mongodb_database': 'ldw_ftp',
  250. # 'mongodb_read_table': 'j00600',
  251. # # 'timeBegin': '2025-01-01 00:00:00',
  252. # # 'timeEnd': '2025-01-03 23:45:00'
  253. # }
  254. # data = get_data_from_mongo(args).sort_values(by='dateTime', ascending=True)
  255. # pre_data = history_error(data, col_power='realPower', col_pre='dq')
  256. # for point in range(0, 16, 1):
  257. # iterate_coe(pre_data, point, 'realPower', 'dq')
  258. # run_code = 0
  259. print("Program starts execution!")
  260. from waitress import serve
  261. serve(app, host="0.0.0.0", port=10123,
  262. threads=8, # 指定线程数(默认4,根据硬件调整)
  263. channel_timeout=600 # 连接超时时间(秒)
  264. )
  265. print("server start!")