analysis_cdq.py 12 KB


  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pandas as pd
  4. from flask import Flask, request
  5. import time
  6. import random
  7. import logging
  8. import traceback
  9. import os
  10. from matplotlib.pyplot import title
  11. from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
  12. import plotly.graph_objects as go
  13. from plotly.subplots import make_subplots
  14. import plotly.io as pio
  15. from bson.decimal128 import Decimal128
  16. import numbers
  17. app = Flask('analysis_report——service')
  18. def create_fig(df_predict, col_time, label, label_pre, point):
  19. # 创建一个图表对象
  20. fig = go.Figure()
  21. point_data = df_predict[df_predict['howLongAgo']==point]
  22. # 获取所有的模型
  23. models = df_predict['model'].unique()
  24. # 添加实际功率曲线
  25. fig.add_trace(go.Scatter(
  26. x=df_predict[col_time],
  27. y=df_predict[label],
  28. mode='lines+markers',
  29. name='实际功率', # 实际功率
  30. line=dict(width=1), # 虚线
  31. marker=dict(symbol='circle'),
  32. ))
  33. # 为每个模型添加预测值和实际功率的曲线
  34. for model in models:
  35. # 筛选该模型的数据
  36. model_data = point_data[point_data['model'] == model]
  37. # 添加预测值曲线
  38. fig.add_trace(go.Scatter(
  39. x=model_data[col_time],
  40. y=model_data[label_pre],
  41. mode='lines+markers',
  42. name=f'{model} 预测值', # 预测值
  43. marker=dict(symbol='circle'),
  44. line=dict(width=2)
  45. ))
  46. fig_name = '超短期-第{}点'.format(point) if point < 17 else '超短期-平均值'
  47. # 设置图表的标题和标签
  48. fig.update_layout(
  49. template='seaborn', # 使用 seaborn 模板
  50. title=dict(
  51. text=fig_name, # 标题
  52. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  53. ),
  54. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  55. xaxis=dict(
  56. showgrid=True,
  57. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  58. title='时间', # 时间轴标题
  59. rangeslider=dict(visible=True), # 显示滚动条
  60. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  61. ),
  62. yaxis=dict(
  63. showgrid=True,
  64. gridcolor='rgba(200, 200, 200, 0.5)',
  65. title='功率' # y轴标题
  66. ),
  67. legend=dict(
  68. x=0.01,
  69. y=0.99,
  70. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  71. bordercolor='black',
  72. borderwidth=1,
  73. font=dict(size=12) # 字体大小
  74. ),
  75. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  76. hoverlabel=dict(
  77. bgcolor='white',
  78. font_size=14,
  79. font_family="Rockwell", # 设置字体样式
  80. bordercolor='black'
  81. ),
  82. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  83. )
  84. return fig
  85. def put_analysis_report_to_html(args, df_predict, df_accuracy):
  86. col_time = args['col_time']
  87. label = args['label']
  88. label_pre = args['label_pre']
  89. farmId = args['farmId']
  90. points = args['points'].split(',')
  91. acc_flag = df_accuracy.shape[0]
  92. # 获取所有的模型
  93. models = df_predict['model'].unique()
  94. aves = []
  95. # 添加超短期16个点平均值
  96. for model in models:
  97. # 筛选该模型的数据
  98. model_data = df_predict[df_predict['model'] == model]
  99. # 添加超短期16个点平均值
  100. ave = model_data.groupby(col_time).agg({
  101. label: 'first',
  102. 'model': 'first',
  103. label_pre: 'mean',
  104. 'farm_id': 'first'
  105. }).reset_index()
  106. ave['howLongAgo'] = 17
  107. ave = ave.reindex(columns=df_predict.columns.tolist())
  108. aves.append(ave)
  109. df_predict = pd.concat([df_predict]+aves)
  110. df_predict = df_predict.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x).sort_values(by=col_time)
  111. if acc_flag > 0:
  112. df_accuracy = df_accuracy.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x).sort_values(by=col_time)
  113. figs = [create_fig(df_predict, col_time, label, label_pre, int(p)) for p in points]
  114. # 将折线图保存为 HTML 片段
  115. power_htmls = [pio.to_html(f, full_html=False) for f in figs]
  116. power_htmls = ["<div class='plot-container'>{}</div>".format(html) for html in power_htmls]
  117. # -------------------- 准确率表展示--------------------
  118. acc_html = ''
  119. if acc_flag > 0:
  120. acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped',
  121. index=False)
  122. # -------------------- 准确率汇总展示--------------------
  123. summary_html = ''
  124. if acc_flag > 0:
  125. # 指定需要转换的列
  126. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
  127. for col in cols_to_convert:
  128. if col in df_accuracy.columns:
  129. df_accuracy[col] = df_accuracy[col].apply(
  130. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  131. numbers.Number) else np.nan)
  132. # 确定存在的列
  133. agg_dict = {}
  134. rename_cols = ['model']
  135. if 'MAE' in df_accuracy.columns:
  136. agg_dict['MAE'] = np.nanmean
  137. rename_cols.append('MAE平均值')
  138. if 'accuracy' in df_accuracy.columns:
  139. agg_dict['accuracy'] = np.nanmean
  140. rename_cols.append('准确率平均值')
  141. if 'RMSE' in df_accuracy.columns:
  142. agg_dict['RMSE'] = np.nanmean
  143. rename_cols.append('RMSE平均值')
  144. if 'deviationElectricity' in df_accuracy.columns:
  145. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  146. rename_cols.append('考核电量平均值')
  147. rename_cols.append('考核总电量')
  148. if 'deviationAssessment' in df_accuracy.columns:
  149. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  150. rename_cols.append('考核分数平均值')
  151. rename_cols.append('考核总分数')
  152. # 进行分组聚合,如果有需要聚合的列
  153. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  154. summary_df.columns = rename_cols
  155. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  156. # -------------------- 生成完整 HTML 页面 --------------------
  157. html_content = f"""
  158. <!DOCTYPE html>
  159. <html lang="en">
  160. <head>
  161. <meta charset="UTF-8">
  162. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  163. <title>Data Analysis Report</title>
  164. <!-- 引入 Bootstrap CSS -->
  165. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  166. <style>
  167. justify-between;{{
  168. display: flex;
  169. justify-content: space-between;
  170. }}
  171. body {{
  172. background-color: #f4f4f9;
  173. font-family: Arial, sans-serif;
  174. padding: 20px;
  175. }}
  176. .container {{
  177. background-color: #fff;
  178. padding: 20px;
  179. border-radius: 10px;
  180. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  181. margin-bottom: 30px;
  182. }}
  183. h1 {{
  184. text-align: center;
  185. color: #333;
  186. margin-bottom: 20px;
  187. }}
  188. .plot-container {{
  189. margin: 20px 0;
  190. max-height: 500px; /* 限制高度 */
  191. overflow-y: auto; /* 显示垂直滚动条 */
  192. }}
  193. .table-container {{
  194. margin-top: 30px;
  195. overflow-x: auto; /* 水平滚动条 */
  196. max-width: 100%; /* 限制宽度 */
  197. white-space: nowrap; /* 防止内容换行 */
  198. max-height: 500px; /* 限制高度 */
  199. overflow-y: auto; /* 显示垂直滚动条 */
  200. }}
  201. .fixed-table thead tr > th:first-child,
  202. .fixed-table tbody tr > td:first-child {{
  203. position: sticky;
  204. left: 0;
  205. z-index: 1;
  206. }}
  207. .fixed-table-header thead tr > th {{
  208. position: sticky;
  209. top: 0;
  210. z-index: 2;
  211. }}
  212. table {{
  213. width: 100%;
  214. font-size: 12px; /* 设置字体大小为12px */
  215. }}
  216. th, td {{
  217. text-align: center; /* 表头和单元格文字居中 */
  218. }}
  219. }}
  220. </style>
  221. </head>
  222. <body>
  223. <div class="container">
  224. <h1>分析报告</h1>
  225. <!-- 曲线对比 -->
  226. <h2>1. 预测功率与实际功率曲线对比</h2>
  227. {''.join(power_htmls)}
  228. <!-- Pandas DataFrame 表格 -->
  229. <div style="display:flex; justify-content: space-between;">
  230. <h2>2. 准确率对比</h2>
  231. <span>
  232. <a href="/formula.xlsx">公式</a>
  233. </span>
  234. </div>
  235. <div class="table-container fixed-table-header">
  236. {acc_html}
  237. </div>
  238. <!-- Pandas DataFrame 表格 -->
  239. <div class="table-container">
  240. <h2>3. 准确率汇总对比</h2>
  241. {summary_html}
  242. </div>
  243. </div>
  244. </body>
  245. </html>
  246. """
  247. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  248. # 保存为 HTML
  249. directory = '/usr/share/nginx/html'
  250. if not os.path.exists(directory):
  251. os.makedirs(directory)
  252. file_path = os.path.join(directory, filename)
  253. path = f"http://ds3:10010/{filename}"
  254. # 将 HTML 内容写入文件
  255. with open(file_path, "w", encoding="utf-8") as f:
  256. f.write(html_content)
  257. print("HTML report generated successfully!")
  258. return path
  259. @app.route('/analysis_report_cdq', methods=['POST'])
  260. def analysis_report():
  261. start_time = time.time()
  262. result = {}
  263. success = 0
  264. path = ""
  265. print("Program starts execution!")
  266. try:
  267. args = request.values.to_dict()
  268. print('args', args)
  269. logger.info(args)
  270. # 获取数据
  271. df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1]
  272. path = put_analysis_report_to_html(args, df_predict, df_accuracy)
  273. success = 1
  274. except Exception as e:
  275. my_exception = traceback.format_exc()
  276. my_exception.replace("\n", "\t")
  277. result['msg'] = my_exception
  278. end_time = time.time()
  279. result['success'] = success
  280. result['args'] = args
  281. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  282. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  283. result['file_path'] = path
  284. print("Program execution ends!")
  285. return result
  286. if __name__ == "__main__":
  287. print("Program starts execution!")
  288. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  289. logger = logging.getLogger("analysis_report log")
  290. from waitress import serve
  291. serve(app, host="0.0.0.0", port=10108)
  292. print("server start!")
  293. # args_dict = {"mongodb_database": 'db_cdq', 'mongodb_read_table': 'j00234_neu_overwrite,j00234_neu_res', 'col_time': 'dateTime',
  294. # 'label': 'C_REAL_VALUE', 'label_pre': 'power_forecast', 'farmId': 'j00234'}
  295. # df_predict, df_accuracy = get_df_list_from_mongo(args_dict)[0], get_df_list_from_mongo(args_dict)[1]
  296. # path = put_analysis_report_to_html(args_dict, df_predict, df_accuracy)