analysis_cdq.py 12 KB


  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. import pandas as pd
  4. from flask import Flask, request
  5. import time
  6. import random
  7. import logging
  8. import traceback
  9. import os
  10. from matplotlib.pyplot import title
  11. from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
  12. import plotly.graph_objects as go
  13. from plotly.subplots import make_subplots
  14. import plotly.io as pio
  15. from bson.decimal128 import Decimal128
  16. import numbers
  17. app = Flask('analysis_report——service')
  18. def create_fig(df_predict, col_time, label, label_pre, point):
  19. # 创建一个图表对象
  20. fig = go.Figure()
  21. point_data = df_predict[df_predict['howLongAgo']==point]
  22. # 获取所有的模型
  23. models = df_predict['model'].unique()
  24. # 添加实际功率曲线
  25. fig.add_trace(go.Scatter(
  26. x=df_predict[col_time],
  27. y=df_predict[label],
  28. mode='lines+markers',
  29. name='实际功率', # 实际功率
  30. line=dict(width=1), # 虚线
  31. marker=dict(symbol='circle'),
  32. ))
  33. # 为每个模型添加预测值和实际功率的曲线
  34. for model in models:
  35. # 筛选该模型的数据
  36. model_data = point_data[point_data['model'] == model]
  37. # 添加预测值曲线
  38. fig.add_trace(go.Scatter(
  39. x=model_data[col_time],
  40. y=model_data[label_pre],
  41. mode='lines+markers',
  42. name=f'{model} 预测值', # 预测值
  43. marker=dict(symbol='circle'),
  44. line=dict(width=2)
  45. ))
  46. fig_name = '超短期-第{}点'.format(point) if point < 17 else '超短期-平均值'
  47. # 设置图表的标题和标签
  48. fig.update_layout(
  49. template='seaborn', # 使用 seaborn 模板
  50. title=dict(
  51. text=fig_name, # 标题
  52. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  53. ),
  54. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  55. xaxis=dict(
  56. showgrid=True,
  57. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  58. title='时间', # 时间轴标题
  59. rangeslider=dict(visible=True), # 显示滚动条
  60. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  61. ),
  62. yaxis=dict(
  63. showgrid=True,
  64. gridcolor='rgba(200, 200, 200, 0.5)',
  65. title='功率' # y轴标题
  66. ),
  67. legend=dict(
  68. x=0.01,
  69. y=0.99,
  70. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  71. bordercolor='black',
  72. borderwidth=1,
  73. font=dict(size=12) # 字体大小
  74. ),
  75. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  76. hoverlabel=dict(
  77. bgcolor='white',
  78. font_size=14,
  79. font_family="Rockwell", # 设置字体样式
  80. bordercolor='black'
  81. ),
  82. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  83. )
  84. return fig
  85. def put_analysis_report_to_html(args, df_predict, df_accuracy):
  86. col_time = args['col_time']
  87. label = args['label']
  88. label_pre = args['label_pre']
  89. farmId = args['farmId']
  90. points = args['points'].split(',')
  91. df_predict = df_predict.drop_duplicates(subset=[col_time, 'model', 'howLongAgo'], keep='first').reset_index(drop=True)
  92. df_predict = df_predict.drop_duplicates(subset=[col_time, 'model', 'howLongAgo'], keep='first').reset_index(drop=True)
  93. cdq_title = '超短期分析报告 ' + args['mongodb_read_table'].split(',')[1]
  94. acc_flag = df_accuracy.shape[0]
  95. # 获取所有的模型
  96. models = df_predict['model'].unique()
  97. aves = []
  98. # 添加超短期16个点平均值
  99. for model in models:
  100. # 筛选该模型的数据
  101. model_data = df_predict[df_predict['model'] == model]
  102. # 添加超短期16个点平均值
  103. ave = model_data.groupby(col_time).agg({
  104. label: 'first',
  105. 'model': 'first',
  106. label_pre: 'mean',
  107. 'farm_id': 'first'
  108. }).reset_index()
  109. ave['howLongAgo'] = 17
  110. ave = ave.reindex(columns=df_predict.columns.tolist())
  111. aves.append(ave)
  112. df_predict = pd.concat([df_predict]+aves)
  113. df_predict = df_predict.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x).sort_values(by=col_time)
  114. if acc_flag > 0:
  115. df_accuracy = df_accuracy.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x).sort_values(by=col_time)
  116. figs = [create_fig(df_predict, col_time, label, label_pre, int(p)) for p in points]
  117. # 将折线图保存为 HTML 片段
  118. power_htmls = [pio.to_html(f, full_html=False) for f in figs]
  119. power_htmls = ["<div class='plot-container'>{}</div>".format(html) for html in power_htmls]
  120. # -------------------- 准确率表展示--------------------
  121. acc_html = ''
  122. if acc_flag > 0:
  123. acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped',
  124. index=False)
  125. # -------------------- 准确率汇总展示--------------------
  126. summary_html = ''
  127. if acc_flag > 0:
  128. # 指定需要转换的列
  129. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
  130. for col in cols_to_convert:
  131. if col in df_accuracy.columns:
  132. df_accuracy[col] = df_accuracy[col].apply(
  133. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  134. numbers.Number) else np.nan)
  135. # 确定存在的列
  136. agg_dict = {}
  137. rename_cols = ['model']
  138. if 'MAE' in df_accuracy.columns:
  139. agg_dict['MAE'] = np.nanmean
  140. rename_cols.append('MAE平均值')
  141. if 'accuracy' in df_accuracy.columns:
  142. agg_dict['accuracy'] = np.nanmean
  143. rename_cols.append('准确率平均值')
  144. if 'RMSE' in df_accuracy.columns:
  145. agg_dict['RMSE'] = np.nanmean
  146. rename_cols.append('RMSE平均值')
  147. if 'deviationElectricity' in df_accuracy.columns:
  148. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  149. rename_cols.append('考核电量平均值')
  150. rename_cols.append('考核总电量')
  151. if 'deviationAssessment' in df_accuracy.columns:
  152. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  153. rename_cols.append('考核分数平均值')
  154. rename_cols.append('考核总分数')
  155. if 'accuracyAssessment' in df_accuracy.columns:
  156. agg_dict['accuracyAssessment'] = [np.nanmean, np.nansum]
  157. rename_cols.append('考核分数平均值')
  158. rename_cols.append('考核总分数')
  159. # 进行分组聚合,如果有需要聚合的列
  160. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  161. summary_df.columns = rename_cols
  162. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  163. # -------------------- 生成完整 HTML 页面 --------------------
  164. html_content = f"""
  165. <!DOCTYPE html>
  166. <html lang="en">
  167. <head>
  168. <meta charset="UTF-8">
  169. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  170. <title>Data Analysis Report</title>
  171. <!-- 引入 Bootstrap CSS -->
  172. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  173. <style>
  174. justify-between;{{
  175. display: flex;
  176. justify-content: space-between;
  177. }}
  178. body {{
  179. background-color: #f4f4f9;
  180. font-family: Arial, sans-serif;
  181. padding: 20px;
  182. }}
  183. .container {{
  184. background-color: #fff;
  185. padding: 20px;
  186. border-radius: 10px;
  187. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  188. margin-bottom: 30px;
  189. }}
  190. h1 {{
  191. text-align: center;
  192. color: #333;
  193. margin-bottom: 20px;
  194. }}
  195. .plot-container {{
  196. margin: 20px 0;
  197. max-height: 500px; /* 限制高度 */
  198. overflow-y: auto; /* 显示垂直滚动条 */
  199. }}
  200. .table-container {{
  201. margin-top: 30px;
  202. overflow-x: auto; /* 水平滚动条 */
  203. max-width: 100%; /* 限制宽度 */
  204. white-space: nowrap; /* 防止内容换行 */
  205. max-height: 500px; /* 限制高度 */
  206. overflow-y: auto; /* 显示垂直滚动条 */
  207. }}
  208. .fixed-table thead tr > th:first-child,
  209. .fixed-table tbody tr > td:first-child {{
  210. position: sticky;
  211. left: 0;
  212. z-index: 1;
  213. }}
  214. .fixed-table-header thead tr > th {{
  215. position: sticky;
  216. top: 0;
  217. z-index: 2;
  218. }}
  219. table {{
  220. width: 100%;
  221. font-size: 12px; /* 设置字体大小为12px */
  222. }}
  223. th, td {{
  224. text-align: center; /* 表头和单元格文字居中 */
  225. }}
  226. }}
  227. </style>
  228. </head>
  229. <body>
  230. <div class="container">
  231. <h1>{ cdq_title }</h1>
  232. <!-- 曲线对比 -->
  233. <h2>1. 预测功率与实际功率曲线对比</h2>
  234. {''.join(power_htmls)}
  235. <!-- Pandas DataFrame 表格 -->
  236. <div style="display:flex; justify-content: space-between;">
  237. <h2>2. 准确率对比</h2>
  238. <span>
  239. <a href="/formula.xlsx">公式</a>
  240. </span>
  241. </div>
  242. <div class="table-container fixed-table-header">
  243. {acc_html}
  244. </div>
  245. <!-- Pandas DataFrame 表格 -->
  246. <div class="table-container">
  247. <h2>3. 准确率汇总对比</h2>
  248. {summary_html}
  249. </div>
  250. </div>
  251. </body>
  252. </html>
  253. """
  254. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  255. # 保存为 HTML
  256. directory = '/usr/share/nginx/html'
  257. if not os.path.exists(directory):
  258. os.makedirs(directory)
  259. file_path = os.path.join(directory, filename)
  260. path = f"http://ds1:10010/{filename}"
  261. # 将 HTML 内容写入文件
  262. with open(file_path, "w", encoding="utf-8") as f:
  263. f.write(html_content)
  264. print("HTML report generated successfully!")
  265. return path
  266. @app.route('/analysis_report_cdq', methods=['POST'])
  267. def analysis_report():
  268. start_time = time.time()
  269. result = {}
  270. success = 0
  271. path = ""
  272. print("Program starts execution!")
  273. try:
  274. args = request.values.to_dict()
  275. print('args', args)
  276. logger.info(args)
  277. # 获取数据
  278. df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1]
  279. path = put_analysis_report_to_html(args, df_predict, df_accuracy)
  280. success = 1
  281. except Exception as e:
  282. my_exception = traceback.format_exc()
  283. my_exception.replace("\n", "\t")
  284. result['msg'] = my_exception
  285. end_time = time.time()
  286. result['success'] = success
  287. result['args'] = args
  288. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  289. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  290. result['file_path'] = path
  291. print("Program execution ends!")
  292. return result
  293. if __name__ == "__main__":
  294. print("Program starts execution!")
  295. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  296. logger = logging.getLogger("analysis_report log")
  297. from waitress import serve
  298. serve(app, host="0.0.0.0", port=10108)
  299. print("server start!")
  300. # args_dict = {"mongodb_database": 'db_cdq', 'mongodb_read_table': 'j00234_neu_overwrite,j00234_neu_res', 'col_time': 'dateTime',
  301. # 'label': 'C_REAL_VALUE', 'label_pre': 'power_forecast', 'farmId': 'j00234'}
  302. # df_predict, df_accuracy = get_df_list_from_mongo(args_dict)[0], get_df_list_from_mongo(args_dict)[1]
  303. # path = put_analysis_report_to_html(args_dict, df_predict, df_accuracy)