analysis_report.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from flask import Flask,request
  4. import time
  5. import random
  6. import logging
  7. import traceback
  8. import os
  9. from common.database_dml import get_df_list_from_mongo,insert_data_into_mongo
  10. import plotly.express as px
  11. import plotly.graph_objects as go
  12. import pandas as pd
  13. import plotly.io as pio
  14. from bson.decimal128 import Decimal128
  15. import numbers
  16. app = Flask('analysis_report——service')
  17. def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
  18. col_time = args['col_time']
  19. col_x_env = args['col_x_env']
  20. col_x_pre = args['col_x_pre']
  21. label = args['label']
  22. label_pre = args['label_pre']
  23. farmId = args['farmId']
  24. df_overview = pd.DataFrame(
  25. {'数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()], '数据总记录数': [df_clean.shape[0]]})
  26. overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
  27. # -------------------- 数据描述 --------------------
  28. describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
  29. classes='table table-bordered table-striped', index=False)
  30. # -------------------- 实测气象与实际功率散点图--------------------
  31. # 生成实际功率与辐照度的散点图
  32. fig_scatter = px.scatter(df_clean, x=col_x_env, y=label)
  33. # 自定义散点图布局
  34. fig_scatter.update_layout(
  35. template='seaborn',
  36. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  37. xaxis=dict(
  38. showgrid=True,
  39. gridcolor='rgba(200, 200, 200, 0.5)',
  40. title=col_x_env
  41. ),
  42. yaxis=dict(
  43. showgrid=True,
  44. gridcolor='rgba(200, 200, 200, 0.5)',
  45. title=label
  46. ),
  47. legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.7)', bordercolor='black', borderwidth=1)
  48. )
  49. # 将散点图保存为 HTML 片段
  50. scatter_html = pio.to_html(fig_scatter, full_html=False)
  51. # -------------------- 生成相关性热力图 --------------------
  52. # 计算相关矩阵
  53. correlation_matrix = df_clean.corr()
  54. # 生成热力图,带数值标签和新配色
  55. fig_heatmap = go.Figure(data=go.Heatmap(
  56. z=correlation_matrix.values,
  57. x=correlation_matrix.columns,
  58. y=correlation_matrix.columns,
  59. colorscale='RdBu', # 使用红蓝配色:正相关为蓝色,负相关为红色
  60. text=correlation_matrix.round(2).astype(str), # 将相关性值保留两位小数并转换为字符串
  61. texttemplate="%{text}", # 显示数值标签
  62. colorbar=dict(title='Correlation'),
  63. zmin=-1, zmax=1 # 设置颜色映射的范围
  64. ))
  65. # 自定义热力图布局
  66. fig_heatmap.update_layout(
  67. # title='Correlation Matrix Heatmap',
  68. xaxis=dict(tickangle=45),
  69. yaxis=dict(autorange='reversed'),
  70. template='seaborn'
  71. )
  72. # 将热力图保存为 HTML 片段
  73. corr_html = pio.to_html(fig_heatmap, full_html=False)
  74. # -------------------- 实测气象与预测气象趋势曲线 --------------------
  75. # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)
  76. fig_line = px.line(df_clean, x=col_time, y=[col_x_env, col_x_pre], markers=True)
  77. # 自定义趋势图布局
  78. fig_line.update_layout(
  79. template='seaborn',
  80. # title=dict(text=f"{col_x_env}与{col_x_pre}趋势曲线",
  81. # x=0.5, font=dict(size=24, color='darkblue')),
  82. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 改为白色背景
  83. xaxis=dict(
  84. showgrid=True,
  85. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  86. rangeslider=dict(visible=True), # 显示滚动条
  87. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  88. ),
  89. yaxis=dict(showgrid=True, gridcolor='rgba(200, 200, 200, 0.5)'),
  90. legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.7)', bordercolor='black', borderwidth=1)
  91. )
  92. # 将折线图保存为 HTML 片段
  93. env_pre_html = pio.to_html(fig_line, full_html=False)
  94. # -------------------- 实测气象与预测气象偏差密度曲线 --------------------
  95. df_clean['deviation'] = df_clean[col_x_pre] - df_clean[col_x_env]
  96. # 生成预测与实测辐照度偏差的密度曲线图
  97. # 生成偏差的密度图
  98. fig_density = px.histogram(df_clean, x='deviation', nbins=30, marginal='rug', opacity=0.75,
  99. histnorm='density')
  100. # 自定义密度曲线图布局
  101. fig_density.update_layout(
  102. template='seaborn',
  103. # # title=dict(text=f"{col_x_pre}与{col_x_env}偏差密度曲线",
  104. # x=0.5, font=dict(size=24, color='darkred')),
  105. plot_bgcolor='rgba(255, 255, 255, 0.8)',
  106. xaxis=dict(
  107. showgrid=True,
  108. gridcolor='rgba(200, 200, 200, 0.5)',
  109. title='偏差'
  110. ),
  111. yaxis=dict(
  112. showgrid=True,
  113. gridcolor='rgba(200, 200, 200, 0.5)',
  114. title='Density'
  115. ),
  116. legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.7)', bordercolor='black', borderwidth=1)
  117. )
  118. # 将密度曲线图保存为 HTML 片段
  119. density_html = pio.to_html(fig_density, full_html=False)
  120. # -------------------- 预测功率与实际功率曲线 --------------------
  121. # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)
  122. # 创建一个图表对象
  123. fig = go.Figure()
  124. # 获取所有的模型
  125. models = df_predict['model'].unique()
  126. # 添加实际功率曲线
  127. fig.add_trace(go.Scatter(
  128. x=df_predict[col_time],
  129. y=df_predict[label],
  130. mode='lines+markers',
  131. name='实际功率', # 实际功率
  132. line=dict(dash='dot', width=2), # 虚线
  133. marker=dict(symbol='cross'),
  134. ))
  135. # 为每个模型添加预测值和实际功率的曲线
  136. for model in models:
  137. # 筛选该模型的数据
  138. model_data = df_predict[df_predict['model'] == model]
  139. # 添加预测值曲线
  140. fig.add_trace(go.Scatter(
  141. x=model_data[col_time],
  142. y=model_data[label_pre],
  143. mode='lines+markers',
  144. name=f'{model} 预测值', # 预测值
  145. marker=dict(symbol='circle'),
  146. line=dict(width=2)
  147. ))
  148. # 设置图表的标题和标签
  149. fig.update_layout(
  150. template='seaborn', # 使用 seaborn 模板
  151. title=dict(
  152. text=f"{label_pre} 与 {label} 对比", # 标题
  153. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  154. ),
  155. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  156. xaxis=dict(
  157. showgrid=True,
  158. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  159. title='时间', # 时间轴标题
  160. rangeslider=dict(visible=True), # 显示滚动条
  161. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  162. ),
  163. yaxis=dict(
  164. showgrid=True,
  165. gridcolor='rgba(200, 200, 200, 0.5)',
  166. title='功率' # y轴标题
  167. ),
  168. legend=dict(
  169. x=0.01,
  170. y=0.99,
  171. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  172. bordercolor='black',
  173. borderwidth=1,
  174. font=dict(size=12) # 字体大小
  175. ),
  176. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  177. hoverlabel=dict(
  178. bgcolor='white',
  179. font_size=14,
  180. font_family="Rockwell", # 设置字体样式
  181. bordercolor='black'
  182. ),
  183. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  184. )
  185. # 将折线图保存为 HTML 片段
  186. power_html = pio.to_html(fig, full_html=False)
  187. # -------------------- 准确率表展示--------------------
  188. acc_html = df_accuracy.to_html(classes='table table-bordered table-striped', index=False)
  189. # -------------------- 准确率汇总展示--------------------
  190. # 指定需要转换的列
  191. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity','deviationAssessment']
  192. for col in cols_to_convert :
  193. if col in df_accuracy.columns:
  194. df_accuracy[col] = df_accuracy[col].apply(
  195. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else np.nan)
  196. # 确定存在的列
  197. agg_dict = {}
  198. rename_cols = ['model']
  199. if 'MAE' in df_accuracy.columns:
  200. agg_dict['MAE'] = np.nanmean
  201. rename_cols.append('MAE平均值')
  202. if 'accuracy' in df_accuracy.columns:
  203. agg_dict['accuracy'] = np.nanmean
  204. rename_cols.append('准确率平均值')
  205. if 'RMSE' in df_accuracy.columns:
  206. agg_dict['RMSE'] = np.nanmean
  207. rename_cols.append('RMSE平均值')
  208. if 'deviationElectricity' in df_accuracy.columns:
  209. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  210. rename_cols.append('考核电量平均值')
  211. rename_cols.append('考核总电量')
  212. if 'deviationAssessment' in df_accuracy.columns:
  213. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  214. rename_cols.append('考核分数平均值')
  215. rename_cols.append('考核总分数')
  216. # 进行分组聚合,如果有需要聚合的列
  217. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  218. summary_df.columns =rename_cols
  219. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  220. # -------------------- 生成完整 HTML 页面 --------------------
  221. html_content = f"""
  222. <!DOCTYPE html>
  223. <html lang="en">
  224. <head>
  225. <meta charset="UTF-8">
  226. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  227. <title>Data Analysis Report</title>
  228. <!-- 引入 Bootstrap CSS -->
  229. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  230. <style>
  231. body {{
  232. background-color: #f4f4f9;
  233. font-family: Arial, sans-serif;
  234. padding: 20px;
  235. }}
  236. .container {{
  237. background-color: #fff;
  238. padding: 20px;
  239. border-radius: 10px;
  240. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  241. margin-bottom: 30px;
  242. }}
  243. h1 {{
  244. text-align: center;
  245. color: #333;
  246. margin-bottom: 20px;
  247. }}
  248. .plot-container {{
  249. margin: 20px 0;
  250. max-height: 500px; /* 限制高度 */
  251. overflow-y: auto; /* 显示垂直滚动条 */
  252. }}
  253. .table-container {{
  254. margin-top: 30px;
  255. overflow-x: auto; /* 水平滚动条 */
  256. max-width: 100%; /* 限制宽度 */
  257. white-space: nowrap; /* 防止内容换行 */
  258. max-height: 500px; /* 限制高度 */
  259. overflow-y: auto; /* 显示垂直滚动条 */
  260. }}
  261. table {{
  262. width: 100%;
  263. font-size: 12px; /* 设置字体大小为12px */
  264. }}
  265. th, td {{
  266. text-align: center; /* 表头和单元格文字居中 */
  267. }}
  268. </style>
  269. </head>
  270. <body>
  271. <div class="container">
  272. <h1>分析报告</h1>
  273. <!-- Pandas DataFrame 表格 -->
  274. <div class="table-container">
  275. <h2>1. 数据总览</h2>
  276. {overview_html}
  277. </div>
  278. <!-- Pandas DataFrame 表格 -->
  279. <div class="table-container">
  280. <h2>2. 数据描述</h2>
  281. {describe_html}
  282. </div>
  283. <div class="plot-container">
  284. <h2>3. 数据清洗后实测气象与实际功率散点图</h2>
  285. {scatter_html}
  286. </div>
  287. <div class="plot-container">
  288. <h2>4. 相关性分析</h2>
  289. {corr_html}
  290. </div>
  291. <div class="plot-container">
  292. <h2>5. 实测气象与预测气象曲线趋势</h2>
  293. {env_pre_html}
  294. </div>
  295. <div class="plot-container">
  296. <h2>6. 预测气象与实测气象偏差曲线</h2>
  297. {density_html}
  298. </div>
  299. <div class="plot-container">
  300. <h2>7. 预测功率与实际功率曲线对比</h2>
  301. {power_html}
  302. </div>
  303. <!-- Pandas DataFrame 表格 -->
  304. <div class="table-container">
  305. <h2>8. 准确率对比</h2>
  306. {acc_html}
  307. </div>
  308. <!-- Pandas DataFrame 表格 -->
  309. <div class="table-container">
  310. <h2>9. 准确率汇总对比</h2>
  311. {summary_html}
  312. </div>
  313. </div>
  314. </body>
  315. </html>
  316. """
  317. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  318. # 保存为 HTML
  319. directory = '/usr/share/nginx/html'
  320. if not os.path.exists(directory):
  321. os.makedirs(directory)
  322. file_path = os.path.join(directory, filename)
  323. path = f"http://ds3:10010/{filename}"
  324. # 将 HTML 内容写入文件
  325. with open(file_path, "w", encoding="utf-8") as f:
  326. f.write(html_content)
  327. print("HTML report generated successfully!")
  328. return path
  329. @app.route('/analysis_report', methods=['POST'])
  330. def analysis_report():
  331. start_time = time.time()
  332. result = {}
  333. success = 0
  334. path = ""
  335. print("Program starts execution!")
  336. try:
  337. args = request.values.to_dict()
  338. print('args',args)
  339. logger.info(args)
  340. #获取数据
  341. df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1],get_df_list_from_mongo(args)[2]
  342. path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
  343. success = 1
  344. except Exception as e:
  345. my_exception = traceback.format_exc()
  346. my_exception.replace("\n","\t")
  347. result['msg'] = my_exception
  348. end_time = time.time()
  349. result['success'] = success
  350. result['args'] = args
  351. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  352. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  353. result['file_path'] = path
  354. print("Program execution ends!")
  355. return result
  356. if __name__=="__main__":
  357. print("Program starts execution!")
  358. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  359. logger = logging.getLogger("analysis_report log")
  360. from waitress import serve
  361. serve(app, host="0.0.0.0", port=10092)
  362. print("server start!")