analysis_report.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from flask import Flask, request
  4. import time
  5. import random
  6. import logging
  7. import traceback
  8. import os
  9. from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
  10. import plotly.express as px
  11. import plotly.graph_objects as go
  12. import pandas as pd
  13. import plotly.io as pio
  14. from bson.decimal128 import Decimal128
  15. import numbers
  16. from common.processing_data_common import str_to_list,generate_unique_colors
  17. from scipy.stats import gaussian_kde
  18. app = Flask('analysis_report——service')
  19. def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
  20. col_time = args['col_time']
  21. col_x_env = args['col_x_env']
  22. col_x_pre = str_to_list(args['col_x_pre'])
  23. label = args['label']
  24. label_pre = args['label_pre']
  25. farmId = args['farmId']
  26. acc_flag = df_accuracy.shape[0]
  27. df_clean = df_clean.applymap(
  28. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  29. numbers.Number) else x).sort_values(
  30. by=col_time)
  31. df_predict = df_predict.applymap(
  32. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  33. numbers.Number) else x).sort_values(
  34. by=col_time)
  35. if acc_flag>0:
  36. df_accuracy = df_accuracy.applymap(
  37. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  38. numbers.Number) else x).sort_values(
  39. by=col_time)
  40. total_size = df_clean.shape[0]
  41. clean_size = total_size
  42. if 'is_limit' in df_clean.columns:
  43. df_clean['is_limit'] = df_clean['is_limit'].apply(lambda x: '正常点' if x==0 else '异常点')
  44. clean_size = df_clean[df_clean['is_limit']=='正常点'].shape[0]
  45. df_overview = pd.DataFrame(
  46. {'场站编码':[farmId],
  47. '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
  48. '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
  49. '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
  50. overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
  51. df_clean_after = df_clean[df_clean['is_limit']=='正常点']
  52. # -------------------- 数据描述 --------------------
  53. describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
  54. classes='table table-bordered table-striped fixed', index=False)
  55. # -------------------- 实测气象与实际功率散点图--------------------
  56. fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
  57. # 自定义散点图布局
  58. fig_scatter.update_layout(
  59. template='seaborn', # 使用 seaborn 风格
  60. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色(淡白色)
  61. xaxis=dict(
  62. showgrid=True, # 显示网格
  63. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色(淡灰色)
  64. title=col_x_env, # x 轴标题
  65. title_font=dict(size=14), # x 轴标题字体大小
  66. tickfont=dict(size=12) # x 轴刻度标签字体大小
  67. ),
  68. yaxis=dict(
  69. showgrid=True, # 显示网格
  70. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色(淡灰色)
  71. title=label, # y 轴标题
  72. title_font=dict(size=14), # y 轴标题字体大小
  73. tickfont=dict(size=12) # y 轴刻度标签字体大小
  74. ),
  75. legend=dict(
  76. x=0.01, y=0.99, # 图例位置
  77. bgcolor='rgba(255, 255, 255, 0.7)', # 图例背景色
  78. bordercolor='black', # 图例边框颜色
  79. borderwidth=1, # 图例边框宽度
  80. font=dict(size=12) # 图例文字大小
  81. ),
  82. title=dict(
  83. # text='实际功率与辐照度的散点图', # 图表标题
  84. x=0.5, # 标题居中
  85. font=dict(size=16) # 标题字体大小
  86. ),
  87. )
  88. # 将散点图保存为 HTML 片段
  89. scatter_html = pio.to_html(fig_scatter, full_html=False)
  90. # -------------------- 生成相关性热力图 --------------------
  91. # 计算相关矩阵
  92. correlation_matrix = df_clean_after.select_dtypes(include=['number']).corr()
  93. # 生成热力图,带数值标签和新配色
  94. fig_heatmap = go.Figure(data=go.Heatmap(
  95. z=correlation_matrix.values,
  96. x=correlation_matrix.columns,
  97. y=correlation_matrix.columns,
  98. colorscale='RdBu', # 使用红蓝配色:正相关为蓝色,负相关为红色
  99. text=correlation_matrix.round(2).astype(str), # 将相关性值保留两位小数并转换为字符串
  100. texttemplate="%{text}", # 显示数值标签
  101. colorbar=dict(title='Correlation'),
  102. zmin=-1, zmax=1 # 设置颜色映射的范围
  103. ))
  104. # 自定义热力图布局
  105. fig_heatmap.update_layout(
  106. # title='Correlation Matrix Heatmap',
  107. xaxis=dict(tickangle=45),
  108. yaxis=dict(autorange='reversed'),
  109. template='seaborn'
  110. )
  111. # 将热力图保存为 HTML 片段
  112. corr_html = pio.to_html(fig_heatmap, full_html=False)
  113. # -------------------- 6.实测气象与预测气象趋势曲线 --------------------
  114. # # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)实际功率
  115. # y_env = [label,col_x_env]+ col_x_pre
  116. # fig_line = px.line(df_clean, x=col_time, y=y_env, markers=True)
  117. # # fig_line = px.line(df_clean[(df_clean[col_time] >= df_predict[col_time].min()) & (
  118. # # df_clean[col_time] <= df_predict[col_time].max())], x=col_time, y=y_env, markers=True)
  119. # # 自定义趋势图布局
  120. # fig_line.update_layout(
  121. # template='seaborn',
  122. # # title=dict(text=f"{col_x_env}与{col_x_pre}趋势曲线",
  123. # # x=0.5, font=dict(size=24, color='darkblue')),
  124. # plot_bgcolor='rgba(255, 255, 255, 0.8)', # 改为白色背景
  125. # xaxis=dict(
  126. # showgrid=True,
  127. # gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  128. # rangeslider=dict(visible=True), # 显示滚动条
  129. # rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  130. # ),
  131. # yaxis=dict(showgrid=True, gridcolor='rgba(200, 200, 200, 0.5)'),
  132. # legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.7)', bordercolor='black', borderwidth=1)
  133. # )
  134. #
  135. # # 将折线图保存为 HTML 片段
  136. # env_pre_html = pio.to_html(fig_line, full_html=False)
  137. # 创建折线图(label 单独一个纵轴, [col_x_env] + col_x_pre 一个纵轴)
  138. fig_line = px.line(df_clean, x=col_time, y=[label] + [col_x_env] + col_x_pre, markers=True)
  139. # 修改布局,添加双轴设置
  140. fig_line.update_layout(
  141. template='seaborn',
  142. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 设置白色背景
  143. xaxis=dict(
  144. showgrid=True,
  145. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  146. rangeslider=dict(visible=True), # 显示滚动条
  147. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  148. ),
  149. yaxis=dict(
  150. title="实际功率", # 主纵轴用于 label
  151. showgrid=True,
  152. gridcolor='rgba(200, 200, 200, 0.5)'
  153. ),
  154. yaxis2=dict(
  155. title="环境数据", # 第二纵轴用于 [col_x_env] + col_x_pre
  156. overlaying='y', # 与主纵轴叠加
  157. side='right', # 放置在右侧
  158. showgrid=False # 不显示网格线
  159. ),
  160. legend=dict(
  161. x=0.01,
  162. y=0.99,
  163. bgcolor='rgba(255, 255, 255, 0.7)',
  164. bordercolor='black',
  165. borderwidth=1
  166. )
  167. )
  168. # 更新每个曲线的 y 轴对应性
  169. for i, col in enumerate([label] + [col_x_env] + col_x_pre):
  170. fig_line.data[i].update(yaxis='y' if col == label else 'y2')
  171. # 将折线图保存为 HTML 片段
  172. env_pre_html = pio.to_html(fig_line, full_html=False)
  173. # -------------------- 5.实测气象与预测气象偏差密度曲线 --------------------
  174. # 创建 Plotly 图形对象
  175. fig_density = go.Figure()
  176. colors = generate_unique_colors(len(col_x_pre))
  177. for col in zip(col_x_pre,colors):
  178. df_clean[f"{col[0]}_deviation"] = df_clean[col[0]] - df_clean[col_x_env]
  179. data = df_clean[f"{col[0]}_deviation"].dropna() # 确保没有 NaN 值
  180. kde = gaussian_kde(data)
  181. x_vals = np.linspace(data.min(), data.max(), 1000)
  182. y_vals = kde(x_vals)
  183. # 添加曲线
  184. fig_density.add_trace(go.Scatter(
  185. x=x_vals,
  186. y=y_vals,
  187. mode='lines',
  188. fill='tozeroy',
  189. line=dict(color=col[1]), # 循环使用颜色
  190. name=f'Density {col[0]}' # 图例名称
  191. ))
  192. # 生成预测与实测辐照度偏差的密度曲线图
  193. # 将密度曲线图保存为 HTML 片段
  194. density_html = pio.to_html(fig_density, full_html=False)
  195. # -------------------- 预测功率与实际功率曲线 --------------------
  196. # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)
  197. # 创建一个图表对象
  198. fig = go.Figure()
  199. # 获取所有的模型
  200. models = df_predict['model'].unique()
  201. # 添加实际功率曲线
  202. fig.add_trace(go.Scatter(
  203. x=df_predict[col_time],
  204. y=df_predict[label],
  205. mode='lines+markers',
  206. name='实际功率', # 实际功率
  207. line=dict( width=1), # 虚线
  208. marker=dict(symbol='circle'),
  209. ))
  210. # 为每个模型添加预测值和实际功率的曲线
  211. for model in models:
  212. # 筛选该模型的数据
  213. model_data = df_predict[df_predict['model'] == model]
  214. # 添加预测值曲线
  215. fig.add_trace(go.Scatter(
  216. x=model_data[col_time],
  217. y=model_data[label_pre],
  218. mode='lines+markers',
  219. name=f'{model} 预测值', # 预测值
  220. marker=dict(symbol='circle'),
  221. line=dict(width=2)
  222. ))
  223. # 设置图表的标题和标签
  224. fig.update_layout(
  225. template='seaborn', # 使用 seaborn 模板
  226. title=dict(
  227. # text=f"{label_pre} 与 {label} 对比", # 标题
  228. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  229. ),
  230. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  231. xaxis=dict(
  232. showgrid=True,
  233. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  234. title='时间', # 时间轴标题
  235. rangeslider=dict(visible=True), # 显示滚动条
  236. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  237. ),
  238. yaxis=dict(
  239. showgrid=True,
  240. gridcolor='rgba(200, 200, 200, 0.5)',
  241. title='功率' # y轴标题
  242. ),
  243. legend=dict(
  244. x=0.01,
  245. y=0.99,
  246. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  247. bordercolor='black',
  248. borderwidth=1,
  249. font=dict(size=12) # 字体大小
  250. ),
  251. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  252. hoverlabel=dict(
  253. bgcolor='white',
  254. font_size=14,
  255. font_family="Rockwell", # 设置字体样式
  256. bordercolor='black'
  257. ),
  258. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  259. )
  260. # 将折线图保存为 HTML 片段
  261. power_html = pio.to_html(fig, full_html=False)
  262. # -------------------- 准确率表展示--------------------
  263. acc_html=''
  264. if acc_flag>0:
  265. acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
  266. # -------------------- 准确率汇总展示--------------------
  267. summary_html = ''
  268. if acc_flag>0:
  269. # 指定需要转换的列
  270. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
  271. for col in cols_to_convert:
  272. if col in df_accuracy.columns:
  273. df_accuracy[col] = df_accuracy[col].apply(
  274. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  275. numbers.Number) else np.nan)
  276. # 确定存在的列
  277. agg_dict = {}
  278. rename_cols = ['model']
  279. if 'MAE' in df_accuracy.columns:
  280. agg_dict['MAE'] = np.nanmean
  281. rename_cols.append('MAE平均值')
  282. if 'accuracy' in df_accuracy.columns:
  283. agg_dict['accuracy'] = np.nanmean
  284. rename_cols.append('准确率平均值')
  285. if 'RMSE' in df_accuracy.columns:
  286. agg_dict['RMSE'] = np.nanmean
  287. rename_cols.append('RMSE平均值')
  288. if 'deviationElectricity' in df_accuracy.columns:
  289. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  290. rename_cols.append('考核电量平均值')
  291. rename_cols.append('考核总电量')
  292. if 'deviationAssessment' in df_accuracy.columns:
  293. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  294. rename_cols.append('考核分数平均值')
  295. rename_cols.append('考核总分数')
  296. if 'qualificationRate' in df_accuracy.columns:
  297. agg_dict['qualificationRate'] = [np.nanmean]
  298. rename_cols.append('合格率平均值')
  299. # 进行分组聚合,如果有需要聚合的列
  300. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  301. summary_df.columns = rename_cols
  302. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  303. # -------------------- 生成完整 HTML 页面 --------------------
  304. html_content = f"""
  305. <!DOCTYPE html>
  306. <html lang="en">
  307. <head>
  308. <meta charset="UTF-8">
  309. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  310. <title>Data Analysis Report</title>
  311. <!-- 引入 Bootstrap CSS -->
  312. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  313. <style>
  314. justify-between;{{
  315. display: flex;
  316. justify-content: space-between;
  317. }}
  318. body {{
  319. background-color: #f4f4f9;
  320. font-family: Arial, sans-serif;
  321. padding: 20px;
  322. }}
  323. .container {{
  324. background-color: #fff;
  325. padding: 20px;
  326. border-radius: 10px;
  327. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  328. margin-bottom: 30px;
  329. }}
  330. h1 {{
  331. text-align: center;
  332. color: #333;
  333. margin-bottom: 20px;
  334. }}
  335. .plot-container {{
  336. margin: 20px 0;
  337. max-height: 500px; /* 限制高度 */
  338. overflow-y: auto; /* 显示垂直滚动条 */
  339. }}
  340. .table-container {{
  341. margin-top: 30px;
  342. overflow-x: auto; /* 水平滚动条 */
  343. max-width: 100%; /* 限制宽度 */
  344. white-space: nowrap; /* 防止内容换行 */
  345. max-height: 500px; /* 限制高度 */
  346. overflow-y: auto; /* 显示垂直滚动条 */
  347. }}
  348. .fixed-table thead tr > th:first-child,
  349. .fixed-table tbody tr > td:first-child {{
  350. position: sticky;
  351. left: 0;
  352. z-index: 1;
  353. }}
  354. .fixed-table-header thead tr > th {{
  355. position: sticky;
  356. top: 0;
  357. z-index: 2;
  358. }}
  359. table {{
  360. width: 100%;
  361. font-size: 12px; /* 设置字体大小为12px */
  362. }}
  363. th, td {{
  364. text-align: center; /* 表头和单元格文字居中 */
  365. }}
  366. }}
  367. </style>
  368. </head>
  369. <body>
  370. <div class="container">
  371. <h1>分析报告</h1>
  372. <!-- Pandas DataFrame 表格 -->
  373. <div class="table-container">
  374. <h2>1. 数据总览</h2>
  375. {overview_html}
  376. </div>
  377. <!-- Pandas DataFrame 表格 -->
  378. <h2>2. 数据描述</h2>
  379. <div class="table-container fixed-table">
  380. {describe_html}
  381. </div>
  382. <div class="plot-container">
  383. <h2>3. 实测气象与实际功率散点图</h2>
  384. {scatter_html}
  385. </div>
  386. <div class="plot-container">
  387. <h2>4. 相关性分析</h2>
  388. {corr_html}
  389. </div>
  390. <div class="plot-container">
  391. <h2>5. 预测气象与实测气象偏差曲线</h2>
  392. {density_html}
  393. </div>
  394. <div class="plot-container">
  395. <h2>6. 实测气象与预测气象曲线趋势</h2>
  396. {env_pre_html}
  397. </div>
  398. <div class="plot-container">
  399. <h2>7. 预测功率与实际功率曲线对比</h2>
  400. {power_html}
  401. </div>
  402. <!-- Pandas DataFrame 表格 -->
  403. <div style="display:flex; justify-content: space-between;">
  404. <h2>8. 准确率对比</h2>
  405. <span>
  406. <a href="/formula.xlsx">公式</a>
  407. </span>
  408. </div>
  409. <div class="table-container fixed-table-header">
  410. {acc_html}
  411. </div>
  412. <!-- Pandas DataFrame 表格 -->
  413. <div class="table-container">
  414. <h2>9. 准确率汇总对比</h2>
  415. {summary_html}
  416. </div>
  417. </div>
  418. </body>
  419. </html>
  420. """
  421. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  422. # 保存为 HTML
  423. directory = '/usr/share/nginx/html'
  424. if not os.path.exists(directory):
  425. os.makedirs(directory)
  426. file_path = os.path.join(directory, filename)
  427. path = f"http://ds1:10010/{filename}"
  428. # 将 HTML 内容写入文件
  429. with open(file_path, "w", encoding="utf-8") as f:
  430. f.write(html_content)
  431. print("HTML report generated successfully!")
  432. return path
  433. @app.route('/analysis_report', methods=['POST'])
  434. def analysis_report():
  435. start_time = time.time()
  436. result = {}
  437. success = 0
  438. path = ""
  439. print("Program starts execution!")
  440. try:
  441. args = request.values.to_dict()
  442. print('args', args)
  443. logger.info(args)
  444. # 获取数据
  445. df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1], \
  446. get_df_list_from_mongo(args)[2]
  447. path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
  448. success = 1
  449. except Exception as e:
  450. my_exception = traceback.format_exc()
  451. my_exception.replace("\n", "\t")
  452. result['msg'] = my_exception
  453. end_time = time.time()
  454. result['success'] = success
  455. result['args'] = args
  456. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  457. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  458. result['file_path'] = path
  459. print("Program execution ends!")
  460. return result
  461. if __name__ == "__main__":
  462. print("Program starts execution!")
  463. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  464. logger = logging.getLogger("analysis_report log")
  465. from waitress import serve
  466. serve(app, host="0.0.0.0", port=10092)
  467. print("server start!")