analysis.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from flask import Flask, request
  4. import time
  5. import random
  6. import logging
  7. import traceback
  8. import os
  9. from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
  10. import plotly.express as px
  11. import plotly.graph_objects as go
  12. import pandas as pd
  13. import plotly.io as pio
  14. from bson.decimal128 import Decimal128
  15. import numbers
  16. app = Flask('analysis_report——service')
  17. def put_analysis_report_to_html(args, df_predict, df_accuracy):
  18. col_time = args['col_time']
  19. label = args['label']
  20. label_pre = args['label_pre']
  21. farmId = args['farmId']
  22. df_predict = df_predict.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  23. numbers.Number) else x).sort_values(by=col_time)
  24. df_accuracy = df_accuracy.applymap(lambda x:float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  25. numbers.Number) else x).sort_values(by=col_time)
  26. # -------------------- 预测功率与实际功率曲线 --------------------
  27. # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)
  28. # 创建一个图表对象
  29. fig = go.Figure()
  30. # 获取所有的模型
  31. models = df_predict['model'].unique()
  32. # 添加实际功率曲线
  33. fig.add_trace(go.Scatter(
  34. x=df_predict[col_time],
  35. y=df_predict[label],
  36. mode='lines+markers',
  37. name='实际功率', # 实际功率
  38. line=dict(dash='dot', width=2), # 虚线
  39. marker=dict(symbol='cross'),
  40. ))
  41. # 为每个模型添加预测值和实际功率的曲线
  42. for model in models:
  43. # 筛选该模型的数据
  44. model_data = df_predict[df_predict['model'] == model]
  45. # 添加预测值曲线
  46. fig.add_trace(go.Scatter(
  47. x=model_data[col_time],
  48. y=model_data[label_pre],
  49. mode='lines+markers',
  50. name=f'{model} 预测值', # 预测值
  51. marker=dict(symbol='circle'),
  52. line=dict(width=2)
  53. ))
  54. # 设置图表的标题和标签
  55. fig.update_layout(
  56. template='seaborn', # 使用 seaborn 模板
  57. title=dict(
  58. # text=f"{label_pre} 与 {label} 对比", # 标题
  59. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  60. ),
  61. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  62. xaxis=dict(
  63. showgrid=True,
  64. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  65. title='时间', # 时间轴标题
  66. rangeslider=dict(visible=True), # 显示滚动条
  67. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  68. ),
  69. yaxis=dict(
  70. showgrid=True,
  71. gridcolor='rgba(200, 200, 200, 0.5)',
  72. title='功率' # y轴标题
  73. ),
  74. legend=dict(
  75. x=0.01,
  76. y=0.99,
  77. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  78. bordercolor='black',
  79. borderwidth=1,
  80. font=dict(size=12) # 字体大小
  81. ),
  82. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  83. hoverlabel=dict(
  84. bgcolor='white',
  85. font_size=14,
  86. font_family="Rockwell", # 设置字体样式
  87. bordercolor='black'
  88. ),
  89. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  90. )
  91. # 将折线图保存为 HTML 片段
  92. power_html = pio.to_html(fig, full_html=False)
  93. # -------------------- 准确率表展示--------------------
  94. acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
  95. # -------------------- 准确率汇总展示--------------------
  96. # 指定需要转换的列
  97. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
  98. for col in cols_to_convert:
  99. if col in df_accuracy.columns:
  100. df_accuracy[col] = df_accuracy[col].apply(
  101. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  102. numbers.Number) else np.nan)
  103. # 确定存在的列
  104. agg_dict = {}
  105. rename_cols = ['model']
  106. if 'MAE' in df_accuracy.columns:
  107. agg_dict['MAE'] = np.nanmean
  108. rename_cols.append('MAE平均值')
  109. if 'accuracy' in df_accuracy.columns:
  110. agg_dict['accuracy'] = np.nanmean
  111. rename_cols.append('准确率平均值')
  112. if 'RMSE' in df_accuracy.columns:
  113. agg_dict['RMSE'] = np.nanmean
  114. rename_cols.append('RMSE平均值')
  115. if 'deviationElectricity' in df_accuracy.columns:
  116. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  117. rename_cols.append('考核电量平均值')
  118. rename_cols.append('考核总电量')
  119. if 'deviationAssessment' in df_accuracy.columns:
  120. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  121. rename_cols.append('考核分数平均值')
  122. rename_cols.append('考核总分数')
  123. # 进行分组聚合,如果有需要聚合的列
  124. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  125. summary_df.columns = rename_cols
  126. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  127. # -------------------- 生成完整 HTML 页面 --------------------
  128. html_content = f"""
  129. <!DOCTYPE html>
  130. <html lang="en">
  131. <head>
  132. <meta charset="UTF-8">
  133. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  134. <title>Data Analysis Report</title>
  135. <!-- 引入 Bootstrap CSS -->
  136. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  137. <style>
  138. justify-between;{{
  139. display: flex;
  140. justify-content: space-between;
  141. }}
  142. body {{
  143. background-color: #f4f4f9;
  144. font-family: Arial, sans-serif;
  145. padding: 20px;
  146. }}
  147. .container {{
  148. background-color: #fff;
  149. padding: 20px;
  150. border-radius: 10px;
  151. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  152. margin-bottom: 30px;
  153. }}
  154. h1 {{
  155. text-align: center;
  156. color: #333;
  157. margin-bottom: 20px;
  158. }}
  159. .plot-container {{
  160. margin: 20px 0;
  161. max-height: 500px; /* 限制高度 */
  162. overflow-y: auto; /* 显示垂直滚动条 */
  163. }}
  164. .table-container {{
  165. margin-top: 30px;
  166. overflow-x: auto; /* 水平滚动条 */
  167. max-width: 100%; /* 限制宽度 */
  168. white-space: nowrap; /* 防止内容换行 */
  169. max-height: 500px; /* 限制高度 */
  170. overflow-y: auto; /* 显示垂直滚动条 */
  171. }}
  172. .fixed-table thead tr > th:first-child,
  173. .fixed-table tbody tr > td:first-child {{
  174. position: sticky;
  175. left: 0;
  176. z-index: 1;
  177. }}
  178. .fixed-table-header thead tr > th {{
  179. position: sticky;
  180. top: 0;
  181. z-index: 2;
  182. }}
  183. table {{
  184. width: 100%;
  185. font-size: 12px; /* 设置字体大小为12px */
  186. }}
  187. th, td {{
  188. text-align: center; /* 表头和单元格文字居中 */
  189. }}
  190. }}
  191. </style>
  192. </head>
  193. <body>
  194. <div class="container">
  195. <h1>分析报告</h1>
  196. <!-- Pandas DataFrame 表格 -->
  197. <div class="plot-container">
  198. <h2>1. 预测功率与实际功率曲线对比</h2>
  199. {power_html}
  200. </div>
  201. <!-- Pandas DataFrame 表格 -->
  202. <div style="display:flex; justify-content: space-between;">
  203. <h2>2. 准确率对比</h2>
  204. <span>
  205. <a href="/formula.xlsx">公式</a>
  206. </span>
  207. </div>
  208. <div class="table-container fixed-table-header">
  209. {acc_html}
  210. </div>
  211. <!-- Pandas DataFrame 表格 -->
  212. <div class="table-container">
  213. <h2>3. 准确率汇总对比</h2>
  214. {summary_html}
  215. </div>
  216. </div>
  217. </body>
  218. </html>
  219. """
  220. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  221. # 保存为 HTML
  222. directory = '/usr/share/nginx/html'
  223. if not os.path.exists(directory):
  224. os.makedirs(directory)
  225. file_path = os.path.join(directory, filename)
  226. path = f"http://ds3:10010/{filename}"
  227. # 将 HTML 内容写入文件
  228. with open(file_path, "w", encoding="utf-8") as f:
  229. f.write(html_content)
  230. print("HTML report generated successfully!")
  231. return path
  232. @app.route('/analysis_report_small', methods=['POST'])
  233. def analysis_report():
  234. start_time = time.time()
  235. result = {}
  236. success = 0
  237. path = ""
  238. print("Program starts execution!")
  239. try:
  240. args = request.values.to_dict()
  241. print('args', args)
  242. logger.info(args)
  243. # 获取数据
  244. df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1]
  245. path = put_analysis_report_to_html(args,df_predict, df_accuracy)
  246. success = 1
  247. except Exception as e:
  248. my_exception = traceback.format_exc()
  249. my_exception.replace("\n", "\t")
  250. result['msg'] = my_exception
  251. end_time = time.time()
  252. result['success'] = success
  253. result['args'] = args
  254. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  255. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  256. result['file_path'] = path
  257. print("Program execution ends!")
  258. return result
  259. if __name__ == "__main__":
  260. print("Program starts execution!")
  261. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  262. logger = logging.getLogger("analysis_report log")
  263. from waitress import serve
  264. serve(app, host="0.0.0.0", port=10099)
  265. print("server start!")