analysis.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from flask import Flask, request
  4. import time
  5. import random
  6. import logging
  7. import traceback
  8. import os
  9. from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
  10. import plotly.graph_objects as go
  11. import plotly.io as pio
  12. from bson.decimal128 import Decimal128
  13. import numbers
  14. app = Flask('analysis_report——service')
  15. def put_analysis_report_to_html(args, df_predict, df_accuracy):
  16. col_time = args['col_time']
  17. label = args['label']
  18. label_pre = args['label_pre']
  19. farmId = args['farmId']
  20. acc_flag = df_accuracy.shape[0]
  21. df_predict = df_predict.applymap(
  22. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  23. numbers.Number) else x).sort_values(
  24. by=col_time)
  25. if acc_flag > 0:
  26. df_accuracy = df_accuracy.applymap(
  27. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  28. numbers.Number) else x).sort_values(
  29. by=col_time)
  30. # 创建一个图表对象
  31. fig = go.Figure()
  32. # 获取所有的模型
  33. models = df_predict['model'].unique()
  34. # 添加实际功率曲线
  35. fig.add_trace(go.Scatter(
  36. x=df_predict[col_time],
  37. y=df_predict[label],
  38. mode='lines+markers',
  39. name='实际功率', # 实际功率
  40. line=dict(width=1), # 虚线
  41. marker=dict(symbol='circle'),
  42. ))
  43. # 为每个模型添加预测值和实际功率的曲线
  44. for model in models:
  45. # 筛选该模型的数据
  46. model_data = df_predict[df_predict['model'] == model]
  47. # 添加预测值曲线
  48. fig.add_trace(go.Scatter(
  49. x=model_data[col_time],
  50. y=model_data[label_pre],
  51. mode='lines+markers',
  52. name=f'{model} 预测值', # 预测值
  53. marker=dict(symbol='circle'),
  54. line=dict(width=2)
  55. ))
  56. # 设置图表的标题和标签
  57. fig.update_layout(
  58. template='seaborn', # 使用 seaborn 模板
  59. title=dict(
  60. # text=f"{label_pre} 与 {label} 对比", # 标题
  61. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  62. ),
  63. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  64. xaxis=dict(
  65. showgrid=True,
  66. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  67. title='时间', # 时间轴标题
  68. rangeslider=dict(visible=True), # 显示滚动条
  69. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  70. ),
  71. yaxis=dict(
  72. showgrid=True,
  73. gridcolor='rgba(200, 200, 200, 0.5)',
  74. title='功率' # y轴标题
  75. ),
  76. legend=dict(
  77. x=0.01,
  78. y=0.99,
  79. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  80. bordercolor='black',
  81. borderwidth=1,
  82. font=dict(size=12) # 字体大小
  83. ),
  84. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  85. hoverlabel=dict(
  86. bgcolor='white',
  87. font_size=14,
  88. font_family="Rockwell", # 设置字体样式
  89. bordercolor='black'
  90. ),
  91. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  92. )
  93. # 将折线图保存为 HTML 片段
  94. power_html = pio.to_html(fig, full_html=False)
  95. # -------------------- 准确率表展示--------------------
  96. acc_html = ''
  97. if acc_flag > 0:
  98. acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped',
  99. index=False)
  100. # -------------------- 准确率汇总展示--------------------
  101. summary_html = ''
  102. if acc_flag > 0:
  103. # 指定需要转换的列
  104. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
  105. for col in cols_to_convert:
  106. if col in df_accuracy.columns:
  107. df_accuracy[col] = df_accuracy[col].apply(
  108. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  109. numbers.Number) else np.nan)
  110. # 确定存在的列
  111. agg_dict = {}
  112. rename_cols = ['model']
  113. if 'MAE' in df_accuracy.columns:
  114. agg_dict['MAE'] = np.nanmean
  115. rename_cols.append('MAE平均值')
  116. if 'accuracy' in df_accuracy.columns:
  117. agg_dict['accuracy'] = np.nanmean
  118. rename_cols.append('准确率平均值')
  119. if 'RMSE' in df_accuracy.columns:
  120. agg_dict['RMSE'] = np.nanmean
  121. rename_cols.append('RMSE平均值')
  122. if 'deviationElectricity' in df_accuracy.columns:
  123. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  124. rename_cols.append('考核电量平均值')
  125. rename_cols.append('考核总电量')
  126. if 'deviationAssessment' in df_accuracy.columns:
  127. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  128. rename_cols.append('考核分数平均值')
  129. rename_cols.append('考核总分数')
  130. if 'qualificationRate' in df_accuracy.columns:
  131. agg_dict['qualificationRate'] = [np.nanmean]
  132. rename_cols.append('合格率平均值')
  133. # 进行分组聚合,如果有需要聚合的列
  134. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  135. summary_df.columns = rename_cols
  136. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  137. # -------------------- 生成完整 HTML 页面 --------------------
  138. html_content = f"""
  139. <!DOCTYPE html>
  140. <html lang="en">
  141. <head>
  142. <meta charset="UTF-8">
  143. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  144. <title>Data Analysis Report</title>
  145. <!-- 引入 Bootstrap CSS -->
  146. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  147. <style>
  148. justify-between;{{
  149. display: flex;
  150. justify-content: space-between;
  151. }}
  152. body {{
  153. background-color: #f4f4f9;
  154. font-family: Arial, sans-serif;
  155. padding: 20px;
  156. }}
  157. .container {{
  158. background-color: #fff;
  159. padding: 20px;
  160. border-radius: 10px;
  161. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  162. margin-bottom: 30px;
  163. }}
  164. h1 {{
  165. text-align: center;
  166. color: #333;
  167. margin-bottom: 20px;
  168. }}
  169. .plot-container {{
  170. margin: 20px 0;
  171. max-height: 500px; /* 限制高度 */
  172. overflow-y: auto; /* 显示垂直滚动条 */
  173. }}
  174. .table-container {{
  175. margin-top: 30px;
  176. overflow-x: auto; /* 水平滚动条 */
  177. max-width: 100%; /* 限制宽度 */
  178. white-space: nowrap; /* 防止内容换行 */
  179. max-height: 500px; /* 限制高度 */
  180. overflow-y: auto; /* 显示垂直滚动条 */
  181. }}
  182. .fixed-table thead tr > th:first-child,
  183. .fixed-table tbody tr > td:first-child {{
  184. position: sticky;
  185. left: 0;
  186. z-index: 1;
  187. }}
  188. .fixed-table-header thead tr > th {{
  189. position: sticky;
  190. top: 0;
  191. z-index: 2;
  192. }}
  193. table {{
  194. width: 100%;
  195. font-size: 12px; /* 设置字体大小为12px */
  196. }}
  197. th, td {{
  198. text-align: center; /* 表头和单元格文字居中 */
  199. }}
  200. }}
  201. </style>
  202. </head>
  203. <body>
  204. <div class="container">
  205. <h1>分析报告</h1>
  206. <!-- Pandas DataFrame 表格 -->
  207. <div class="plot-container">
  208. <h2>1. 预测功率与实际功率曲线对比</h2>
  209. {power_html}
  210. </div>
  211. <!-- Pandas DataFrame 表格 -->
  212. <div style="display:flex; justify-content: space-between;">
  213. <h2>2. 准确率对比</h2>
  214. <span>
  215. <a href="/formula.xlsx">公式</a>
  216. </span>
  217. </div>
  218. <div class="table-container fixed-table-header">
  219. {acc_html}
  220. </div>
  221. <!-- Pandas DataFrame 表格 -->
  222. <div class="table-container">
  223. <h2>3. 准确率汇总对比</h2>
  224. {summary_html}
  225. </div>
  226. </div>
  227. </body>
  228. </html>
  229. """
  230. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  231. # 保存为 HTML
  232. directory = '/usr/share/nginx/html'
  233. # directory = '../cache'
  234. if not os.path.exists(directory):
  235. os.makedirs(directory)
  236. file_path = os.path.join(directory, filename)
  237. path = f"http://ds1:10010/{filename}"
  238. # 将 HTML 内容写入文件
  239. with open(file_path, "w", encoding="utf-8") as f:
  240. f.write(html_content)
  241. print("HTML report generated successfully!")
  242. return path
  243. @app.route('/analysis_report_small', methods=['POST'])
  244. def analysis_report():
  245. start_time = time.time()
  246. result = {}
  247. success = 0
  248. path = ""
  249. print("Program starts execution!")
  250. try:
  251. args = request.values.to_dict()
  252. print('args', args)
  253. logger.info(args)
  254. # 获取数据
  255. df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1]
  256. path = put_analysis_report_to_html(args, df_predict, df_accuracy)
  257. success = 1
  258. except Exception as e:
  259. my_exception = traceback.format_exc()
  260. my_exception.replace("\n", "\t")
  261. result['msg'] = my_exception
  262. end_time = time.time()
  263. result['success'] = success
  264. result['args'] = args
  265. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  266. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  267. result['file_path'] = path
  268. print("Program execution ends!")
  269. return result
  270. if __name__ == "__main__":
  271. print("Program starts execution!")
  272. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  273. logger = logging.getLogger("analysis_report log")
  274. from waitress import serve
  275. serve(app, host="0.0.0.0", port=10099)
  276. print("server start!")