analysis.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293
  1. # -*- coding: utf-8 -*-
  2. import numpy as np
  3. from flask import Flask, request
  4. import time
  5. import random
  6. import logging
  7. import traceback
  8. import os
  9. from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
  10. import plotly.graph_objects as go
  11. import plotly.io as pio
  12. from bson.decimal128 import Decimal128
  13. import numbers
  14. app = Flask('analysis_report——service')
  15. def put_analysis_report_to_html(args, df_predict, df_accuracy):
  16. col_time = args['col_time']
  17. label = args['label']
  18. label_pre = args['label_pre']
  19. farmId = args['farmId']
  20. acc_flag = df_accuracy.shape[0]
  21. df_predict = df_predict.applymap(
  22. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  23. numbers.Number) else x).sort_values(
  24. by=col_time)
  25. if acc_flag > 0:
  26. df_accuracy = df_accuracy.applymap(
  27. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  28. numbers.Number) else x).sort_values(
  29. by=col_time)
  30. # 创建一个图表对象
  31. fig = go.Figure()
  32. # 获取所有的模型
  33. models = df_predict['model'].unique()
  34. # 添加实际功率曲线
  35. fig.add_trace(go.Scatter(
  36. x=df_predict[col_time],
  37. y=df_predict[label],
  38. mode='lines+markers',
  39. name='实际功率', # 实际功率
  40. line=dict(width=1), # 虚线
  41. marker=dict(symbol='circle'),
  42. ))
  43. # 为每个模型添加预测值和实际功率的曲线
  44. for model in models:
  45. # 筛选该模型的数据
  46. model_data = df_predict[df_predict['model'] == model]
  47. # 添加预测值曲线
  48. fig.add_trace(go.Scatter(
  49. x=model_data[col_time],
  50. y=model_data[label_pre],
  51. mode='lines+markers',
  52. name=f'{model} 预测值', # 预测值
  53. marker=dict(symbol='circle'),
  54. line=dict(width=2)
  55. ))
  56. # 设置图表的标题和标签
  57. fig.update_layout(
  58. template='seaborn', # 使用 seaborn 模板
  59. title=dict(
  60. # text=f"{label_pre} 与 {label} 对比", # 标题
  61. x=0.5, font=dict(size=20, color='darkblue') # 标题居中并设置字体大小和颜色
  62. ),
  63. plot_bgcolor='rgba(255, 255, 255, 0.8)', # 背景色
  64. xaxis=dict(
  65. showgrid=True,
  66. gridcolor='rgba(200, 200, 200, 0.5)', # 网格线颜色
  67. title='时间', # 时间轴标题
  68. rangeslider=dict(visible=True), # 显示滚动条
  69. rangeselector=dict(visible=True) # 显示预设的时间范围选择器
  70. ),
  71. yaxis=dict(
  72. showgrid=True,
  73. gridcolor='rgba(200, 200, 200, 0.5)',
  74. title='功率' # y轴标题
  75. ),
  76. legend=dict(
  77. x=0.01,
  78. y=0.99,
  79. bgcolor='rgba(255, 255, 255, 0.7)', # 背景透明
  80. bordercolor='black',
  81. borderwidth=1,
  82. font=dict(size=12) # 字体大小
  83. ),
  84. hovermode='x unified', # 鼠标悬停时显示统一的提示框
  85. hoverlabel=dict(
  86. bgcolor='white',
  87. font_size=14,
  88. font_family="Rockwell", # 设置字体样式
  89. bordercolor='black'
  90. ),
  91. margin=dict(l=50, r=50, t=50, b=50) # 调整边距,避免标题或标签被遮挡
  92. )
  93. # 将折线图保存为 HTML 片段
  94. power_html = pio.to_html(fig, full_html=False)
  95. # -------------------- 准确率表展示--------------------
  96. acc_html = ''
  97. if acc_flag > 0:
  98. acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped',
  99. index=False)
  100. # -------------------- 准确率汇总展示--------------------
  101. summary_html = ''
  102. if acc_flag > 0:
  103. # 指定需要转换的列
  104. cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
  105. for col in cols_to_convert:
  106. if col in df_accuracy.columns:
  107. df_accuracy[col] = df_accuracy[col].apply(
  108. lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
  109. numbers.Number) else np.nan)
  110. # 确定存在的列
  111. agg_dict = {}
  112. rename_cols = ['model']
  113. if 'MAE' in df_accuracy.columns:
  114. agg_dict['MAE'] = np.nanmean
  115. rename_cols.append('MAE平均值')
  116. if 'accuracy' in df_accuracy.columns:
  117. agg_dict['accuracy'] = np.nanmean
  118. rename_cols.append('准确率平均值')
  119. if 'RMSE' in df_accuracy.columns:
  120. agg_dict['RMSE'] = np.nanmean
  121. rename_cols.append('RMSE平均值')
  122. if 'deviationElectricity' in df_accuracy.columns:
  123. agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
  124. rename_cols.append('考核电量平均值')
  125. rename_cols.append('考核总电量')
  126. if 'deviationAssessment' in df_accuracy.columns:
  127. agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
  128. rename_cols.append('考核分数平均值')
  129. rename_cols.append('考核总分数')
  130. # 进行分组聚合,如果有需要聚合的列
  131. summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
  132. summary_df.columns = rename_cols
  133. summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
  134. # -------------------- 生成完整 HTML 页面 --------------------
  135. html_content = f"""
  136. <!DOCTYPE html>
  137. <html lang="en">
  138. <head>
  139. <meta charset="UTF-8">
  140. <meta name="viewport" content="width=device-width, initial-scale=1.0">
  141. <title>Data Analysis Report</title>
  142. <!-- 引入 Bootstrap CSS -->
  143. <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
  144. <style>
  145. justify-between;{{
  146. display: flex;
  147. justify-content: space-between;
  148. }}
  149. body {{
  150. background-color: #f4f4f9;
  151. font-family: Arial, sans-serif;
  152. padding: 20px;
  153. }}
  154. .container {{
  155. background-color: #fff;
  156. padding: 20px;
  157. border-radius: 10px;
  158. box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
  159. margin-bottom: 30px;
  160. }}
  161. h1 {{
  162. text-align: center;
  163. color: #333;
  164. margin-bottom: 20px;
  165. }}
  166. .plot-container {{
  167. margin: 20px 0;
  168. max-height: 500px; /* 限制高度 */
  169. overflow-y: auto; /* 显示垂直滚动条 */
  170. }}
  171. .table-container {{
  172. margin-top: 30px;
  173. overflow-x: auto; /* 水平滚动条 */
  174. max-width: 100%; /* 限制宽度 */
  175. white-space: nowrap; /* 防止内容换行 */
  176. max-height: 500px; /* 限制高度 */
  177. overflow-y: auto; /* 显示垂直滚动条 */
  178. }}
  179. .fixed-table thead tr > th:first-child,
  180. .fixed-table tbody tr > td:first-child {{
  181. position: sticky;
  182. left: 0;
  183. z-index: 1;
  184. }}
  185. .fixed-table-header thead tr > th {{
  186. position: sticky;
  187. top: 0;
  188. z-index: 2;
  189. }}
  190. table {{
  191. width: 100%;
  192. font-size: 12px; /* 设置字体大小为12px */
  193. }}
  194. th, td {{
  195. text-align: center; /* 表头和单元格文字居中 */
  196. }}
  197. }}
  198. </style>
  199. </head>
  200. <body>
  201. <div class="container">
  202. <h1>分析报告</h1>
  203. <!-- Pandas DataFrame 表格 -->
  204. <div class="plot-container">
  205. <h2>1. 预测功率与实际功率曲线对比</h2>
  206. {power_html}
  207. </div>
  208. <!-- Pandas DataFrame 表格 -->
  209. <div style="display:flex; justify-content: space-between;">
  210. <h2>2. 准确率对比</h2>
  211. <span>
  212. <a href="/formula.xlsx">公式</a>
  213. </span>
  214. </div>
  215. <div class="table-container fixed-table-header">
  216. {acc_html}
  217. </div>
  218. <!-- Pandas DataFrame 表格 -->
  219. <div class="table-container">
  220. <h2>3. 准确率汇总对比</h2>
  221. {summary_html}
  222. </div>
  223. </div>
  224. </body>
  225. </html>
  226. """
  227. filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  228. # 保存为 HTML
  229. directory = '/usr/share/nginx/html'
  230. if not os.path.exists(directory):
  231. os.makedirs(directory)
  232. file_path = os.path.join(directory, filename)
  233. path = f"http://ds3:10010/{filename}"
  234. # 将 HTML 内容写入文件
  235. with open(file_path, "w", encoding="utf-8") as f:
  236. f.write(html_content)
  237. print("HTML report generated successfully!")
  238. return path
  239. import lightgbm as lgb
  240. gbm_model = lgb.train(params,
  241. lgb_train,
  242. num_boost_round=500,
  243. eval_metric=custom_loss,
  244. valid_sets=[lgb_train, lgb_eval],
  245. )
  246. @app.route('/analysis_report_small', methods=['POST'])
  247. def analysis_report():
  248. start_time = time.time()
  249. result = {}
  250. success = 0
  251. path = ""
  252. print("Program starts execution!")
  253. try:
  254. args = request.values.to_dict()
  255. print('args', args)
  256. logger.info(args)
  257. # 获取数据
  258. df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1]
  259. path = put_analysis_report_to_html(args, df_predict, df_accuracy)
  260. success = 1
  261. except Exception as e:
  262. my_exception = traceback.format_exc()
  263. my_exception.replace("\n", "\t")
  264. result['msg'] = my_exception
  265. end_time = time.time()
  266. result['success'] = success
  267. result['args'] = args
  268. result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  269. result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  270. result['file_path'] = path
  271. print("Program execution ends!")
  272. return result
  273. if __name__ == "__main__":
  274. print("Program starts execution!")
  275. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  276. logger = logging.getLogger("analysis_report log")
  277. from waitress import serve
  278. serve(app, host="0.0.0.0", port=10099)
  279. print("server start!")