analysis.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247
  1. # # -*- coding: utf-8 -*-
  2. # import pandas as pd
  3. # import matplotlib.pyplot as plt
  4. # from pymongo import MongoClient
  5. # import pickle
  6. # import numpy as np
  7. # import plotly.express as px
  8. # from plotly.subplots import make_subplots
  9. # import plotly.graph_objects as go
  10. # from flask import Flask,request,jsonify
  11. # from waitress import serve
  12. # import time
  13. # import random
  14. # import argparse
  15. # import logging
  16. # import traceback
  17. # import os
  18. # import lightgbm as lgb
  19. #
  20. # app = Flask('analysis_report——service')
  21. # def get_data_from_mongo(args):
  22. # # 1.读数据
  23. # mongodb_connection,mongodb_database,all_table,accuracy_table,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['train_table'],args['accuracy_table'],args['model_table'],args['model_name']
  24. # client = MongoClient(mongodb_connection)
  25. # # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
  26. # db = client[mongodb_database]
  27. # # 将游标转换为列表,并创建 pandas DataFrame
  28. # df_all = pd.DataFrame(db[all_table].find({}, {'_id': 0}))
  29. #
  30. # df_accuracy = pd.DataFrame(db[accuracy_table].find({}, {'_id': 0}))
  31. #
  32. # model_data = db[model_table].find_one({"model_name": model_name})
  33. # if model_data is not None:
  34. # model_binary = model_data['model'] # 确保这个字段是存储模型的二进制数据
  35. # # 反序列化模型
  36. # model = pickle.loads(model_binary)
  37. # client.close()
  38. # return df_all,df_accuracy,model
  39. #
  40. #
  41. # def draw_info(df_all,df_accuracy,model,features,args):
  42. # #1.数据描述 数据描述:
  43. # col_time = args['col_time']
  44. # label = args['label']
  45. # df_accuracy_beginTime = df_accuracy[col_time].min()
  46. # df_accuracy_endTime = df_accuracy[col_time].max()
  47. # df_train = df_all[df_all[col_time]<df_accuracy_beginTime][features+[col_time,label]]
  48. # df_train_beginTime = df_train[col_time].min()
  49. # df_train_endTime = df_train[col_time].max()
  50. # text_content = f"训练数据时间范围:{df_train_beginTime} 至 {df_train_endTime},共{df_train.shape[0]}条记录,测试集数据时间范围:{df_accuracy_beginTime} 至 {df_accuracy_endTime}。<br>lightgbm模型参数:{model.params}"
  51. # return text_content
  52. #
  53. #
  54. #
  55. # def draw_global_scatter(df,args):
  56. # # --- 1. 实际功率和辐照度的散点图 ---
  57. # col_x = args['scatter_col_x']
  58. # col_y = args['label']
  59. # scatter_fig = px.scatter(
  60. # df,
  61. # x=col_x,
  62. # y=col_y,
  63. # title=f"{col_x}和{col_y}的散点图",
  64. # labels={"辐照度": "辐照度 (W/m²)", "实际功率": "实际功率 (kW)"}
  65. # )
  66. # return scatter_fig
  67. #
  68. #
  69. #
  70. # def draw_corr(df,features,args):
  71. #
  72. # # --- 2. 相关性热力图 ---
  73. # # 计算相关性矩阵
  74. # label = args['label']
  75. # features_coor = features+[label]
  76. # corr_matrix = df[features_coor].corr()
  77. # # 使用 Plotly Express 绘制热力图
  78. # heatmap_fig = px.imshow(corr_matrix,
  79. # text_auto=True, # 显示数值
  80. # color_continuous_scale='RdBu', # 配色方案
  81. # title="Correlation Heatmap")
  82. # heatmap_fig.update_coloraxes(showscale=False)
  83. #
  84. # return heatmap_fig
  85. #
  86. # def draw_feature_importance(model,features):
  87. # # --- 3. 特征重要性排名 ---
  88. # # 获取特征重要性
  89. # importance = model.feature_importance() # 'split' 或 'gain',根据需求选择
  90. #
  91. # # 转换为 DataFrame 方便绘图
  92. # feature_importance_df = pd.DataFrame({
  93. # 'Feature': features,
  94. # 'Importance': importance
  95. # })
  96. # feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
  97. #
  98. # # 使用 Plotly Express 绘制条形图
  99. # importance_fig = px.bar(feature_importance_df, x='Feature', y='Importance',
  100. # title="特征重要性排名",
  101. # labels={'Feature': '特征', 'Importance': '重要性'},
  102. # color='Importance',
  103. # color_continuous_scale='Viridis')
  104. # # 更新每个 trace,确保没有图例
  105. #
  106. # importance_fig.update_layout(title="模型特征重要性排名",
  107. # showlegend=False # 移除图例
  108. # )
  109. # importance_fig.update_coloraxes(showscale=False)
  110. # return importance_fig
  111. #
  112. #
  113. # def draw_data_info_table(content):
  114. # # --- 4. 创建数据说明的表格 ---
  115. # # 转换为表格格式:1行1列,且填充文字说明
  116. # # 转换为表格格式
  117. # # 创建一个空的图
  118. # table_fig = go.Figure()
  119. #
  120. # # 第一部分: 显示文字说明
  121. # table_fig.add_trace(go.Table(
  122. # header=dict(
  123. # values=["说明"], # 表格只有一列:说明
  124. # fill_color="paleturquoise",
  125. # align="center"
  126. # ),
  127. # cells=dict(
  128. # values=[[content]] , # 第一行填入文本说明
  129. # fill_color="lavender",
  130. # align="center"
  131. # )
  132. # ))
  133. #
  134. #
  135. # return table_fig
  136. #
  137. #
  138. #
  139. # def draw_accuracy_table(df,content):
  140. #
  141. # # --- 4. 每日的准确率表格 ---
  142. # # 转换为表格格式
  143. # table_fig = go.Figure(
  144. # data=[
  145. # go.Table(
  146. # header=dict(
  147. # values=list(df.columns),
  148. # fill_color="paleturquoise",
  149. # align="center"
  150. # ),
  151. # cells=dict(
  152. # values=[df[col] for col in df.columns],
  153. # fill_color="lavender",
  154. # align="center"
  155. # )
  156. # )
  157. # ]
  158. # )
  159. # table_fig.update_layout(title="准确率表", showlegend=False)
  160. # return table_fig
  161. #
  162. #
  163. # @app.route('/analysis_report', methods=['POST'])
  164. # def analysis_report():
  165. # start_time = time.time()
  166. # result = {}
  167. # success = 0
  168. # path = ""
  169. # print("Program starts execution!")
  170. # try:
  171. # args = request.values.to_dict()
  172. # print('args',args)
  173. # logger.info(args)
  174. # #获取数据
  175. # df_all, df_accuracy, model = get_data_from_mongo(args)
  176. # features = model.feature_name()
  177. # text_content = draw_info(df_all,df_accuracy,model,features,args)
  178. # text_fig,scatter_fig,heatmap_fig,importance_fig,table_fig=draw_data_info_table(text_content),draw_global_scatter(df_all,args),draw_corr(df_all,features,args),draw_feature_importance(model,features),\
  179. # draw_accuracy_table(df_accuracy,text_content)
  180. # # --- 合并图表并保存到一个 HTML 文件 ---
  181. # # 创建子图布局
  182. # combined_fig = make_subplots(
  183. # rows=5, cols=1,
  184. # subplot_titles=["数据-模型概览","辐照度和实际功率的散点图", "相关性","特征重要性排名", "准确率表"],
  185. # row_heights=[0.3, 0.6, 0.6, 0.6, 0.4],
  186. # specs=[[{"type": "table"}], [{"type": "xy"}], [{"type": "heatmap"}], [{"type": "xy"}],[{"type": "table"}]] # 指定每个子图类型
  187. # )
  188. # # 添加文本信息到子图(第一行)
  189. # # 添加文字说明
  190. # for trace in text_fig.data:
  191. # combined_fig.add_trace(trace, row=1, col=1)
  192. #
  193. # # 添加散点图
  194. # for trace in scatter_fig.data:
  195. # combined_fig.add_trace(trace, row=2, col=1)
  196. #
  197. # # 添加相关性热力图
  198. # for trace in heatmap_fig.data:
  199. # combined_fig.add_trace(trace, row=3, col=1)
  200. #
  201. # # 添加特征重要性排名图
  202. # for trace in importance_fig.data:
  203. # combined_fig.add_trace(trace, row=4, col=1)
  204. #
  205. # # 添加表格
  206. # for trace in table_fig.data:
  207. # combined_fig.add_trace(trace, row=5, col=1)
  208. #
  209. # # 更新布局
  210. # combined_fig.update_layout(
  211. # height=1500,
  212. # title_text="分析结果汇总", # 添加换行符以适应文本内容
  213. # title_x=0.5, # 中心对齐标题
  214. # showlegend=False,
  215. # )
  216. # combined_fig.update_coloraxes(showscale=False)
  217. # filename = f"{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
  218. # # 保存为 HTML
  219. # directory = '/usr/share/nginx/html'
  220. # if not os.path.exists(directory):
  221. # os.makedirs(directory)
  222. # file_path = os.path.join(directory, filename)
  223. # # combined_fig.write_html(f"D://usr//{filename}")
  224. # combined_fig.write_html(file_path)
  225. # path = f"http://ds2:10093/{filename}"
  226. # success = 1
  227. # except Exception as e:
  228. # my_exception = traceback.format_exc()
  229. # my_exception.replace("\n","\t")
  230. # result['msg'] = my_exception
  231. # end_time = time.time()
  232. # result['success'] = success
  233. # result['args'] = args
  234. # result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
  235. # result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
  236. # result['file_path'] = path
  237. # print("Program execution ends!")
  238. # return result
  239. #
  240. #
  241. # if __name__=="__main__":
  242. # # print("Program starts execution!")
  243. # # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
  244. # # logger = logging.getLogger("analysis_report log")
  245. # # from waitress import serve
  246. # # serve(app, host="0.0.0.0", port=10092)
  247. # # print("server start!")