|
@@ -0,0 +1,245 @@
|
|
|
+import pandas as pd
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+from pymongo import MongoClient
|
|
|
+import pickle
|
|
|
+import numpy as np
|
|
|
+import plotly.express as px
|
|
|
+from plotly.subplots import make_subplots
|
|
|
+import plotly.graph_objects as go
|
|
|
+from flask import Flask,request,jsonify
|
|
|
+from waitress import serve
|
|
|
+import time
|
|
|
+import random
|
|
|
+import argparse
|
|
|
+import logging
|
|
|
+import traceback
|
|
|
+import os
|
|
|
+
|
|
|
+app = Flask('analysis_report——service')
|
|
|
+def get_data_from_mongo(args):
|
|
|
+ # 1.读数据
|
|
|
+ mongodb_connection,mongodb_database,all_table,accuracy_table,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['train_table'],args['accuracy_table'],args['model_table'],args['model_name']
|
|
|
+ client = MongoClient(mongodb_connection)
|
|
|
+ # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
|
|
|
+ db = client[mongodb_database]
|
|
|
+ # 将游标转换为列表,并创建 pandas DataFrame
|
|
|
+ df_all = pd.DataFrame(db[all_table].find({}, {'_id': 0}))
|
|
|
+
|
|
|
+ df_accuracy = pd.DataFrame(db[accuracy_table].find({}, {'_id': 0}))
|
|
|
+
|
|
|
+ model_data = db[model_table].find_one({"model_name": model_name})
|
|
|
+ if model_data is not None:
|
|
|
+ model_binary = model_data['model'] # 确保这个字段是存储模型的二进制数据
|
|
|
+ # 反序列化模型
|
|
|
+ model = pickle.loads(model_binary)
|
|
|
+ client.close()
|
|
|
+ return df_all,df_accuracy,model
|
|
|
+
|
|
|
+
|
|
|
+def draw_info(df_all,df_accuracy,model,features,args):
|
|
|
+ #1.数据描述 数据描述:
|
|
|
+ col_time = args['col_time']
|
|
|
+ label = args['label']
|
|
|
+ df_accuracy_beginTime = df_accuracy[col_time].min()
|
|
|
+ df_accuracy_endTime = df_accuracy[col_time].max()
|
|
|
+ df_train = df_all[df_all[col_time]<df_accuracy_beginTime][features+[col_time,label]]
|
|
|
+ df_train_beginTime = df_train[col_time].min()
|
|
|
+ df_train_endTime = df_train[col_time].max()
|
|
|
+ text_content = f"训练数据时间范围:{df_train_beginTime} 至 {df_train_endTime},共{df_train.shape[0]}条记录,测试集数据时间范围:{df_accuracy_beginTime} 至 {df_accuracy_endTime}。<br>lightgbm模型参数:{model.params}"
|
|
|
+ return text_content
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def draw_global_scatter(df,args):
|
|
|
+ # --- 1. 实际功率和辐照度的散点图 ---
|
|
|
+ col_x = args['scatter_col_x']
|
|
|
+ col_y = args['label']
|
|
|
+ scatter_fig = px.scatter(
|
|
|
+ df,
|
|
|
+ x=col_x,
|
|
|
+ y=col_y,
|
|
|
+ title=f"{col_x}和{col_y}的散点图",
|
|
|
+ labels={"辐照度": "辐照度 (W/m²)", "实际功率": "实际功率 (kW)"}
|
|
|
+ )
|
|
|
+ return scatter_fig
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def draw_corr(df,features,args):
|
|
|
+
|
|
|
+ # --- 2. 相关性热力图 ---
|
|
|
+ # 计算相关性矩阵
|
|
|
+ label = args['label']
|
|
|
+ features_coor = features+[label]
|
|
|
+ corr_matrix = df[features_coor].corr()
|
|
|
+ # 使用 Plotly Express 绘制热力图
|
|
|
+ heatmap_fig = px.imshow(corr_matrix,
|
|
|
+ text_auto=True, # 显示数值
|
|
|
+ color_continuous_scale='RdBu', # 配色方案
|
|
|
+ title="Correlation Heatmap")
|
|
|
+ heatmap_fig.update_coloraxes(showscale=False)
|
|
|
+
|
|
|
+ return heatmap_fig
|
|
|
+
|
|
|
+def draw_feature_importance(model,features):
|
|
|
+ # --- 3. 特征重要性排名 ---
|
|
|
+ # 获取特征重要性
|
|
|
+ importance = model.feature_importance() # 'split' 或 'gain',根据需求选择
|
|
|
+
|
|
|
+ # 转换为 DataFrame 方便绘图
|
|
|
+ feature_importance_df = pd.DataFrame({
|
|
|
+ 'Feature': features,
|
|
|
+ 'Importance': importance
|
|
|
+ })
|
|
|
+ feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
|
|
|
+
|
|
|
+ # 使用 Plotly Express 绘制条形图
|
|
|
+ importance_fig = px.bar(feature_importance_df, x='Feature', y='Importance',
|
|
|
+ title="特征重要性排名",
|
|
|
+ labels={'Feature': '特征', 'Importance': '重要性'},
|
|
|
+ color='Importance',
|
|
|
+ color_continuous_scale='Viridis')
|
|
|
+ # 更新每个 trace,确保没有图例
|
|
|
+
|
|
|
+ importance_fig.update_layout(title="模型特征重要性排名",
|
|
|
+ showlegend=False # 移除图例
|
|
|
+ )
|
|
|
+ importance_fig.update_coloraxes(showscale=False)
|
|
|
+ return importance_fig
|
|
|
+
|
|
|
+
|
|
|
+def draw_data_info_table(content):
|
|
|
+ # --- 4. 创建数据说明的表格 ---
|
|
|
+ # 转换为表格格式:1行1列,且填充文字说明
|
|
|
+ # 转换为表格格式
|
|
|
+ # 创建一个空的图
|
|
|
+ table_fig = go.Figure()
|
|
|
+
|
|
|
+ # 第一部分: 显示文字说明
|
|
|
+ table_fig.add_trace(go.Table(
|
|
|
+ header=dict(
|
|
|
+ values=["说明"], # 表格只有一列:说明
|
|
|
+ fill_color="paleturquoise",
|
|
|
+ align="center"
|
|
|
+ ),
|
|
|
+ cells=dict(
|
|
|
+ values=[[content]] , # 第一行填入文本说明
|
|
|
+ fill_color="lavender",
|
|
|
+ align="center"
|
|
|
+ )
|
|
|
+ ))
|
|
|
+
|
|
|
+
|
|
|
+ return table_fig
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def draw_accuracy_table(df,content):
|
|
|
+
|
|
|
+ # --- 4. 每日的准确率表格 ---
|
|
|
+ # 转换为表格格式
|
|
|
+ table_fig = go.Figure(
|
|
|
+ data=[
|
|
|
+ go.Table(
|
|
|
+ header=dict(
|
|
|
+ values=list(df.columns),
|
|
|
+ fill_color="paleturquoise",
|
|
|
+ align="center"
|
|
|
+ ),
|
|
|
+ cells=dict(
|
|
|
+ values=[df[col] for col in df.columns],
|
|
|
+ fill_color="lavender",
|
|
|
+ align="center"
|
|
|
+ )
|
|
|
+ )
|
|
|
+ ]
|
|
|
+ )
|
|
|
+ table_fig.update_layout(title="准确率表", showlegend=False)
|
|
|
+ return table_fig
|
|
|
+
|
|
|
+
|
|
|
+@app.route('/analysis_report', methods=['POST'])
|
|
|
+def analysis_report():
|
|
|
+ start_time = time.time()
|
|
|
+ result = {}
|
|
|
+ success = 0
|
|
|
+ path = ""
|
|
|
+ print("Program starts execution!")
|
|
|
+ try:
|
|
|
+ args = request.values.to_dict()
|
|
|
+ print('args',args)
|
|
|
+ logger.info(args)
|
|
|
+ #获取数据
|
|
|
+ df_all, df_accuracy, model = get_data_from_mongo(args)
|
|
|
+ features = model.feature_name()
|
|
|
+ text_content = draw_info(df_all,df_accuracy,model,features,args)
|
|
|
+ text_fig,scatter_fig,heatmap_fig,importance_fig,table_fig=draw_data_info_table(text_content),draw_global_scatter(df_all,args),draw_corr(df_all,features,args),draw_feature_importance(model,features),\
|
|
|
+ draw_accuracy_table(df_accuracy,text_content)
|
|
|
+ # --- 合并图表并保存到一个 HTML 文件 ---
|
|
|
+ # 创建子图布局
|
|
|
+ combined_fig = make_subplots(
|
|
|
+ rows=5, cols=1,
|
|
|
+ subplot_titles=["数据-模型概览","辐照度和实际功率的散点图", "相关性","特征重要性排名", "准确率表"],
|
|
|
+ row_heights=[0.3, 0.6, 0.6, 0.6, 0.4],
|
|
|
+ specs=[[{"type": "table"}], [{"type": "xy"}], [{"type": "heatmap"}], [{"type": "xy"}],[{"type": "table"}]] # 指定每个子图类型
|
|
|
+ )
|
|
|
+ # 添加文本信息到子图(第一行)
|
|
|
+ # 添加文字说明
|
|
|
+ for trace in text_fig.data:
|
|
|
+ combined_fig.add_trace(trace, row=1, col=1)
|
|
|
+
|
|
|
+ # 添加散点图
|
|
|
+ for trace in scatter_fig.data:
|
|
|
+ combined_fig.add_trace(trace, row=2, col=1)
|
|
|
+
|
|
|
+ # 添加相关性热力图
|
|
|
+ for trace in heatmap_fig.data:
|
|
|
+ combined_fig.add_trace(trace, row=3, col=1)
|
|
|
+
|
|
|
+ # 添加特征重要性排名图
|
|
|
+ for trace in importance_fig.data:
|
|
|
+ combined_fig.add_trace(trace, row=4, col=1)
|
|
|
+
|
|
|
+ # 添加表格
|
|
|
+ for trace in table_fig.data:
|
|
|
+ combined_fig.add_trace(trace, row=5, col=1)
|
|
|
+
|
|
|
+ # 更新布局
|
|
|
+ combined_fig.update_layout(
|
|
|
+ height=1500,
|
|
|
+ title_text="分析结果汇总", # 添加换行符以适应文本内容
|
|
|
+ title_x=0.5, # 中心对齐标题
|
|
|
+ showlegend=False,
|
|
|
+ )
|
|
|
+ combined_fig.update_coloraxes(showscale=False)
|
|
|
+ filename = f"{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
|
|
|
+ # 保存为 HTML
|
|
|
+ directory = '/usr/share/nginx/html'
|
|
|
+ if not os.path.exists(directory):
|
|
|
+ os.makedirs(directory)
|
|
|
+ file_path = os.path.join(directory, filename)
|
|
|
+ # combined_fig.write_html(f"D://usr//{filename}")
|
|
|
+ combined_fig.write_html(file_path)
|
|
|
+ path = f"交互式 HTML 文件已生成!路径/data/html/{filename}"
|
|
|
+ success = 1
|
|
|
+ except Exception as e:
|
|
|
+ my_exception = traceback.format_exc()
|
|
|
+ my_exception.replace("\n","\t")
|
|
|
+ result['msg'] = my_exception
|
|
|
+ end_time = time.time()
|
|
|
+ result['success'] = success
|
|
|
+ result['args'] = args
|
|
|
+ result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
|
|
|
+ result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
|
|
|
+ result['file_path'] = path
|
|
|
+ print("Program execution ends!")
|
|
|
+ return result
|
|
|
+
|
|
|
+
|
|
|
+if __name__=="__main__":
|
|
|
+ print("Program starts execution!")
|
|
|
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
|
+ logger = logging.getLogger("analysis_report log")
|
|
|
+ from waitress import serve
|
|
|
+ serve(app, host="0.0.0.0", port=10092)
|
|
|
+ print("server start!")
|