123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245 |
- import pandas as pd
- import matplotlib.pyplot as plt
- from pymongo import MongoClient
- import pickle
- import numpy as np
- import plotly.express as px
- from plotly.subplots import make_subplots
- import plotly.graph_objects as go
- from flask import Flask,request,jsonify
- from waitress import serve
- import time
- import random
- import argparse
- import logging
- import traceback
- import os
- app = Flask('analysis_report——service')
- def get_data_from_mongo(args):
- # 1.读数据
- mongodb_connection,mongodb_database,all_table,accuracy_table,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['train_table'],args['accuracy_table'],args['model_table'],args['model_name']
- client = MongoClient(mongodb_connection)
- # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
- db = client[mongodb_database]
- # 将游标转换为列表,并创建 pandas DataFrame
- df_all = pd.DataFrame(db[all_table].find({}, {'_id': 0}))
-
- df_accuracy = pd.DataFrame(db[accuracy_table].find({}, {'_id': 0}))
-
- model_data = db[model_table].find_one({"model_name": model_name})
- if model_data is not None:
- model_binary = model_data['model'] # 确保这个字段是存储模型的二进制数据
- # 反序列化模型
- model = pickle.loads(model_binary)
- client.close()
- return df_all,df_accuracy,model
- def draw_info(df_all,df_accuracy,model,features,args):
- #1.数据描述 数据描述:
- col_time = args['col_time']
- label = args['label']
- df_accuracy_beginTime = df_accuracy[col_time].min()
- df_accuracy_endTime = df_accuracy[col_time].max()
- df_train = df_all[df_all[col_time]<df_accuracy_beginTime][features+[col_time,label]]
- df_train_beginTime = df_train[col_time].min()
- df_train_endTime = df_train[col_time].max()
- text_content = f"训练数据时间范围:{df_train_beginTime} 至 {df_train_endTime},共{df_train.shape[0]}条记录,测试集数据时间范围:{df_accuracy_beginTime} 至 {df_accuracy_endTime}。<br>lightgbm模型参数:{model.params}"
- return text_content
-
- def draw_global_scatter(df,args):
- # --- 1. 实际功率和辐照度的散点图 ---
- col_x = args['scatter_col_x']
- col_y = args['label']
- scatter_fig = px.scatter(
- df,
- x=col_x,
- y=col_y,
- title=f"{col_x}和{col_y}的散点图",
- labels={"辐照度": "辐照度 (W/m²)", "实际功率": "实际功率 (kW)"}
- )
- return scatter_fig
-
-
- def draw_corr(df,features,args):
- # --- 2. 相关性热力图 ---
- # 计算相关性矩阵
- label = args['label']
- features_coor = features+[label]
- corr_matrix = df[features_coor].corr()
- # 使用 Plotly Express 绘制热力图
- heatmap_fig = px.imshow(corr_matrix,
- text_auto=True, # 显示数值
- color_continuous_scale='RdBu', # 配色方案
- title="Correlation Heatmap")
- heatmap_fig.update_coloraxes(showscale=False)
- return heatmap_fig
- def draw_feature_importance(model,features):
- # --- 3. 特征重要性排名 ---
- # 获取特征重要性
- importance = model.feature_importance() # 'split' 或 'gain',根据需求选择
-
- # 转换为 DataFrame 方便绘图
- feature_importance_df = pd.DataFrame({
- 'Feature': features,
- 'Importance': importance
- })
- feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
-
- # 使用 Plotly Express 绘制条形图
- importance_fig = px.bar(feature_importance_df, x='Feature', y='Importance',
- title="特征重要性排名",
- labels={'Feature': '特征', 'Importance': '重要性'},
- color='Importance',
- color_continuous_scale='Viridis')
- # 更新每个 trace,确保没有图例
-
- importance_fig.update_layout(title="模型特征重要性排名",
- showlegend=False # 移除图例
- )
- importance_fig.update_coloraxes(showscale=False)
- return importance_fig
- def draw_data_info_table(content):
- # --- 4. 创建数据说明的表格 ---
- # 转换为表格格式:1行1列,且填充文字说明
- # 转换为表格格式
- # 创建一个空的图
- table_fig = go.Figure()
- # 第一部分: 显示文字说明
- table_fig.add_trace(go.Table(
- header=dict(
- values=["说明"], # 表格只有一列:说明
- fill_color="paleturquoise",
- align="center"
- ),
- cells=dict(
- values=[[content]] , # 第一行填入文本说明
- fill_color="lavender",
- align="center"
- )
- ))
-
- return table_fig
-
- def draw_accuracy_table(df,content):
-
- # --- 4. 每日的准确率表格 ---
- # 转换为表格格式
- table_fig = go.Figure(
- data=[
- go.Table(
- header=dict(
- values=list(df.columns),
- fill_color="paleturquoise",
- align="center"
- ),
- cells=dict(
- values=[df[col] for col in df.columns],
- fill_color="lavender",
- align="center"
- )
- )
- ]
- )
- table_fig.update_layout(title="准确率表", showlegend=False)
- return table_fig
- @app.route('/analysis_report', methods=['POST'])
- def analysis_report():
- start_time = time.time()
- result = {}
- success = 0
- path = ""
- print("Program starts execution!")
- try:
- args = request.values.to_dict()
- print('args',args)
- logger.info(args)
- #获取数据
- df_all, df_accuracy, model = get_data_from_mongo(args)
- features = model.feature_name()
- text_content = draw_info(df_all,df_accuracy,model,features,args)
- text_fig,scatter_fig,heatmap_fig,importance_fig,table_fig=draw_data_info_table(text_content),draw_global_scatter(df_all,args),draw_corr(df_all,features,args),draw_feature_importance(model,features),\
- draw_accuracy_table(df_accuracy,text_content)
- # --- 合并图表并保存到一个 HTML 文件 ---
- # 创建子图布局
- combined_fig = make_subplots(
- rows=5, cols=1,
- subplot_titles=["数据-模型概览","辐照度和实际功率的散点图", "相关性","特征重要性排名", "准确率表"],
- row_heights=[0.3, 0.6, 0.6, 0.6, 0.4],
- specs=[[{"type": "table"}], [{"type": "xy"}], [{"type": "heatmap"}], [{"type": "xy"}],[{"type": "table"}]] # 指定每个子图类型
- )
- # 添加文本信息到子图(第一行)
- # 添加文字说明
- for trace in text_fig.data:
- combined_fig.add_trace(trace, row=1, col=1)
-
- # 添加散点图
- for trace in scatter_fig.data:
- combined_fig.add_trace(trace, row=2, col=1)
-
- # 添加相关性热力图
- for trace in heatmap_fig.data:
- combined_fig.add_trace(trace, row=3, col=1)
-
- # 添加特征重要性排名图
- for trace in importance_fig.data:
- combined_fig.add_trace(trace, row=4, col=1)
-
- # 添加表格
- for trace in table_fig.data:
- combined_fig.add_trace(trace, row=5, col=1)
-
- # 更新布局
- combined_fig.update_layout(
- height=1500,
- title_text="分析结果汇总", # 添加换行符以适应文本内容
- title_x=0.5, # 中心对齐标题
- showlegend=False,
- )
- combined_fig.update_coloraxes(showscale=False)
- filename = f"{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
- # 保存为 HTML
- directory = '/usr/share/nginx/html'
- if not os.path.exists(directory):
- os.makedirs(directory)
- file_path = os.path.join(directory, filename)
- # combined_fig.write_html(f"D://usr//{filename}")
- combined_fig.write_html(file_path)
- path = f"交互式 HTML 文件已生成!路径/data/html/{filename}"
- success = 1
- except Exception as e:
- my_exception = traceback.format_exc()
- my_exception.replace("\n","\t")
- result['msg'] = my_exception
- end_time = time.time()
- result['success'] = success
- result['args'] = args
- result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
- result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
- result['file_path'] = path
- print("Program execution ends!")
- return result
- if __name__=="__main__":
- print("Program starts execution!")
- logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
- logger = logging.getLogger("analysis_report log")
- from waitress import serve
- serve(app, host="0.0.0.0", port=10092)
- print("server start!")
|