anweiguo
/
algorithm_platform


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
							import pandas as pd
import matplotlib.pyplot as plt
from pymongo import MongoClient
import pickle
import numpy as np
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from flask import Flask,request,jsonify
from waitress import serve
import time
import random
import argparse
import logging
import traceback
import os

app = Flask('analysis_report——service')
def get_data_from_mongo(args):
    # 1.读数据 
    mongodb_connection,mongodb_database,all_table,accuracy_table,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['train_table'],args['accuracy_table'],args['model_table'],args['model_name']
    client = MongoClient(mongodb_connection)
    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
    db = client[mongodb_database]
    # 将游标转换为列表，并创建 pandas DataFrame
    df_all = pd.DataFrame(db[all_table].find({}, {'_id': 0}))
    
    df_accuracy = pd.DataFrame(db[accuracy_table].find({}, {'_id': 0}))
    
    model_data = db[model_table].find_one({"model_name": model_name})
    if model_data is not None:
        model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
        # 反序列化模型 
        model = pickle.loads(model_binary)
    client.close()
    return df_all,df_accuracy,model


def draw_info(df_all,df_accuracy,model,features,args):
    #1.数据描述 数据描述：
    col_time = args['col_time']
    label = args['label']
    df_accuracy_beginTime = df_accuracy[col_time].min()
    df_accuracy_endTime = df_accuracy[col_time].max()
    df_train = df_all[df_all[col_time]<df_accuracy_beginTime][features+[col_time,label]]
    df_train_beginTime = df_train[col_time].min()
    df_train_endTime = df_train[col_time].max()
    text_content = f"训练数据时间范围：{df_train_beginTime} 至 {df_train_endTime}，共{df_train.shape[0]}条记录，测试集数据时间范围：{df_accuracy_beginTime} 至 {df_accuracy_endTime}。<br>lightgbm模型参数：{model.params}"
    return text_content
    

def draw_global_scatter(df,args):
    # --- 1. 实际功率和辐照度的散点图 ---
    col_x = args['scatter_col_x']
    col_y = args['label']
    scatter_fig = px.scatter(
        df,
        x=col_x,
        y=col_y,
        title=f"{col_x}和{col_y}的散点图",
        labels={"辐照度": "辐照度 (W/m²)", "实际功率": "实际功率 (kW)"}
    )
    return scatter_fig
    
    
def draw_corr(df,features,args):

    # --- 2. 相关性热力图 ---
    # 计算相关性矩阵
    label = args['label']
    features_coor = features+[label]
    corr_matrix = df[features_coor].corr()
    # 使用 Plotly Express 绘制热力图
    heatmap_fig = px.imshow(corr_matrix, 
                    text_auto=True,  # 显示数值
                    color_continuous_scale='RdBu',  # 配色方案
                    title="Correlation Heatmap")
    heatmap_fig.update_coloraxes(showscale=False)

    return heatmap_fig

def draw_feature_importance(model,features):
    # --- 3. 特征重要性排名 ---
    # 获取特征重要性
    importance = model.feature_importance()  # 'split' 或 'gain'，根据需求选择
    
    # 转换为 DataFrame 方便绘图
    feature_importance_df = pd.DataFrame({
        'Feature': features,
        'Importance': importance
    })
    feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
    
    # 使用 Plotly Express 绘制条形图
    importance_fig = px.bar(feature_importance_df, x='Feature', y='Importance', 
                 title="特征重要性排名", 
                 labels={'Feature': '特征', 'Importance': '重要性'}, 
                 color='Importance', 
                 color_continuous_scale='Viridis')
    # 更新每个 trace，确保没有图例
    
    importance_fig.update_layout(title="模型特征重要性排名", 
                                 showlegend=False  # 移除图例
                                )
    importance_fig.update_coloraxes(showscale=False)
    return importance_fig


def draw_data_info_table(content):
    # --- 4. 创建数据说明的表格 ---
    # 转换为表格格式：1行1列，且填充文字说明
    # 转换为表格格式  
    # 创建一个空的图
    table_fig = go.Figure()

    # 第一部分: 显示文字说明
    table_fig.add_trace(go.Table(
        header=dict(
            values=["说明"],  # 表格只有一列：说明
            fill_color="paleturquoise",
            align="center"
        ),
        cells=dict(
            values=[[content]] ,  # 第一行填入文本说明
            fill_color="lavender",
            align="center"
        )
    ))

   
    return table_fig
    

def draw_accuracy_table(df,content):
    
    # --- 4. 每日的准确率表格 ---
    # 转换为表格格式  
    table_fig = go.Figure(
        data=[
            go.Table(
                header=dict(
                    values=list(df.columns),
                    fill_color="paleturquoise",
                    align="center"
                ),
                cells=dict(
                    values=[df[col] for col in df.columns],
                    fill_color="lavender",
                    align="center"
                )
            )
        ]
    )
    table_fig.update_layout(title="准确率表", showlegend=False)
    return table_fig


@app.route('/analysis_report', methods=['POST'])
def analysis_report():
    start_time = time.time()  
    result = {}
    success = 0
    path = ""
    print("Program starts execution!")
    try:
        args = request.values.to_dict()
        print('args',args)
        logger.info(args)
        #获取数据
        df_all, df_accuracy, model = get_data_from_mongo(args)
        features = model.feature_name()
        text_content = draw_info(df_all,df_accuracy,model,features,args)
        text_fig,scatter_fig,heatmap_fig,importance_fig,table_fig=draw_data_info_table(text_content),draw_global_scatter(df_all,args),draw_corr(df_all,features,args),draw_feature_importance(model,features),\
        draw_accuracy_table(df_accuracy,text_content)
        # --- 合并图表并保存到一个 HTML 文件 ---
        # 创建子图布局
        combined_fig = make_subplots(
            rows=5, cols=1,
            subplot_titles=["数据-模型概览","辐照度和实际功率的散点图", "相关性","特征重要性排名", "准确率表"],
            row_heights=[0.3, 0.6, 0.6, 0.6, 0.4],
            specs=[[{"type": "table"}], [{"type": "xy"}], [{"type": "heatmap"}], [{"type": "xy"}],[{"type": "table"}]]  # 指定每个子图类型
        )
        # 添加文本信息到子图（第一行）
        # 添加文字说明
        for trace in text_fig.data:
            combined_fig.add_trace(trace, row=1, col=1)
            
        # 添加散点图
        for trace in scatter_fig.data:
            combined_fig.add_trace(trace, row=2, col=1)
        
        # 添加相关性热力图
        for trace in heatmap_fig.data:
            combined_fig.add_trace(trace, row=3, col=1)
            
        # 添加特征重要性排名图
        for trace in importance_fig.data:
            combined_fig.add_trace(trace, row=4, col=1)
        
        # 添加表格
        for trace in table_fig.data:
            combined_fig.add_trace(trace, row=5, col=1)
    
        # 更新布局
        combined_fig.update_layout(
        height=1500,
        title_text="分析结果汇总",  # 添加换行符以适应文本内容
        title_x=0.5,  # 中心对齐标题
        showlegend=False,
        )
        combined_fig.update_coloraxes(showscale=False)
        filename = f"{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
        # 保存为 HTML
        directory = '/usr/share/nginx/html'
        if not os.path.exists(directory):
            os.makedirs(directory)
        file_path = os.path.join(directory, filename)
        # combined_fig.write_html(f"D://usr//{filename}")
        combined_fig.write_html(file_path)
        path = f"交互式 HTML 文件已生成！路径/data/html/{filename}"
        success = 1
    except Exception as e:
        my_exception = traceback.format_exc()
        my_exception.replace("\n","\t")
        result['msg'] = my_exception
    end_time = time.time() 
    result['success'] = success
    result['args'] = args
    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
    result['file_path'] = path
    print("Program execution ends!")
    return result


if __name__=="__main__":  
    print("Program starts execution!")
    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    logger = logging.getLogger("analysis_report log")
    from waitress import serve
    serve(app, host="0.0.0.0", port=10092)
    print("server start!")