Browse Source

awg commit algorithm components

anweiguo 4 tháng trước cách đây
mục cha
commit
14c5996203
3 tập tin đã thay đổi với 289 bổ sung249 xóa
  1. 285 247
      evaluation_processing/analysis.py
  2. 2 2
      evaluation_processing/analysis_report.py
  3. 2 0
      run_all.py

+ 285 - 247
evaluation_processing/analysis.py

@@ -1,247 +1,285 @@
-# # -*- coding: utf-8 -*-
-# import pandas as pd
-# import matplotlib.pyplot as plt
-# from pymongo import MongoClient
-# import pickle
-# import numpy as np
-# import plotly.express as px
-# from plotly.subplots import make_subplots
-# import plotly.graph_objects as go
-# from flask import Flask,request,jsonify
-# from waitress import serve
-# import time
-# import random
-# import argparse
-# import logging
-# import traceback
-# import os
-# import lightgbm as lgb
-#
-# app = Flask('analysis_report——service')
-# def get_data_from_mongo(args):
-#     # 1.读数据
-#     mongodb_connection,mongodb_database,all_table,accuracy_table,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['train_table'],args['accuracy_table'],args['model_table'],args['model_name']
-#     client = MongoClient(mongodb_connection)
-#     # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
-#     db = client[mongodb_database]
-#     # 将游标转换为列表,并创建 pandas DataFrame
-#     df_all = pd.DataFrame(db[all_table].find({}, {'_id': 0}))
-#
-#     df_accuracy = pd.DataFrame(db[accuracy_table].find({}, {'_id': 0}))
-#
-#     model_data = db[model_table].find_one({"model_name": model_name})
-#     if model_data is not None:
-#         model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
-#         # 反序列化模型
-#         model = pickle.loads(model_binary)
-#     client.close()
-#     return df_all,df_accuracy,model
-#
-#
-# def draw_info(df_all,df_accuracy,model,features,args):
-#     #1.数据描述 数据描述:
-#     col_time = args['col_time']
-#     label = args['label']
-#     df_accuracy_beginTime = df_accuracy[col_time].min()
-#     df_accuracy_endTime = df_accuracy[col_time].max()
-#     df_train = df_all[df_all[col_time]<df_accuracy_beginTime][features+[col_time,label]]
-#     df_train_beginTime = df_train[col_time].min()
-#     df_train_endTime = df_train[col_time].max()
-#     text_content = f"训练数据时间范围:{df_train_beginTime} 至 {df_train_endTime},共{df_train.shape[0]}条记录,测试集数据时间范围:{df_accuracy_beginTime} 至 {df_accuracy_endTime}。<br>lightgbm模型参数:{model.params}"
-#     return text_content
-#
-#
-#
-# def draw_global_scatter(df,args):
-#     # --- 1. 实际功率和辐照度的散点图 ---
-#     col_x = args['scatter_col_x']
-#     col_y = args['label']
-#     scatter_fig = px.scatter(
-#         df,
-#         x=col_x,
-#         y=col_y,
-#         title=f"{col_x}和{col_y}的散点图",
-#         labels={"辐照度": "辐照度 (W/m²)", "实际功率": "实际功率 (kW)"}
-#     )
-#     return scatter_fig
-#
-#
-#
-# def draw_corr(df,features,args):
-#
-#     # --- 2. 相关性热力图 ---
-#     # 计算相关性矩阵
-#     label = args['label']
-#     features_coor = features+[label]
-#     corr_matrix = df[features_coor].corr()
-#     # 使用 Plotly Express 绘制热力图
-#     heatmap_fig = px.imshow(corr_matrix,
-#                     text_auto=True,  # 显示数值
-#                     color_continuous_scale='RdBu',  # 配色方案
-#                     title="Correlation Heatmap")
-#     heatmap_fig.update_coloraxes(showscale=False)
-#
-#     return heatmap_fig
-#
-# def draw_feature_importance(model,features):
-#     # --- 3. 特征重要性排名 ---
-#     # 获取特征重要性
-#     importance = model.feature_importance()  # 'split' 或 'gain',根据需求选择
-#
-#     # 转换为 DataFrame 方便绘图
-#     feature_importance_df = pd.DataFrame({
-#         'Feature': features,
-#         'Importance': importance
-#     })
-#     feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
-#
-#     # 使用 Plotly Express 绘制条形图
-#     importance_fig = px.bar(feature_importance_df, x='Feature', y='Importance',
-#                  title="特征重要性排名",
-#                  labels={'Feature': '特征', 'Importance': '重要性'},
-#                  color='Importance',
-#                  color_continuous_scale='Viridis')
-#     # 更新每个 trace,确保没有图例
-#
-#     importance_fig.update_layout(title="模型特征重要性排名",
-#                                  showlegend=False  # 移除图例
-#                                 )
-#     importance_fig.update_coloraxes(showscale=False)
-#     return importance_fig
-#
-#
-# def draw_data_info_table(content):
-#     # --- 4. 创建数据说明的表格 ---
-#     # 转换为表格格式:1行1列,且填充文字说明
-#     # 转换为表格格式
-#     # 创建一个空的图
-#     table_fig = go.Figure()
-#
-#     # 第一部分: 显示文字说明
-#     table_fig.add_trace(go.Table(
-#         header=dict(
-#             values=["说明"],  # 表格只有一列:说明
-#             fill_color="paleturquoise",
-#             align="center"
-#         ),
-#         cells=dict(
-#             values=[[content]] ,  # 第一行填入文本说明
-#             fill_color="lavender",
-#             align="center"
-#         )
-#     ))
-#
-#
-#     return table_fig
-#
-#
-#
-# def draw_accuracy_table(df,content):
-#
-#     # --- 4. 每日的准确率表格 ---
-#     # 转换为表格格式
-#     table_fig = go.Figure(
-#         data=[
-#             go.Table(
-#                 header=dict(
-#                     values=list(df.columns),
-#                     fill_color="paleturquoise",
-#                     align="center"
-#                 ),
-#                 cells=dict(
-#                     values=[df[col] for col in df.columns],
-#                     fill_color="lavender",
-#                     align="center"
-#                 )
-#             )
-#         ]
-#     )
-#     table_fig.update_layout(title="准确率表", showlegend=False)
-#     return table_fig
-#
-#
-# @app.route('/analysis_report', methods=['POST'])
-# def analysis_report():
-#     start_time = time.time()
-#     result = {}
-#     success = 0
-#     path = ""
-#     print("Program starts execution!")
-#     try:
-#         args = request.values.to_dict()
-#         print('args',args)
-#         logger.info(args)
-#         #获取数据
-#         df_all, df_accuracy, model = get_data_from_mongo(args)
-#         features = model.feature_name()
-#         text_content = draw_info(df_all,df_accuracy,model,features,args)
-#         text_fig,scatter_fig,heatmap_fig,importance_fig,table_fig=draw_data_info_table(text_content),draw_global_scatter(df_all,args),draw_corr(df_all,features,args),draw_feature_importance(model,features),\
-#         draw_accuracy_table(df_accuracy,text_content)
-#         # --- 合并图表并保存到一个 HTML 文件 ---
-#         # 创建子图布局
-#         combined_fig = make_subplots(
-#             rows=5, cols=1,
-#             subplot_titles=["数据-模型概览","辐照度和实际功率的散点图", "相关性","特征重要性排名", "准确率表"],
-#             row_heights=[0.3, 0.6, 0.6, 0.6, 0.4],
-#             specs=[[{"type": "table"}], [{"type": "xy"}], [{"type": "heatmap"}], [{"type": "xy"}],[{"type": "table"}]]  # 指定每个子图类型
-#         )
-#         # 添加文本信息到子图(第一行)
-#         # 添加文字说明
-#         for trace in text_fig.data:
-#             combined_fig.add_trace(trace, row=1, col=1)
-#
-#         # 添加散点图
-#         for trace in scatter_fig.data:
-#             combined_fig.add_trace(trace, row=2, col=1)
-#
-#         # 添加相关性热力图
-#         for trace in heatmap_fig.data:
-#             combined_fig.add_trace(trace, row=3, col=1)
-#
-#         # 添加特征重要性排名图
-#         for trace in importance_fig.data:
-#             combined_fig.add_trace(trace, row=4, col=1)
-#
-#         # 添加表格
-#         for trace in table_fig.data:
-#             combined_fig.add_trace(trace, row=5, col=1)
-#
-#         # 更新布局
-#         combined_fig.update_layout(
-#         height=1500,
-#         title_text="分析结果汇总",  # 添加换行符以适应文本内容
-#         title_x=0.5,  # 中心对齐标题
-#         showlegend=False,
-#         )
-#         combined_fig.update_coloraxes(showscale=False)
-#         filename = f"{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
-#         # 保存为 HTML
-#         directory = '/usr/share/nginx/html'
-#         if not os.path.exists(directory):
-#             os.makedirs(directory)
-#         file_path = os.path.join(directory, filename)
-#         # combined_fig.write_html(f"D://usr//{filename}")
-#         combined_fig.write_html(file_path)
-#         path = f"http://ds2:10093/{filename}"
-#         success = 1
-#     except Exception as e:
-#         my_exception = traceback.format_exc()
-#         my_exception.replace("\n","\t")
-#         result['msg'] = my_exception
-#     end_time = time.time()
-#     result['success'] = success
-#     result['args'] = args
-#     result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
-#     result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
-#     result['file_path'] = path
-#     print("Program execution ends!")
-#     return result
-#
-#
-# if __name__=="__main__":
-#     # print("Program starts execution!")
-#     # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
-#     # logger = logging.getLogger("analysis_report log")
-#     # from waitress import serve
-#     # serve(app, host="0.0.0.0", port=10092)
-#     # print("server start!")
+# -*- coding: utf-8 -*-
+import numpy as np
+from flask import Flask, request
+import time
+import random
+import logging
+import traceback
+import os
+from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
+import plotly.express as px
+import plotly.graph_objects as go
+import pandas as pd
+import plotly.io as pio
+from bson.decimal128 import Decimal128
+import numbers
+
+app = Flask('analysis_report——service')
+
+
+def put_analysis_report_to_html(args, df_predict, df_accuracy):
+    col_time = args['col_time']
+    label = args['label']
+    label_pre = args['label_pre']
+    farmId = args['farmId']
+
+    df_predict = df_predict.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
+                                                                                                         numbers.Number) else x).sort_values(by=col_time)
+    df_accuracy = df_accuracy.applymap(lambda x:float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
+                                                                                                         numbers.Number) else x).sort_values(by=col_time)
+
+    # -------------------- 预测功率与实际功率曲线 --------------------
+
+    # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)
+    # 创建一个图表对象
+    fig = go.Figure()
+
+    # 获取所有的模型
+    models = df_predict['model'].unique()
+    # 添加实际功率曲线
+    fig.add_trace(go.Scatter(
+        x=df_predict[col_time],
+        y=df_predict[label],
+        mode='lines+markers',
+        name='实际功率',  # 实际功率
+        line=dict(dash='dot', width=2),  # 虚线
+        marker=dict(symbol='cross'),
+    ))
+    # 为每个模型添加预测值和实际功率的曲线
+    for model in models:
+        # 筛选该模型的数据
+        model_data = df_predict[df_predict['model'] == model]
+
+        # 添加预测值曲线
+        fig.add_trace(go.Scatter(
+            x=model_data[col_time],
+            y=model_data[label_pre],
+            mode='lines+markers',
+            name=f'{model} 预测值',  # 预测值
+            marker=dict(symbol='circle'),
+            line=dict(width=2)
+        ))
+
+    # 设置图表的标题和标签
+    fig.update_layout(
+        template='seaborn',  # 使用 seaborn 模板
+        title=dict(
+            # text=f"{label_pre} 与 {label} 对比",  # 标题
+            x=0.5, font=dict(size=20, color='darkblue')  # 标题居中并设置字体大小和颜色
+        ),
+        plot_bgcolor='rgba(255, 255, 255, 0.8)',  # 背景色
+        xaxis=dict(
+            showgrid=True,
+            gridcolor='rgba(200, 200, 200, 0.5)',  # 网格线颜色
+            title='时间',  # 时间轴标题
+            rangeslider=dict(visible=True),  # 显示滚动条
+            rangeselector=dict(visible=True)  # 显示预设的时间范围选择器
+        ),
+        yaxis=dict(
+            showgrid=True,
+            gridcolor='rgba(200, 200, 200, 0.5)',
+            title='功率'  # y轴标题
+        ),
+        legend=dict(
+            x=0.01,
+            y=0.99,
+            bgcolor='rgba(255, 255, 255, 0.7)',  # 背景透明
+            bordercolor='black',
+            borderwidth=1,
+            font=dict(size=12)  # 字体大小
+        ),
+        hovermode='x unified',  # 鼠标悬停时显示统一的提示框
+        hoverlabel=dict(
+            bgcolor='white',
+            font_size=14,
+            font_family="Rockwell",  # 设置字体样式
+            bordercolor='black'
+        ),
+        margin=dict(l=50, r=50, t=50, b=50)  # 调整边距,避免标题或标签被遮挡
+    )
+    # 将折线图保存为 HTML 片段
+    power_html = pio.to_html(fig, full_html=False)
+
+    # -------------------- 准确率表展示--------------------
+    acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
+    # -------------------- 准确率汇总展示--------------------
+    # 指定需要转换的列
+    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
+    for col in cols_to_convert:
+        if col in df_accuracy.columns:
+            df_accuracy[col] = df_accuracy[col].apply(
+                lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
+                                                                                                         numbers.Number) else np.nan)
+
+    # 确定存在的列
+    agg_dict = {}
+    rename_cols = ['model']
+    if 'MAE' in df_accuracy.columns:
+        agg_dict['MAE'] = np.nanmean
+        rename_cols.append('MAE平均值')
+    if 'accuracy' in df_accuracy.columns:
+        agg_dict['accuracy'] = np.nanmean
+        rename_cols.append('准确率平均值')
+    if 'RMSE' in df_accuracy.columns:
+        agg_dict['RMSE'] = np.nanmean
+        rename_cols.append('RMSE平均值')
+    if 'deviationElectricity' in df_accuracy.columns:
+        agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
+        rename_cols.append('考核电量平均值')
+        rename_cols.append('考核总电量')
+    if 'deviationAssessment' in df_accuracy.columns:
+        agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
+        rename_cols.append('考核分数平均值')
+        rename_cols.append('考核总分数')
+    # 进行分组聚合,如果有需要聚合的列
+    summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
+    summary_df.columns = rename_cols
+    summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
+    # -------------------- 生成完整 HTML 页面 --------------------
+
+    html_content = f"""
+    <!DOCTYPE html>
+    <html lang="en">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Data Analysis Report</title>
+        <!-- 引入 Bootstrap CSS -->
+        <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
+        <style>
+         justify-between;{{
+                display: flex;
+                justify-content: space-between;
+           }}
+            body {{
+                background-color: #f4f4f9;
+                font-family: Arial, sans-serif;
+                padding: 20px;
+            }}
+            .container {{
+                background-color: #fff;
+                padding: 20px;
+                border-radius: 10px;
+                box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1);
+                margin-bottom: 30px;
+            }}
+           h1 {{
+                text-align: center;
+                color: #333;
+                margin-bottom: 20px;
+            }}
+            .plot-container {{
+                margin: 20px 0;
+                max-height: 500px;  /* 限制高度 */
+                overflow-y: auto;   /* 显示垂直滚动条 */
+            }}
+            .table-container {{
+                margin-top: 30px;
+                overflow-x: auto;   /* 水平滚动条 */
+                max-width: 100%;     /* 限制宽度 */
+                white-space: nowrap; /* 防止内容换行 */
+                max-height: 500px;  /* 限制高度 */
+                overflow-y: auto;   /* 显示垂直滚动条 */
+            }}
+             .fixed-table thead tr > th:first-child,
+             .fixed-table tbody tr > td:first-child {{
+             position: sticky;
+             left: 0;
+             z-index: 1;
+
+             }}
+            .fixed-table-header thead tr > th {{
+                position: sticky;
+                top: 0;
+                z-index: 2;
+            }}
+            table {{
+                width: 100%;
+                font-size: 12px;  /* 设置字体大小为12px */
+            }}
+            th, td {{
+                text-align: center;  /* 表头和单元格文字居中 */
+            }}
+        }}
+
+        </style>
+    </head>
+    <body>
+        <div class="container">
+            <h1>分析报告</h1>
+            <!-- Pandas DataFrame 表格 -->
+          
+            <div class="plot-container">
+                <h2>1. 预测功率与实际功率曲线对比</h2>
+                {power_html}
+            </div>
+            <!-- Pandas DataFrame 表格 -->
+            <div style="display:flex; justify-content: space-between;">
+                <h2>2. 准确率对比</h2>
+                <span>
+                    <a href="/formula.xlsx">公式</a>
+                </span>
+            </div>
+            <div class="table-container fixed-table-header"> 
+                {acc_html}
+            </div>
+            <!-- Pandas DataFrame 表格 -->
+            <div class="table-container">
+                <h2>3. 准确率汇总对比</h2>
+                {summary_html}
+            </div>
+        </div>
+    </body>
+    </html>
+    """
+    filename = f"{farmId}_{int(time.time() * 1000)}_{random.randint(1000, 9999)}.html"
+    # 保存为 HTML
+    directory = '/usr/share/nginx/html'
+    if not os.path.exists(directory):
+        os.makedirs(directory)
+    file_path = os.path.join(directory, filename)
+    path = f"http://ds3:10010/{filename}"
+    # 将 HTML 内容写入文件
+    with open(file_path, "w", encoding="utf-8") as f:
+        f.write(html_content)
+    print("HTML report generated successfully!")
+    return path
+
+
+@app.route('/analysis_report_small', methods=['POST'])
+def analysis_report():
+    start_time = time.time()
+    result = {}
+    success = 0
+    path = ""
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args', args)
+        logger.info(args)
+        # 获取数据
+        df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1]
+        path = put_analysis_report_to_html(args,df_predict, df_accuracy)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    result['file_path'] = path
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("analysis_report log")
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=10099)
+    print("server start!")

+ 2 - 2
evaluation_processing/analysis_report.py

@@ -42,8 +42,8 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     # -------------------- 实测气象与实际功率散点图--------------------
 
     # 生成实际功率与辐照度的散点图
-    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
-
+    # fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
+    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label)
     # 自定义散点图布局
     fig_scatter.update_layout(
         template='seaborn',

+ 2 - 0
run_all.py

@@ -16,6 +16,8 @@ services = [
     ("models_processing/model_train/model_training_lstm.py", 10096),
     ("models_processing/model_predict/model_prediction_lstm.py", 10097),
     ("post_processing/post_processing.py", 10098),
+    ("evaluation_processing/analysis.py", 10099),
+
 ]
 
 # 获取当前脚本所在的根目录