6 månader sedan · 67a79f2d48
--- a/data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py
+++ b/data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py
@@ -60,7 +60,8 @@ def light_statistics_judgement(df_power,args):
 
															     new_df_power = df_power[df_power['is_limit'] == False]    
														
 
															     print(f"未清洗限电前，总共有：{origin_records}条数据")
														
 
															     print(f"清除异常点后保留的点有：{len(new_df_power)}, 占比：{round(len(new_df_power) / origin_records, 2)}")
														
 
															-    return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
														
 
															+    # return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
														
 
															+    return df_power.drop(['c'], axis=1)
														
 
															 @app.route('/processing_limit_power_by_statistics_light', methods=['POST','GET'])
														
--- a/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
+++ b/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
@@ -61,7 +61,8 @@ def wind_statistics_judgement(df_power,args):
 
															     df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
														
 
															     print(f"原始样本数：{df_power.shape[0]},异常点样本数：{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比："
														
 
															           f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
														
 
															-    return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
														
 
															+    # return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
														
 
															+    return df_tmp.drop(['c'],axis=1)
														
 
															 @app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])
														
--- a/evaluation_processing/analysis_report.py
+++ b/evaluation_processing/analysis_report.py
@@ -1,39 +1,48 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 import numpy as np
														
 
															-from flask import Flask,request
														
 
															+from flask import Flask, request
														
 
															 import time
														
 
															 import random
														
 
															 import logging
														
 
															 import traceback
														
 
															 import os
														
 
															-from common.database_dml import get_df_list_from_mongo,insert_data_into_mongo
														
 
															+from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
														
 
															 import plotly.express as px
														
 
															 import plotly.graph_objects as go
														
 
															 import pandas as pd
														
 
															 import plotly.io as pio
														
 
															 from bson.decimal128 import Decimal128
														
 
															 import numbers
														
 
															+
														
 
															 app = Flask('analysis_report——service')
														
 
															-def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
														
 
															+
														
 
															+def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
														
 
															     col_time = args['col_time']
														
 
															     col_x_env = args['col_x_env']
														
 
															     col_x_pre = args['col_x_pre']
														
 
															     label = args['label']
														
 
															     label_pre = args['label_pre']
														
 
															-    farmId =  args['farmId']
														
 
															+    farmId = args['farmId']
														
 
															+    total_size = df_clean.shape[0]
														
 
															+    clean_size = total_size
														
 
															+    if 'is_limit' in df_clean.columns:
														
 
															+        clean_size = df_clean[df_clean['is_limit']==False].shape[0]
														
 
															     df_overview = pd.DataFrame(
														
 
															-        {'数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()], '数据总记录数': [df_clean.shape[0]]})
														
 
															+        {'场站编码':[farmId],
														
 
															+         '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
														
 
															+         '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
														
 
															+         '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
														
 
															     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
														
 
															     # -------------------- 数据描述 --------------------
														
 
															     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
														
 
															-        classes='table table-bordered table-striped', index=False)
														
 
															+        classes='table table-bordered table-striped fixed', index=False)
														
 
															     # -------------------- 实测气象与实际功率散点图--------------------
														
 
															     # 生成实际功率与辐照度的散点图
														
 
															-    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label)
														
 
															+    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
														
 
															     # 自定义散点图布局
														
 
															     fig_scatter.update_layout(
														
@@ -173,7 +182,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															     fig.update_layout(
														
 
															         template='seaborn',  # 使用 seaborn 模板
														
 
															         title=dict(
														
 
															-            text=f"{label_pre} 与 {label} 对比",  # 标题
														
 
															+            # text=f"{label_pre} 与 {label} 对比",  # 标题
														
 
															             x=0.5, font=dict(size=20, color='darkblue')  # 标题居中并设置字体大小和颜色
														
 
															         ),
														
 
															         plot_bgcolor='rgba(255, 255, 255, 0.8)',  # 背景色
														
@@ -210,14 +219,15 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															     power_html = pio.to_html(fig, full_html=False)
														
 
															     # -------------------- 准确率表展示--------------------
														
 
															-    acc_html = df_accuracy.to_html(classes='table table-bordered table-striped', index=False)
														
 
															+    acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
														
 
															     # -------------------- 准确率汇总展示--------------------
														
 
															     # 指定需要转换的列
														
 
															-    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity','deviationAssessment']
														
 
															-    for col in cols_to_convert :
														
 
															+    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
														
 
															+    for col in cols_to_convert:
														
 
															         if col in df_accuracy.columns:
														
 
															             df_accuracy[col] = df_accuracy[col].apply(
														
 
															-        lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else np.nan)
														
 
															+                lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
														
 
															+                                                                                                         numbers.Number) else np.nan)
														
 
															     # 确定存在的列
														
 
															     agg_dict = {}
														
@@ -241,7 +251,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															         rename_cols.append('考核总分数')
														
 
															     # 进行分组聚合，如果有需要聚合的列
														
 
															     summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
														
 
															-    summary_df.columns =rename_cols
														
 
															+    summary_df.columns = rename_cols
														
 
															     summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
														
 
															     # -------------------- 生成完整 HTML 页面 --------------------
														
@@ -255,6 +265,10 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															         <!-- 引入 Bootstrap CSS -->
														
 
															         <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
														
 
															         <style>
														
 
															+         justify-between;{{
														
 
															+                display: flex;
														
 
															+                justify-content: space-between;
														
 
															+           }}
														
 
															             body {{
														
 
															                 background-color: #f4f4f9;
														
 
															                 font-family: Arial, sans-serif;
														
@@ -285,6 +299,18 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															                 max-height: 500px;  /* 限制高度 */
														
 
															                 overflow-y: auto;   /* 显示垂直滚动条 */
														
 
															             }}
														
 
															+             .fixed-table thead tr > th:first-child,
														
 
															+             .fixed-table tbody tr > td:first-child {{
														
 
															+             position: sticky;
														
 
															+             left: 0;
														
 
															+             z-index: 1;
														
 
															+        
														
 
															+             }}
														
 
															+            .fixed-table-header thead tr > th {{
														
 
															+                position: sticky;
														
 
															+                top: 0;
														
 
															+                z-index: 2;
														
 
															+            }}
														
 
															             table {{
														
 
															                 width: 100%;
														
 
															                 font-size: 12px;  /* 设置字体大小为12px */
														
@@ -292,6 +318,8 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															             th, td {{
														
 
															                 text-align: center;  /* 表头和单元格文字居中 */
														
 
															             }}
														
 
															+        }}
														
 
															+        
														
 
															         </style>
														
 
															     </head>
														
 
															     <body>
														
@@ -303,12 +331,12 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															                 {overview_html}
														
 
															             </div>
														
 
															             <!-- Pandas DataFrame 表格 -->
														
 
															-            <div class="table-container">
														
 
															-                <h2>2. 数据描述</h2>
														
 
															+             <h2>2. 数据描述</h2>
														
 
															+            <div class="table-container fixed-table">
														
 
															                 {describe_html}
														
 
															             </div>
														
 
															             <div class="plot-container">
														
 
															-                <h2>3. 数据清洗后实测气象与实际功率散点图</h2>
														
 
															+                <h2>3. 实测气象与实际功率散点图</h2>
														
 
															                 {scatter_html}
														
 
															             </div>
														
 
															             <div class="plot-container">
														
@@ -316,20 +344,25 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															                 {corr_html}
														
 
															             </div>
														
 
															             <div class="plot-container">
														
 
															-                <h2>5. 实测气象与预测气象曲线趋势</h2>
														
 
															-                {env_pre_html}
														
 
															-            </div>
														
 
															-            <div class="plot-container">
														
 
															-                <h2>6. 预测气象与实测气象偏差曲线</h2>
														
 
															+                <h2>5. 预测气象与实测气象偏差曲线</h2>
														
 
															                 {density_html}
														
 
															             </div>
														
 
															+             <div class="plot-container">
														
 
															+                <h2>6. 实测气象与预测气象曲线趋势</h2>
														
 
															+                {env_pre_html}
														
 
															+            </div>
														
 
															             <div class="plot-container">
														
 
															                 <h2>7. 预测功率与实际功率曲线对比</h2>
														
 
															                 {power_html}
														
 
															             </div>
														
 
															             <!-- Pandas DataFrame 表格 -->
														
 
															-            <div class="table-container">
														
 
															+            <div style="display:flex; justify-content: space-between;">
														
 
															                 <h2>8. 准确率对比</h2>
														
 
															+                <span>
														
 
															+                    <a href="/formula.xlsx">公式</a>
														
 
															+                </span>
														
 
															+            </div>
														
 
															+            <div class="table-container fixed-table-header"> 
														
 
															                 {acc_html}
														
 
															             </div>
														
 
															             <!-- Pandas DataFrame 表格 -->
														
@@ -353,26 +386,29 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
															         f.write(html_content)
														
 
															     print("HTML report generated successfully!")
														
 
															     return path
														
 
															+
														
 
															+
														
 
															 @app.route('/analysis_report', methods=['POST'])
														
 
															 def analysis_report():
														
 
															-    start_time = time.time()  
														
 
															+    start_time = time.time()
														
 
															     result = {}
														
 
															     success = 0
														
 
															     path = ""
														
 
															     print("Program starts execution!")
														
 
															     try:
														
 
															         args = request.values.to_dict()
														
 
															-        print('args',args)
														
 
															+        print('args', args)
														
 
															         logger.info(args)
														
 
															-        #获取数据
														
 
															-        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1],get_df_list_from_mongo(args)[2]
														
 
															+        # 获取数据
														
 
															+        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1], \
														
 
															+        get_df_list_from_mongo(args)[2]
														
 
															         path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
														
 
															         success = 1
														
 
															     except Exception as e:
														
 
															         my_exception = traceback.format_exc()
														
 
															-        my_exception.replace("\n","\t")
														
 
															+        my_exception.replace("\n", "\t")
														
 
															         result['msg'] = my_exception
														
 
															-    end_time = time.time() 
														
 
															+    end_time = time.time()
														
 
															     result['success'] = success
														
 
															     result['args'] = args
														
 
															     result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
														
@@ -382,10 +418,11 @@ def analysis_report():
 
															     return result
														
 
															-if __name__=="__main__":  
														
 
															+if __name__ == "__main__":
														
 
															     print("Program starts execution!")
														
 
															     logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
														
 
															     logger = logging.getLogger("analysis_report log")
														
 
															     from waitress import serve
														
 
															+
														
 
															     serve(app, host="0.0.0.0", port=10092)
														
 
															-    print("server start!")
														
 
															+    print("server start!")
														
--- a/models_processing/model_train/model_training_lightgbm.py
+++ b/models_processing/model_train/model_training_lightgbm.py
@@ -16,6 +16,8 @@ def build_model(df,args):
 
															     numerical_features,categorical_features,label,model_name,num_boost_round,model_params = str_to_list(args['numerical_features']),str_to_list(args['categorical_features']),args['label'],args['model_name'],int(args['num_boost_round']),eval(args['model_params'])
														
 
															     features = numerical_features+categorical_features
														
 
															     print("features:************",features)
														
 
															+    if 'is_limit' in df.columns:
														
 
															+        df = df[df['is_limit']==False]
														
 
															     # 拆分数据为训练集和测试集
														
 
															     X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.2, random_state=42)
														
 
															     # 创建LightGBM数据集
														
--- a/models_processing/model_train/model_training_lstm.py
+++ b/models_processing/model_train/model_training_lstm.py
@@ -49,6 +49,8 @@ def create_sequences(data_features,data_target,time_steps):
 
															 def build_model(data, args):
														
 
															     col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
														
 
															+    if 'is_limit' in data.columns:
														
 
															+        data = data[data['is_limit']==False]
														
 
															     train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
														
 
															     # X_train, X_test, y_train, y_test = process_data(df_clean, params)
														
 
															     # 创建特征和目标的标准化器