6 mesi fa · 67a79f2d48
--- a/data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py
+++ b/data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py
@@ -60,7 +60,8 @@ def light_statistics_judgement(df_power,args):
 
				     new_df_power = df_power[df_power['is_limit'] == False]    
			
 
				     print(f"未清洗限电前，总共有：{origin_records}条数据")
			
 
				     print(f"清除异常点后保留的点有：{len(new_df_power)}, 占比：{round(len(new_df_power) / origin_records, 2)}")
			
 
				-    return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
			
 
				+    # return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
			
 
				+    return df_power.drop(['c'], axis=1)
			
 
				 
			
 
				 
			
 
				 @app.route('/processing_limit_power_by_statistics_light', methods=['POST','GET'])
			
--- a/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
+++ b/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
@@ -61,7 +61,8 @@ def wind_statistics_judgement(df_power,args):
 
				     df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
			
 
				     print(f"原始样本数：{df_power.shape[0]},异常点样本数：{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比："
			
 
				           f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
			
 
				-    return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
			
 
				+    # return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
			
 
				+    return df_tmp.drop(['c'],axis=1)
			
 
				 
			
 
				 
			
 
				 @app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])
			
--- a/evaluation_processing/analysis_report.py
+++ b/evaluation_processing/analysis_report.py
@@ -1,39 +1,48 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 import numpy as np
			
 
				-from flask import Flask,request
			
 
				+from flask import Flask, request
			
 
				 import time
			
 
				 import random
			
 
				 import logging
			
 
				 import traceback
			
 
				 import os
			
 
				-from common.database_dml import get_df_list_from_mongo,insert_data_into_mongo
			
 
				+from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
			
 
				 import plotly.express as px
			
 
				 import plotly.graph_objects as go
			
 
				 import pandas as pd
			
 
				 import plotly.io as pio
			
 
				 from bson.decimal128 import Decimal128
			
 
				 import numbers
			
 
				+
			
 
				 app = Flask('analysis_report——service')
			
 
				 
			
 
				-def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
			
 
				+
			
 
				+def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
			
 
				     col_time = args['col_time']
			
 
				     col_x_env = args['col_x_env']
			
 
				     col_x_pre = args['col_x_pre']
			
 
				     label = args['label']
			
 
				     label_pre = args['label_pre']
			
 
				-    farmId =  args['farmId']
			
 
				+    farmId = args['farmId']
			
 
				+    total_size = df_clean.shape[0]
			
 
				+    clean_size = total_size
			
 
				+    if 'is_limit' in df_clean.columns:
			
 
				+        clean_size = df_clean[df_clean['is_limit']==False].shape[0]
			
 
				     df_overview = pd.DataFrame(
			
 
				-        {'数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()], '数据总记录数': [df_clean.shape[0]]})
			
 
				+        {'场站编码':[farmId],
			
 
				+         '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
			
 
				+         '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
			
 
				+         '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
			
 
				     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
			
 
				 
			
 
				     # -------------------- 数据描述 --------------------
			
 
				     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
			
 
				-        classes='table table-bordered table-striped', index=False)
			
 
				+        classes='table table-bordered table-striped fixed', index=False)
			
 
				 
			
 
				     # -------------------- 实测气象与实际功率散点图--------------------
			
 
				 
			
 
				     # 生成实际功率与辐照度的散点图
			
 
				-    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label)
			
 
				+    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
			
 
				 
			
 
				     # 自定义散点图布局
			
 
				     fig_scatter.update_layout(
			
@@ -173,7 +182,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				     fig.update_layout(
			
 
				         template='seaborn',  # 使用 seaborn 模板
			
 
				         title=dict(
			
 
				-            text=f"{label_pre} 与 {label} 对比",  # 标题
			
 
				+            # text=f"{label_pre} 与 {label} 对比",  # 标题
			
 
				             x=0.5, font=dict(size=20, color='darkblue')  # 标题居中并设置字体大小和颜色
			
 
				         ),
			
 
				         plot_bgcolor='rgba(255, 255, 255, 0.8)',  # 背景色
			
@@ -210,14 +219,15 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				     power_html = pio.to_html(fig, full_html=False)
			
 
				 
			
 
				     # -------------------- 准确率表展示--------------------
			
 
				-    acc_html = df_accuracy.to_html(classes='table table-bordered table-striped', index=False)
			
 
				+    acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
			
 
				     # -------------------- 准确率汇总展示--------------------
			
 
				     # 指定需要转换的列
			
 
				-    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity','deviationAssessment']
			
 
				-    for col in cols_to_convert :
			
 
				+    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
			
 
				+    for col in cols_to_convert:
			
 
				         if col in df_accuracy.columns:
			
 
				             df_accuracy[col] = df_accuracy[col].apply(
			
 
				-        lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else np.nan)
			
 
				+                lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
			
 
				+                                                                                                         numbers.Number) else np.nan)
			
 
				 
			
 
				     # 确定存在的列
			
 
				     agg_dict = {}
			
@@ -241,7 +251,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				         rename_cols.append('考核总分数')
			
 
				     # 进行分组聚合，如果有需要聚合的列
			
 
				     summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
			
 
				-    summary_df.columns =rename_cols
			
 
				+    summary_df.columns = rename_cols
			
 
				     summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
			
 
				     # -------------------- 生成完整 HTML 页面 --------------------
			
 
				 
			
@@ -255,6 +265,10 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				         <!-- 引入 Bootstrap CSS -->
			
 
				         <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
			
 
				         <style>
			
 
				+         justify-between;{{
			
 
				+                display: flex;
			
 
				+                justify-content: space-between;
			
 
				+           }}
			
 
				             body {{
			
 
				                 background-color: #f4f4f9;
			
 
				                 font-family: Arial, sans-serif;
			
@@ -285,6 +299,18 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				                 max-height: 500px;  /* 限制高度 */
			
 
				                 overflow-y: auto;   /* 显示垂直滚动条 */
			
 
				             }}
			
 
				+             .fixed-table thead tr > th:first-child,
			
 
				+             .fixed-table tbody tr > td:first-child {{
			
 
				+             position: sticky;
			
 
				+             left: 0;
			
 
				+             z-index: 1;
			
 
				+        
			
 
				+             }}
			
 
				+            .fixed-table-header thead tr > th {{
			
 
				+                position: sticky;
			
 
				+                top: 0;
			
 
				+                z-index: 2;
			
 
				+            }}
			
 
				             table {{
			
 
				                 width: 100%;
			
 
				                 font-size: 12px;  /* 设置字体大小为12px */
			
@@ -292,6 +318,8 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				             th, td {{
			
 
				                 text-align: center;  /* 表头和单元格文字居中 */
			
 
				             }}
			
 
				+        }}
			
 
				+        
			
 
				         </style>
			
 
				     </head>
			
 
				     <body>
			
@@ -303,12 +331,12 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				                 {overview_html}
			
 
				             </div>
			
 
				             <!-- Pandas DataFrame 表格 -->
			
 
				-            <div class="table-container">
			
 
				-                <h2>2. 数据描述</h2>
			
 
				+             <h2>2. 数据描述</h2>
			
 
				+            <div class="table-container fixed-table">
			
 
				                 {describe_html}
			
 
				             </div>
			
 
				             <div class="plot-container">
			
 
				-                <h2>3. 数据清洗后实测气象与实际功率散点图</h2>
			
 
				+                <h2>3. 实测气象与实际功率散点图</h2>
			
 
				                 {scatter_html}
			
 
				             </div>
			
 
				             <div class="plot-container">
			
@@ -316,20 +344,25 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				                 {corr_html}
			
 
				             </div>
			
 
				             <div class="plot-container">
			
 
				-                <h2>5. 实测气象与预测气象曲线趋势</h2>
			
 
				-                {env_pre_html}
			
 
				-            </div>
			
 
				-            <div class="plot-container">
			
 
				-                <h2>6. 预测气象与实测气象偏差曲线</h2>
			
 
				+                <h2>5. 预测气象与实测气象偏差曲线</h2>
			
 
				                 {density_html}
			
 
				             </div>
			
 
				+             <div class="plot-container">
			
 
				+                <h2>6. 实测气象与预测气象曲线趋势</h2>
			
 
				+                {env_pre_html}
			
 
				+            </div>
			
 
				             <div class="plot-container">
			
 
				                 <h2>7. 预测功率与实际功率曲线对比</h2>
			
 
				                 {power_html}
			
 
				             </div>
			
 
				             <!-- Pandas DataFrame 表格 -->
			
 
				-            <div class="table-container">
			
 
				+            <div style="display:flex; justify-content: space-between;">
			
 
				                 <h2>8. 准确率对比</h2>
			
 
				+                <span>
			
 
				+                    <a href="/formula.xlsx">公式</a>
			
 
				+                </span>
			
 
				+            </div>
			
 
				+            <div class="table-container fixed-table-header"> 
			
 
				                 {acc_html}
			
 
				             </div>
			
 
				             <!-- Pandas DataFrame 表格 -->
			
@@ -353,26 +386,29 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
 
				         f.write(html_content)
			
 
				     print("HTML report generated successfully!")
			
 
				     return path
			
 
				+
			
 
				+
			
 
				 @app.route('/analysis_report', methods=['POST'])
			
 
				 def analysis_report():
			
 
				-    start_time = time.time()  
			
 
				+    start_time = time.time()
			
 
				     result = {}
			
 
				     success = 0
			
 
				     path = ""
			
 
				     print("Program starts execution!")
			
 
				     try:
			
 
				         args = request.values.to_dict()
			
 
				-        print('args',args)
			
 
				+        print('args', args)
			
 
				         logger.info(args)
			
 
				-        #获取数据
			
 
				-        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1],get_df_list_from_mongo(args)[2]
			
 
				+        # 获取数据
			
 
				+        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1], \
			
 
				+        get_df_list_from_mongo(args)[2]
			
 
				         path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
			
 
				         success = 1
			
 
				     except Exception as e:
			
 
				         my_exception = traceback.format_exc()
			
 
				-        my_exception.replace("\n","\t")
			
 
				+        my_exception.replace("\n", "\t")
			
 
				         result['msg'] = my_exception
			
 
				-    end_time = time.time() 
			
 
				+    end_time = time.time()
			
 
				     result['success'] = success
			
 
				     result['args'] = args
			
 
				     result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
@@ -382,10 +418,11 @@ def analysis_report():
 
				     return result
			
 
				 
			
 
				 
			
 
				-if __name__=="__main__":  
			
 
				+if __name__ == "__main__":
			
 
				     print("Program starts execution!")
			
 
				     logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				     logger = logging.getLogger("analysis_report log")
			
 
				     from waitress import serve
			
 
				+
			
 
				     serve(app, host="0.0.0.0", port=10092)
			
 
				-    print("server start!")
			
 
				+    print("server start!")
			
--- a/models_processing/model_train/model_training_lightgbm.py
+++ b/models_processing/model_train/model_training_lightgbm.py
@@ -16,6 +16,8 @@ def build_model(df,args):
 
				     numerical_features,categorical_features,label,model_name,num_boost_round,model_params = str_to_list(args['numerical_features']),str_to_list(args['categorical_features']),args['label'],args['model_name'],int(args['num_boost_round']),eval(args['model_params'])
			
 
				     features = numerical_features+categorical_features
			
 
				     print("features:************",features)
			
 
				+    if 'is_limit' in df.columns:
			
 
				+        df = df[df['is_limit']==False]
			
 
				     # 拆分数据为训练集和测试集
			
 
				     X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.2, random_state=42)
			
 
				     # 创建LightGBM数据集
			
--- a/models_processing/model_train/model_training_lstm.py
+++ b/models_processing/model_train/model_training_lstm.py
@@ -49,6 +49,8 @@ def create_sequences(data_features,data_target,time_steps):
 
				 
			
 
				 def build_model(data, args):
			
 
				     col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
			
 
				+    if 'is_limit' in data.columns:
			
 
				+        data = data[data['is_limit']==False]
			
 
				     train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
			
 
				     # X_train, X_test, y_train, y_test = process_data(df_clean, params)
			
 
				     # 创建特征和目标的标准化器