Sfoglia il codice sorgente

awg commit algorithm components

anweiguo 4 mesi fa
parent
commit
67a79f2d48

+ 2 - 1
data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py

@@ -60,7 +60,8 @@ def light_statistics_judgement(df_power,args):
     new_df_power = df_power[df_power['is_limit'] == False]    
     print(f"未清洗限电前,总共有:{origin_records}条数据")
     print(f"清除异常点后保留的点有:{len(new_df_power)}, 占比:{round(len(new_df_power) / origin_records, 2)}")
-    return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    # return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    return df_power.drop(['c'], axis=1)
 
 
 @app.route('/processing_limit_power_by_statistics_light', methods=['POST','GET'])

+ 2 - 1
data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py

@@ -61,7 +61,8 @@ def wind_statistics_judgement(df_power,args):
     df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
     print(f"原始样本数:{df_power.shape[0]},异常点样本数:{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比:"
           f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
-    return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    # return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    return df_tmp.drop(['c'],axis=1)
 
 
 @app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])

+ 67 - 30
evaluation_processing/analysis_report.py

@@ -1,39 +1,48 @@
 # -*- coding: utf-8 -*-
 import numpy as np
-from flask import Flask,request
+from flask import Flask, request
 import time
 import random
 import logging
 import traceback
 import os
-from common.database_dml import get_df_list_from_mongo,insert_data_into_mongo
+from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
 import plotly.express as px
 import plotly.graph_objects as go
 import pandas as pd
 import plotly.io as pio
 from bson.decimal128 import Decimal128
 import numbers
+
 app = Flask('analysis_report——service')
 
-def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
+
+def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     col_time = args['col_time']
     col_x_env = args['col_x_env']
     col_x_pre = args['col_x_pre']
     label = args['label']
     label_pre = args['label_pre']
-    farmId =  args['farmId']
+    farmId = args['farmId']
+    total_size = df_clean.shape[0]
+    clean_size = total_size
+    if 'is_limit' in df_clean.columns:
+        clean_size = df_clean[df_clean['is_limit']==False].shape[0]
     df_overview = pd.DataFrame(
-        {'数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()], '数据总记录数': [df_clean.shape[0]]})
+        {'场站编码':[farmId],
+         '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
+         '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
+         '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
 
     # -------------------- 数据描述 --------------------
     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
-        classes='table table-bordered table-striped', index=False)
+        classes='table table-bordered table-striped fixed', index=False)
 
     # -------------------- 实测气象与实际功率散点图--------------------
 
     # 生成实际功率与辐照度的散点图
-    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label)
+    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
 
     # 自定义散点图布局
     fig_scatter.update_layout(
@@ -173,7 +182,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
     fig.update_layout(
         template='seaborn',  # 使用 seaborn 模板
         title=dict(
-            text=f"{label_pre} 与 {label} 对比",  # 标题
+            # text=f"{label_pre} 与 {label} 对比",  # 标题
             x=0.5, font=dict(size=20, color='darkblue')  # 标题居中并设置字体大小和颜色
         ),
         plot_bgcolor='rgba(255, 255, 255, 0.8)',  # 背景色
@@ -210,14 +219,15 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
     power_html = pio.to_html(fig, full_html=False)
 
     # -------------------- 准确率表展示--------------------
-    acc_html = df_accuracy.to_html(classes='table table-bordered table-striped', index=False)
+    acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
     # -------------------- 准确率汇总展示--------------------
     # 指定需要转换的列
-    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity','deviationAssessment']
-    for col in cols_to_convert :
+    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
+    for col in cols_to_convert:
         if col in df_accuracy.columns:
             df_accuracy[col] = df_accuracy[col].apply(
-        lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else np.nan)
+                lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
+                                                                                                         numbers.Number) else np.nan)
 
     # 确定存在的列
     agg_dict = {}
@@ -241,7 +251,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
         rename_cols.append('考核总分数')
     # 进行分组聚合,如果有需要聚合的列
     summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
-    summary_df.columns =rename_cols
+    summary_df.columns = rename_cols
     summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
     # -------------------- 生成完整 HTML 页面 --------------------
 
@@ -255,6 +265,10 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
         <!-- 引入 Bootstrap CSS -->
         <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
         <style>
+         justify-between;{{
+                display: flex;
+                justify-content: space-between;
+           }}
             body {{
                 background-color: #f4f4f9;
                 font-family: Arial, sans-serif;
@@ -285,6 +299,18 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
                 max-height: 500px;  /* 限制高度 */
                 overflow-y: auto;   /* 显示垂直滚动条 */
             }}
+             .fixed-table thead tr > th:first-child,
+             .fixed-table tbody tr > td:first-child {{
+             position: sticky;
+             left: 0;
+             z-index: 1;
+        
+             }}
+            .fixed-table-header thead tr > th {{
+                position: sticky;
+                top: 0;
+                z-index: 2;
+            }}
             table {{
                 width: 100%;
                 font-size: 12px;  /* 设置字体大小为12px */
@@ -292,6 +318,8 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
             th, td {{
                 text-align: center;  /* 表头和单元格文字居中 */
             }}
+        }}
+        
         </style>
     </head>
     <body>
@@ -303,12 +331,12 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
                 {overview_html}
             </div>
             <!-- Pandas DataFrame 表格 -->
-            <div class="table-container">
-                <h2>2. 数据描述</h2>
+             <h2>2. 数据描述</h2>
+            <div class="table-container fixed-table">
                 {describe_html}
             </div>
             <div class="plot-container">
-                <h2>3. 数据清洗后实测气象与实际功率散点图</h2>
+                <h2>3. 实测气象与实际功率散点图</h2>
                 {scatter_html}
             </div>
             <div class="plot-container">
@@ -316,20 +344,25 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
                 {corr_html}
             </div>
             <div class="plot-container">
-                <h2>5. 实测气象与预测气象曲线趋势</h2>
-                {env_pre_html}
-            </div>
-            <div class="plot-container">
-                <h2>6. 预测气象与实测气象偏差曲线</h2>
+                <h2>5. 预测气象与实测气象偏差曲线</h2>
                 {density_html}
             </div>
+             <div class="plot-container">
+                <h2>6. 实测气象与预测气象曲线趋势</h2>
+                {env_pre_html}
+            </div>
             <div class="plot-container">
                 <h2>7. 预测功率与实际功率曲线对比</h2>
                 {power_html}
             </div>
             <!-- Pandas DataFrame 表格 -->
-            <div class="table-container">
+            <div style="display:flex; justify-content: space-between;">
                 <h2>8. 准确率对比</h2>
+                <span>
+                    <a href="/formula.xlsx">公式</a>
+                </span>
+            </div>
+            <div class="table-container fixed-table-header"> 
                 {acc_html}
             </div>
             <!-- Pandas DataFrame 表格 -->
@@ -353,26 +386,29 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
         f.write(html_content)
     print("HTML report generated successfully!")
     return path
+
+
 @app.route('/analysis_report', methods=['POST'])
 def analysis_report():
-    start_time = time.time()  
+    start_time = time.time()
     result = {}
     success = 0
     path = ""
     print("Program starts execution!")
     try:
         args = request.values.to_dict()
-        print('args',args)
+        print('args', args)
         logger.info(args)
-        #获取数据
-        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1],get_df_list_from_mongo(args)[2]
+        # 获取数据
+        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1], \
+        get_df_list_from_mongo(args)[2]
         path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
         success = 1
     except Exception as e:
         my_exception = traceback.format_exc()
-        my_exception.replace("\n","\t")
+        my_exception.replace("\n", "\t")
         result['msg'] = my_exception
-    end_time = time.time() 
+    end_time = time.time()
     result['success'] = success
     result['args'] = args
     result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
@@ -382,10 +418,11 @@ def analysis_report():
     return result
 
 
-if __name__=="__main__":  
+if __name__ == "__main__":
     print("Program starts execution!")
     logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     logger = logging.getLogger("analysis_report log")
     from waitress import serve
+
     serve(app, host="0.0.0.0", port=10092)
-    print("server start!")
+    print("server start!")

+ 2 - 0
models_processing/model_train/model_training_lightgbm.py

@@ -16,6 +16,8 @@ def build_model(df,args):
     numerical_features,categorical_features,label,model_name,num_boost_round,model_params = str_to_list(args['numerical_features']),str_to_list(args['categorical_features']),args['label'],args['model_name'],int(args['num_boost_round']),eval(args['model_params'])
     features = numerical_features+categorical_features
     print("features:************",features)
+    if 'is_limit' in df.columns:
+        df = df[df['is_limit']==False]
     # 拆分数据为训练集和测试集
     X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.2, random_state=42)
     # 创建LightGBM数据集

+ 2 - 0
models_processing/model_train/model_training_lstm.py

@@ -49,6 +49,8 @@ def create_sequences(data_features,data_target,time_steps):
 
 def build_model(data, args):
     col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
+    if 'is_limit' in data.columns:
+        data = data[data['is_limit']==False]
     train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
     # X_train, X_test, y_train, y_test = process_data(df_clean, params)
     # 创建特征和目标的标准化器