Explorar o código

awg commit algorithm components

anweiguo hai 4 meses
pai
achega
67a79f2d48

+ 2 - 1
data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py

@@ -60,7 +60,8 @@ def light_statistics_judgement(df_power,args):
     new_df_power = df_power[df_power['is_limit'] == False]    
     new_df_power = df_power[df_power['is_limit'] == False]    
     print(f"未清洗限电前,总共有:{origin_records}条数据")
     print(f"未清洗限电前,总共有:{origin_records}条数据")
     print(f"清除异常点后保留的点有:{len(new_df_power)}, 占比:{round(len(new_df_power) / origin_records, 2)}")
     print(f"清除异常点后保留的点有:{len(new_df_power)}, 占比:{round(len(new_df_power) / origin_records, 2)}")
-    return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    # return df_power[df_power['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    return df_power.drop(['c'], axis=1)
 
 
 
 
 @app.route('/processing_limit_power_by_statistics_light', methods=['POST','GET'])
 @app.route('/processing_limit_power_by_statistics_light', methods=['POST','GET'])

+ 2 - 1
data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py

@@ -61,7 +61,8 @@ def wind_statistics_judgement(df_power,args):
     df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
     df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
     print(f"原始样本数:{df_power.shape[0]},异常点样本数:{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比:"
     print(f"原始样本数:{df_power.shape[0]},异常点样本数:{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比:"
           f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
           f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
-    return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    # return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
+    return df_tmp.drop(['c'],axis=1)
 
 
 
 
 @app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])
 @app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])

+ 67 - 30
evaluation_processing/analysis_report.py

@@ -1,39 +1,48 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 import numpy as np
 import numpy as np
-from flask import Flask,request
+from flask import Flask, request
 import time
 import time
 import random
 import random
 import logging
 import logging
 import traceback
 import traceback
 import os
 import os
-from common.database_dml import get_df_list_from_mongo,insert_data_into_mongo
+from common.database_dml import get_df_list_from_mongo, insert_data_into_mongo
 import plotly.express as px
 import plotly.express as px
 import plotly.graph_objects as go
 import plotly.graph_objects as go
 import pandas as pd
 import pandas as pd
 import plotly.io as pio
 import plotly.io as pio
 from bson.decimal128 import Decimal128
 from bson.decimal128 import Decimal128
 import numbers
 import numbers
+
 app = Flask('analysis_report——service')
 app = Flask('analysis_report——service')
 
 
-def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
+
+def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     col_time = args['col_time']
     col_time = args['col_time']
     col_x_env = args['col_x_env']
     col_x_env = args['col_x_env']
     col_x_pre = args['col_x_pre']
     col_x_pre = args['col_x_pre']
     label = args['label']
     label = args['label']
     label_pre = args['label_pre']
     label_pre = args['label_pre']
-    farmId =  args['farmId']
+    farmId = args['farmId']
+    total_size = df_clean.shape[0]
+    clean_size = total_size
+    if 'is_limit' in df_clean.columns:
+        clean_size = df_clean[df_clean['is_limit']==False].shape[0]
     df_overview = pd.DataFrame(
     df_overview = pd.DataFrame(
-        {'数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()], '数据总记录数': [df_clean.shape[0]]})
+        {'场站编码':[farmId],
+         '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
+         '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
+         '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
 
 
     # -------------------- 数据描述 --------------------
     # -------------------- 数据描述 --------------------
     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
-        classes='table table-bordered table-striped', index=False)
+        classes='table table-bordered table-striped fixed', index=False)
 
 
     # -------------------- 实测气象与实际功率散点图--------------------
     # -------------------- 实测气象与实际功率散点图--------------------
 
 
     # 生成实际功率与辐照度的散点图
     # 生成实际功率与辐照度的散点图
-    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label)
+    fig_scatter = px.scatter(df_clean, x=col_x_env, y=label, color='is_limit')
 
 
     # 自定义散点图布局
     # 自定义散点图布局
     fig_scatter.update_layout(
     fig_scatter.update_layout(
@@ -173,7 +182,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
     fig.update_layout(
     fig.update_layout(
         template='seaborn',  # 使用 seaborn 模板
         template='seaborn',  # 使用 seaborn 模板
         title=dict(
         title=dict(
-            text=f"{label_pre} 与 {label} 对比",  # 标题
+            # text=f"{label_pre} 与 {label} 对比",  # 标题
             x=0.5, font=dict(size=20, color='darkblue')  # 标题居中并设置字体大小和颜色
             x=0.5, font=dict(size=20, color='darkblue')  # 标题居中并设置字体大小和颜色
         ),
         ),
         plot_bgcolor='rgba(255, 255, 255, 0.8)',  # 背景色
         plot_bgcolor='rgba(255, 255, 255, 0.8)',  # 背景色
@@ -210,14 +219,15 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
     power_html = pio.to_html(fig, full_html=False)
     power_html = pio.to_html(fig, full_html=False)
 
 
     # -------------------- 准确率表展示--------------------
     # -------------------- 准确率表展示--------------------
-    acc_html = df_accuracy.to_html(classes='table table-bordered table-striped', index=False)
+    acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
     # -------------------- 准确率汇总展示--------------------
     # -------------------- 准确率汇总展示--------------------
     # 指定需要转换的列
     # 指定需要转换的列
-    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity','deviationAssessment']
-    for col in cols_to_convert :
+    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
+    for col in cols_to_convert:
         if col in df_accuracy.columns:
         if col in df_accuracy.columns:
             df_accuracy[col] = df_accuracy[col].apply(
             df_accuracy[col] = df_accuracy[col].apply(
-        lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else np.nan)
+                lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
+                                                                                                         numbers.Number) else np.nan)
 
 
     # 确定存在的列
     # 确定存在的列
     agg_dict = {}
     agg_dict = {}
@@ -241,7 +251,7 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
         rename_cols.append('考核总分数')
         rename_cols.append('考核总分数')
     # 进行分组聚合,如果有需要聚合的列
     # 进行分组聚合,如果有需要聚合的列
     summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
     summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
-    summary_df.columns =rename_cols
+    summary_df.columns = rename_cols
     summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
     summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
     # -------------------- 生成完整 HTML 页面 --------------------
     # -------------------- 生成完整 HTML 页面 --------------------
 
 
@@ -255,6 +265,10 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
         <!-- 引入 Bootstrap CSS -->
         <!-- 引入 Bootstrap CSS -->
         <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
         <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0/dist/css/bootstrap.min.css" rel="stylesheet">
         <style>
         <style>
+         justify-between;{{
+                display: flex;
+                justify-content: space-between;
+           }}
             body {{
             body {{
                 background-color: #f4f4f9;
                 background-color: #f4f4f9;
                 font-family: Arial, sans-serif;
                 font-family: Arial, sans-serif;
@@ -285,6 +299,18 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
                 max-height: 500px;  /* 限制高度 */
                 max-height: 500px;  /* 限制高度 */
                 overflow-y: auto;   /* 显示垂直滚动条 */
                 overflow-y: auto;   /* 显示垂直滚动条 */
             }}
             }}
+             .fixed-table thead tr > th:first-child,
+             .fixed-table tbody tr > td:first-child {{
+             position: sticky;
+             left: 0;
+             z-index: 1;
+        
+             }}
+            .fixed-table-header thead tr > th {{
+                position: sticky;
+                top: 0;
+                z-index: 2;
+            }}
             table {{
             table {{
                 width: 100%;
                 width: 100%;
                 font-size: 12px;  /* 设置字体大小为12px */
                 font-size: 12px;  /* 设置字体大小为12px */
@@ -292,6 +318,8 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
             th, td {{
             th, td {{
                 text-align: center;  /* 表头和单元格文字居中 */
                 text-align: center;  /* 表头和单元格文字居中 */
             }}
             }}
+        }}
+        
         </style>
         </style>
     </head>
     </head>
     <body>
     <body>
@@ -303,12 +331,12 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
                 {overview_html}
                 {overview_html}
             </div>
             </div>
             <!-- Pandas DataFrame 表格 -->
             <!-- Pandas DataFrame 表格 -->
-            <div class="table-container">
-                <h2>2. 数据描述</h2>
+             <h2>2. 数据描述</h2>
+            <div class="table-container fixed-table">
                 {describe_html}
                 {describe_html}
             </div>
             </div>
             <div class="plot-container">
             <div class="plot-container">
-                <h2>3. 数据清洗后实测气象与实际功率散点图</h2>
+                <h2>3. 实测气象与实际功率散点图</h2>
                 {scatter_html}
                 {scatter_html}
             </div>
             </div>
             <div class="plot-container">
             <div class="plot-container">
@@ -316,20 +344,25 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
                 {corr_html}
                 {corr_html}
             </div>
             </div>
             <div class="plot-container">
             <div class="plot-container">
-                <h2>5. 实测气象与预测气象曲线趋势</h2>
-                {env_pre_html}
-            </div>
-            <div class="plot-container">
-                <h2>6. 预测气象与实测气象偏差曲线</h2>
+                <h2>5. 预测气象与实测气象偏差曲线</h2>
                 {density_html}
                 {density_html}
             </div>
             </div>
+             <div class="plot-container">
+                <h2>6. 实测气象与预测气象曲线趋势</h2>
+                {env_pre_html}
+            </div>
             <div class="plot-container">
             <div class="plot-container">
                 <h2>7. 预测功率与实际功率曲线对比</h2>
                 <h2>7. 预测功率与实际功率曲线对比</h2>
                 {power_html}
                 {power_html}
             </div>
             </div>
             <!-- Pandas DataFrame 表格 -->
             <!-- Pandas DataFrame 表格 -->
-            <div class="table-container">
+            <div style="display:flex; justify-content: space-between;">
                 <h2>8. 准确率对比</h2>
                 <h2>8. 准确率对比</h2>
+                <span>
+                    <a href="/formula.xlsx">公式</a>
+                </span>
+            </div>
+            <div class="table-container fixed-table-header"> 
                 {acc_html}
                 {acc_html}
             </div>
             </div>
             <!-- Pandas DataFrame 表格 -->
             <!-- Pandas DataFrame 表格 -->
@@ -353,26 +386,29 @@ def put_analysis_report_to_html(args,df_clean, df_predict, df_accuracy):
         f.write(html_content)
         f.write(html_content)
     print("HTML report generated successfully!")
     print("HTML report generated successfully!")
     return path
     return path
+
+
 @app.route('/analysis_report', methods=['POST'])
 @app.route('/analysis_report', methods=['POST'])
 def analysis_report():
 def analysis_report():
-    start_time = time.time()  
+    start_time = time.time()
     result = {}
     result = {}
     success = 0
     success = 0
     path = ""
     path = ""
     print("Program starts execution!")
     print("Program starts execution!")
     try:
     try:
         args = request.values.to_dict()
         args = request.values.to_dict()
-        print('args',args)
+        print('args', args)
         logger.info(args)
         logger.info(args)
-        #获取数据
-        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1],get_df_list_from_mongo(args)[2]
+        # 获取数据
+        df_clean, df_predict, df_accuracy = get_df_list_from_mongo(args)[0], get_df_list_from_mongo(args)[1], \
+        get_df_list_from_mongo(args)[2]
         path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
         path = put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy)
         success = 1
         success = 1
     except Exception as e:
     except Exception as e:
         my_exception = traceback.format_exc()
         my_exception = traceback.format_exc()
-        my_exception.replace("\n","\t")
+        my_exception.replace("\n", "\t")
         result['msg'] = my_exception
         result['msg'] = my_exception
-    end_time = time.time() 
+    end_time = time.time()
     result['success'] = success
     result['success'] = success
     result['args'] = args
     result['args'] = args
     result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
     result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
@@ -382,10 +418,11 @@ def analysis_report():
     return result
     return result
 
 
 
 
-if __name__=="__main__":  
+if __name__ == "__main__":
     print("Program starts execution!")
     print("Program starts execution!")
     logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
     logger = logging.getLogger("analysis_report log")
     logger = logging.getLogger("analysis_report log")
     from waitress import serve
     from waitress import serve
+
     serve(app, host="0.0.0.0", port=10092)
     serve(app, host="0.0.0.0", port=10092)
-    print("server start!")
+    print("server start!")

+ 2 - 0
models_processing/model_train/model_training_lightgbm.py

@@ -16,6 +16,8 @@ def build_model(df,args):
     numerical_features,categorical_features,label,model_name,num_boost_round,model_params = str_to_list(args['numerical_features']),str_to_list(args['categorical_features']),args['label'],args['model_name'],int(args['num_boost_round']),eval(args['model_params'])
     numerical_features,categorical_features,label,model_name,num_boost_round,model_params = str_to_list(args['numerical_features']),str_to_list(args['categorical_features']),args['label'],args['model_name'],int(args['num_boost_round']),eval(args['model_params'])
     features = numerical_features+categorical_features
     features = numerical_features+categorical_features
     print("features:************",features)
     print("features:************",features)
+    if 'is_limit' in df.columns:
+        df = df[df['is_limit']==False]
     # 拆分数据为训练集和测试集
     # 拆分数据为训练集和测试集
     X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.2, random_state=42)
     X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.2, random_state=42)
     # 创建LightGBM数据集
     # 创建LightGBM数据集

+ 2 - 0
models_processing/model_train/model_training_lstm.py

@@ -49,6 +49,8 @@ def create_sequences(data_features,data_target,time_steps):
 
 
 def build_model(data, args):
 def build_model(data, args):
     col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
     col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
+    if 'is_limit' in data.columns:
+        data = data[data['is_limit']==False]
     train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
     train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
     # X_train, X_test, y_train, y_test = process_data(df_clean, params)
     # X_train, X_test, y_train, y_test = process_data(df_clean, params)
     # 创建特征和目标的标准化器
     # 创建特征和目标的标准化器