Browse Source

awg commit algorithm components

anweiguo 6 months ago
parent
commit
863c420ab2

+ 16 - 0
common/processing_data_common.py

@@ -0,0 +1,16 @@
+import random
+def str_to_list(arg):
+    if arg == '':
+        return []
+    else:
+        return arg.split(',')
+
+
+
+# 随机生成唯一颜色
+def generate_unique_colors(num_colors):
+    generated_colors = set()
+    while len(generated_colors) < num_colors:
+        color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"
+        generated_colors.add(color)
+    return list(generated_colors)

+ 7 - 2
data_processing/data_operation/data_join.py

@@ -16,9 +16,9 @@ def hello():
 
 #1.AGC/AVC信号判断限电(有的场站准 有的不准) 1种方法  数据库数据有问题 暂时用不了
 def  data_merge(df_list, args):
-    join_key,join_type = args['join_key'], args['join_type']
+    join_key,join_type,features = args['join_key'], args['join_type'], str_to_list(args['col_reserve'])
     result = reduce(lambda left, right: pd.merge(left, right, how=join_type, on=join_key), df_list)
-    return result
+    return result[features]
 
 
 @app.route('/data_join', methods=['POST'])
@@ -48,6 +48,11 @@ def data_join():
     print("Program execution ends!")
     return result
 
+def str_to_list(arg):
+    if arg == '':
+        return []
+    else:
+        return arg.split(',')
 
 if __name__=="__main__":
     print("Program starts execution!")

+ 29 - 34
evaluation_processing/analysis_report.py

@@ -13,14 +13,15 @@ import pandas as pd
 import plotly.io as pio
 from bson.decimal128 import Decimal128
 import numbers
-
+from common.processing_data_common import str_to_list,generate_unique_colors
+from scipy.stats import gaussian_kde
 app = Flask('analysis_report——service')
 
 
 def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     col_time = args['col_time']
     col_x_env = args['col_x_env']
-    col_x_pre = args['col_x_pre']
+    col_x_pre = str_to_list(args['col_x_pre'])
     label = args['label']
     label_pre = args['label_pre']
     farmId = args['farmId']
@@ -40,14 +41,14 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     clean_size = total_size
     if 'is_limit' in df_clean.columns:
         df_clean['is_limit'] = df_clean['is_limit'].apply(lambda x: '正常点' if x==0 else '异常点')
-        clean_size = df_clean[df_clean['is_limit']==False].shape[0]
+        clean_size = df_clean[df_clean['is_limit']=='正常点'].shape[0]
     df_overview = pd.DataFrame(
         {'场站编码':[farmId],
          '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
          '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
          '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
-
+    df_clean_after = df_clean[df_clean['is_limit']=='正常点']
     # -------------------- 数据描述 --------------------
     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
         classes='table table-bordered table-striped fixed', index=False)
@@ -94,7 +95,7 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     # -------------------- 生成相关性热力图 --------------------
 
     # 计算相关矩阵
-    correlation_matrix = df_clean.corr()
+    correlation_matrix = df_clean_after.corr()
 
     # 生成热力图,带数值标签和新配色
     fig_heatmap = go.Figure(data=go.Heatmap(
@@ -119,11 +120,11 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     # 将热力图保存为 HTML 片段
     corr_html = pio.to_html(fig_heatmap, full_html=False)
 
-    # -------------------- 实测气象与预测气象趋势曲线 --------------------
+    # -------------------- 6.实测气象与预测气象趋势曲线 --------------------
 
     # 生成折线图(以 C_GLOBALR 和 NWP预测总辐射 为例)
-    fig_line = px.line(df_clean, x=col_time, y=[col_x_env, col_x_pre], markers=True)
-
+    y_env = [col_x_env]+ col_x_pre
+    fig_line = px.line(df_clean[(df_clean[col_time]>=df_predict[col_time].min())&(df_clean[col_time]<=df_predict[col_time].max())], x=col_time, y=y_env, markers=True)
     # 自定义趋势图布局
     fig_line.update_layout(
         template='seaborn',
@@ -143,33 +144,27 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     # 将折线图保存为 HTML 片段
     env_pre_html = pio.to_html(fig_line, full_html=False)
 
-    # -------------------- 实测气象与预测气象偏差密度曲线 --------------------
-
-    df_clean['deviation'] = df_clean[col_x_pre] - df_clean[col_x_env]
+    # -------------------- 5.实测气象与预测气象偏差密度曲线 --------------------
+    # 创建 Plotly 图形对象
+    fig_density = go.Figure()
+    colors = generate_unique_colors(len(col_x_pre))
+    for col in zip(col_x_pre,colors):
+        df_clean[f"{col[0]}_deviation"] = df_clean[col[0]] - df_clean[col_x_env]
+        data = df_clean[f"{col[0]}_deviation"].dropna()  # 确保没有 NaN 值
+        kde = gaussian_kde(data)
+        x_vals = np.linspace(data.min(), data.max(), 1000)
+        y_vals = kde(x_vals)
+
+        # 添加曲线
+        fig_density.add_trace(go.Scatter(
+            x=x_vals,
+            y=y_vals,
+            mode='lines',
+            fill='tozeroy',
+            line=dict(color=col[1]),  # 循环使用颜色
+            name=f'Density {col[0]}'  # 图例名称
+        ))
     # 生成预测与实测辐照度偏差的密度曲线图
-    # 生成偏差的密度图
-    fig_density = px.histogram(df_clean, x='deviation', nbins=30, marginal='rug', opacity=0.75,
-                               histnorm='density')
-
-    # 自定义密度曲线图布局
-    fig_density.update_layout(
-        template='seaborn',
-        # # title=dict(text=f"{col_x_pre}与{col_x_env}偏差密度曲线",
-        # x=0.5, font=dict(size=24, color='darkred')),
-        plot_bgcolor='rgba(255, 255, 255, 0.8)',
-        xaxis=dict(
-            showgrid=True,
-            gridcolor='rgba(200, 200, 200, 0.5)',
-            title='偏差'
-        ),
-        yaxis=dict(
-            showgrid=True,
-            gridcolor='rgba(200, 200, 200, 0.5)',
-            title='Density'
-        ),
-        legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.7)', bordercolor='black', borderwidth=1)
-    )
-
     # 将密度曲线图保存为 HTML 片段
     density_html = pio.to_html(fig_density, full_html=False)
 

+ 1 - 0
models_processing/model_predict/model_prediction_lightgbm.py

@@ -24,6 +24,7 @@ def model_prediction(df,args):
         # 反序列化模型 
         model = pickle.loads(model_binary)
         df['predict'] = model.predict(df[model.feature_name()])
+        df.loc[df['predict']<0,'predict']=0
         df['model'] = model_name
         print("model predict result  successfully!")
     features_reserve = col_reserve + ['model','predict']

+ 1 - 0
models_processing/model_predict/model_prediction_lstm.py

@@ -34,6 +34,7 @@ def model_prediction(df,args):
     y_predict = list(chain.from_iterable(target_scaler.inverse_transform([model.predict(X_predict).flatten()])))
     result = df[-len(y_predict):]
     result['predict'] = y_predict
+    result.loc[result['predict'] < 0, 'predict'] = 0
     result['model'] = model_name
     features_reserve = col_reserve + ['model', 'predict']
     return result[set(features_reserve)]

+ 59 - 0
models_processing/model_predict/res_prediction.py

@@ -0,0 +1,59 @@
+import pandas as pd
+from pymongo import MongoClient
+import pickle
+from flask import Flask, request
+import time
+import logging
+import traceback
+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
+
+app = Flask('res_prediction——service')
+
+
+def str_to_list(arg):
+    if arg == '':
+        return []
+    else:
+        return arg.split(',')
+
+
+@app.route('/res_prediction', methods=['POST'])
+def model_prediction_lightgbm():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args', args)
+        logger.info(args)
+        col_reserve = str_to_list(args['col_reserve'])
+        power_df = get_data_from_mongo(args)
+        power_df['model'] = args['model']
+        power_df['predict'] = power_df[args['col_pre']]
+        features_reserve = col_reserve + ['model', 'predict']
+        insert_data_into_mongo(power_df[set(features_reserve)], args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("res_prediction log")
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=10100)
+    print("server start!")

+ 1 - 0
run_all.py

@@ -17,6 +17,7 @@ services = [
     ("models_processing/model_predict/model_prediction_lstm.py", 10097),
     ("post_processing/post_processing.py", 10098),
     ("evaluation_processing/analysis.py", 10099),
+    ("models_processing/model_predict/res_prediction.py", 10100)
 
 ]