6 months ago · 863c420ab2
--- a/common/processing_data_common.py
+++ b/common/processing_data_common.py
@@ -0,0 +1,16 @@
 
				+import random
			
 
				+def str_to_list(arg):
			
 
				+    if arg == '':
			
 
				+        return []
			
 
				+    else:
			
 
				+        return arg.split(',')
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 随机生成唯一颜色
			
 
				+def generate_unique_colors(num_colors):
			
 
				+    generated_colors = set()
			
 
				+    while len(generated_colors) < num_colors:
			
 
				+        color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"
			
 
				+        generated_colors.add(color)
			
 
				+    return list(generated_colors)
			
--- a/data_processing/data_operation/data_join.py
+++ b/data_processing/data_operation/data_join.py
@@ -16,9 +16,9 @@ def hello():
 
				 
			
 
				 #1.AGC/AVC信号判断限电（有的场站准 有的不准） 1种方法  数据库数据有问题 暂时用不了
			
 
				 def  data_merge(df_list, args):
			
 
				-    join_key,join_type = args['join_key'], args['join_type']
			
 
				+    join_key,join_type,features = args['join_key'], args['join_type'], str_to_list(args['col_reserve'])
			
 
				     result = reduce(lambda left, right: pd.merge(left, right, how=join_type, on=join_key), df_list)
			
 
				-    return result
			
 
				+    return result[features]
			
 
				 
			
 
				 
			
 
				 @app.route('/data_join', methods=['POST'])
			
@@ -48,6 +48,11 @@ def data_join():
 
				     print("Program execution ends!")
			
 
				     return result
			
 
				 
			
 
				+def str_to_list(arg):
			
 
				+    if arg == '':
			
 
				+        return []
			
 
				+    else:
			
 
				+        return arg.split(',')
			
 
				 
			
 
				 if __name__=="__main__":
			
 
				     print("Program starts execution!")
			
--- a/evaluation_processing/analysis_report.py
+++ b/evaluation_processing/analysis_report.py
@@ -13,14 +13,15 @@ import pandas as pd
 
				 import plotly.io as pio
			
 
				 from bson.decimal128 import Decimal128
			
 
				 import numbers
			
 
				-
			
 
				+from common.processing_data_common import str_to_list,generate_unique_colors
			
 
				+from scipy.stats import gaussian_kde
			
 
				 app = Flask('analysis_report——service')
			
 
				 
			
 
				 
			
 
				 def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
			
 
				     col_time = args['col_time']
			
 
				     col_x_env = args['col_x_env']
			
 
				-    col_x_pre = args['col_x_pre']
			
 
				+    col_x_pre = str_to_list(args['col_x_pre'])
			
 
				     label = args['label']
			
 
				     label_pre = args['label_pre']
			
 
				     farmId = args['farmId']
			
@@ -40,14 +41,14 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
 
				     clean_size = total_size
			
 
				     if 'is_limit' in df_clean.columns:
			
 
				         df_clean['is_limit'] = df_clean['is_limit'].apply(lambda x: '正常点' if x==0 else '异常点')
			
 
				-        clean_size = df_clean[df_clean['is_limit']==False].shape[0]
			
 
				+        clean_size = df_clean[df_clean['is_limit']=='正常点'].shape[0]
			
 
				     df_overview = pd.DataFrame(
			
 
				         {'场站编码':[farmId],
			
 
				          '数据开始时间': [df_clean[col_time].min()], '数据结束时间': [df_clean[col_time].max()],
			
 
				          '总天数':[(pd.to_datetime(df_clean[col_time].max())-pd.to_datetime(df_clean[col_time].min())).days],
			
 
				          '数据总记录数': [total_size],'清洗后记录数':[clean_size],'数据可用率':[clean_size/total_size]})
			
 
				     overview_html = df_overview.to_html(classes='table table-bordered table-striped', index=False)
			
 
				-
			
 
				+    df_clean_after = df_clean[df_clean['is_limit']=='正常点']
			
 
				     # -------------------- 数据描述 --------------------
			
 
				     describe_html = df_clean.describe().reset_index().rename(columns={'index': '统计量'}).to_html(
			
 
				         classes='table table-bordered table-striped fixed', index=False)
			
@@ -94,7 +95,7 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
 
				     # -------------------- 生成相关性热力图 --------------------
			
 
				 
			
 
				     # 计算相关矩阵
			
 
				-    correlation_matrix = df_clean.corr()
			
 
				+    correlation_matrix = df_clean_after.corr()
			
 
				 
			
 
				     # 生成热力图，带数值标签和新配色
			
 
				     fig_heatmap = go.Figure(data=go.Heatmap(
			
@@ -119,11 +120,11 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
 
				     # 将热力图保存为 HTML 片段
			
 
				     corr_html = pio.to_html(fig_heatmap, full_html=False)
			
 
				 
			
 
				-    # -------------------- 实测气象与预测气象趋势曲线 --------------------
			
 
				+    # -------------------- 6.实测气象与预测气象趋势曲线 --------------------
			
 
				 
			
 
				     # 生成折线图（以 C_GLOBALR 和 NWP预测总辐射 为例）
			
 
				-    fig_line = px.line(df_clean, x=col_time, y=[col_x_env, col_x_pre], markers=True)
			
 
				-
			
 
				+    y_env = [col_x_env]+ col_x_pre
			
 
				+    fig_line = px.line(df_clean[(df_clean[col_time]>=df_predict[col_time].min())&(df_clean[col_time]<=df_predict[col_time].max())], x=col_time, y=y_env, markers=True)
			
 
				     # 自定义趋势图布局
			
 
				     fig_line.update_layout(
			
 
				         template='seaborn',
			
@@ -143,33 +144,27 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
 
				     # 将折线图保存为 HTML 片段
			
 
				     env_pre_html = pio.to_html(fig_line, full_html=False)
			
 
				 
			
 
				-    # -------------------- 实测气象与预测气象偏差密度曲线 --------------------
			
 
				-
			
 
				-    df_clean['deviation'] = df_clean[col_x_pre] - df_clean[col_x_env]
			
 
				+    # -------------------- 5.实测气象与预测气象偏差密度曲线 --------------------
			
 
				+    # 创建 Plotly 图形对象
			
 
				+    fig_density = go.Figure()
			
 
				+    colors = generate_unique_colors(len(col_x_pre))
			
 
				+    for col in zip(col_x_pre,colors):
			
 
				+        df_clean[f"{col[0]}_deviation"] = df_clean[col[0]] - df_clean[col_x_env]
			
 
				+        data = df_clean[f"{col[0]}_deviation"].dropna()  # 确保没有 NaN 值
			
 
				+        kde = gaussian_kde(data)
			
 
				+        x_vals = np.linspace(data.min(), data.max(), 1000)
			
 
				+        y_vals = kde(x_vals)
			
 
				+
			
 
				+        # 添加曲线
			
 
				+        fig_density.add_trace(go.Scatter(
			
 
				+            x=x_vals,
			
 
				+            y=y_vals,
			
 
				+            mode='lines',
			
 
				+            fill='tozeroy',
			
 
				+            line=dict(color=col[1]),  # 循环使用颜色
			
 
				+            name=f'Density {col[0]}'  # 图例名称
			
 
				+        ))
			
 
				     # 生成预测与实测辐照度偏差的密度曲线图
			
 
				-    # 生成偏差的密度图
			
 
				-    fig_density = px.histogram(df_clean, x='deviation', nbins=30, marginal='rug', opacity=0.75,
			
 
				-                               histnorm='density')
			
 
				-
			
 
				-    # 自定义密度曲线图布局
			
 
				-    fig_density.update_layout(
			
 
				-        template='seaborn',
			
 
				-        # # title=dict(text=f"{col_x_pre}与{col_x_env}偏差密度曲线",
			
 
				-        # x=0.5, font=dict(size=24, color='darkred')),
			
 
				-        plot_bgcolor='rgba(255, 255, 255, 0.8)',
			
 
				-        xaxis=dict(
			
 
				-            showgrid=True,
			
 
				-            gridcolor='rgba(200, 200, 200, 0.5)',
			
 
				-            title='偏差'
			
 
				-        ),
			
 
				-        yaxis=dict(
			
 
				-            showgrid=True,
			
 
				-            gridcolor='rgba(200, 200, 200, 0.5)',
			
 
				-            title='Density'
			
 
				-        ),
			
 
				-        legend=dict(x=0.01, y=0.99, bgcolor='rgba(255, 255, 255, 0.7)', bordercolor='black', borderwidth=1)
			
 
				-    )
			
 
				-
			
 
				     # 将密度曲线图保存为 HTML 片段
			
 
				     density_html = pio.to_html(fig_density, full_html=False)
			
 
				 
			
--- a/models_processing/model_predict/model_prediction_lightgbm.py
+++ b/models_processing/model_predict/model_prediction_lightgbm.py
@@ -24,6 +24,7 @@ def model_prediction(df,args):
 
				         # 反序列化模型 
			
 
				         model = pickle.loads(model_binary)
			
 
				         df['predict'] = model.predict(df[model.feature_name()])
			
 
				+        df.loc[df['predict']<0,'predict']=0
			
 
				         df['model'] = model_name
			
 
				         print("model predict result  successfully!")
			
 
				     features_reserve = col_reserve + ['model','predict']
			
--- a/models_processing/model_predict/model_prediction_lstm.py
+++ b/models_processing/model_predict/model_prediction_lstm.py
@@ -34,6 +34,7 @@ def model_prediction(df,args):
 
				     y_predict = list(chain.from_iterable(target_scaler.inverse_transform([model.predict(X_predict).flatten()])))
			
 
				     result = df[-len(y_predict):]
			
 
				     result['predict'] = y_predict
			
 
				+    result.loc[result['predict'] < 0, 'predict'] = 0
			
 
				     result['model'] = model_name
			
 
				     features_reserve = col_reserve + ['model', 'predict']
			
 
				     return result[set(features_reserve)]
			
--- a/models_processing/model_predict/res_prediction.py
+++ b/models_processing/model_predict/res_prediction.py
@@ -0,0 +1,59 @@
 
				+import pandas as pd
			
 
				+from pymongo import MongoClient
			
 
				+import pickle
			
 
				+from flask import Flask, request
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
			
 
				+
			
 
				+app = Flask('res_prediction——service')
			
 
				+
			
 
				+
			
 
				+def str_to_list(arg):
			
 
				+    if arg == '':
			
 
				+        return []
			
 
				+    else:
			
 
				+        return arg.split(',')
			
 
				+
			
 
				+
			
 
				+@app.route('/res_prediction', methods=['POST'])
			
 
				+def model_prediction_lightgbm():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args', args)
			
 
				+        logger.info(args)
			
 
				+        col_reserve = str_to_list(args['col_reserve'])
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        power_df['model'] = args['model']
			
 
				+        power_df['predict'] = power_df[args['col_pre']]
			
 
				+        features_reserve = col_reserve + ['model', 'predict']
			
 
				+        insert_data_into_mongo(power_df[set(features_reserve)], args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n", "\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time()
			
 
				+
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("res_prediction log")
			
 
				+    from waitress import serve
			
 
				+
			
 
				+    serve(app, host="0.0.0.0", port=10100)
			
 
				+    print("server start!")
			
--- a/run_all.py
+++ b/run_all.py
@@ -17,6 +17,7 @@ services = [
 
				     ("models_processing/model_predict/model_prediction_lstm.py", 10097),
			
 
				     ("post_processing/post_processing.py", 10098),
			
 
				     ("evaluation_processing/analysis.py", 10099),
			
 
				+    ("models_processing/model_predict/res_prediction.py", 10100)
			
 
				 
			
 
				 ]