2 semanas atrás · f006a988e9
--- a/data_processing/data_operation/weight.py
+++ b/data_processing/data_operation/weight.py
@@ -1,10 +1,11 @@
 
				 import numpy as np
			
 
				 
			
 
				 
			
 
				-def balance_weights(y: np.ndarray, bins: int = 10, normalize: bool = True, **kwargs) -> np.ndarray:
			
 
				+def balance_weights(y: np.ndarray, bins=10, normalize: bool = True, **kwargs) -> np.ndarray:
			
 
				     """
			
 
				     平衡权重，分布数量越少权重越大
			
 
				     """
			
 
				+    bins = int(bins)
			
 
				     counts, bin_edges = np.histogram(y, bins=bins)
			
 
				 
			
 
				     # digitize 不使用 right=True，这样最小值也能落在 bin 0 开始
			
@@ -26,6 +27,7 @@ def south_weight(target: np.ndarray, cap, **kwargs) -> np.ndarray:
 
				     应付南方点网的奇怪考核
			
 
				     为了不把曲线压太低，这里有所收敛(添加开方处理，不让权重分布过于离散)
			
 
				     """
			
 
				+    cap = float(cap)
			
 
				     weight = 1 / np.sqrt(np.where(target < 0.2 * cap, 0.2 * cap, target))
			
 
				     return weight
			
 
				 
			
--- a/models_processing/model_predict/model_prediction_ml.py
+++ b/models_processing/model_predict/model_prediction_ml.py
@@ -26,7 +26,6 @@ def str_to_list(arg):
 
				     else:
			
 
				         return arg.split(',')
			
 
				 
			
 
				-
			
 
				 def forecast_data_distribution(pre_data, args):
			
 
				     col_time = args['col_time']
			
 
				     farm_id = args['farmId']
			
@@ -86,7 +85,6 @@ def forecast_data_distribution(pre_data, args):
 
				     result['power_forecast'] = round(result['power_forecast'], 2)
			
 
				     return result
			
 
				 
			
 
				-
			
 
				 def model_prediction(df, args):
			
 
				     mongodb_connection, mongodb_database, mongodb_model_table, model_name, howLongAgo, farm_id, target = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
			
 
				     args['mongodb_database'], args['mongodb_model_table'], args['model_name'], int(args['howLongAgo']), args['farm_id'], \
			
@@ -106,6 +104,7 @@ def model_prediction(df, args):
 
				             features = model_data['features']
			
 
				         else:
			
 
				             features = model.feature_name()
			
 
				+        df.dropna(subset=features, inplace=True)
			
 
				         df['power_forecast'] = model.predict(df[features])
			
 
				         df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
			
 
				         df['model'] = model_name
			
@@ -135,6 +134,7 @@ def model_prediction_ml():
 
				         success = 1
			
 
				     except Exception as e:
			
 
				         my_exception = traceback.format_exc()
			
 
				+        print(my_exception)
			
 
				         my_exception.replace("\n", "\t")
			
 
				         result['msg'] = my_exception
			
 
				     end_time = time.time()
			
--- a/models_processing/model_train/model_training_ml.py
+++ b/models_processing/model_train/model_training_ml.py
@@ -15,6 +15,18 @@ from data_processing.data_operation.weight import WEIGHT_REGISTER
 
				 
			
 
				 app = Flask('model_training_ml——service')
			
 
				 
			
 
				+def get_sample_weight(df, label, args):
			
 
				+    # 样本权重
			
 
				+    if 'sample_weight' in args.keys():
			
 
				+        if args['sample_weight'] in WEIGHT_REGISTER.keys():
			
 
				+            sample_weight = WEIGHT_REGISTER[args['sample_weight']](df[label].values.reshape(-1), **args)
			
 
				+        elif args['sample_weight'] in df.columns.tolist():
			
 
				+            sample_weight = df[args['sample_weight']].values.reshape(-1)
			
 
				+        else:
			
 
				+            sample_weight = None
			
 
				+            print('sample_weight is neither in the predefined weights nor a column of the DataFrame, not applicable')
			
 
				+    return sample_weight
			
 
				+
			
 
				 def train_lgb(data_split, categorical_features, model_params, num_boost_round=100, sample_weight=None):
			
 
				     X_train, X_test, y_train, y_test = data_split
			
 
				     # 创建LightGBM数据集
			
@@ -67,19 +79,15 @@ def build_model(df, args):
 
				     df = missing_features(df, features, col_time)
			
 
				     df = df[~np.isnan(df[label])]
			
 
				     # 拆分数据为训练集和测试集
			
 
				-    X_train, X_test, y_train, y_test = train_test_split(df[features], df[label], test_size=0.2, random_state=42,
			
 
				+    df_train, df_test = train_test_split(df, test_size=0.2, random_state=42,
			
 
				                                                         shuffle=False)
			
 
				+    X_train, y_train = df_train[features].values, df_train[label].values
			
 
				+    X_test, y_test = df_test[features].values, df_test[label].values
			
 
				+
			
 
				+    # 获取样本权重
			
 
				+    sample_weight = get_sample_weight(df_train, label=label, args=args)
			
 
				+
			
 
				     model_type = args['model_type']
			
 
				-    sample_weight = None
			
 
				-    # 样本权重
			
 
				-    if 'sample_weight' in args.keys():
			
 
				-        if args['sample_weight'] in WEIGHT_REGISTER.keys():
			
 
				-            sample_weight = WEIGHT_REGISTER[args['sample_weight']](df[label].values.reshape(-1), **args)
			
 
				-        elif args['sample_weight'] in df.columns.tolist():
			
 
				-            sample_weight = df[args['sample_weight']].values.reshape(-1)
			
 
				-        else:
			
 
				-            sample_weight = None
			
 
				-            print('sample_weight is neither in the predefined weights nor a column of the DataFrame, not applicable')
			
 
				     # 区分常规机器学习模型和lgb，这里只实例化svr，后续可扩展
			
 
				     if model_type == "lightgbm":
			
 
				         num_boost_round = int(args['num_boost_round'])
			
@@ -114,6 +122,7 @@ def model_training_ml():
 
				         insert_pickle_model_into_mongo(model, args, features=features)
			
 
				         success = 1
			
 
				     except Exception as e:
			
 
				+        print(e)
			
 
				         my_exception = traceback.format_exc()
			
 
				         my_exception.replace("\n", "\t")
			
 
				         result['msg'] = my_exception
			
--- a/post_processing/post_process.py
+++ b/post_processing/post_process.py
@@ -0,0 +1,118 @@
 
				+import pandas as pd
			
 
				+from flask import Flask, request, jsonify
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+
			
 
				+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
			
 
				+
			
 
				+app = Flask('post_processing——service')
			
 
				+
			
 
				+"""
			
 
				+id = "${id}"
			
 
				+
			
 
				+cap = ${cap}
			
 
				+
			
 
				+参数
			
 
				+{
			
 
				+    'mongodb_database': 'hzh_ftp',
			
 
				+    'mongodb_read_table': f'{id}_PRED',
			
 
				+    'mongodb_write_table': f'{id}_PRED',
			
 
				+    'col_time':  "dateTime",
			
 
				+    'smooth_window': 3
			
 
				+    'plant_type': 'solar',
			
 
				+    'mongodb_nwp_table': f'{id}_NWP_D1'
			
 
				+}
			
 
				+
			
 
				+"""
			
 
				+
			
 
				+
			
 
				+def get_data(args):
			
 
				+    df = get_data_from_mongo(args)
			
 
				+    col_time = args['col_time']
			
 
				+    if not df.empty:
			
 
				+        print("预测数据加载成功！")
			
 
				+        df[col_time] = pd.to_datetime(df[col_time])
			
 
				+        df.set_index(col_time, inplace=True)
			
 
				+        df.sort_index(inplace=True)
			
 
				+    else:
			
 
				+        raise ValueError("未获取到预测数据。")
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def predict_result_adjustment(df, args):
			
 
				+    """
			
 
				+    光伏/风电 数据后处理 主要操作
			
 
				+    1. 光伏 (夜间 置零 + 平滑)
			
 
				+    2. 风电 (平滑)
			
 
				+    3. cap 封顶
			
 
				+    """
			
 
				+    mongodb_database, plant_type, cap, col_time = args['mongodb_database'], args['plant_type'], float(args['cap']), \
			
 
				+        args['col_time']
			
 
				+    if 'smooth_window' in args.keys():
			
 
				+        smooth_window = int(args['smooth_window'])
			
 
				+    else:
			
 
				+        smooth_window = 3
			
 
				+
			
 
				+    # 平滑
			
 
				+    df_cp = df.copy()
			
 
				+    df_cp['power_forecast'] = df_cp['power_forecast'].rolling(window=smooth_window, min_periods=1,
			
 
				+                                                              center=True).mean().clip(0, 0.985 * cap)
			
 
				+    print("smooth processed")
			
 
				+
			
 
				+    # 光伏晚上置零
			
 
				+    if plant_type == 'solar' and 'mongodb_nwp_table' in args.keys():
			
 
				+        nwp_param = {
			
 
				+            'mongodb_database': mongodb_database,
			
 
				+            'mongodb_read_table': args['mongodb_nwp_table'],
			
 
				+            'col_time': col_time
			
 
				+        }
			
 
				+        nwp = get_data(nwp_param)
			
 
				+
			
 
				+        df_cp = df_cp.join(nwp['radiation'])
			
 
				+        df_cp.loc[nwp['radiation'] == 0, 'power_forecast'] = 0
			
 
				+        df_cp['power_forecast'] = round(df_cp['power_forecast'], 2)
			
 
				+        df_cp.drop(columns=['radiation'], inplace=True)
			
 
				+        print("solar processed")
			
 
				+    df_cp.reset_index(inplace=True)
			
 
				+    df_cp[col_time] = df_cp[col_time].dt.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+    return df_cp
			
 
				+
			
 
				+
			
 
				+@app.route('/post_process', methods=['POST'])
			
 
				+def data_join():
			
 
				+    # 获取程序开始时间
			
 
				+    start_time = time.time()
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args', args)
			
 
				+        logger.info(args)
			
 
				+        df_pre = get_data(args)
			
 
				+        res_df = predict_result_adjustment(df_pre, args)
			
 
				+        insert_data_into_mongo(res_df, args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        print(my_exception)
			
 
				+        my_exception.replace("\n", "\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time()
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("post_processing")
			
 
				+    from waitress import serve
			
 
				+
			
 
				+    serve(app, host="0.0.0.0", port=10098)
			
 
				+    print("server start!")