6 kuukautta sitten · 8e61bb6fdf
--- a/evaluation_processing/analysis_report.py
+++ b/evaluation_processing/analysis_report.py
@@ -225,8 +225,8 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
 
				         y=df_predict[label],
			
 
				         mode='lines+markers',
			
 
				         name='实际功率',  # 实际功率
			
 
				-        line=dict(dash='dot', width=2),  # 虚线
			
 
				-        marker=dict(symbol='cross'),
			
 
				+        line=dict( width=1),  # 虚线
			
 
				+        marker=dict(symbol='circle'),
			
 
				     ))
			
 
				     # 为每个模型添加预测值和实际功率的曲线
			
 
				     for model in models:
			
--- a/models_processing/model_predict/model_prediction_bp.py
+++ b/models_processing/model_predict/model_prediction_bp.py
@@ -0,0 +1,68 @@
 
				+from flask import Flask,request
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from itertools import chain
			
 
				+from common.database_dml import get_data_from_mongo,insert_data_into_mongo,get_h5_model_from_mongo,get_scaler_model_from_mongo
			
 
				+from common.processing_data_common import str_to_list
			
 
				+app = Flask('model_prediction_bp——service')
			
 
				+
			
 
				+
			
 
				+# 创建时间序列数据
			
 
				+def model_prediction(df,args):
			
 
				+    if 'is_limit' in df.columns:
			
 
				+        df = df[df['is_limit'] == False]
			
 
				+    features, time_steps, col_time, model_name,col_reserve =  str_to_list(args['features']), int(args['time_steps']),args['col_time'],args['model_name'],str_to_list(args['col_reserve'])
			
 
				+    feature_scaler,target_scaler = get_scaler_model_from_mongo(args)
			
 
				+    df = df.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
			
 
				+    scaled_features = feature_scaler.transform(df[features])
			
 
				+    # X_predict, _ = create_sequences(scaled_features, [], time_steps)
			
 
				+    # 加载模型时传入自定义损失函数
			
 
				+    # model = load_model(f'{farmId}_model.h5', custom_objects={'rmse': rmse})
			
 
				+    model = get_h5_model_from_mongo(args)
			
 
				+    y_predict = list(chain.from_iterable(target_scaler.inverse_transform([model.predict(scaled_features).flatten()])))
			
 
				+    result = df[-len(y_predict):]
			
 
				+    result['predict'] = y_predict
			
 
				+    result.loc[result['predict'] < 0, 'predict'] = 0
			
 
				+    result['model'] = model_name
			
 
				+    features_reserve = col_reserve + ['model', 'predict']
			
 
				+    return result[set(features_reserve)]
			
 
				+
			
 
				+
			
 
				+@app.route('/model_prediction_bp', methods=['POST'])
			
 
				+def model_prediction_bp():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        model = model_prediction(power_df,args)
			
 
				+        insert_data_into_mongo(model,args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time() 
			
 
				+   
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":  
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("model_prediction_bp log")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10104)
			
 
				+    print("server start!")
			
 
				+    
			
--- a/models_processing/model_predict/model_prediction_lightgbm.py
+++ b/models_processing/model_predict/model_prediction_lightgbm.py
@@ -19,6 +19,9 @@ def model_prediction(df,args):
 
				     db = client[mongodb_database]
			
 
				     collection = db[mongodb_model_table]
			
 
				     model_data = collection.find_one({"model_name": model_name})
			
 
				+    if 'is_limit' in df.columns:
			
 
				+        df = df[df['is_limit'] == False]
			
 
				+
			
 
				     if model_data is not None:
			
 
				         model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
			
 
				         # 反序列化模型 
			
--- a/models_processing/model_predict/model_prediction_lstm.py
+++ b/models_processing/model_predict/model_prediction_lstm.py
@@ -24,6 +24,8 @@ def create_sequences(data_features,data_target,time_steps):
 
				 
			
 
				 
			
 
				 def model_prediction(df,args):
			
 
				+    if 'is_limit' in df.columns:
			
 
				+        df = df[df['is_limit'] == False]
			
 
				     features, time_steps, col_time, model_name,col_reserve =  str_to_list(args['features']), int(args['time_steps']),args['col_time'],args['model_name'],str_to_list(args['col_reserve'])
			
 
				     feature_scaler,target_scaler = get_scaler_model_from_mongo(args)
			
 
				     df = df.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
			
--- a/models_processing/model_predict/res_prediction.py
+++ b/models_processing/model_predict/res_prediction.py
@@ -30,6 +30,8 @@ def model_prediction_lightgbm():
 
				         logger.info(args)
			
 
				         col_reserve = str_to_list(args['col_reserve'])
			
 
				         power_df = get_data_from_mongo(args)
			
 
				+        if 'is_limit' in power_df.columns:
			
 
				+            power_df = power_df[power_df['is_limit'] == False]
			
 
				         power_df['model'] = args['model']
			
 
				         power_df['predict'] = power_df[args['col_pre']]
			
 
				         features_reserve = col_reserve + ['model', 'predict']
			
--- a/models_processing/model_train/model_training_bp.py
+++ b/models_processing/model_train/model_training_bp.py
@@ -0,0 +1,129 @@
 
				+import numpy as np
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from flask import Flask,request
			
 
				+import time
			
 
				+import traceback
			
 
				+import logging
			
 
				+from sklearn.preprocessing import MinMaxScaler
			
 
				+from io import BytesIO
			
 
				+import joblib
			
 
				+from tensorflow.keras.models import Sequential
			
 
				+from tensorflow.keras.layers import LSTM, Dense, Dropout
			
 
				+from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
			
 
				+import tensorflow as tf
			
 
				+from common.database_dml import get_data_from_mongo,insert_h5_model_into_mongo
			
 
				+from common.processing_data_common import missing_features,str_to_list
			
 
				+import time
			
 
				+import random
			
 
				+import matplotlib.pyplot as plt
			
 
				+app = Flask('model_training_bp——service')
			
 
				+
			
 
				+def rmse(y_true, y_pred):
			
 
				+    return tf.math.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
			
 
				+
			
 
				+def draw_loss(history):
			
 
				+    #绘制训练集和验证集损失
			
 
				+    plt.figure(figsize=(20, 8))
			
 
				+    plt.plot(history.history['loss'], label='Training Loss')
			
 
				+    plt.plot(history.history['val_loss'], label='Validation Loss')
			
 
				+    plt.title('Loss Curve')
			
 
				+    plt.xlabel('Epochs')
			
 
				+    plt.ylabel('Loss')
			
 
				+    plt.legend()
			
 
				+    plt.show()
			
 
				+# 创建时间序列数据
			
 
				+
			
 
				+def build_model(data, args):
			
 
				+    sleep_time = random.uniform(1, 20)  # 生成 5 到 20 之间的随机浮动秒数
			
 
				+    time.sleep(sleep_time)
			
 
				+    tf.keras.backend.clear_session()  # 清除当前的图和会话
			
 
				+    # 设置随机种子
			
 
				+    np.random.seed(42)  # NumPy随机种子
			
 
				+    tf.random.set_seed(42)  # TensorFlow随机种子
			
 
				+    col_time,features,target = args['col_time'], str_to_list(args['features']),args['target']
			
 
				+    if 'is_limit' in data.columns:
			
 
				+        data = data[data['is_limit']==False]
			
 
				+    # 清洗特征平均缺失率大于20%的天
			
 
				+    data = missing_features(data, features, col_time)
			
 
				+    train_data = data.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
			
 
				+    # 创建特征和目标的标准化器
			
 
				+    feature_scaler = MinMaxScaler(feature_range=(0, 1))
			
 
				+    target_scaler = MinMaxScaler(feature_range=(0, 1))
			
 
				+    # 标准化特征和目标
			
 
				+    scaled_features = feature_scaler.fit_transform(train_data[features])
			
 
				+    scaled_target = target_scaler.fit_transform(train_data[[target]])
			
 
				+    # 保存两个scaler
			
 
				+    feature_scaler_bytes = BytesIO()
			
 
				+    joblib.dump(feature_scaler, feature_scaler_bytes)
			
 
				+    feature_scaler_bytes.seek(0)  # Reset pointer to the beginning of the byte stream
			
 
				+    target_scaler_bytes = BytesIO()
			
 
				+    joblib.dump(target_scaler, target_scaler_bytes)
			
 
				+    target_scaler_bytes.seek(0)
			
 
				+
			
 
				+    # 划分训练集和测试集
			
 
				+    X_train, X_test, y_train, y_test = train_test_split(scaled_features, scaled_target, test_size=0.2, random_state=43)
			
 
				+
			
 
				+    # 构建 LSTM 模型
			
 
				+    model = Sequential([
			
 
				+        Dense(64, input_dim=X_train.shape[1], activation='relu'),  # 输入层和隐藏层，10个神经元
			
 
				+        Dropout(0.2),
			
 
				+        Dense(32, activation='relu'),  # 隐藏层，8个神经元
			
 
				+        Dropout(0.3),  # Dropout层，30%的神经元输出会被随机丢弃
			
 
				+        Dense(1, activation='linear')  # 输出层，1个神经元（用于回归任务）
			
 
				+    ])
			
 
				+
			
 
				+    # 编译模型
			
 
				+    model.compile(optimizer='adam', loss='mean_squared_error')
			
 
				+    # 定义 EarlyStopping 和 ReduceLROnPlateau 回调
			
 
				+    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
			
 
				+    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)
			
 
				+    # 训练模型
			
 
				+    # 使用GPU进行训练
			
 
				+    with tf.device('/GPU:1'):
			
 
				+        history = model.fit(X_train, y_train,
			
 
				+                        epochs=100,
			
 
				+                        batch_size=32,
			
 
				+                        validation_data=(X_test, y_test),
			
 
				+                        verbose=2,
			
 
				+                        shuffle=False,
			
 
				+                        callbacks=[early_stopping, reduce_lr])
			
 
				+    draw_loss(history)
			
 
				+    return model,feature_scaler_bytes,target_scaler_bytes
			
 
				+
			
 
				+
			
 
				+@app.route('/model_training_bp', methods=['POST'])
			
 
				+def model_training_bp():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        model,feature_scaler_bytes,target_scaler_bytes = build_model(power_df,args)
			
 
				+        insert_h5_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time() 
			
 
				+   
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":  
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("model_training_bp log")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10103,threads=4)
			
 
				+    print("server start!")
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -15,4 +15,5 @@ joblib==1.3.2
 
				 tensorflow==2.2.0
			
 
				 matplotlib==3.5.3
			
 
				 Keras==2.3.1
			
 
				-protobuf==3.20.3
			
 
				+protobuf==3.20.3
			
 
				+APScheduler==3.10.4
			
--- a/run_all.py
+++ b/run_all.py
@@ -17,8 +17,10 @@ services = [
 
				     ("models_processing/model_predict/model_prediction_lstm.py", 10097),
			
 
				     ("post_processing/post_processing.py", 10098),
			
 
				     ("evaluation_processing/analysis.py", 10099),
			
 
				-    ("models_processing/model_predict/res_prediction.py", 10100)
			
 
				-
			
 
				+    ("data_processing/data_operation/pre_data_ftp.py", 10101),
			
 
				+    ("data_processing/data_operation/data_nwp_ftp.py", 10102),
			
 
				+    ("models_processing/model_train/model_training_bp.py", 10103),
			
 
				+    ("models_processing/model_predict/model_prediction_bp.py", 10104),
			
 
				 ]
			
 
				 
			
 
				 # 获取当前脚本所在的根目录