|
@@ -6,11 +6,10 @@
|
|
|
# company: shenyang JY
|
|
|
|
|
|
import numpy as np
|
|
|
-from sklearn.model_selection import train_test_split
|
|
|
from flask import Flask, request
|
|
|
import time
|
|
|
import traceback
|
|
|
-import logging
|
|
|
+import logging, argparse
|
|
|
from sklearn.preprocessing import MinMaxScaler
|
|
|
from io import BytesIO
|
|
|
import joblib
|
|
@@ -20,39 +19,27 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
|
|
|
from tensorflow.keras import optimizers, regularizers
|
|
|
import tensorflow.keras.backend as K
|
|
|
import tensorflow as tf
|
|
|
-
|
|
|
from common.data_cleaning import cleaning
|
|
|
from common.database_dml import *
|
|
|
from common.processing_data_common import missing_features, str_to_list
|
|
|
from data_processing.data_operation.data_handler import DataHandler
|
|
|
from threading import Lock
|
|
|
-import time
|
|
|
+import time, yaml
|
|
|
import random
|
|
|
import matplotlib.pyplot as plt
|
|
|
model_lock = Lock()
|
|
|
+from common.logs import Log
|
|
|
+logger = Log('models-processing').logger
|
|
|
+np.random.seed(42) # NumPy随机种子
|
|
|
+tf.random.set_random_seed(42) # TensorFlow随机种子
|
|
|
+app = Flask('nn_bp——service')
|
|
|
|
|
|
+with app.app_context():
|
|
|
+ with open('./models_processing/model_koi/bp.yaml', 'r', encoding='utf-8') as f:
|
|
|
+ arguments = yaml.safe_load(f)
|
|
|
|
|
|
-app = Flask('model_training_bp——service')
|
|
|
-
|
|
|
-def draw_loss(history):
|
|
|
- # 绘制训练集和验证集损失
|
|
|
- plt.figure(figsize=(20, 8))
|
|
|
- plt.plot(history.history['loss'], label='Training Loss')
|
|
|
- plt.plot(history.history['val_loss'], label='Validation Loss')
|
|
|
- plt.title('Loss Curve')
|
|
|
- plt.xlabel('Epochs')
|
|
|
- plt.ylabel('Loss')
|
|
|
- plt.legend()
|
|
|
- plt.show()
|
|
|
-
|
|
|
-dh = DataHandler()
|
|
|
+dh = DataHandler(logger, arguments)
|
|
|
def train_data_handler(data, args):
|
|
|
- sleep_time = random.uniform(1, 20) # 生成 5 到 20 之间的随机浮动秒数
|
|
|
- time.sleep(sleep_time)
|
|
|
- tf.keras.backend.clear_session() # 清除当前的图和会话
|
|
|
- # 设置随机种子
|
|
|
- np.random.seed(42) # NumPy随机种子
|
|
|
- tf.random.set_seed(42) # TensorFlow随机种子
|
|
|
col_time, features, target = args['col_time'], str_to_list(args['features']), args['target']
|
|
|
if 'is_limit' in data.columns:
|
|
|
data = data[data['is_limit'] == False]
|
|
@@ -78,20 +65,12 @@ def pre_data_handler(data, args):
|
|
|
features, time_steps, col_time, model_name,col_reserve = str_to_list(args['features']), int(args['time_steps']),args['col_time'],args['model_name'],str_to_list(args['col_reserve'])
|
|
|
feature_scaler,target_scaler = get_scaler_model_from_mongo(args)
|
|
|
pre_data = data.sort_values(by=col_time)
|
|
|
- # 对预测数据进行特征预处理:1.空值异常值清洗 2.缺值补值
|
|
|
- pre_data_cleaned = cleaning(pre_data, '', logger, pre_data.columns.tolist())
|
|
|
- pre_data = dh.fill_train_data(pre_data_cleaned)
|
|
|
scaled_features = feature_scaler.transform(pre_data[features])
|
|
|
return scaled_features
|
|
|
|
|
|
-class NPHandler(object):
|
|
|
- train = False
|
|
|
-
|
|
|
- def __init__(self, log, args, graph, sess):
|
|
|
- self.logger = log
|
|
|
- self.graph = graph
|
|
|
- self.sess = sess
|
|
|
- opt = args.parse_args_and_yaml()
|
|
|
+class BPHandler(object):
|
|
|
+ def __init__(self, logger):
|
|
|
+ self.logger = logger
|
|
|
self.model = None
|
|
|
|
|
|
def get_model(self, args):
|
|
@@ -103,7 +82,7 @@ class NPHandler(object):
|
|
|
# NPHandler.model = NPHandler.get_keras_model(opt)
|
|
|
self.model = get_h5_model_from_mongo(args)
|
|
|
except Exception as e:
|
|
|
- print("加载模型权重失败:{}".format(e.args))
|
|
|
+ self.logger.info("加载模型权重失败:{}".format(e.args))
|
|
|
|
|
|
@staticmethod
|
|
|
def get_keras_model(opt):
|
|
@@ -124,14 +103,14 @@ class NPHandler(object):
|
|
|
adam = optimizers.Adam(learning_rate=opt.Model['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-7,
|
|
|
amsgrad=True)
|
|
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=5, verbose=1)
|
|
|
- model.compile(loss=rmse, optimizer=adam)
|
|
|
+ model.compile(loss='rmse', optimizer=adam)
|
|
|
return model
|
|
|
|
|
|
- def train_init(self, opt, args):
|
|
|
+ def train_init(self, opt):
|
|
|
try:
|
|
|
if opt.Model['add_train']:
|
|
|
# 进行加强训练,支持修模
|
|
|
- base_train_model = get_h5_model_from_mongo(args)
|
|
|
+ base_train_model = get_h5_model_from_mongo(vars(opt))
|
|
|
base_train_model.summary()
|
|
|
self.logger.info("已加载加强训练基础模型")
|
|
|
else:
|
|
@@ -141,23 +120,17 @@ class NPHandler(object):
|
|
|
self.logger.info("加强训练加载模型权重失败:{}".format(e.args))
|
|
|
|
|
|
def training(self, opt, train_and_valid_data):
|
|
|
- model = self.train_init(opt)
|
|
|
- train_X, train_Y, valid_X, valid_Y = train_and_valid_data
|
|
|
- print("----------", np.array(train_X[0]).shape)
|
|
|
- print("++++++++++", np.array(train_X[1]).shape)
|
|
|
- # weight_lstm_1, bias_lstm_1 = model.get_layer('d1').get_weights()
|
|
|
- # print("weight_lstm_1 = ", weight_lstm_1)
|
|
|
- # print("bias_lstm_1 = ", bias_lstm_1)
|
|
|
+ model = self.train_init(opt, )
|
|
|
+ tf.reset_default_graph() # 清除默认图
|
|
|
+ train_x, train_y, valid_x, valid_y = train_and_valid_data
|
|
|
+ print("----------", np.array(train_x[0]).shape)
|
|
|
+ print("++++++++++", np.array(train_x[1]).shape)
|
|
|
|
|
|
check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss',
|
|
|
save_best_only=True, mode='auto')
|
|
|
early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
|
|
|
- # tbCallBack = TensorBoard(log_dir='../figure',
|
|
|
- # histogram_freq=0,
|
|
|
- # write_graph=True,
|
|
|
- # write_images=True)
|
|
|
- history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
|
|
|
- validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop], shuffle=False)
|
|
|
+ history = model.fit(train_x, train_y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
|
|
|
+ validation_data=(valid_x, valid_y), callbacks=[check_point, early_stop], shuffle=False)
|
|
|
loss = np.round(history.history['loss'], decimals=5)
|
|
|
val_loss = np.round(history.history['val_loss'], decimals=5)
|
|
|
self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
|
|
@@ -170,56 +143,25 @@ class NPHandler(object):
|
|
|
self.logger.info("执行预测方法")
|
|
|
return result
|
|
|
|
|
|
-def build_model(data, args):
|
|
|
- # 划分训练集和测试集
|
|
|
- X_train, X_test, y_train, y_test = train_test_split(scaled_features, scaled_target, test_size=0.2, random_state=43)
|
|
|
-
|
|
|
- # 构建 LSTM 模型
|
|
|
- model = Sequential([
|
|
|
- Dense(64, input_dim=X_train.shape[1], activation='relu'), # 输入层和隐藏层,10个神经元
|
|
|
- Dropout(0.2),
|
|
|
- Dense(32, activation='relu'), # 隐藏层,8个神经元
|
|
|
- Dropout(0.3), # Dropout层,30%的神经元输出会被随机丢弃
|
|
|
- Dense(1, activation='linear') # 输出层,1个神经元(用于回归任务)
|
|
|
- ])
|
|
|
-
|
|
|
- # 编译模型
|
|
|
- model.compile(optimizer='adam', loss='mean_squared_error')
|
|
|
- # 定义 EarlyStopping 和 ReduceLROnPlateau 回调
|
|
|
- early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
|
|
|
- reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)
|
|
|
- # 训练模型
|
|
|
- # 使用GPU进行训练
|
|
|
- with tf.device('/GPU:1'):
|
|
|
- history = model.fit(X_train, y_train,
|
|
|
- epochs=100,
|
|
|
- batch_size=32,
|
|
|
- validation_data=(X_test, y_test),
|
|
|
- verbose=2,
|
|
|
- shuffle=False,
|
|
|
- callbacks=[early_stopping, reduce_lr])
|
|
|
- draw_loss(history)
|
|
|
- return model, feature_scaler_bytes, target_scaler_bytes
|
|
|
-
|
|
|
-
|
|
|
@app.route('/model_training_bp', methods=['POST'])
|
|
|
def model_training_bp():
|
|
|
# 获取程序开始时间
|
|
|
start_time = time.time()
|
|
|
result = {}
|
|
|
success = 0
|
|
|
- nh = NPHandler()
|
|
|
+ bp = BPHandler(logger)
|
|
|
print("Program starts execution!")
|
|
|
try:
|
|
|
- args = request.values.to_dict()
|
|
|
- print('args', args)
|
|
|
+ params_dict = request.values.to_dict()
|
|
|
+ args = arguments.deepcopy()
|
|
|
+ args.update(params_dict)
|
|
|
+ opt = argparse.Namespace(**args)
|
|
|
logger.info(args)
|
|
|
- power_df = get_data_from_mongo(args)
|
|
|
- train_x, valid_x, train_y, valid_y, train_data_handler = dh.get_train_data(power_df)
|
|
|
- np_model = nh.training(opt, [train_x, valid_x, train_y, valid_y])
|
|
|
- model, feature_scaler_bytes, target_scaler_bytes = build_model(power_df, args)
|
|
|
-
|
|
|
- insert_h5_model_into_mongo(np_model, train_data_handler, args)
|
|
|
+ train_data = get_data_from_mongo(args)
|
|
|
+ train_x, valid_x, train_y, valid_y, scaled_train_bytes = train_data_handler(train_data, args)
|
|
|
+ bp_model = bp.training(opt, [train_x, valid_x, train_y, valid_y])
|
|
|
+ insert_single_model_into_mongo(bp_model, args)
|
|
|
+ insert_scaler_model_into_mongo(scaled_train_bytes, args)
|
|
|
success = 1
|
|
|
except Exception as e:
|
|
|
my_exception = traceback.format_exc()
|
|
@@ -241,15 +183,18 @@ def model_prediction_bp():
|
|
|
start_time = time.time()
|
|
|
result = {}
|
|
|
success = 0
|
|
|
- nh = NPHandler()
|
|
|
+ bp = BPHandler(logger)
|
|
|
print("Program starts execution!")
|
|
|
try:
|
|
|
- args = request.values.to_dict()
|
|
|
+ params_dict = request.values.to_dict()
|
|
|
+ args = arguments.deepcopy()
|
|
|
+ args.update(params_dict)
|
|
|
+ opt = argparse.Namespace(**args)
|
|
|
print('args', args)
|
|
|
logger.info(args)
|
|
|
- power_df = get_data_from_mongo(args)
|
|
|
- scaled_features = pre_data_handler(power_df, args)
|
|
|
- result = nh.predict(power_df, args)
|
|
|
+ predict_data = get_data_from_mongo(args)
|
|
|
+ scaled_features = pre_data_handler(predict_data, args)
|
|
|
+ result = bp.predict(scaled_features, args)
|
|
|
insert_data_into_mongo(result, args)
|
|
|
success = 1
|
|
|
except Exception as e:
|