|
@@ -19,20 +19,21 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
|
|
from tensorflow.keras import optimizers, regularizers
|
|
from tensorflow.keras import optimizers, regularizers
|
|
import tensorflow.keras.backend as K
|
|
import tensorflow.keras.backend as K
|
|
import tensorflow as tf
|
|
import tensorflow as tf
|
|
-from common.data_cleaning import cleaning
|
|
|
|
|
|
+from bson.decimal128 import Decimal128
|
|
|
|
+from common.data_cleaning import cleaning, key_field_row_cleaning
|
|
from common.database_dml import *
|
|
from common.database_dml import *
|
|
from common.processing_data_common import missing_features, str_to_list
|
|
from common.processing_data_common import missing_features, str_to_list
|
|
from data_processing.data_operation.data_handler import DataHandler
|
|
from data_processing.data_operation.data_handler import DataHandler
|
|
from threading import Lock
|
|
from threading import Lock
|
|
import time, yaml
|
|
import time, yaml
|
|
-import random
|
|
|
|
|
|
+import random, numbers
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.pyplot as plt
|
|
model_lock = Lock()
|
|
model_lock = Lock()
|
|
from common.logs import Log
|
|
from common.logs import Log
|
|
logger = logging.getLogger()
|
|
logger = logging.getLogger()
|
|
# logger = Log('models-processing').logger
|
|
# logger = Log('models-processing').logger
|
|
np.random.seed(42) # NumPy随机种子
|
|
np.random.seed(42) # NumPy随机种子
|
|
-tf.random.set_random_seed(42) # TensorFlow随机种子
|
|
|
|
|
|
+# tf.set_random_seed(42) # TensorFlow随机种子
|
|
app = Flask('nn_bp——service')
|
|
app = Flask('nn_bp——service')
|
|
|
|
|
|
with app.app_context():
|
|
with app.app_context():
|
|
@@ -41,26 +42,42 @@ with app.app_context():
|
|
|
|
|
|
dh = DataHandler(logger, arguments)
|
|
dh = DataHandler(logger, arguments)
|
|
def train_data_handler(data, opt):
|
|
def train_data_handler(data, opt):
|
|
|
|
+ """
|
|
|
|
+ 训练数据预处理:
|
|
|
|
+ 清洗+补值+归一化
|
|
|
|
+ Aras:
|
|
|
|
+ data: 从mongo中加载的数据
|
|
|
|
+ opt:参数命名空间
|
|
|
|
+ return:
|
|
|
|
+ x_train
|
|
|
|
+ x_valid
|
|
|
|
+ y_train
|
|
|
|
+ y_valid
|
|
|
|
+ """
|
|
col_time, features, target = opt.col_time, opt.features, opt.target
|
|
col_time, features, target = opt.col_time, opt.features, opt.target
|
|
|
|
+ # 清洗处理好的限电记录
|
|
if 'is_limit' in data.columns:
|
|
if 'is_limit' in data.columns:
|
|
data = data[data['is_limit'] == False]
|
|
data = data[data['is_limit'] == False]
|
|
|
|
+ # 筛选特征,数值化
|
|
|
|
+ train_data = data[[col_time]+features+[target]]
|
|
# 清洗特征平均缺失率大于20%的天
|
|
# 清洗特征平均缺失率大于20%的天
|
|
- data = missing_features(data, features, col_time)
|
|
|
|
- train_data = data.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
|
|
|
|
-
|
|
|
|
|
|
+ train_data = missing_features(train_data, features, col_time)
|
|
train_data = train_data.sort_values(by=col_time)
|
|
train_data = train_data.sort_values(by=col_time)
|
|
|
|
+ # train_data = train_data.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
|
|
# 对清洗完限电的数据进行特征预处理:1.空值异常值清洗 2.缺值补值
|
|
# 对清洗完限电的数据进行特征预处理:1.空值异常值清洗 2.缺值补值
|
|
- train_data_cleaned = cleaning(train_data, 'nn_bp:features', logger, features)
|
|
|
|
- train_data = dh.fill_train_data(train_data_cleaned)
|
|
|
|
|
|
+ train_data_cleaned = key_field_row_cleaning(train_data, features+[target], logger)
|
|
|
|
+ train_data_cleaned = train_data_cleaned.applymap(lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x, numbers.Number) else x)
|
|
# 创建特征和目标的标准化器
|
|
# 创建特征和目标的标准化器
|
|
train_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
train_scaler = MinMaxScaler(feature_range=(0, 1))
|
|
# 标准化特征和目标
|
|
# 标准化特征和目标
|
|
- scaled_train_data = train_scaler.fit_transform(train_data[features+[target]])
|
|
|
|
|
|
+ scaled_train_data = train_scaler.fit_transform(train_data_cleaned[features+[target]])
|
|
|
|
+ train_data_cleaned[features+[target]] = scaled_train_data
|
|
|
|
+ train_datas = dh.fill_train_data(train_data_cleaned, col_time)
|
|
# 保存两个scaler
|
|
# 保存两个scaler
|
|
scaled_train_bytes = BytesIO()
|
|
scaled_train_bytes = BytesIO()
|
|
joblib.dump(scaled_train_data, scaled_train_bytes)
|
|
joblib.dump(scaled_train_data, scaled_train_bytes)
|
|
scaled_train_bytes.seek(0) # Reset pointer to the beginning of the byte stream
|
|
scaled_train_bytes.seek(0) # Reset pointer to the beginning of the byte stream
|
|
- x_train, x_valid, y_train, y_valid = dh.get_train_data(scaled_train_data)
|
|
|
|
|
|
+ x_train, x_valid, y_train, y_valid = dh.get_train_data(train_datas, col_time, features, target)
|
|
return x_train, x_valid, y_train, y_valid, scaled_train_bytes
|
|
return x_train, x_valid, y_train, y_valid, scaled_train_bytes
|
|
|
|
|
|
def pre_data_handler(data, args):
|
|
def pre_data_handler(data, args):
|
|
@@ -92,22 +109,20 @@ class BPHandler(object):
|
|
def get_keras_model(opt):
|
|
def get_keras_model(opt):
|
|
# db_loss = NorthEastLoss(opt)
|
|
# db_loss = NorthEastLoss(opt)
|
|
# south_loss = SouthLoss(opt)
|
|
# south_loss = SouthLoss(opt)
|
|
|
|
+ from models_processing.losses.loss_cdq import rmse
|
|
l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
|
|
l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
|
|
l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
|
|
l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
|
|
- nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size_nwp']), name='nwp')
|
|
|
|
- env_input = Input(shape=(opt.Model['his_points'], opt.Model['input_size_env']), name='env')
|
|
|
|
|
|
+ nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size']), name='nwp')
|
|
|
|
|
|
- con1 = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu',
|
|
|
|
- kernel_regularizer=l2_reg)(nwp_input)
|
|
|
|
|
|
+ con1 = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
|
|
d1 = Dense(32, activation='relu', name='d1', kernel_regularizer=l1_reg)(con1)
|
|
d1 = Dense(32, activation='relu', name='d1', kernel_regularizer=l1_reg)(con1)
|
|
nwp = Dense(8, activation='relu', name='d2', kernel_regularizer=l1_reg)(d1)
|
|
nwp = Dense(8, activation='relu', name='d2', kernel_regularizer=l1_reg)(d1)
|
|
|
|
|
|
output = Dense(opt.Model['output_size'], name='d5')(nwp)
|
|
output = Dense(opt.Model['output_size'], name='d5')(nwp)
|
|
- model = Model([env_input, nwp_input], output)
|
|
|
|
- adam = optimizers.Adam(learning_rate=opt.Model['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-7,
|
|
|
|
- amsgrad=True)
|
|
|
|
|
|
+ model = Model(nwp_input, output)
|
|
|
|
+ adam = optimizers.Adam(learning_rate=opt.Model['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
|
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=5, verbose=1)
|
|
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.01, patience=5, verbose=1)
|
|
- model.compile(loss='rmse', optimizer=adam)
|
|
|
|
|
|
+ model.compile(loss=rmse, optimizer=adam)
|
|
return model
|
|
return model
|
|
|
|
|
|
def train_init(self, opt):
|
|
def train_init(self, opt):
|
|
@@ -125,16 +140,14 @@ class BPHandler(object):
|
|
|
|
|
|
def training(self, opt, train_and_valid_data):
|
|
def training(self, opt, train_and_valid_data):
|
|
model = self.train_init(opt)
|
|
model = self.train_init(opt)
|
|
- tf.reset_default_graph() # 清除默认图
|
|
|
|
|
|
+ # tf.reset_default_graph() # 清除默认图
|
|
train_x, train_y, valid_x, valid_y = train_and_valid_data
|
|
train_x, train_y, valid_x, valid_y = train_and_valid_data
|
|
print("----------", np.array(train_x[0]).shape)
|
|
print("----------", np.array(train_x[0]).shape)
|
|
print("++++++++++", np.array(train_x[1]).shape)
|
|
print("++++++++++", np.array(train_x[1]).shape)
|
|
|
|
|
|
- check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss',
|
|
|
|
- save_best_only=True, mode='auto')
|
|
|
|
|
|
+ check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss', save_best_only=True, mode='auto')
|
|
early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
|
|
early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
|
|
- history = model.fit(train_x, train_y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
|
|
|
|
- validation_data=(valid_x, valid_y), callbacks=[check_point, early_stop], shuffle=False)
|
|
|
|
|
|
+ history = model.fit(train_x, train_y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2, validation_data=(valid_x, valid_y), callbacks=[check_point, early_stop], shuffle=False)
|
|
loss = np.round(history.history['loss'], decimals=5)
|
|
loss = np.round(history.history['loss'], decimals=5)
|
|
val_loss = np.round(history.history['val_loss'], decimals=5)
|
|
val_loss = np.round(history.history['val_loss'], decimals=5)
|
|
self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
|
|
self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
|
|
@@ -225,13 +238,16 @@ if __name__ == "__main__":
|
|
print("server start!")
|
|
print("server start!")
|
|
|
|
|
|
bp = BPHandler(logger)
|
|
bp = BPHandler(logger)
|
|
- args = copy.deepcopy(bp)
|
|
|
|
- opt = argparse.Namespace(**arguments)
|
|
|
|
- logger.info(args)
|
|
|
|
args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
|
|
args_dict = {"mongodb_database": 'david_test', 'scaler_table': 'j00083_scaler', 'model_name': 'bp1.0.test',
|
|
- 'model_table': 'j00083_model', 'mongodb_read_table': 'j00083'}
|
|
|
|
|
|
+ 'model_table': 'j00083_model', 'mongodb_read_table': 'j00083', 'col_time': 'dateTime',
|
|
|
|
+ 'features': 'speed10,direction10,speed30,direction30,speed50,direction50,speed70,direction70,speed90,direction90,speed110,direction110,speed150,direction150,speed170,direction170'}
|
|
|
|
+ args_dict['features'] = args_dict['features'].split(',')
|
|
|
|
+ arguments.update(args_dict)
|
|
|
|
+ opt = argparse.Namespace(**arguments)
|
|
|
|
+ opt.Model['input_size'] = len(opt.features)
|
|
train_data = get_data_from_mongo(args_dict)
|
|
train_data = get_data_from_mongo(args_dict)
|
|
train_x, valid_x, train_y, valid_y, scaled_train_bytes = train_data_handler(train_data, opt)
|
|
train_x, valid_x, train_y, valid_y, scaled_train_bytes = train_data_handler(train_data, opt)
|
|
- bp_model = bp.training(opt, [train_x, valid_x, train_y, valid_y])
|
|
|
|
|
|
+
|
|
|
|
+ bp_model = bp.training(opt, [train_x, train_y, valid_x, valid_y])
|
|
insert_trained_model_into_mongo(bp_model, args_dict)
|
|
insert_trained_model_into_mongo(bp_model, args_dict)
|
|
- insert_scaler_model_into_mongo(scaled_train_bytes, args)
|
|
|
|
|
|
+ insert_scaler_model_into_mongo(scaled_train_bytes, args_dict)
|