|
@@ -4,8 +4,8 @@
|
|
# @Time :2025/5/08 14:03
|
|
# @Time :2025/5/08 14:03
|
|
# @Author :David
|
|
# @Author :David
|
|
# @Company: shenyang JY
|
|
# @Company: shenyang JY
|
|
-
|
|
|
|
-from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten, LayerNormalization, Dropout
|
|
|
|
|
|
+from tensorflow.keras.initializers import glorot_uniform, orthogonal
|
|
|
|
+from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten, LayerNormalization, Dropout, Layer, Add, MultiHeadAttention, Dropout
|
|
from tensorflow.keras.models import Model, load_model
|
|
from tensorflow.keras.models import Model, load_model
|
|
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
|
|
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
|
|
from tensorflow.keras import optimizers, regularizers
|
|
from tensorflow.keras import optimizers, regularizers
|
|
@@ -18,6 +18,49 @@ import argparse
|
|
model_lock = Lock()
|
|
model_lock = Lock()
|
|
set_deterministic(42)
|
|
set_deterministic(42)
|
|
|
|
|
|
|
|
+
|
|
|
|
+class PositionalEncoding(tf.keras.layers.Layer):
|
|
|
|
+ """自定义位置编码层(支持序列化)"""
|
|
|
|
+ def __init__(self, max_len, d_model, **kwargs):
|
|
|
|
+ super().__init__(**kwargs)
|
|
|
|
+ self.max_len = max_len # 将参数保存为实例属性
|
|
|
|
+ self.d_model = d_model
|
|
|
|
+ # 位置编码在初始化时生成
|
|
|
|
+ self.position_embedding = self.positional_encoding(max_len, d_model)
|
|
|
|
+
|
|
|
|
+ def get_angles(self, pos, i, d_model):
|
|
|
|
+ # 计算角度参数
|
|
|
|
+ angles = 1 / tf.pow(10000., (2 * (i // 2)) / tf.cast(d_model, tf.float32))
|
|
|
|
+ return pos * angles
|
|
|
|
+
|
|
|
|
+ def positional_encoding(self, max_len, d_model):
|
|
|
|
+ # 生成位置编码矩阵
|
|
|
|
+ angle_rads = self.get_angles(
|
|
|
|
+ pos=tf.range(max_len, dtype=tf.float32)[:, tf.newaxis],
|
|
|
|
+ i=tf.range(d_model, dtype=tf.float32)[tf.newaxis, :],
|
|
|
|
+ d_model=d_model
|
|
|
|
+ )
|
|
|
|
+ # 拼接正弦和余弦编码
|
|
|
|
+ sines = tf.math.sin(angle_rads[:, 0::2])
|
|
|
|
+ cosines = tf.math.cos(angle_rads[:, 1::2])
|
|
|
|
+ pos_encoding = tf.concat([sines, cosines], axis=-1)
|
|
|
|
+ return pos_encoding[tf.newaxis, ...] # 增加批次维度
|
|
|
|
+
|
|
|
|
+ def call(self, inputs):
|
|
|
|
+ # 动态截取与输入序列长度匹配的部分
|
|
|
|
+ seq_len = tf.shape(inputs)[1]
|
|
|
|
+ return inputs + self.position_embedding[:, :seq_len, :]
|
|
|
|
+
|
|
|
|
+ def get_config(self):
|
|
|
|
+ # 将参数序列化(关键步骤!)
|
|
|
|
+ config = super().get_config()
|
|
|
|
+ config.update({
|
|
|
|
+ 'max_len': self.max_len,
|
|
|
|
+ 'd_model': self.d_model,
|
|
|
|
+ })
|
|
|
|
+ return config
|
|
|
|
+
|
|
|
|
+
|
|
class TransformerHandler(object):
|
|
class TransformerHandler(object):
|
|
def __init__(self, logger, args):
|
|
def __init__(self, logger, args):
|
|
self.logger = logger
|
|
self.logger = logger
|
|
@@ -32,7 +75,7 @@ class TransformerHandler(object):
|
|
try:
|
|
try:
|
|
with model_lock:
|
|
with model_lock:
|
|
loss = region_loss(self.opt)
|
|
loss = region_loss(self.opt)
|
|
- self.model, self.model_params = get_keras_model_from_mongo(args, {type(loss).__name__: loss})
|
|
|
|
|
|
+ self.model, self.model_params = get_keras_model_from_mongo(args, {type(loss).__name__: loss, 'PositionalEncoding': PositionalEncoding})
|
|
except Exception as e:
|
|
except Exception as e:
|
|
self.logger.info("加载模型权重失败:{}".format(e.args))
|
|
self.logger.info("加载模型权重失败:{}".format(e.args))
|
|
|
|
|
|
@@ -41,53 +84,45 @@ class TransformerHandler(object):
|
|
hidden_size = opt.Model.get('hidden_size', 64)
|
|
hidden_size = opt.Model.get('hidden_size', 64)
|
|
num_heads = opt.Model.get('num_heads', 4)
|
|
num_heads = opt.Model.get('num_heads', 4)
|
|
ff_dim = opt.Model.get('ff_dim', 128)
|
|
ff_dim = opt.Model.get('ff_dim', 128)
|
|
- l2_reg = regularizers.l2(opt.Model.get('lambda_value_2', 0.0))
|
|
|
|
|
|
+ l2_reg = regularizers.l2(opt.Model.get('lambda_value_2', 0.01))
|
|
|
|
|
|
- nwp_input = Input(shape=(opt.Model['time_step'] * time_series, opt.Model['input_size']), name='nwp')
|
|
|
|
|
|
+ nwp_input = Input(shape=(opt.Model['time_step'] * time_series, opt.Model['input_size']))
|
|
|
|
|
|
- # 输入嵌入
|
|
|
|
- x = Conv1D(hidden_size, 1, kernel_regularizer=l2_reg)(nwp_input)
|
|
|
|
|
|
+ # 嵌入层 + 位置编码
|
|
|
|
+ x = Conv1D(hidden_size, kernel_size=3, padding='same', kernel_regularizer=l2_reg)(nwp_input)
|
|
|
|
+ x = PositionalEncoding(opt.Model['time_step'], hidden_size)(x)
|
|
|
|
|
|
- # Transformer编码器层
|
|
|
|
|
|
+ # Transformer编码层(带残差连接)
|
|
for _ in range(opt.Model.get('num_layers', 2)):
|
|
for _ in range(opt.Model.get('num_layers', 2)):
|
|
- # 多头自注意力
|
|
|
|
- x = tf.keras.layers.MultiHeadAttention(
|
|
|
|
- num_heads=num_heads, key_dim=hidden_size,
|
|
|
|
- kernel_regularizer=l2_reg
|
|
|
|
- )(x, x)
|
|
|
|
|
|
+ # 自注意力
|
|
|
|
+ residual = x
|
|
|
|
+ x = MultiHeadAttention(num_heads=num_heads, key_dim=hidden_size)(x, x)
|
|
|
|
+ x = Dropout(0.1)(x)
|
|
|
|
+ x = Add()([residual, x])
|
|
x = LayerNormalization()(x)
|
|
x = LayerNormalization()(x)
|
|
- x = tf.keras.layers.Dropout(0.1)(x)
|
|
|
|
|
|
|
|
# 前馈网络
|
|
# 前馈网络
|
|
- x = tf.keras.layers.Dense(ff_dim, activation='relu', kernel_regularizer=l2_reg)(x)
|
|
|
|
- x = tf.keras.layers.Dense(hidden_size, kernel_regularizer=l2_reg)(x)
|
|
|
|
|
|
+ residual = x
|
|
|
|
+ x = Dense(ff_dim, activation='relu')(x)
|
|
|
|
+ x = Dense(hidden_size)(x)
|
|
|
|
+ x = Dropout(0.1)(x)
|
|
|
|
+ x = Add()([residual, x])
|
|
x = LayerNormalization()(x)
|
|
x = LayerNormalization()(x)
|
|
- x = tf.keras.layers.Dropout(0.1)(x)
|
|
|
|
|
|
|
|
- # 提取中间时间步
|
|
|
|
- # start_idx = (time_steps - output_steps) // 2
|
|
|
|
- # x = x[:, start_idx:start_idx + output_steps, :]
|
|
|
|
|
|
+ # 输出层(预测每个时间步)
|
|
|
|
+ output = Dense(1, activation='linear')(x)
|
|
|
|
+ # output = tf.keras.layers.Lambda(lambda x: tf.squeeze(x, axis=-1))(output)
|
|
|
|
+ output = Flatten(name='Flatten')(output)
|
|
|
|
|
|
- # 输出层
|
|
|
|
- output = Dense(1, name='cdq_output')(x) # 或者使用所有时间步
|
|
|
|
- output = Flatten(name='flatten')(output)
|
|
|
|
model = Model(nwp_input, output)
|
|
model = Model(nwp_input, output)
|
|
-
|
|
|
|
- # 编译模型
|
|
|
|
- adam = optimizers.Adam(
|
|
|
|
- learning_rate=opt.Model.get('learning_rate', 0.001),
|
|
|
|
- beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True
|
|
|
|
- )
|
|
|
|
- loss = region_loss(opt)
|
|
|
|
- model.compile(loss=loss, optimizer=adam)
|
|
|
|
-
|
|
|
|
|
|
+ model.compile(loss='mse', optimizer=optimizers.Adam(learning_rate=1e-4))
|
|
return model
|
|
return model
|
|
|
|
|
|
def train_init(self):
|
|
def train_init(self):
|
|
try:
|
|
try:
|
|
# 进行加强训练,支持修模
|
|
# 进行加强训练,支持修模
|
|
loss = region_loss(self.opt)
|
|
loss = region_loss(self.opt)
|
|
- base_train_model, self.model_params = get_keras_model_from_mongo(vars(self.opt), {type(loss).__name__: loss})
|
|
|
|
|
|
+ base_train_model, self.model_params = get_keras_model_from_mongo(vars(self.opt), {type(loss).__name__: loss, 'PositionalEncoding': PositionalEncoding})
|
|
base_train_model.summary()
|
|
base_train_model.summary()
|
|
self.logger.info("已加载加强训练基础模型")
|
|
self.logger.info("已加载加强训练基础模型")
|
|
return base_train_model
|
|
return base_train_model
|