|
@@ -1,7 +1,7 @@
|
|
|
#!/usr/bin/env python
|
|
|
# -*- coding:utf-8 -*-
|
|
|
-# @FileName :tf_lstm.py
|
|
|
-# @Time :2025/2/12 14:03
|
|
|
+# @FileName :tf_transformer.py
|
|
|
+# @Time :2025/5/08 14:03
|
|
|
# @Author :David
|
|
|
# @Company: shenyang JY
|
|
|
|
|
@@ -18,7 +18,7 @@ import argparse
|
|
|
model_lock = Lock()
|
|
|
set_deterministic(42)
|
|
|
|
|
|
-class TSHandler(object):
|
|
|
+class TransformerHandler(object):
|
|
|
def __init__(self, logger, args):
|
|
|
self.logger = logger
|
|
|
self.opt = argparse.Namespace(**args)
|
|
@@ -57,58 +57,53 @@ class TSHandler(object):
|
|
|
return model
|
|
|
|
|
|
@staticmethod
|
|
|
- def get_tcn_model(opt, time_series=1):
|
|
|
- # 参数设置
|
|
|
- loss = region_loss(opt)
|
|
|
- time_steps = 48 # 输入时间步长 (16*3)
|
|
|
- output_steps = 16 # 输出时间步长
|
|
|
+ def get_transformer_model(opt, time_series=1):
|
|
|
+ time_steps = 48
|
|
|
+ input_features = 21
|
|
|
+ output_steps = 16
|
|
|
hidden_size = opt.Model.get('hidden_size', 64)
|
|
|
- l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
|
|
|
- dropout_rate = opt.Model.get('dropout_rate', 0.2)
|
|
|
-
|
|
|
- # 输入层
|
|
|
- nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
|
|
|
+ num_heads = opt.Model.get('num_heads', 4)
|
|
|
+ ff_dim = opt.Model.get('ff_dim', 128)
|
|
|
+ l2_reg = regularizers.l2(opt.Model.get('lambda_value_2', 0.0))
|
|
|
+
|
|
|
+ nwp_input = Input(shape=(opt.Model['time_step'] * time_series, opt.Model['input_size']), name='nwp')
|
|
|
+
|
|
|
+ # 输入嵌入
|
|
|
+ x = Conv1D(hidden_size, 1, kernel_regularizer=l2_reg)(nwp_input)
|
|
|
+
|
|
|
+ # Transformer编码器层
|
|
|
+ for _ in range(opt.Model.get('num_layers', 2)):
|
|
|
+ # 多头自注意力
|
|
|
+ x = tf.keras.layers.MultiHeadAttention(
|
|
|
+ num_heads=num_heads, key_dim=hidden_size,
|
|
|
+ kernel_regularizer=l2_reg
|
|
|
+ )(x, x)
|
|
|
+ x = LayerNormalization()(x)
|
|
|
+ x = tf.keras.layers.Dropout(0.1)(x)
|
|
|
|
|
|
- # 初始卷积层 (将通道数扩展到hidden_size)
|
|
|
- x = Conv1D(filters=hidden_size, kernel_size=3, strides=1, padding='causal', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
|
|
|
-
|
|
|
- # 时序卷积块 (TCN块)
|
|
|
- for d in [1, 2, 4, 8]: # 扩张系数
|
|
|
- # 扩张因果卷积
|
|
|
- conv = Conv1D(filters=hidden_size, kernel_size=3, strides=1,
|
|
|
- padding='causal', activation='relu',
|
|
|
- dilation_rate=d,
|
|
|
- kernel_regularizer=l2_reg)
|
|
|
- x = conv(x)
|
|
|
- # 残差连接
|
|
|
- skip = Conv1D(filters=hidden_size, kernel_size=1,
|
|
|
- padding='same')(x)
|
|
|
- # 层归一化
|
|
|
+ # 前馈网络
|
|
|
+ x = tf.keras.layers.Dense(ff_dim, activation='relu', kernel_regularizer=l2_reg)(x)
|
|
|
+ x = tf.keras.layers.Dense(hidden_size, kernel_regularizer=l2_reg)(x)
|
|
|
x = LayerNormalization()(x)
|
|
|
- x = tf.keras.activations.relu(x)
|
|
|
- x = Dropout(dropout_rate)(x)
|
|
|
- x = x + skip # 残差连接
|
|
|
-
|
|
|
- # 提取中间16个时间步的表示
|
|
|
- # 这里我们使用全局平均池化或直接切片
|
|
|
- # 方法1: 使用全局平均池化然后上采样
|
|
|
- # x = tf.reduce_mean(x, axis=1, keepdims=True)
|
|
|
- # x = tf.tile(x, [1, output_steps, 1])
|
|
|
-
|
|
|
- # 方法2: 直接切片中间16个时间步 (更符合你的需求)
|
|
|
- # 由于是因果卷积,中间时间步大致对应输入的中间部分
|
|
|
+ x = tf.keras.layers.Dropout(0.1)(x)
|
|
|
+
|
|
|
+ # 提取中间时间步
|
|
|
start_idx = (time_steps - output_steps) // 2
|
|
|
x = x[:, start_idx:start_idx + output_steps, :]
|
|
|
|
|
|
# 输出层
|
|
|
- output = Dense(output_steps, activation=None, name='cdq_output')(x)
|
|
|
+ output = Dense(output_steps, name='cdq_output')(x[:, -1, :]) # 或者使用所有时间步
|
|
|
|
|
|
- # 创建模型
|
|
|
model = Model(nwp_input, output)
|
|
|
|
|
|
- # 优化器
|
|
|
- adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
|
|
|
+ # 编译模型
|
|
|
+ adam = optimizers.Adam(
|
|
|
+ learning_rate=opt.Model.get('learning_rate', 0.001),
|
|
|
+ beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True
|
|
|
+ )
|
|
|
+ loss = region_loss(opt)
|
|
|
model.compile(loss=loss, optimizer=adam)
|
|
|
+
|
|
|
return model
|
|
|
|
|
|
def train_init(self):
|