David 3 周之前
父節點
當前提交
6bd527de03
共有 2 個文件被更改,包括 85 次插入56 次删除
  1. 46 12
      models_processing/model_tf/tf_tcn.py
  2. 39 44
      models_processing/model_tf/tf_transformer.py

+ 46 - 12
models_processing/model_tf/tf_tcn.py

@@ -5,7 +5,7 @@
 # @Author    :David
 # @Company: shenyang JY
 
-from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten
+from tensorflow.keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten, LayerNormalization, Dropout
 from tensorflow.keras.models import Model, load_model
 from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard, ReduceLROnPlateau
 from tensorflow.keras import optimizers, regularizers
@@ -18,7 +18,7 @@ import argparse
 model_lock = Lock()
 set_deterministic(42)
 
-class TSHandler(object):
+class TCNHandler(object):
     def __init__(self, logger, args):
         self.logger = logger
         self.opt = argparse.Namespace(**args)
@@ -37,21 +37,55 @@ class TSHandler(object):
             self.logger.info("加载模型权重失败:{}".format(e.args))
 
     @staticmethod
-    def get_keras_model(opt, time_series=1, lstm_type=1):
+    def get_keras_model(opt, time_series=1):
+        # 参数设置
         loss = region_loss(opt)
-        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
+        time_steps = opt.Model['time_step']*time_series  # 输入时间步长 (16*3)
+        output_steps = opt.Model['time_step']  # 输出时间步长
+        hidden_size = opt.Model.get('hidden_size', 64)
         l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
-        nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
+        dropout_rate = opt.Model.get('dropout_rate', 0.2)
 
-        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
-        con1_p = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
-        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(con1_p)
-        if lstm_type == 2:
-            output = Dense(opt.Model['time_step'], name='cdq_output')(nwp_lstm)
-        else:
-            output = Dense(opt.Model['time_step']*time_series, name='cdq_output')(nwp_lstm)
+        # 输入层
+        nwp_input = Input(shape=(time_steps, opt.Model['input_size']), name='nwp')
 
+        # 初始卷积层 (将通道数扩展到hidden_size)
+        x = Conv1D(filters=hidden_size, kernel_size=3, strides=1, padding='causal', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
+
+        # 时序卷积块 (TCN块)
+        for d in [1, 2, 4, 8]:  # 扩张系数
+            # 扩张因果卷积
+            conv = Conv1D(filters=hidden_size, kernel_size=3, strides=1,
+                          padding='causal', activation='relu',
+                          dilation_rate=d,
+                          kernel_regularizer=l2_reg)
+            x = conv(x)
+            # 残差连接
+            skip = Conv1D(filters=hidden_size, kernel_size=1, padding='same')(x)
+            # 层归一化
+            x = LayerNormalization()(x)
+            x = tf.keras.activations.relu(x)
+            x = Dropout(dropout_rate)(x)
+            x = x + skip  # 残差连接
+
+        # 提取中间16个时间步的表示
+        # 这里我们使用全局平均池化或直接切片
+        # 方法1: 使用全局平均池化然后上采样
+        # x = tf.reduce_mean(x, axis=1, keepdims=True)
+        # x = tf.tile(x, [1, output_steps, 1])
+
+        # 方法2: 直接切片中间16个时间步 (更符合你的需求)
+        # 由于是因果卷积,中间时间步大致对应输入的中间部分
+        start_idx = (time_steps - output_steps) // 2
+        x = x[:, start_idx:start_idx + output_steps, :]
+
+        # 输出层
+        output = Dense(output_steps, activation=None, name='cdq_output')(x)
+
+        # 创建模型
         model = Model(nwp_input, output)
+
+        # 优化器
         adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
         model.compile(loss=loss, optimizer=adam)
         return model

+ 39 - 44
models_processing/model_tf/tf_transformer.py

@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding:utf-8 -*-
-# @FileName  :tf_lstm.py
-# @Time      :2025/2/12 14:03
+# @FileName  :tf_transformer.py
+# @Time      :2025/5/08 14:03
 # @Author    :David
 # @Company: shenyang JY
 
@@ -18,7 +18,7 @@ import argparse
 model_lock = Lock()
 set_deterministic(42)
 
-class TSHandler(object):
+class TransformerHandler(object):
     def __init__(self, logger, args):
         self.logger = logger
         self.opt = argparse.Namespace(**args)
@@ -57,58 +57,53 @@ class TSHandler(object):
         return model
 
     @staticmethod
-    def get_tcn_model(opt, time_series=1):
-        # 参数设置
-        loss = region_loss(opt)
-        time_steps = 48  # 输入时间步长 (16*3)
-        output_steps = 16  # 输出时间步长
+    def get_transformer_model(opt, time_series=1):
+        time_steps = 48
+        input_features = 21
+        output_steps = 16
         hidden_size = opt.Model.get('hidden_size', 64)
-        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
-        dropout_rate = opt.Model.get('dropout_rate', 0.2)
-
-        # 输入层
-        nwp_input = Input(shape=(opt.Model['time_step']*time_series, opt.Model['input_size']), name='nwp')
+        num_heads = opt.Model.get('num_heads', 4)
+        ff_dim = opt.Model.get('ff_dim', 128)
+        l2_reg = regularizers.l2(opt.Model.get('lambda_value_2', 0.0))
+
+        nwp_input = Input(shape=(opt.Model['time_step'] * time_series, opt.Model['input_size']), name='nwp')
+
+        # 输入嵌入
+        x = Conv1D(hidden_size, 1, kernel_regularizer=l2_reg)(nwp_input)
+
+        # Transformer编码器层
+        for _ in range(opt.Model.get('num_layers', 2)):
+            # 多头自注意力
+            x = tf.keras.layers.MultiHeadAttention(
+                num_heads=num_heads, key_dim=hidden_size,
+                kernel_regularizer=l2_reg
+            )(x, x)
+            x = LayerNormalization()(x)
+            x = tf.keras.layers.Dropout(0.1)(x)
 
-        # 初始卷积层 (将通道数扩展到hidden_size)
-        x = Conv1D(filters=hidden_size, kernel_size=3, strides=1, padding='causal', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
-
-        # 时序卷积块 (TCN块)
-        for d in [1, 2, 4, 8]:  # 扩张系数
-            # 扩张因果卷积
-            conv = Conv1D(filters=hidden_size, kernel_size=3, strides=1,
-                          padding='causal', activation='relu',
-                          dilation_rate=d,
-                          kernel_regularizer=l2_reg)
-            x = conv(x)
-            # 残差连接
-            skip = Conv1D(filters=hidden_size, kernel_size=1,
-                          padding='same')(x)
-            # 层归一化
+            # 前馈网络
+            x = tf.keras.layers.Dense(ff_dim, activation='relu', kernel_regularizer=l2_reg)(x)
+            x = tf.keras.layers.Dense(hidden_size, kernel_regularizer=l2_reg)(x)
             x = LayerNormalization()(x)
-            x = tf.keras.activations.relu(x)
-            x = Dropout(dropout_rate)(x)
-            x = x + skip  # 残差连接
-
-        # 提取中间16个时间步的表示
-        # 这里我们使用全局平均池化或直接切片
-        # 方法1: 使用全局平均池化然后上采样
-        # x = tf.reduce_mean(x, axis=1, keepdims=True)
-        # x = tf.tile(x, [1, output_steps, 1])
-
-        # 方法2: 直接切片中间16个时间步 (更符合你的需求)
-        # 由于是因果卷积,中间时间步大致对应输入的中间部分
+            x = tf.keras.layers.Dropout(0.1)(x)
+
+        # 提取中间时间步
         start_idx = (time_steps - output_steps) // 2
         x = x[:, start_idx:start_idx + output_steps, :]
 
         # 输出层
-        output = Dense(output_steps, activation=None, name='cdq_output')(x)
+        output = Dense(output_steps, name='cdq_output')(x[:, -1, :])  # 或者使用所有时间步
 
-        # 创建模型
         model = Model(nwp_input, output)
 
-        # 优化器
-        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+        # 编译模型
+        adam = optimizers.Adam(
+            learning_rate=opt.Model.get('learning_rate', 0.001),
+            beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True
+        )
+        loss = region_loss(opt)
         model.compile(loss=loss, optimizer=adam)
+
         return model
 
     def train_init(self):