import glob import os import random import torch import torch.nn as nn import torch.optim as optim from torch.utils.data import Dataset, DataLoader import numpy as np import pandas as pd from dataset.TimeDataset import TimeSeriesDataset #from model.Transformer_base import TimeSeriesTransformer from model.LSTM import TimeSeriesTransformer from tqdm import tqdm from utils.Arg import Arg from utils import ModeTest import matplotlib.pyplot as plt arg = Arg() # 超参数 input_dim = arg.input_dim output_dim = arg.output_dim input_seq_length = arg.input_seq_length output_seq_length = arg.output_seq_length d_model = arg.d_model nhead = arg.nhead num_layers = arg.num_layers dropout = arg.dropout batch_size = arg.batch_size epochs = arg.epochs device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def train_for_one_Data(model,dataloader): optimizer = optim.Adam(model.parameters(), lr=1e-3) criterion = nn.MSELoss() model.train() # 训练模型 data_len = 0 data_loss = 0.0 epoch_loss = 0.0 for batch_idx, (input_seq, output_seq) in enumerate(dataloader): input_seq, output_seq = input_seq.to(device), output_seq.to(device) # 前向传播 # input_seq = input_seq.permute(1, 0, 2) # tgt = input_seq[-1:] #predictions = model(input_seq,tgt) predictions = model(input_seq) # 计算损失 loss = criterion(predictions, output_seq) # 反向传播 optimizer.zero_grad() loss.backward() optimizer.step() data_loss += loss.item() data_len += len(dataloader) epoch_loss += data_loss return epoch_loss / data_len def base_train(model): best_loss = float('inf') for epoch in range(epochs): #训练一年的数据 #print(list(range(1, 5))) for i in tqdm(range(3, 13), desc='Training progress:'): file_inputs_2 = './data/training/NWP/NWP_2021_{}.csv'.format(i) file_inputs_3 = './data/training/power/power_2021_{}.csv'.format(i) dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) epoch_loss = train_for_one_Data(model, dataloader) print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}") if epoch_loss < best_loss: best_loss = epoch_loss torch.save(model.state_dict(), 'save/lstm_base.pt') print("Best loss model is saved") for i in tqdm(range(1, 4), desc='Training progress:'): file_inputs_2 = './data/training/NWP/NWP_2022_{}.csv'.format(i) file_inputs_3 = './data/training/power/power_2022_{}.csv'.format(i) dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) epoch_loss = train_for_one_Data(model, dataloader) print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}") if epoch_loss < best_loss: best_loss = epoch_loss torch.save(model.state_dict(), 'save/lstm_base.pt') print("Best loss model is saved") def re_train_for_data(model,year,month): best_loss = float('inf') for epoch in range(epochs): # 训练一年的数据 file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv' file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv' dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) epoch_loss = train_for_one_Data(model, dataloader) print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}") if epoch_loss < best_loss: best_loss = epoch_loss torch.save(model.state_dict(), './save/lstm_base_pro.pt') print("Best loss model is saved") def re_train_for_alotday(model,year,month,day): from torch.utils.data import Subset best_loss = float('inf') for epoch in range(epochs): # 训练一年的数据 file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv' file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv' dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2) number = int(len(dataset)-day*24*60/15) # 创建一个Subset,只包含前num_samples个样本 dataset = Subset(dataset, indices=range(number,len(dataset)+1)) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) epoch_loss = train_for_one_Data(model, dataloader) print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}") if epoch_loss < best_loss: best_loss = epoch_loss torch.save(model.state_dict(), './save/lstm_base_pro.pt') print("Best loss model is saved") def re_train_for_turbine_sum_power(model): from torch.utils.data import Subset best_loss = float('inf') # 训练一年的数据 file_inputs_2 = f'./data/all_power/NWP.csv' file_inputs_3 = f'./data/all_power/power_training.csv' dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) for epoch in range(epochs): epoch_loss = train_for_one_Data(model, dataloader) print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}") if epoch_loss < best_loss: best_loss = epoch_loss torch.save(model.state_dict(), './save/lstm_base_pro.pt') print("Best loss model is saved") def extract_date_from_path(path): parts = path.split('_') year = int(parts[-2]) month = int(parts[-1].split('.')[0]) return year, month