123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143 |
- import glob
- import os
- import random
- import torch
- import torch.nn as nn
- import torch.optim as optim
- from torch.utils.data import Dataset, DataLoader
- import numpy as np
- import pandas as pd
- from dataset.TimeDataset import TimeSeriesDataset
- #from model.Transformer_base import TimeSeriesTransformer
- from model.LSTM import TimeSeriesTransformer
- from tqdm import tqdm
- from utils.Arg import Arg
- from utils import ModeTest
- import matplotlib.pyplot as plt
- arg = Arg()
- # 超参数
- input_dim = arg.input_dim
- output_dim = arg.output_dim
- input_seq_length = arg.input_seq_length
- output_seq_length = arg.output_seq_length
- d_model = arg.d_model
- nhead = arg.nhead
- num_layers = arg.num_layers
- dropout = arg.dropout
- batch_size = arg.batch_size
- epochs = arg.epochs
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
- def train_for_one_Data(model,dataloader):
- optimizer = optim.Adam(model.parameters(), lr=1e-3)
- criterion = nn.MSELoss()
- model.train()
- # 训练模型
- data_len = 0
- data_loss = 0.0
- epoch_loss = 0.0
- for batch_idx, (input_seq, output_seq) in enumerate(dataloader):
- input_seq, output_seq = input_seq.to(device), output_seq.to(device)
- # 前向传播
- # input_seq = input_seq.permute(1, 0, 2)
- # tgt = input_seq[-1:]
- #predictions = model(input_seq,tgt)
- predictions = model(input_seq)
- # 计算损失
- loss = criterion(predictions, output_seq)
- # 反向传播
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- data_loss += loss.item()
- data_len += len(dataloader)
- epoch_loss += data_loss
- return epoch_loss / data_len
- def base_train(model):
- best_loss = float('inf')
- for epoch in range(epochs):
- #训练一年的数据
- #print(list(range(1, 5)))
- for i in tqdm(range(3, 13), desc='Training progress:'):
- file_inputs_2 = './data/training/NWP/NWP_2021_{}.csv'.format(i)
- file_inputs_3 = './data/training/power/power_2021_{}.csv'.format(i)
- dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
- epoch_loss = train_for_one_Data(model, dataloader)
- print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
- if epoch_loss < best_loss:
- best_loss = epoch_loss
- torch.save(model.state_dict(), 'save/lstm_base.pt')
- print("Best loss model is saved")
- for i in tqdm(range(1, 4), desc='Training progress:'):
- file_inputs_2 = './data/training/NWP/NWP_2022_{}.csv'.format(i)
- file_inputs_3 = './data/training/power/power_2022_{}.csv'.format(i)
- dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
- epoch_loss = train_for_one_Data(model, dataloader)
- print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
- if epoch_loss < best_loss:
- best_loss = epoch_loss
- torch.save(model.state_dict(), 'save/lstm_base.pt')
- print("Best loss model is saved")
- def re_train_for_data(model,year,month):
- best_loss = float('inf')
- for epoch in range(epochs):
- # 训练一年的数据
- file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv'
- file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv'
- dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
- epoch_loss = train_for_one_Data(model, dataloader)
- print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
- if epoch_loss < best_loss:
- best_loss = epoch_loss
- torch.save(model.state_dict(), './save/lstm_base_pro.pt')
- print("Best loss model is saved")
- def re_train_for_alotday(model,year,month,day):
- from torch.utils.data import Subset
- best_loss = float('inf')
- for epoch in range(epochs):
- # 训练一年的数据
- file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv'
- file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv'
- dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
- number = int(len(dataset)-day*24*60/15)
- # 创建一个Subset,只包含前num_samples个样本
- dataset = Subset(dataset, indices=range(number,len(dataset)+1))
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
- epoch_loss = train_for_one_Data(model, dataloader)
- print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
- if epoch_loss < best_loss:
- best_loss = epoch_loss
- torch.save(model.state_dict(), './save/lstm_base_pro.pt')
- print("Best loss model is saved")
- def re_train_for_turbine_sum_power(model):
- from torch.utils.data import Subset
- best_loss = float('inf')
- # 训练一年的数据
- file_inputs_2 = f'./data/all_power/NWP.csv'
- file_inputs_3 = f'./data/all_power/power_training.csv'
- dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
- dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
- for epoch in range(epochs):
- epoch_loss = train_for_one_Data(model, dataloader)
- print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
- if epoch_loss < best_loss:
- best_loss = epoch_loss
- torch.save(model.state_dict(), './save/lstm_base_pro.pt')
- print("Best loss model is saved")
- def extract_date_from_path(path):
- parts = path.split('_')
- year = int(parts[-2])
- month = int(parts[-1].split('.')[0])
- return year, month
|