training_model.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143
  1. import glob
  2. import os
  3. import random
  4. import torch
  5. import torch.nn as nn
  6. import torch.optim as optim
  7. from torch.utils.data import Dataset, DataLoader
  8. import numpy as np
  9. import pandas as pd
  10. from dataset.TimeDataset import TimeSeriesDataset
  11. #from model.Transformer_base import TimeSeriesTransformer
  12. from model.LSTM import TimeSeriesTransformer
  13. from tqdm import tqdm
  14. from utils.Arg import Arg
  15. from utils import ModeTest
  16. import matplotlib.pyplot as plt
  17. arg = Arg()
  18. # 超参数
  19. input_dim = arg.input_dim
  20. output_dim = arg.output_dim
  21. input_seq_length = arg.input_seq_length
  22. output_seq_length = arg.output_seq_length
  23. d_model = arg.d_model
  24. nhead = arg.nhead
  25. num_layers = arg.num_layers
  26. dropout = arg.dropout
  27. batch_size = arg.batch_size
  28. epochs = arg.epochs
  29. device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  30. def train_for_one_Data(model,dataloader):
  31. optimizer = optim.Adam(model.parameters(), lr=1e-3)
  32. criterion = nn.MSELoss()
  33. model.train()
  34. # 训练模型
  35. data_len = 0
  36. data_loss = 0.0
  37. epoch_loss = 0.0
  38. for batch_idx, (input_seq, output_seq) in enumerate(dataloader):
  39. input_seq, output_seq = input_seq.to(device), output_seq.to(device)
  40. # 前向传播
  41. # input_seq = input_seq.permute(1, 0, 2)
  42. # tgt = input_seq[-1:]
  43. #predictions = model(input_seq,tgt)
  44. predictions = model(input_seq)
  45. # 计算损失
  46. loss = criterion(predictions, output_seq)
  47. # 反向传播
  48. optimizer.zero_grad()
  49. loss.backward()
  50. optimizer.step()
  51. data_loss += loss.item()
  52. data_len += len(dataloader)
  53. epoch_loss += data_loss
  54. return epoch_loss / data_len
  55. def base_train(model):
  56. best_loss = float('inf')
  57. for epoch in range(epochs):
  58. #训练一年的数据
  59. #print(list(range(1, 5)))
  60. for i in tqdm(range(3, 13), desc='Training progress:'):
  61. file_inputs_2 = './data/training/NWP/NWP_2021_{}.csv'.format(i)
  62. file_inputs_3 = './data/training/power/power_2021_{}.csv'.format(i)
  63. dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
  64. dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
  65. epoch_loss = train_for_one_Data(model, dataloader)
  66. print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
  67. if epoch_loss < best_loss:
  68. best_loss = epoch_loss
  69. torch.save(model.state_dict(), 'save/lstm_base.pt')
  70. print("Best loss model is saved")
  71. for i in tqdm(range(1, 4), desc='Training progress:'):
  72. file_inputs_2 = './data/training/NWP/NWP_2022_{}.csv'.format(i)
  73. file_inputs_3 = './data/training/power/power_2022_{}.csv'.format(i)
  74. dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
  75. dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
  76. epoch_loss = train_for_one_Data(model, dataloader)
  77. print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
  78. if epoch_loss < best_loss:
  79. best_loss = epoch_loss
  80. torch.save(model.state_dict(), 'save/lstm_base.pt')
  81. print("Best loss model is saved")
  82. def re_train_for_data(model,year,month):
  83. best_loss = float('inf')
  84. for epoch in range(epochs):
  85. # 训练一年的数据
  86. file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv'
  87. file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv'
  88. dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
  89. dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
  90. epoch_loss = train_for_one_Data(model, dataloader)
  91. print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
  92. if epoch_loss < best_loss:
  93. best_loss = epoch_loss
  94. torch.save(model.state_dict(), './save/lstm_base_pro.pt')
  95. print("Best loss model is saved")
  96. def re_train_for_alotday(model,year,month,day):
  97. from torch.utils.data import Subset
  98. best_loss = float('inf')
  99. for epoch in range(epochs):
  100. # 训练一年的数据
  101. file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv'
  102. file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv'
  103. dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
  104. number = int(len(dataset)-day*24*60/15)
  105. # 创建一个Subset,只包含前num_samples个样本
  106. dataset = Subset(dataset, indices=range(number,len(dataset)+1))
  107. dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
  108. epoch_loss = train_for_one_Data(model, dataloader)
  109. print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
  110. if epoch_loss < best_loss:
  111. best_loss = epoch_loss
  112. torch.save(model.state_dict(), './save/lstm_base_pro.pt')
  113. print("Best loss model is saved")
  114. def re_train_for_turbine_sum_power(model):
  115. from torch.utils.data import Subset
  116. best_loss = float('inf')
  117. # 训练一年的数据
  118. file_inputs_2 = f'./data/all_power/NWP.csv'
  119. file_inputs_3 = f'./data/all_power/power_training.csv'
  120. dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
  121. dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
  122. for epoch in range(epochs):
  123. epoch_loss = train_for_one_Data(model, dataloader)
  124. print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
  125. if epoch_loss < best_loss:
  126. best_loss = epoch_loss
  127. torch.save(model.state_dict(), './save/lstm_base_pro.pt')
  128. print("Best loss model is saved")
  129. def extract_date_from_path(path):
  130. parts = path.split('_')
  131. year = int(parts[-2])
  132. month = int(parts[-1].split('.')[0])
  133. return year, month