1 år sedan · c31d38639b
--- a/ipfcst-forecast-LSTM-v1.0/.gitignore
+++ b/ipfcst-forecast-LSTM-v1.0/.gitignore
@@ -1,10 +0,0 @@
 
															-*/__pycache__
														
 
															-/__pycache__
														
 
															-/.idea
														
 
															-/checkpoint
														
 
															-*.log
														
 
															-*.swp
														
 
															-/log
														
 
															-/data
														
 
															-/figure
														
 
															-
														
--- a/ipfcst-forecast-LSTM-v1.0/Readme.md
+++ b/ipfcst-forecast-LSTM-v1.0/Readme.md
@@ -1,18 +0,0 @@
 
															-## 超短期功率预测系统训练端
														
 
															-
														
 
															-这个项目将LSTM长短期时序模型用于超短期电力功率预测任务，实现特性如下: 
														
 
															-
														
 
															-- 程序简洁、模块化
														
 
															-- 支持可扩展的Keras框架（LSTM，可修改网络层）
														
 
															-- 参数、模型和框架支持高度可定制和修改
														
 
															-- 支持增量训练（在预训练模型上进行微调）
														
 
															-- 支持同时预测多个指标（目前预测实际功率）
														
 
															-- 支持预测任意时间节点数（目前设置16个点）
														
 
															-- 支持训练可视化和记录日志
														
 
															-
														
 
															-
														
 
															-
														
 
															-| 训练case | 表头  |
														
 
															-|--------| ----  |
														
 
															-| 1      | 单元格 |
														
 
															-| 2      | 单元格 |
														
--- a/ipfcst-forecast-LSTM-v1.0/config.py
+++ b/ipfcst-forecast-LSTM-v1.0/config.py
@@ -1,94 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 14:46
														
 
															-# file: config.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import yaml
														
 
															-import argparse
														
 
															-
														
 
															-
														
 
															-class myargparse(argparse.ArgumentParser):
														
 
															-    def __init__(self, discription, add_help):
														
 
															-        super(myargparse, self).__init__(description=discription, add_help=add_help)
														
 
															-        # default_config_parser = parser = argparse.ArgumentParser(
														
 
															-        #     description='Training Config', add_help=False)
														
 
															-        self.add_argument(
														
 
															-            '-c',
														
 
															-            '--config_yaml',
														
 
															-            default=
														
 
															-            'config.yml',
														
 
															-            type=str,
														
 
															-            metavar='FILE',
														
 
															-            help='YAML config file specifying default arguments')
														
 
															-
														
 
															-        feature_columns = list(range(1, 16))
														
 
															-        # feature_columns = list(range(1, 28))
														
 
															-        label_columns = [2]
														
 
															-
														
 
															-        label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-        # 在控制台可以指定的参数， yml中没有
														
 
															-        self.add_argument('--feature_columns', type=list, default=feature_columns, help='要作为特征的列')
														
 
															-
														
 
															-        self.add_argument('--label_columns', type=list, default=label_columns, help='要预测的列')
														
 
															-
														
 
															-        self.add_argument('--label_in_feature_index', type=list, default=label_in_feature_index, help='标签在特征列的索引')
														
 
															-
														
 
															-        self.add_argument('--input_size', type=int, default=len(feature_columns), help='输入维度')
														
 
															-
														
 
															-        self.add_argument('--output_size', type=int, default=len(label_columns), help='输出维度')
														
 
															-
														
 
															-        self.add_argument("--train_data_path", type=str, default=None,help='数据集地址')  # train_data_path yml中有
														
 
															-
														
 
															-        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
														
 
															-
														
 
															-        self.add_argument('--model_name', type=str, default=None, help='模型名称')
														
 
															-
														
 
															-        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
														
 
															-
														
 
															-
														
 
															-    def _init_dir(self, opt):
														
 
															-        import os, time
														
 
															-        # 在这里给opt赋值
														
 
															-        opt.model_name = "model_" + opt.continue_flag + opt.used_frame + opt.model_postfix[opt.used_frame]
														
 
															-        opt.model_save_path = './checkpoint/' + opt.model_name + "/"
														
 
															-        if not os.path.exists(opt.model_save_path):
														
 
															-            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
														
 
															-        if not os.path.exists(opt.figure_save_path):
														
 
															-            os.mkdir(opt.figure_save_path)
														
 
															-        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
														
 
															-            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
														
 
															-            os.makedirs(log_save_path)
														
 
															-
														
 
															-
														
 
															-# YAML should override the argparser's content
														
 
															-    def _parse_args_and_yaml(self):
														
 
															-        given_configs, remaining = self.parse_known_args()
														
 
															-        if given_configs.config_yaml:
														
 
															-            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-                cfg = yaml.safe_load(f)
														
 
															-                self.set_defaults(**cfg)
														
 
															-
														
 
															-        # The main arg parser parses the rest of the args, the usual
														
 
															-        # defaults will have been overridden if config file specified.
														
 
															-        opt = self.parse_args(remaining)
														
 
															-        self._init_dir(opt)
														
 
															-        # Cache the args as a text string to save them in the output dir later
														
 
															-        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
														
 
															-        return opt, opt_text
														
 
															-
														
 
															-
														
 
															-    def parse_args_and_yaml(self):
														
 
															-        return self._parse_args_and_yaml()[0]
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # opt = _parse_args_and_yaml()
														
 
															-    pass
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/ipfcst-forecast-LSTM-v1.0/config.yml
+++ b/ipfcst-forecast-LSTM-v1.0/config.yml
@@ -1,77 +0,0 @@
 
															-Model:
														
 
															-  batch_size: 64
														
 
															-  dropout_rate: 0.2
														
 
															-  epoch: 20
														
 
															-  hidden_size: 128
														
 
															-  learning_rate: 0.001
														
 
															-  lstm_layers: 2
														
 
															-  patience: 5
														
 
															-  random_seed: 42
														
 
															-  time_step: 16
														
 
															-add_train: false
														
 
															-continue_flag: ''
														
 
															-data_format:
														
 
															-  dq: dq.xls
														
 
															-  envir: "\u73AF\u5883\u6570\u636E.xls"
														
 
															-  nwp: nwp.xls
														
 
															-  rp: rp.xls
														
 
															-debug_model: false
														
 
															-debug_num: 500
														
 
															-do_continue_train: false
														
 
															-do_figure_save: false
														
 
															-do_log_print_to_screen: true
														
 
															-do_log_save_to_file: true
														
 
															-do_predict: true
														
 
															-do_train: true
														
 
															-do_train_visualized: True
														
 
															-excel_data_path: ./data/J00307/
														
 
															-figure_save_path: ./figure/
														
 
															-is_continuous_predict: False
														
 
															-log_save_path: ./log/
														
 
															-mean:
														
 
															-  C_AIRT: 10.305992230762874
														
 
															-  C_CELLT: 10.664897925448384
														
 
															-  C_DIFFUSER: 143.2639061079428
														
 
															-  C_DIFFUSERDA: 6.571077155136789
														
 
															-  C_DIRECTR: 68.21328208942887
														
 
															-  C_DIRECTRDA: 3.163283039920654
														
 
															-  C_FORECAST: 3.1419734966774113
														
 
															-  C_GLOBALR: 173.2587817174973
														
 
															-  C_GLOBALRDA: 7.756491280271097
														
 
															-  C_HOURDA: 1.998222150590958
														
 
															-  C_P: 947.7830440532276
														
 
															-  C_RH: 55.59672286965865
														
 
															-  C_VALUE: 3.404744648318043
														
 
															-  C_WD: 212.88300686007108
														
 
															-  C_WS: 1.802446483180428
														
 
															-model_postfix:
														
 
															-  keras: .h5
														
 
															-  pytorch: .pth
														
 
															-  tensorflow: .ckpt
														
 
															-predict_points: 16
														
 
															-shuffle_train_data: false
														
 
															-std:
														
 
															-  C_AIRT: 12.127220611319888
														
 
															-  C_CELLT: 12.654848145970181
														
 
															-  C_DIFFUSER: 230.93680419867772
														
 
															-  C_DIFFUSERDA: 6.4933162833681415
														
 
															-  C_DIRECTR: 166.61348332191056
														
 
															-  C_DIRECTRDA: 4.991297839913351
														
 
															-  C_FORECAST: 4.447082956749344
														
 
															-  C_GLOBALR: 258.87947949591955
														
 
															-  C_GLOBALRDA: 7.9174382136573955
														
 
															-  C_HOURDA: 2.9110230573747247
														
 
															-  C_P: 25.75152505719027
														
 
															-  C_RH: 22.445059526990818
														
 
															-  C_VALUE: 5.013868885103326
														
 
															-  C_WD: 112.90029001408325
														
 
															-  C_WS: 1.6575249140627502
														
 
															-train_data_path: ./data/
														
 
															-train_data_rate: 0.9
														
 
															-use_cuda: false
														
 
															-used_frame: keras
														
 
															-valid_data_rate: 0.15
														
 
															-
														
 
															-is_photovoltaic: True
														
 
															-cap: 20
														
 
															-envir_columns: 16
														
--- a/ipfcst-forecast-LSTM-v1.0/dataset.py
+++ b/ipfcst-forecast-LSTM-v1.0/dataset.py
@@ -1,182 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 10:10
														
 
															-# file: main.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-from sklearn.model_selection import train_test_split
														
 
															-import yaml
														
 
															-
														
 
															-
														
 
															-class DataSet(object):
														
 
															-    def __init__(self, opt):
														
 
															-        self.std = None
														
 
															-        self.mean = None
														
 
															-        self.opt = opt
														
 
															-        self.time_step = self.opt.Model["time_step"]
														
 
															-        excel_data_path = opt.excel_data_path
														
 
															-        data_format = opt.data_format
														
 
															-        dq_path = excel_data_path + data_format["dq"]
														
 
															-        rp_path = excel_data_path + data_format["rp"]
														
 
															-        envir_path = excel_data_path + data_format["envir"]
														
 
															-        nwp_path = excel_data_path + data_format["nwp"]
														
 
															-
														
 
															-        dq_columns = [1, 2]
														
 
															-        rp_columns = [0, 2]
														
 
															-        envir_columns = [0, *[x for x in range(3, 16)]]
														
 
															-        nwp_columns = [x for x in range(1, 27)]
														
 
															-
														
 
															-        dq = self.read_data(dq_path, dq_columns)
														
 
															-        rp = self.read_data(rp_path, rp_columns)
														
 
															-        # nwp = self.read_data(nwp_path, nwp_columns)
														
 
															-        # rp_average(rp)    # 计算平均功率
														
 
															-        envir = self.read_data(envir_path, envir_columns)
														
 
															-
														
 
															-        self.tables, self.tables_column_name = self.tables_integra(dq, rp, envir)
														
 
															-        # 如果是光
														
 
															-        if opt.is_photovoltaic:
														
 
															-            # self.tables = self.filter_data()
														
 
															-            pass
														
 
															-        self.data_num = self.tables.shape[0]
														
 
															-        self.train_num = int(self.data_num * opt.train_data_rate)
														
 
															-
														
 
															-        # 都是在ndarray量纲下进行计算
														
 
															-        self.norm_data = (self.tables[:, 1:] - self.mean) / self.std  # 归一化，去量纲
														
 
															-        # self.norm_data.insert(0, 'C_TIME', self.tables['C_TIME'])
														
 
															-        # self.set_yml({'mean': self.mean.to_dict(), 'std': self.std.to_dict()})
														
 
															-        self.start_num_in_test = 0
														
 
															-
														
 
															-    def set_yml(self, yml_dict):
														
 
															-        with open(self.opt.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-            cfg = yaml.safe_load(f)
														
 
															-        for k, v in yml_dict.items():
														
 
															-            cfg[k] = v
														
 
															-        with open(self.opt.config_yaml, 'w') as f:
														
 
															-            yaml.safe_dump(cfg, f, default_flow_style=False)
														
 
															-
														
 
															-    def read_data(self, path, cols):
														
 
															-        init_data = pd.read_excel(path, usecols=cols)
														
 
															-        return init_data
														
 
															-
														
 
															-    def filter_data(self):
														
 
															-        check_table = self.tables[:, 2]  # 实际功率不能为0，为0代表没发电
														
 
															-        preserve_index = list(np.nonzero(check_table)[0])
														
 
															-        indexs = list(range(len(self.tables)))
														
 
															-        del_index = list(set(indexs) - set(preserve_index))
														
 
															-        self.tables = np.delete(self.tables, del_index, axis=0)
														
 
															-        return self.tables
														
 
															-
														
 
															-    def norm(self, tables):
														
 
															-        """
														
 
															-        归一化操作，获取后存储于config.yml
														
 
															-        :param tables:
														
 
															-        :return:
														
 
															-        """
														
 
															-        mean = np.mean(tables.iloc[:, 1:], axis=0)  # 数据的均值
														
 
															-        std = np.std(tables.iloc[:, 1:], axis=0)  # 标准差
														
 
															-        if hasattr(self.opt, 'mean') is False or hasattr(self.opt, 'std') is False:
														
 
															-            self.set_yml({'mean': mean.to_dict(), 'std': std.to_dict()})
														
 
															-        self.mean, self.std = mean.values, std.values
														
 
															-
														
 
															-    def tables_integra(self, dq, rp, envir):
														
 
															-        """
														
 
															-        联合表
														
 
															-        :param dq: 短期预测功率
														
 
															-        :param rp: 实际功率
														
 
															-        :param envir: 环境
														
 
															-        :return: 联合表， 列集（不包含第一列时间）
														
 
															-        """
														
 
															-        # 1. 先将 dq rp envir 根据时间联立
														
 
															-        union_tables = pd.merge(dq, rp, on='C_TIME')
														
 
															-        union_tables = union_tables.merge(envir, on='C_TIME')
														
 
															-        self.norm(union_tables)
														
 
															-        return union_tables.values, union_tables.columns.tolist()[1:]
														
 
															-
														
 
															-    
														
 
															-
														
 
															-    def get_train_and_valid_data(self, case):
														
 
															-        feature_data = self.norm_data[:self.train_num]
														
 
															-        # label_data = self.norm_data[: self.train_num,
														
 
															-        #                             self.opt.label_in_feature_index]    # 将延后几天的数据作为label
														
 
															-        label_data = self.norm_data[self.opt.predict_points: self.opt.predict_points + self.train_num, self.opt.label_in_feature_index]
														
 
															-        time_step = self.opt.Model["time_step"]
														
 
															-        train_x, train_y = [], []
														
 
															-        if not self.opt.do_continue_train:
														
 
															-            # 在非连续训练模式下，每time_step行数据会作为一个样本，两个样本错开一行，比如：1-20行，2-21行。。。。
														
 
															-            if case == 1: # 相当于实际功率+气象
														
 
															-                train_x = [feature_data[i:i + time_step] for i in range(self.train_num - time_step)]
														
 
															-                train_y = [label_data[i:i + time_step] for i in range(self.train_num - time_step)]
														
 
															-            elif case == 2: # 相当于短期+实际功率+气象
														
 
															-                train_rp = [feature_data[i:i + time_step, 1:]for i in range(self.train_num - time_step*2)]
														
 
															-                train_qd = [feature_data[i + time_step: i + 2*time_step, 0][:, np.newaxis] for i in range(self.train_num - time_step*2)]
														
 
															-                train_x = [list(np.append(t[0], t[1], axis=1)) for t in zip(train_rp, train_qd)]
														
 
															-                train_y = [label_data[i:i + time_step] for i in range(self.train_num - time_step*2)]
														
 
															-        else:
														
 
															-            # 在连续训练模式下
														
 
															-            pass
														
 
															-        train_x, train_y = np.array(train_x), np.array(train_y)
														
 
															-
														
 
															-        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.opt.valid_data_rate,
														
 
															-                                                              random_state=self.opt.Model["random_seed"],
														
 
															-                                                              shuffle=self.opt.shuffle_train_data)   # 划分训练和验证集，并打乱
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-    def get_test_data(self, return_label_data=False):
														
 
															-        feature_data = self.norm_data[self.train_num:]
														
 
															-
														
 
															-        sample_interval = min(feature_data.shape[0], self.time_step*2)     # 防止time_step大于测试集数量
														
 
															-        assert sample_interval == self.time_step*2
														
 
															-
														
 
															-
														
 
															-        test_x, test_y, dq_y = [], [], []
														
 
															-        if self.opt.is_continuous_predict:
														
 
															-            test_num = len(feature_data)
														
 
															-            test_x = [feature_data[
														
 
															-                       i : i + self.time_step]
														
 
															-                      for i in range(test_num - sample_interval)]
														
 
															-            test_y = [feature_data[
														
 
															-                       i + self.time_step: i + sample_interval, self.opt.label_in_feature_index]
														
 
															-                      for i in range(test_num - sample_interval)]
														
 
															-        else:
														
 
															-            # 在测试数据中，每time_step行数据会作为一个样本，两个样本错开time_step行
														
 
															-            # 比如：1-20行，21-40行。。。到数据末尾。
														
 
															-            # 这个地方要重新获取测试集 刘大为
														
 
															-            self.start_num_in_test = feature_data.shape[0] % sample_interval  # 这些天的数据不够一个sample_interval
														
 
															-
														
 
															-            time_step_size = feature_data.shape[0] // sample_interval
														
 
															-            test_x = [feature_data[
														
 
															-                      self.start_num_in_test + i * sample_interval: self.start_num_in_test + i * sample_interval + self.time_step]
														
 
															-                      for i in range(time_step_size)]
														
 
															-            test_y = [feature_data[
														
 
															-                      self.start_num_in_test + i * sample_interval + self.time_step: self.start_num_in_test + (
														
 
															-                                  i + 1) * sample_interval, self.opt.label_in_feature_index]
														
 
															-                      for i in range(time_step_size)]
														
 
															-            dq_y = [feature_data[
														
 
															-                      self.start_num_in_test + i * sample_interval + self.time_step: self.start_num_in_test + (
														
 
															-                                  i + 1) * sample_interval, 0][:, np.newaxis]
														
 
															-                      for i in range(time_step_size)]
														
 
															-            # test_x = [list(np.append(t[0], t[1], axis=1)) for t in zip(test_x, dq_y)]
														
 
															-
														
 
															-        print("test_x的长度为：", len(test_x))
														
 
															-        pass
														
 
															-        # 把test_x重新转换成timestamp时间步长
														
 
															-        # for i, x in enumerate(test_x):
														
 
															-        #     p1 = x[0:16, 0]
														
 
															-        #     p2 = x[16:32, 1]
														
 
															-        #     p = [list(t) for t in zip(p1, p2)]
														
 
															-        #     test_x[i] = np.array(p)
														
 
															-        if return_label_data:       # 实际应用中的测试集是没有label数据的
														
 
															-            return np.array(test_x), np.array(test_y), np.array(dq_y)
														
 
															-        return np.array(test_x)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    ds = DataSet()
														
 
															-    # dq = ds.read_data(dq_path, dq_columns)[0]
														
 
															-    # rp = ds.read_data(rp_path, rp_columns)[0]
														
 
															-    # # rp_average(rp)    # 计算平均功率
														
 
															-    # envir = ds.read_data(envir_path, envir_columns)[0]
														
 
															-    # tables = ds.tables_integra(dq, rp, envir)
														
 
															-    # ds.tables_norm_result(tables)
														
--- a/ipfcst-forecast-LSTM-v1.0/figure.py
+++ b/ipfcst-forecast-LSTM-v1.0/figure.py
@@ -1,73 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: figure.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-
														
 
															-
														
 
															-class Figure(object):
														
 
															-    def __init__(self, opt, logger, ds):
														
 
															-        self.opt = opt
														
 
															-        self.ds = ds
														
 
															-        self.logger = logger
														
 
															-
														
 
															-    def draw(self, label_data, dq_data, predict_norm_data):
														
 
															-        # label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
														
 
															-        #                                         config.label_in_feature_index]
														
 
															-        dq_data = dq_data.reshape((-1, self.opt.output_size))
														
 
															-        label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        label_data = label_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]
														
 
															-        predict_data = predict_norm_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]   # 通过保存的均值和方差还原数据
														
 
															-        dq_data = dq_data * self.ds.std[0] + self.ds.mean[0]
														
 
															-        # predict_data = predict_norm_data
														
 
															-        assert label_data.shape[0] == predict_data.shape[0], "The element number in origin and predicted data is different"
														
 
															-
														
 
															-        label_name = [self.ds.tables_column_name[i] for i in self.opt.label_in_feature_index]
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-
														
 
															-        # label 和 predict 是错开config.predict_day天的数据的
														
 
															-        # 下面是两种norm后的loss的计算方式，结果是一样的，可以简单手推一下
														
 
															-        # label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
														
 
															-        #              config.label_in_feature_index]
														
 
															-        # loss_norm = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
														
 
															-        # logger.info("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss = np.sum((label_data - predict_data) ** 2)/len(label_data)  # mse
														
 
															-        # loss = np.mean((label_data - predict_data) ** 2, axis=0)
														
 
															-        loss_sqrt = np.sqrt(loss)   # rmse
														
 
															-        loss_norm = 1 - loss_sqrt / self.opt.cap
														
 
															-        # loss_norm = loss/(ds.std[opt.label_in_feature_index] ** 2)
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss1 = np.sum((label_data - dq_data) ** 2) / len(label_data)  # mse
														
 
															-        # loss_sqrt1 = np.sqrt(loss1)  # rmse
														
 
															-        # loss_norm1 = 1 - loss_sqrt1 / self.opt.cap
														
 
															-        # self.logger.info("The mean squared error1 of power {} is ".format(label_name) + str(loss_norm1))
														
 
															-        if self.opt.is_continuous_predict:
														
 
															-            label_X = range(int((self.ds.data_num - self.ds.train_num - 32)))
														
 
															-        else:
														
 
															-            label_X = range(int((self.ds.data_num - self.ds.train_num - self.ds.start_num_in_test)/2))
														
 
															-        print("label_x = ", label_X)
														
 
															-        predict_X = [x for x in label_X]
														
 
															-
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(label_X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(predict_X, predict_data[:, i], label='predict', color='g')
														
 
															-                plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
--- a/ipfcst-forecast-LSTM-v1.0/logger.py
+++ b/ipfcst-forecast-LSTM-v1.0/logger.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: logger.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import logging, sys
														
 
															-from logging.handlers import RotatingFileHandler
														
 
															-
														
 
															-
														
 
															-def load_logger(config):
														
 
															-    logger = logging.getLogger()
														
 
															-    logger.setLevel(level=logging.DEBUG)
														
 
															-
														
 
															-    # StreamHandler
														
 
															-    if config.do_log_print_to_screen:
														
 
															-        stream_handler = logging.StreamHandler(sys.stdout)
														
 
															-        stream_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
														
 
															-                                      fmt='[ %(asctime)s ] %(message)s')
														
 
															-        stream_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(stream_handler)
														
 
															-
														
 
															-    # FileHandler
														
 
															-    if config.do_log_save_to_file:
														
 
															-        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
														
 
															-        file_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
														
 
															-        file_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(file_handler)
														
 
															-
														
 
															-        # 把config信息也记录到log 文件中
														
 
															-        config_dict = {}
														
 
															-        for key in dir(config):
														
 
															-            if not key.startswith("_"):
														
 
															-                config_dict[key] = getattr(config, key)
														
 
															-        config_str = str(config_dict)
														
 
															-        config_list = config_str[1:-1].split(", '")
														
 
															-        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
														
 
															-        logger.info(config_save_str)
														
 
															-
														
 
															-    return logger
														
--- a/ipfcst-forecast-LSTM-v1.0/model/__init__.py
+++ b/ipfcst-forecast-LSTM-v1.0/model/__init__.py
--- a/ipfcst-forecast-LSTM-v1.0/model/model_keras.py
+++ b/ipfcst-forecast-LSTM-v1.0/model/model_keras.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-from keras.layers import Input, Dense, LSTM
														
 
															-from keras.models import Model
														
 
															-from keras.callbacks import ModelCheckpoint, EarlyStopping
														
 
															-
														
 
															-
														
 
															-def get_keras_model(opt):
														
 
															-    input1 = Input(shape=(opt.Model['time_step'], opt.input_size))
														
 
															-    lstm = input1
														
 
															-    for i in range(opt.Model['lstm_layers']):
														
 
															-        lstm = LSTM(units=opt.Model['hidden_size'],dropout=opt.Model['dropout_rate'],return_sequences=True)(lstm)
														
 
															-    output = Dense(opt.output_size)(lstm)
														
 
															-    model = Model(input1, output)
														
 
															-    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
														
 
															-    return model
														
 
															-
														
 
															-
														
 
															-def gpu_train_init():
														
 
															-    import tensorflow as tf
														
 
															-    from keras.backend.tensorflow_backend import set_session
														
 
															-    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
														
 
															-    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
														
 
															-    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
														
 
															-    sess = tf.Session(config=sess_config)
														
 
															-    set_session(sess)
														
 
															-
														
 
															-
														
 
															-def train(opt, train_and_valid_data):
														
 
															-    if opt.use_cuda: gpu_train_init()
														
 
															-    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
														
 
															-    model = get_keras_model(opt)
														
 
															-    model.summary()
														
 
															-    if opt.add_train:
														
 
															-        model.load_weights(opt.model_save_path + opt.model_name)
														
 
															-
														
 
															-    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
														
 
															-                                    save_best_only=True, mode='auto')
														
 
															-    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
														
 
															-    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
														
 
															-              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
														
 
															-
														
 
															-
														
 
															-def predict(config, test_X):
														
 
															-    model = get_keras_model(config)
														
 
															-    model.load_weights(config.model_save_path + config.model_name)
														
 
															-    result = model.predict(test_X, batch_size=1)
														
 
															-    result = result.reshape((-1, config.output_size))
														
 
															-    return result
														
--- a/ipfcst-forecast-LSTM-v1.0/requirements.txt
+++ b/ipfcst-forecast-LSTM-v1.0/requirements.txt
@@ -1,8 +0,0 @@
 
															-sklearn
														
 
															-pandas
														
 
															-argparse
														
 
															-keras
														
 
															-tensorflow==1.15
														
 
															-matplotlib>=3.0.2
														
 
															-numpy>=1.14.6
														
 
															-scipy>=1.1.0
														
--- a/ipfcst-forecast-LSTM-v1.0/run_case1.py
+++ b/ipfcst-forecast-LSTM-v1.0/run_case1.py
@@ -1,56 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-
														
 
															-import numpy as np
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from figure import Figure
														
 
															-from dataset import DataSet
														
 
															-from logger import load_logger
														
 
															-from config import myargparse
														
 
															-
														
 
															-frame = "keras"
														
 
															-
														
 
															-if frame == "keras":
														
 
															-    from model.model_keras import train, predict
														
 
															-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
														
 
															-else:
														
 
															-    raise Exception("Wrong frame seletion")
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    parse = myargparse(discription="training config", add_help=False)
														
 
															-    opt = parse.parse_args_and_yaml()
														
 
															-    logger = load_logger(opt)
														
 
															-    try:
														
 
															-        np.random.seed(opt.Model["random_seed"])
														
 
															-        # 在这里获取数据集
														
 
															-        ds = DataSet(opt=opt)
														
 
															-        if opt.do_train:
														
 
															-            train_X, valid_X, train_Y, valid_Y = ds.get_train_and_valid_data(case=2)
														
 
															-            train(opt, [train_X, train_Y, valid_X, valid_Y])
														
 
															-        if opt.do_predict:
														
 
															-            test_X, test_Y, dq_Y = ds.get_test_data(return_label_data=True)
														
 
															-            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
														
 
															-            fig = Figure(opt, logger, ds)
														
 
															-            fig.draw(test_Y, dq_Y, result)
														
 
															-    except Exception:
														
 
															-        logger.error("Run Error", exc_info=True)
														
 
															-
														
 
															-
														
 
															-if __name__=="__main__":
														
 
															-    import argparse
														
 
															-    # argparse方便于命令行下输入参数，可以根据需要增加更多
														
 
															-    # parser = argparse.ArgumentParser()
														
 
															-    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
														
 
															-    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
														
 
															-    # args = parser.parse_args()
														
 
															-
														
 
															-    # con = Config()
														
 
															-    # for key in dir(args):               # dir(args) 函数获得args所有的属性
														
 
															-    #     if not key.startswith("_"):     # 去掉 args 自带属性，比如__name__等
														
 
															-    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
														
 
															-    main()
														
 
															-
														
--- a/ipfcst-forecast-LSTM-v1.0/run_case_history.py
+++ b/ipfcst-forecast-LSTM-v1.0/run_case_history.py
@@ -1,142 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 9:23
														
 
															-# file: run_case_history.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-class Data:
														
 
															-    def __init__(self, config):
														
 
															-        self.config = config
														
 
															-        self.data, self.data_column_name = self.read_data()
														
 
															-
														
 
															-        self.data_num = self.data.shape[0]
														
 
															-        self.train_num = int(self.data_num * self.config.train_data_rate)
														
 
															-
														
 
															-        self.mean = np.mean(self.data, axis=0)              # 数据的均值和方差
														
 
															-        self.std = np.std(self.data, axis=0)
														
 
															-        self.norm_data = (self.data - self.mean)/self.std   # 归一化，去量纲
														
 
															-
														
 
															-        self.start_num_in_test = 0      # 测试集中前几天的数据会被删掉，因为它不够一个time_step
														
 
															-
														
 
															-    def read_data(self):                # 读取初始数据
														
 
															-        if self.config.debug_mode:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, nrows=self.config.debug_num,
														
 
															-                                    usecols=self.config.feature_columns)
														
 
															-        else:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, usecols=self.config.feature_columns)
														
 
															-        init_data = self.filter_data(init_data)
														
 
															-        return init_data.values, init_data.columns.tolist()     # .columns.tolist() 是获取列名
														
 
															-
														
 
															-    def filter_data(self, init_data):
														
 
															-        return init_data[init_data.apply(np.sum, axis=1)!=0]
														
 
															-
														
 
															-    def get_train_and_valid_data(self):
														
 
															-        feature_data = self.norm_data[:self.train_num]
														
 
															-        label_data = self.norm_data[: self.train_num,
														
 
															-                                    self.config.label_in_feature_index]    # 将延后几天的数据作为label
														
 
															-
														
 
															-        if not self.config.do_continue_train:
														
 
															-            # 在非连续训练模式下，每time_step行数据会作为一个样本，两个样本错开一行，比如：1-20行，2-21行。。。。
														
 
															-            train_x, train_y = [], []
														
 
															-            for i in range(self.train_num-self.config.time_step*2):
														
 
															-                p1 = feature_data[:, 0][i:i+self.config.start_predict_point]
														
 
															-                p2 = feature_data[:, 1][i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                p = [list(t) for t in zip(p1, p2)]  # 实际功率， 预测功率 是一组特征值
														
 
															-                l = label_data[i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                train_x.append(p)
														
 
															-                train_y.append(l)
														
 
															-            # train_x = [feature_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # train_y = [label_data[i+self.config.start_predict_point:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # 这里选取后16个点 作为 预测及
														
 
															-        else:
														
 
															-            # 在连续训练模式下，每time_step行数据会作为一个样本，两个样本错开time_step行，
														
 
															-            # 比如：1-20行，21-40行。。。到数据末尾，然后又是 2-21行，22-41行。。。到数据末尾，……
														
 
															-            # 这样才可以把上一个样本的final_state作为下一个样本的init_state，而且不能shuffle
														
 
															-            # 目前本项目中仅能在pytorch的RNN系列模型中用
														
 
															-            train_x = [feature_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-            train_y = [label_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-
														
 
															-        train_x, train_y = np.array(train_x), np.array(train_y)
														
 
															-
														
 
															-        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
														
 
															-                                                              random_state=self.config.random_seed,
														
 
															-                                                              shuffle=self.config.shuffle_train_data)   # 划分训练和验证集，并打乱
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-
														
 
															-class Config:
														
 
															-    # 数据参数
														
 
															-    # feature_columns = list(range(2, 9))     # 要作为feature的列，按原数据从0开始计算，也可以用list 如 [2,4,6,8] 设置
														
 
															-    feature_columns = list(range(1, 3))
														
 
															-    # label_columns = [4, 5]                  # 要预测的列，按原数据从0开始计算, 如同时预测第四，五列 最低价和最高价
														
 
															-    label_columns = [1]
														
 
															-    # label_in_feature_index = [feature_columns.index(i) for i in label_columns]  # 这样写不行
														
 
															-    label_in_feature_index = (lambda x,y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-    predict_day = 1             # 预测未来几天
														
 
															-    predict_points = 16
														
 
															-    # 网络参数
														
 
															-    input_size = len(feature_columns)
														
 
															-    output_size = len(label_columns)
														
 
															-
														
 
															-    hidden_size = 128           # LSTM的隐藏层大小，也是输出大小
														
 
															-    lstm_layers = 2             # LSTM的堆叠层数
														
 
															-    dropout_rate = 0.2          # dropout概率
														
 
															-    time_step = 16             # 这个参数很重要，是设置用前多少个点的数据来预测，也是LSTM的time step数，请保证训练数据量大于它
														
 
															-    start_predict_point = 16
														
 
															-
														
 
															-    # 训练参数
														
 
															-    do_train = True
														
 
															-    do_predict = True
														
 
															-    add_train = False           # 是否载入已有模型参数进行增量训练
														
 
															-    shuffle_train_data = False   # 是否对训练数据做shuffle
														
 
															-    use_cuda = False            # 是否使用GPU训练
														
 
															-
														
 
															-    train_data_rate = 0.95      # 训练数据占总体数据比例，测试数据就是 1-train_data_rate
														
 
															-    valid_data_rate = 0.15      # 验证数据占训练数据比例，验证集在训练过程使用，为了做模型和参数选择
														
 
															-
														
 
															-    batch_size = 64
														
 
															-    learning_rate = 0.001
														
 
															-    epoch = 20                  # 整个训练集被训练多少遍，不考虑早停的前提下
														
 
															-    patience = 5                # 训练多少epoch，验证集没提升就停掉
														
 
															-    random_seed = 42            # 随机种子，保证可复现
														
 
															-
														
 
															-    do_continue_train = False    # 每次训练把上一次的final_state作为下一次的init_state，仅用于RNN类型模型，目前仅支持pytorch
														
 
															-    continue_flag = ""           # 但实际效果不佳，可能原因：仅能以 batch_size = 1 训练
														
 
															-    if do_continue_train:
														
 
															-        shuffle_train_data = False
														
 
															-        batch_size = 1
														
 
															-        continue_flag = "continue_"
														
 
															-
														
 
															-    # 训练模式
														
 
															-    debug_mode = False  # 调试模式下，是为了跑通代码，追求快
														
 
															-    debug_num = 500  # 仅用debug_num条数据来调试
														
 
															-
														
 
															-    # 框架参数
														
 
															-    used_frame = frame  # 选择的深度学习框架，不同的框架模型保存后缀不一样
														
 
															-    model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
														
 
															-    model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
														
 
															-
														
 
															-    # 路径参数
														
 
															-    train_data_path = "./data/J00285.csv"
														
 
															-    model_save_path = "./checkpoint/" + used_frame + "/"
														
 
															-    figure_save_path = "./figure/"
														
 
															-    log_save_path = "./log/"
														
 
															-    do_log_print_to_screen = True
														
 
															-    do_log_save_to_file = True                  # 是否将config和训练过程记录到log
														
 
															-    do_figure_save = False
														
 
															-    do_train_visualized = False          # 训练loss可视化，pytorch用visdom，tf用tensorboardX，实际上可以通用, keras没有
														
 
															-    if not os.path.exists(model_save_path):
														
 
															-        os.makedirs(model_save_path)    # makedirs 递归创建目录
														
 
															-    if not os.path.exists(figure_save_path):
														
 
															-        os.mkdir(figure_save_path)
														
 
															-    if do_train and (do_log_save_to_file or do_train_visualized):
														
 
															-        cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-        log_save_path = log_save_path + cur_time + '_' + used_frame + "/"
														
 
															-        os.makedirs(log_save_path)
														
 
															-
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/.gitignore
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/.gitignore
@@ -1,13 +0,0 @@
 
															-*/__pycache__
														
 
															-/__pycache__
														
 
															-/.idea
														
 
															-/checkpoint
														
 
															-/log
														
 
															-/data
														
 
															-/figure
														
 
															-*.log
														
 
															-*.swp
														
 
															-/log
														
 
															-/data
														
 
															-
														
 
															-
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/Readme.md
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/Readme.md
@@ -1,18 +0,0 @@
 
															-## 超短期功率预测系统训练端
														
 
															-
														
 
															-这个项目将LSTM长短期时序模型用于超短期电力功率预测任务，实现特性如下: 
														
 
															-
														
 
															-- 程序简洁、模块化
														
 
															-- 支持可扩展的Keras框架（LSTM，可修改网络层）
														
 
															-- 参数、模型和框架支持高度可定制和修改
														
 
															-- 支持增量训练（在预训练模型上进行微调）
														
 
															-- 支持同时预测多个指标（目前预测实际功率）
														
 
															-- 支持预测任意时间节点数（目前设置16个点）
														
 
															-- 支持训练可视化和记录日志
														
 
															-
														
 
															-
														
 
															-
														
 
															-| 训练case | 表头  |
														
 
															-|--------| ----  |
														
 
															-| 1      | 单元格 |
														
 
															-| 2      | 单元格 |
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/back.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/back.py
@@ -1,76 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/14 15:32
														
 
															-# file: back.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/config.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/config.py
@@ -1,96 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 14:46
														
 
															-# file: config.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import yaml
														
 
															-import argparse
														
 
															-
														
 
															-
														
 
															-class myargparse(argparse.ArgumentParser):
														
 
															-    def __init__(self, discription, add_help):
														
 
															-        super(myargparse, self).__init__(description=discription, add_help=add_help)
														
 
															-        # default_config_parser = parser = argparse.ArgumentParser(
														
 
															-        #     description='Training Config', add_help=False)
														
 
															-        self.add_argument(
														
 
															-            '-c',
														
 
															-            '--config_yaml',
														
 
															-            default=
														
 
															-            'config.yml',
														
 
															-            type=str,
														
 
															-            metavar='FILE',
														
 
															-            help='YAML config file specifying default arguments')
														
 
															-
														
 
															-        # feature_columns = list(range(1, 28))
														
 
															-        label_columns = ['C_REAL_VALUE']
														
 
															-
														
 
															-        # label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-
														
 
															-        # 在控制台可以指定的参数， yml中没有
														
 
															-        self.add_argument('--feature_columns', type=list, default=None, help='要作为特征的列')
														
 
															-
														
 
															-        self.add_argument('--label_columns', type=list, default=label_columns, help='要预测的列')
														
 
															-
														
 
															-        self.add_argument('--label_in_feature_index', type=list, default=None, help='标签在特征列的索引')
														
 
															-
														
 
															-        self.add_argument('--input_size', type=int, default=0, help='输入维度')
														
 
															-        self.add_argument('--input_size_lstm', type=int, default=0, help='输入维度')
														
 
															-        self.add_argument('--input_size_cnn', type=int, default=0, help='输入维度')
														
 
															-
														
 
															-        self.add_argument('--output_size', type=int, default=len(label_columns), help='输出维度')
														
 
															-
														
 
															-        self.add_argument("--train_data_path", type=str, default=None,help='数据集地址')  # train_data_path yml中有
														
 
															-
														
 
															-        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
														
 
															-
														
 
															-        self.add_argument('--model_name', type=str, default=None, help='模型名称')
														
 
															-
														
 
															-        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
														
 
															-
														
 
															-
														
 
															-    def _init_dir(self, opt):
														
 
															-        import os, time
														
 
															-        # 在这里给opt赋值
														
 
															-        opt.model_name = "model_" + opt.continue_flag + opt.used_frame + opt.model_postfix[opt.used_frame]
														
 
															-        opt.model_save_path = './checkpoint/' + opt.model_name + "/"
														
 
															-        if not os.path.exists(opt.model_save_path):
														
 
															-            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
														
 
															-        if not os.path.exists(opt.figure_save_path):
														
 
															-            os.mkdir(opt.figure_save_path)
														
 
															-        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
														
 
															-            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
														
 
															-            os.makedirs(log_save_path)
														
 
															-
														
 
															-
														
 
															-# YAML should override the argparser's content
														
 
															-    def _parse_args_and_yaml(self):
														
 
															-        given_configs, remaining = self.parse_known_args()
														
 
															-        if given_configs.config_yaml:
														
 
															-            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-                cfg = yaml.safe_load(f)
														
 
															-                self.set_defaults(**cfg)
														
 
															-
														
 
															-        # The main arg parser parses the rest of the args, the usual
														
 
															-        # defaults will have been overridden if config file specified.
														
 
															-        opt = self.parse_args(remaining)
														
 
															-        self._init_dir(opt)
														
 
															-        # Cache the args as a text string to save them in the output dir later
														
 
															-        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
														
 
															-        return opt, opt_text
														
 
															-
														
 
															-
														
 
															-    def parse_args_and_yaml(self):
														
 
															-        return self._parse_args_and_yaml()[0]
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # opt = _parse_args_and_yaml()
														
 
															-    pass
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/config.yml
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/config.yml
@@ -1,77 +0,0 @@
 
															-Model:
														
 
															-  batch_size: 64
														
 
															-  dropout_rate: 0.2
														
 
															-  epoch: 20
														
 
															-  hidden_size: 128
														
 
															-  learning_rate: 0.001
														
 
															-  lstm_layers: 2
														
 
															-  patience: 5
														
 
															-  random_seed: 42
														
 
															-  time_step: 16
														
 
															-add_train: false
														
 
															-continue_flag: ''
														
 
															-data_format:
														
 
															-  dq: dq.csv
														
 
															-  envir: "\u73AF\u5883\u6570\u636E.csv"
														
 
															-  nwp: nwp.csv
														
 
															-  rp: rp.csv
														
 
															-debug_model: false
														
 
															-debug_num: 500
														
 
															-do_continue_train: false
														
 
															-do_figure_save: false
														
 
															-do_log_print_to_screen: true
														
 
															-do_log_save_to_file: true
														
 
															-do_predict: true
														
 
															-do_train: true
														
 
															-do_train_visualized: True
														
 
															-excel_data_path: ./data/J00307/
														
 
															-figure_save_path: ./figure/
														
 
															-is_continuous_predict: True
														
 
															-log_save_path: ./log/
														
 
															-mean:
														
 
															-  C_AIRT: 10.305992230762874
														
 
															-  C_CELLT: 10.664897925448384
														
 
															-  C_DIFFUSER: 143.2639061079428
														
 
															-  C_DIFFUSERDA: 6.571077155136789
														
 
															-  C_DIRECTR: 68.21328208942887
														
 
															-  C_DIRECTRDA: 3.163283039920654
														
 
															-  C_FORECAST: 3.1419734966774113
														
 
															-  C_GLOBALR: 173.2587817174973
														
 
															-  C_GLOBALRDA: 7.756491280271097
														
 
															-  C_HOURDA: 1.998222150590958
														
 
															-  C_P: 947.7830440532276
														
 
															-  C_RH: 55.59672286965865
														
 
															-  C_VALUE: 3.404744648318043
														
 
															-  C_WD: 212.88300686007108
														
 
															-  C_WS: 1.802446483180428
														
 
															-model_postfix:
														
 
															-  keras: .h5
														
 
															-  pytorch: .pth
														
 
															-  tensorflow: .ckpt
														
 
															-predict_points: 16
														
 
															-shuffle_train_data: false
														
 
															-std:
														
 
															-  C_AIRT: 12.127220611319888
														
 
															-  C_CELLT: 12.654848145970181
														
 
															-  C_DIFFUSER: 230.93680419867772
														
 
															-  C_DIFFUSERDA: 6.4933162833681415
														
 
															-  C_DIRECTR: 166.61348332191056
														
 
															-  C_DIRECTRDA: 4.991297839913351
														
 
															-  C_FORECAST: 4.447082956749344
														
 
															-  C_GLOBALR: 258.87947949591955
														
 
															-  C_GLOBALRDA: 7.9174382136573955
														
 
															-  C_HOURDA: 2.9110230573747247
														
 
															-  C_P: 25.75152505719027
														
 
															-  C_RH: 22.445059526990818
														
 
															-  C_VALUE: 5.013868885103326
														
 
															-  C_WD: 112.90029001408325
														
 
															-  C_WS: 1.6575249140627502
														
 
															-train_data_path: ./data/
														
 
															-train_data_rate: 0.9
														
 
															-use_cuda: false
														
 
															-used_frame: keras
														
 
															-valid_data_rate: 0.15
														
 
															-
														
 
															-is_photovoltaic: True
														
 
															-cap: 110
														
 
															-envir_columns: 16
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_analyse.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_analyse.py
@@ -1,76 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 18:57
														
 
															-# file: data_analyse.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        df.to_csv(self.opt.excel_data_path + "nwp+rp+环境（LSTM+CNN）.csv")
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        # self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data, label='label', color='b')
														
 
															-                plt.plot(X, predict_data, label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                # self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                #       str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_features.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_features.py
@@ -1,139 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 17:42
														
 
															-# file: data_features.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-from sklearn.model_selection import train_test_split
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-
														
 
															-
														
 
															-class data_features(object):
														
 
															-    def __init__(self, opt, mean, std):
														
 
															-        self.opt = opt
														
 
															-        self.time_step = self.opt.Model["time_step"]
														
 
															-        self.mean = mean
														
 
															-        self.std = std
														
 
															-        self.columns = list()
														
 
															-        self.columns_lstm = list()
														
 
															-        self.columns_cnn = list()
														
 
															-
														
 
															-    def get_train_data(self, dfs):
														
 
															-        train_x, valid_x, train_y, valid_y = [], [], [], []
														
 
															-        self.opt.feature_columns = dfs[0].columns.tolist()
														
 
															-        self.opt.feature_columns.insert(0, 'C_TIME')
														
 
															-        self.opt.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.opt.feature_columns,
														
 
															-                                                                        self.opt.label_columns)  # 因为feature不一定从0开始
														
 
															-        self.opt.input_size = len(self.opt.feature_columns)
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx_ = [[np.array(x[0]), np.array(x[1])] for x in datax]
														
 
															-            # trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            tx, vx, ty, vy = train_test_split(trainx_, trainy, test_size=self.opt.valid_data_rate,
														
 
															-                                                                  random_state=self.opt.Model["random_seed"],
														
 
															-                                                                  shuffle=self.opt.shuffle_train_data)  # 划分训练和验证集
														
 
															-            # 分裂 tx 和 vx
														
 
															-            train_x.extend(tx)
														
 
															-            valid_x.extend(vx)
														
 
															-            train_y.append(ty)
														
 
															-            valid_y.append(vy)
														
 
															-        # train_x = np.concatenate(train_x, axis=0)
														
 
															-        # valid_x = np.concatenate(valid_x, axis=0)
														
 
															-        train_y = np.concatenate(train_y, axis=0)
														
 
															-        valid_y = np.concatenate(valid_y, axis=0)
														
 
															-
														
 
															-        train_x = self.norm_features(train_x)
														
 
															-        valid_x = self.norm_features(valid_x)
														
 
															-        train_y = self.norm_label(train_y)
														
 
															-        valid_y = self.norm_label(valid_y)
														
 
															-
														
 
															-        cnn_x, cnn_x1 = [], []
														
 
															-        lstm_x, lstm_x1 = [], []
														
 
															-        for i in range(0, len(train_x)):
														
 
															-            cnn_x.append(train_x[i][0])
														
 
															-            lstm_x.append(train_x[i][1])
														
 
															-        train_x = [np.array(cnn_x), np.array(lstm_x)]
														
 
															-        for i in range(0, len(valid_x)):
														
 
															-            cnn_x1.append(valid_x[i][0])
														
 
															-            lstm_x1.append(valid_x[i][1])
														
 
															-        valid_x = [np.array(cnn_x1), np.array(lstm_x1)]
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-    def get_test_data(self, dfs):
														
 
															-        test_x, test_y, data_y = [], [], []
														
 
															-        self.opt.feature_columns = dfs[0].columns.tolist()
														
 
															-        self.opt.feature_columns.insert(0, 'C_TIME')
														
 
															-        self.opt.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.opt.feature_columns,
														
 
															-                                                                        self.opt.label_columns)  # 因为feature不一定从0开始
														
 
															-        self.opt.input_size = len(self.opt.feature_columns)
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx_ = [[np.array(x[0]), np.array(x[1])] for x in datax]
														
 
															-            # trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            test_x.extend(trainx_)
														
 
															-            test_y.append(trainy)
														
 
															-            data_y.append(datay)
														
 
															-
														
 
															-        test_y = np.concatenate(test_y, axis=0)
														
 
															-
														
 
															-        test_x = self.norm_features(test_x)
														
 
															-        test_y = self.norm_label(test_y)
														
 
															-
														
 
															-        cnn_x, lstm_x = [], []
														
 
															-
														
 
															-        for i in range(0, len(test_x)):
														
 
															-            cnn_x.append(test_x[i][0])
														
 
															-            lstm_x.append(test_x[i][1])
														
 
															-        test_x = [np.array(cnn_x), np.array(lstm_x)]
														
 
															-
														
 
															-        return test_x, test_y, data_y
														
 
															-
														
 
															-    def get_data_features(self, df):   # 这段代码基于pandas方法的优化
														
 
															-        norm_data = df.reset_index()
														
 
															-        feature_data = norm_data[:-self.opt.predict_points]
														
 
															-        label_data = norm_data[self.opt.predict_points:].reset_index(drop=True)
														
 
															-        time_step = self.opt.Model["time_step"]
														
 
															-        time_step_loc = time_step - 1
														
 
															-        train_num = int(len(feature_data))
														
 
															-        time_rp = [feature_data.loc[i:i + time_step_loc, 'C_TIME':'C_WD_INST50'] for i in range(train_num - time_step)]
														
 
															-        nwp = [label_data.loc[i:i + time_step_loc, 'C_T':] for i in range(train_num - time_step)]
														
 
															-        features_x, features_x1, features_y = [], [], []
														
 
															-        for row in zip(time_rp, nwp):
														
 
															-            row0 = row[0]   # row0是时间+rp+环境
														
 
															-            row1 = row[1]  # row1是nwp
														
 
															-            row0.set_index('C_TIME', inplace=True, drop=False)
														
 
															-            row0["C_TIME"] = row0["C_TIME"].apply(datetime_to_timestr)
														
 
															-            row0_ = row0.loc[:, ['C_TIME', 'C_REAL_VALUE']]
														
 
															-            row0_.reset_index(drop=True, inplace=True)
														
 
															-            row1.reset_index(drop=True, inplace=True)
														
 
															-            rowx = pd.concat([row0_, row1], axis=1)  # rowx是时间+rp+nwp
														
 
															-            features_x.append([row0, rowx])
														
 
															-        self.columns = row0.columns.tolist()
														
 
															-        self.columns_cnn = row0.columns.tolist()
														
 
															-        self.columns_lstm = rowx.columns.tolist()
														
 
															-
														
 
															-        features_y = [label_data.loc[i:i + time_step_loc, ['C_TIME', 'C_REAL_VALUE']] for i in range(train_num - time_step)]
														
 
															-
														
 
															-        return features_x, features_y
														
 
															-
														
 
															-    def norm_features(self, data: np.ndarray):
														
 
															-        for i, d in enumerate(data):
														
 
															-            mean = np.array([self.mean[col] for col in self.columns_cnn])
														
 
															-            std = np.array([self.std[col] for col in self.columns_cnn])
														
 
															-            d[0] = (d[0] - mean) / std  # 归一化
														
 
															-            mean = np.array([self.mean[col] for col in self.columns_lstm])
														
 
															-            std = np.array([self.std[col] for col in self.columns_lstm])
														
 
															-            d[1] = (d[1] - mean) / std  # 归一化
														
 
															-            data[i] = d
														
 
															-            self.opt.input_size_lstm = len(self.columns_lstm)
														
 
															-            self.opt.input_size_cnn = len(self.columns_cnn)
														
 
															-        return data
														
 
															-
														
 
															-    def norm_label(self, label_data: np.ndarray):
														
 
															-        return (label_data - self.mean['C_REAL_VALUE']) / self.std['C_REAL_VALUE']
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_process.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_process.py
@@ -1,144 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 10:10
														
 
															-# file: main.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-import yaml
														
 
															-
														
 
															-
														
 
															-class data_process(object):
														
 
															-    def __init__(self, opt):
														
 
															-        self.std = None
														
 
															-        self.mean = None
														
 
															-        self.opt = opt
														
 
															-        # 都是在ndarray量纲下进行计算
														
 
															-        # self.norm_data = (self.tables[:, 1:] - self.mean) / self.std  # 归一化，去量纲
														
 
															-        # self.norm_data.insert(0, 'C_TIME', self.tables['C_TIME'])
														
 
															-        # self.set_yml({'mean': self.mean.to_dict(), 'std': self.std.to_dict()})
														
 
															-        # self.start_num_in_test = 0
														
 
															-
														
 
															-    def get_processed_data(self):
														
 
															-        excel_data_path = self.opt.excel_data_path
														
 
															-        data_format = self.opt.data_format
														
 
															-        dq_path = excel_data_path + data_format["dq"]
														
 
															-        rp_path = excel_data_path + data_format["rp"]
														
 
															-        nwp_path = excel_data_path + data_format["nwp"]
														
 
															-        envir_path = excel_data_path + data_format["envir"]
														
 
															-
														
 
															-
														
 
															-        dq_columns = ['C_FORECAST_TIME', 'C_FP_VALUE']
														
 
															-        rp_columns = ['C_TIME', 'C_REAL_VALUE']  # 待优化 ["'C_TIME'", "'C_REAL_VALUE'"] 原因：csv 字符串是单引号''，read_csv带单引号
														
 
															-
														
 
															-        nwp = self.read_data(nwp_path).loc[:, "C_PRE_TIME":]  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
														
 
															-        nwp = self.data_cleaning(nwp)
														
 
															-        nwp.drop(['C_FARM_ID', 'C_SC_DATE', 'C_SC_TIME', 'C_PRE_DATE'], axis=1, inplace=True)
														
 
															-        nwp["C_PRE_TIME"] = nwp["C_PRE_TIME"].apply(timestr_to_datetime)
														
 
															-        nwp.rename({"C_PRE_TIME": "C_TIME"}, axis=1, inplace=True)
														
 
															-        nwp.set_index('C_TIME', inplace=True)
														
 
															-        nwp = self.drop_duplicated(nwp)
														
 
															-
														
 
															-        envir = self.read_data(envir_path).loc[:, "C_TIME":]  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
														
 
															-        envir = self.data_cleaning(envir)
														
 
															-        envir["C_TIME"] = envir["C_TIME"].apply(timestr_to_datetime)
														
 
															-        envir.set_index('C_TIME', inplace=True)
														
 
															-        envir = self.drop_duplicated(envir)
														
 
															-
														
 
															-        rp = self.read_data(rp_path, rp_columns)
														
 
															-        rp["C_TIME"] = rp["C_TIME"].apply(timestr_to_datetime)
														
 
															-        rp.set_index('C_TIME', inplace=True)  # nan也可以设置索引列
														
 
															-        rp = self.data_cleaning(rp)
														
 
															-        rp = self.drop_duplicated(rp)
														
 
															-
														
 
															-        df = self.tables_unite(rp, envir)
														
 
															-        df = self.tables_unite(df, nwp)
														
 
															-        dfs = self.missing_time_splite(df)
														
 
															-        dfs = [self.data_fill(df) for df in dfs]
														
 
															-        self.norm(dfs)  # 归一化 待解决
														
 
															-        return dfs
														
 
															-
														
 
															-    def norm(self, dfs):
														
 
															-        df = pd.concat(dfs, axis=0)
														
 
															-        df = df.reset_index()
														
 
															-        df["C_TIME"] = df["C_TIME"].apply(datetime_to_timestr)
														
 
															-        mean = np.mean(df, axis=0)  # 数据的均值
														
 
															-        std = np.std(df, axis=0)  # 标准差
														
 
															-        if hasattr(self.opt, 'mean') is False or hasattr(self.opt, 'std') is False:
														
 
															-            self.set_yml({'mean': mean.to_dict(), 'std': std.to_dict()})
														
 
															-        print("归一化参数，均值为：{}，方差为：{}".format(mean.to_dict(), std.to_dict()))
														
 
															-        self.mean, self.std = mean.to_dict(), std.to_dict()
														
 
															-
														
 
															-    def data_cleaning(self, data):
														
 
															-        data = data.replace(-99, np.nan)
														
 
															-        # nan 超过30% 删除
														
 
															-        data = data.dropna(axis=1, thresh=len(data)*0.8)
														
 
															-        # 删除取值全部相同的列
														
 
															-        data = data.loc[:, (data != data.iloc[0]).any()]
														
 
															-        # nan 替换成0 本周问题 1.卷积学习，0是否合适？
														
 
															-        data = data.replace(np.nan, 0)
														
 
															-        return data
														
 
															-
														
 
															-    def missing_time_splite(self, df):
														
 
															-        dt = pd.Timedelta(minutes=15)
														
 
															-        day1 = pd.Timedelta(days=1)
														
 
															-        cnt = 0
														
 
															-        cnt1 = 0
														
 
															-        start_index = 0
														
 
															-        dfs = []
														
 
															-        for i in range(1, len(df)):
														
 
															-            if df.index[i] - df.index[i-1] >= day1:
														
 
															-                df_x = df.iloc[start_index:i, ]
														
 
															-                dfs.append(df_x)
														
 
															-                start_index = i
														
 
															-                cnt1 += 1
														
 
															-            if df.index[i] - df.index[i-1] != dt:
														
 
															-                print(df.index[i-1], end=" ~ ")
														
 
															-                print(df.index[i])
														
 
															-                cnt += 1
														
 
															-        dfs.append(df.iloc[start_index:, ])
														
 
															-        print("数据总数：", len(df), "，缺失段数：", cnt, "其中，超过一天的段数：", cnt1)
														
 
															-        return dfs
														
 
															-
														
 
															-    def data_fill(self, df):
														
 
															-        df = df.resample('15T').bfill()
														
 
															-        return df
														
 
															-
														
 
															-    def set_yml(self, yml_dict):
														
 
															-        with open(self.opt.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-            cfg = yaml.safe_load(f)
														
 
															-        for k, v in yml_dict.items():
														
 
															-            cfg[k] = v
														
 
															-        with open(self.opt.config_yaml, 'w') as f:
														
 
															-            yaml.safe_dump(cfg, f, default_flow_style=False)
														
 
															-
														
 
															-    def read_data(self, path, cols=None, index_col=None):
														
 
															-        init_data = pd.read_csv(path, usecols=cols, index_col=index_col)
														
 
															-        return init_data
														
 
															-
														
 
															-    def filter_data(self):
														
 
															-        check_table = self.tables[:, 2]  # 实际功率不能为0，为0代表没发电
														
 
															-        preserve_index = list(np.nonzero(check_table)[0])
														
 
															-        indexs = list(range(len(self.tables)))
														
 
															-        del_index = list(set(indexs) - set(preserve_index))
														
 
															-        self.tables = np.delete(self.tables, del_index, axis=0)
														
 
															-        return self.tables
														
 
															-
														
 
															-    def drop_duplicated(self, df):
														
 
															-        df = df.groupby(level=0).mean()  # DatetimeIndex时间索引去重
														
 
															-        return df
														
 
															-
														
 
															-    def tables_unite(self, t1, t2):
														
 
															-        return pd.merge(t1, t2, left_index=True, right_index=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    ds = DataSet()
														
 
															-    # dq = ds.read_data(dq_path, dq_columns)[0]
														
 
															-    # rp = ds.read_data(rp_path, rp_columns)[0]
														
 
															-    # # rp_average(rp)    # 计算平均功率
														
 
															-    # envir = ds.read_data(envir_path, envir_columns)[0]
														
 
															-    # tables = ds.tables_integra(dq, rp, envir)
														
 
															-    # ds.tables_norm_result(tables)
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_utils.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/data_utils.py
@@ -1,65 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/22 17:17
														
 
															-# file: dpdUtils.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-
														
 
															-import time, datetime
														
 
															-
														
 
															-
														
 
															-class ValidationError(Exception):
														
 
															-    def __init__(self, message):
														
 
															-        self.message = message
														
 
															-
														
 
															-
														
 
															-def timestamp_to_datetime(ts):
														
 
															-    if type(ts) is not int:
														
 
															-        raise ValueError("timestamp-时间格式必须是整型")
														
 
															-    if len(str(ts)) == 13:
														
 
															-        return datetime.datetime.fromtimestamp(ts/1000)
														
 
															-    elif len(str(ts)) == 10:
														
 
															-        return datetime.datetime.fromtimestamp(ts)
														
 
															-    else:
														
 
															-        raise ValueError("timestamp-时间格式长度错误")
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestamp(dt, len):
														
 
															-    if len not in (10, 13):
														
 
															-        raise ValueError("timestamp-时间戳转换长度错误")
														
 
															-    if len == 10:
														
 
															-        return int(round(time.mktime(dt.timetuple())))
														
 
															-    else:
														
 
															-        return int(round(time.mktime(dt.timetuple()))*1000)
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestr(dt):
														
 
															-    return int(dt.strftime('%m%d%H%M'))
														
 
															-
														
 
															-
														
 
															-def timestr_to_datetime(time_data):
														
 
															-    """
														
 
															-    将时间戳或时间字符串转换为datetime.datetime类型
														
 
															-    :param time_data: int or str
														
 
															-    :return:datetime.datetime
														
 
															-    """
														
 
															-    if isinstance(time_data, float):
														
 
															-        result = timestamp_to_datetime(int(time_data))
														
 
															-    elif isinstance(time_data, int):
														
 
															-        result = timestamp_to_datetime(time_data)
														
 
															-    elif isinstance(time_data, str):
														
 
															-        if len(time_data) == 10:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y')
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d')
														
 
															-        elif len(time_data) in {17, 18, 19}:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')
														
 
															-        else:
														
 
															-            raise ValidationError("时间字符串长度不满足要求！")
														
 
															-    return result
														
 
															-
														
 
															-
														
 
															-def timestamp_to_timestr(t):
														
 
															-    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t))
														
 
															-
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/figure.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/figure.py
@@ -1,83 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: figure.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-
														
 
															-
														
 
															-class Figure(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.ds = process
														
 
															-        self.logger = logger
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res[-1])
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def draw(self, label_data, predict_norm_data, numbers):
														
 
															-        # label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
														
 
															-        #                                         config.label_in_feature_index]
														
 
															-        # dq_data = dq_data.reshape((-1, self.opt.output_size))
														
 
															-        predict_norm_data = self.get_16_points(predict_norm_data)
														
 
															-        label_data = self.get_16_points(label_data)
														
 
															-        label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        label_data = label_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]
														
 
															-        predict_data = predict_norm_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]   # 通过保存的均值和方差还原数据
														
 
															-        # dq_data = dq_data * self.ds.std[0] + self.ds.mean[0]
														
 
															-        # predict_data = predict_norm_data
														
 
															-        assert label_data.shape[0] == predict_data.shape[0], "The element number in origin and predicted data is different"
														
 
															-
														
 
															-        label_name = [self.ds.tables_column_name[i] for i in self.opt.label_in_feature_index]
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-
														
 
															-        # label 和 predict 是错开config.predict_day天的数据的
														
 
															-        # 下面是两种norm后的loss的计算方式，结果是一样的，可以简单手推一下
														
 
															-        # label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
														
 
															-        #              config.label_in_feature_index]
														
 
															-        # loss_norm = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
														
 
															-        # logger.info("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss = np.sum((label_data - predict_data) ** 2)/len(label_data)  # mse
														
 
															-        # loss = np.mean((label_data - predict_data) ** 2, axis=0)
														
 
															-        loss_sqrt = np.sqrt(loss)   # rmse
														
 
															-        loss_norm = 1 - loss_sqrt / self.opt.cap
														
 
															-        # loss_norm = loss/(ds.std[opt.label_in_feature_index] ** 2)
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss1 = np.sum((label_data - dq_data) ** 2) / len(label_data)  # mse
														
 
															-        # loss_sqrt1 = np.sqrt(loss1)  # rmse
														
 
															-        # loss_norm1 = 1 - loss_sqrt1 / self.opt.cap
														
 
															-        # self.logger.info("The mean squared error1 of power {} is ".format(label_name) + str(loss_norm1))
														
 
															-        if self.opt.is_continuous_predict:
														
 
															-            # label_X = range(int((self.ds.data_num - self.ds.train_num - 32)))
														
 
															-            label_X = list(range(numbers))
														
 
															-        else:
														
 
															-            label_X = range(int((self.ds.data_num - self.ds.train_num - self.ds.start_num_in_test)/2))
														
 
															-        print("label_x = ", label_X)
														
 
															-        predict_X = [x for x in label_X]
														
 
															-
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(label_X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(predict_X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/logger.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/logger.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: logger.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import logging, sys
														
 
															-from logging.handlers import RotatingFileHandler
														
 
															-
														
 
															-
														
 
															-def load_logger(config):
														
 
															-    logger = logging.getLogger()
														
 
															-    logger.setLevel(level=logging.DEBUG)
														
 
															-
														
 
															-    # StreamHandler
														
 
															-    if config.do_log_print_to_screen:
														
 
															-        stream_handler = logging.StreamHandler(sys.stdout)
														
 
															-        stream_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
														
 
															-                                      fmt='[ %(asctime)s ] %(message)s')
														
 
															-        stream_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(stream_handler)
														
 
															-
														
 
															-    # FileHandler
														
 
															-    if config.do_log_save_to_file:
														
 
															-        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
														
 
															-        file_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
														
 
															-        file_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(file_handler)
														
 
															-
														
 
															-        # 把config信息也记录到log 文件中
														
 
															-        config_dict = {}
														
 
															-        for key in dir(config):
														
 
															-            if not key.startswith("_"):
														
 
															-                config_dict[key] = getattr(config, key)
														
 
															-        config_str = str(config_dict)
														
 
															-        config_list = config_str[1:-1].split(", '")
														
 
															-        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
														
 
															-        logger.info(config_save_str)
														
 
															-
														
 
															-    return logger
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/model/__init__.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/model/__init__.py
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/model/model_keras.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/model/model_keras.py
@@ -1,57 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-from keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D
														
 
															-from keras.models import Model
														
 
															-from keras.callbacks import ModelCheckpoint, EarlyStopping
														
 
															-
														
 
															-
														
 
															-def get_keras_model(opt):
														
 
															-    lstm_input = Input(shape=(opt.Model['time_step'], opt.input_size_lstm))
														
 
															-    cnn_input = Input(shape=(opt.Model['time_step'], opt.input_size_cnn))
														
 
															-    cnn = cnn_input
														
 
															-    cnn = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu')(cnn)
														
 
															-
														
 
															-    cnn = MaxPooling1D(pool_size=64, strides=1, padding='valid', data_format='channels_first')(cnn)  # trides = None，那么默认值是pool_size
														
 
															-    lstm = lstm_input
														
 
															-    for i in range(opt.Model['lstm_layers']):
														
 
															-        lstm = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=True)(lstm)
														
 
															-    lstm = concatenate([lstm, cnn])
														
 
															-    output = Dense(opt.output_size)(lstm)
														
 
															-    model = Model([cnn_input, lstm_input], output)
														
 
															-    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
														
 
															-    return model
														
 
															-
														
 
															-
														
 
															-def gpu_train_init():
														
 
															-    import tensorflow as tf
														
 
															-    from keras.backend.tensorflow_backend import set_session
														
 
															-    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
														
 
															-    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
														
 
															-    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
														
 
															-    sess = tf.Session(config=sess_config)
														
 
															-    set_session(sess)
														
 
															-
														
 
															-
														
 
															-def train(opt, train_and_valid_data):
														
 
															-    if opt.use_cuda: gpu_train_init()
														
 
															-    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
														
 
															-    import numpy as np
														
 
															-    print("----------", np.array(train_X[0]).shape)
														
 
															-    print("++++++++++", np.array(train_X[1]).shape)
														
 
															-    model = get_keras_model(opt)
														
 
															-    model.summary()
														
 
															-    if opt.add_train:
														
 
															-        model.load_weights(opt.model_save_path + opt.model_name)
														
 
															-
														
 
															-    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
														
 
															-                                    save_best_only=True, mode='auto')
														
 
															-    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
														
 
															-    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
														
 
															-              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
														
 
															-
														
 
															-
														
 
															-def predict(config, test_X):
														
 
															-    model = get_keras_model(config)
														
 
															-    model.load_weights(config.model_save_path + config.model_name)
														
 
															-    result = model.predict(test_X, batch_size=1)
														
 
															-    # result = result.reshape((-1, config.output_size))
														
 
															-    return result
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/requirements.txt
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/requirements.txt
@@ -1,8 +0,0 @@
 
															-sklearn
														
 
															-pandas
														
 
															-argparse
														
 
															-keras
														
 
															-tensorflow==1.15
														
 
															-matplotlib>=3.0.2
														
 
															-numpy>=1.14.6
														
 
															-scipy>=1.1.0
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/run_case.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/run_case.py
@@ -1,58 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-
														
 
															-import numpy as np
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from figure import Figure
														
 
															-from data_process import data_process
														
 
															-from data_features import data_features
														
 
															-from logger import load_logger
														
 
															-from config import myargparse
														
 
															-from data_analyse import data_analyse
														
 
															-frame = "keras"
														
 
															-
														
 
															-if frame == "keras":
														
 
															-    from model.model_keras import train, predict
														
 
															-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
														
 
															-else:
														
 
															-    raise Exception("Wrong frame seletion")
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    parse = myargparse(discription="training config", add_help=False)
														
 
															-    opt = parse.parse_args_and_yaml()
														
 
															-    logger = load_logger(opt)
														
 
															-    try:
														
 
															-        np.random.seed(opt.Model["random_seed"])
														
 
															-        process = data_process(opt=opt)
														
 
															-        dfs = process.get_processed_data()
														
 
															-        features = data_features(opt=opt, mean=process.mean, std=process.std)
														
 
															-        if opt.do_train:
														
 
															-            train_X, valid_X, train_Y, valid_Y = features.get_train_data([dfs[0][:'2021/8/1'], dfs[1][:'2022/3/1']])
														
 
															-            train(opt, [train_X, train_Y, valid_X, valid_Y])
														
 
															-        if opt.do_predict:
														
 
															-            test_X, test_Y, df_Y = features.get_test_data([dfs[0]['2021/8/1':'2021/9/6'], dfs[1]['2022/3/1':'2022/4/4']])
														
 
															-            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
														
 
															-            analyse = data_analyse(opt, logger, process)
														
 
															-            analyse.predict_acc(result, df_Y)
														
 
															-    except Exception:
														
 
															-        logger.error("Run Error", exc_info=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    import argparse
														
 
															-    # argparse方便于命令行下输入参数，可以根据需要增加更多
														
 
															-    # parser = argparse.ArgumentParser()
														
 
															-    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
														
 
															-    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
														
 
															-    # args = parser.parse_args()
														
 
															-
														
 
															-    # con = Config()
														
 
															-    # for key in dir(args):               # dir(args) 函数获得args所有的属性
														
 
															-    #     if not key.startswith("_"):     # 去掉 args 自带属性，比如__name__等
														
 
															-    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
														
 
															-    main()
														
 
															-
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/run_case_history.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/run_case_history.py
@@ -1,142 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 9:23
														
 
															-# file: run_case_history.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-class Data:
														
 
															-    def __init__(self, config):
														
 
															-        self.config = config
														
 
															-        self.data, self.data_column_name = self.read_data()
														
 
															-
														
 
															-        self.data_num = self.data.shape[0]
														
 
															-        self.train_num = int(self.data_num * self.config.train_data_rate)
														
 
															-
														
 
															-        self.mean = np.mean(self.data, axis=0)              # 数据的均值和方差
														
 
															-        self.std = np.std(self.data, axis=0)
														
 
															-        self.norm_data = (self.data - self.mean)/self.std   # 归一化，去量纲
														
 
															-
														
 
															-        self.start_num_in_test = 0      # 测试集中前几天的数据会被删掉，因为它不够一个time_step
														
 
															-
														
 
															-    def read_data(self):                # 读取初始数据
														
 
															-        if self.config.debug_mode:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, nrows=self.config.debug_num,
														
 
															-                                    usecols=self.config.feature_columns)
														
 
															-        else:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, usecols=self.config.feature_columns)
														
 
															-        init_data = self.filter_data(init_data)
														
 
															-        return init_data.values, init_data.columns.tolist()     # .columns.tolist() 是获取列名
														
 
															-
														
 
															-    def filter_data(self, init_data):
														
 
															-        return init_data[init_data.apply(np.sum, axis=1)!=0]
														
 
															-
														
 
															-    def get_train_and_valid_data(self):
														
 
															-        feature_data = self.norm_data[:self.train_num]
														
 
															-        label_data = self.norm_data[: self.train_num,
														
 
															-                                    self.config.label_in_feature_index]    # 将延后几天的数据作为label
														
 
															-
														
 
															-        if not self.config.do_continue_train:
														
 
															-            # 在非连续训练模式下，每time_step行数据会作为一个样本，两个样本错开一行，比如：1-20行，2-21行。。。。
														
 
															-            train_x, train_y = [], []
														
 
															-            for i in range(self.train_num-self.config.time_step*2):
														
 
															-                p1 = feature_data[:, 0][i:i+self.config.start_predict_point]
														
 
															-                p2 = feature_data[:, 1][i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                p = [list(t) for t in zip(p1, p2)]  # 实际功率， 预测功率 是一组特征值
														
 
															-                l = label_data[i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                train_x.append(p)
														
 
															-                train_y.append(l)
														
 
															-            # train_x = [feature_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # train_y = [label_data[i+self.config.start_predict_point:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # 这里选取后16个点 作为 预测及
														
 
															-        else:
														
 
															-            # 在连续训练模式下，每time_step行数据会作为一个样本，两个样本错开time_step行，
														
 
															-            # 比如：1-20行，21-40行。。。到数据末尾，然后又是 2-21行，22-41行。。。到数据末尾，……
														
 
															-            # 这样才可以把上一个样本的final_state作为下一个样本的init_state，而且不能shuffle
														
 
															-            # 目前本项目中仅能在pytorch的RNN系列模型中用
														
 
															-            train_x = [feature_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-            train_y = [label_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-
														
 
															-        train_x, train_y = np.array(train_x), np.array(train_y)
														
 
															-
														
 
															-        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
														
 
															-                                                              random_state=self.config.random_seed,
														
 
															-                                                              shuffle=self.config.shuffle_train_data)   # 划分训练和验证集，并打乱
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-
														
 
															-class Config:
														
 
															-    # 数据参数
														
 
															-    # feature_columns = list(range(2, 9))     # 要作为feature的列，按原数据从0开始计算，也可以用list 如 [2,4,6,8] 设置
														
 
															-    feature_columns = list(range(1, 3))
														
 
															-    # label_columns = [4, 5]                  # 要预测的列，按原数据从0开始计算, 如同时预测第四，五列 最低价和最高价
														
 
															-    label_columns = [1]
														
 
															-    # label_in_feature_index = [feature_columns.index(i) for i in label_columns]  # 这样写不行
														
 
															-    label_in_feature_index = (lambda x,y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-    predict_day = 1             # 预测未来几天
														
 
															-    predict_points = 16
														
 
															-    # 网络参数
														
 
															-    input_size = len(feature_columns)
														
 
															-    output_size = len(label_columns)
														
 
															-
														
 
															-    hidden_size = 128           # LSTM的隐藏层大小，也是输出大小
														
 
															-    lstm_layers = 2             # LSTM的堆叠层数
														
 
															-    dropout_rate = 0.2          # dropout概率
														
 
															-    time_step = 16             # 这个参数很重要，是设置用前多少个点的数据来预测，也是LSTM的time step数，请保证训练数据量大于它
														
 
															-    start_predict_point = 16
														
 
															-
														
 
															-    # 训练参数
														
 
															-    do_train = True
														
 
															-    do_predict = True
														
 
															-    add_train = False           # 是否载入已有模型参数进行增量训练
														
 
															-    shuffle_train_data = False   # 是否对训练数据做shuffle
														
 
															-    use_cuda = False            # 是否使用GPU训练
														
 
															-
														
 
															-    train_data_rate = 0.95      # 训练数据占总体数据比例，测试数据就是 1-train_data_rate
														
 
															-    valid_data_rate = 0.15      # 验证数据占训练数据比例，验证集在训练过程使用，为了做模型和参数选择
														
 
															-
														
 
															-    batch_size = 64
														
 
															-    learning_rate = 0.001
														
 
															-    epoch = 20                  # 整个训练集被训练多少遍，不考虑早停的前提下
														
 
															-    patience = 5                # 训练多少epoch，验证集没提升就停掉
														
 
															-    random_seed = 42            # 随机种子，保证可复现
														
 
															-
														
 
															-    do_continue_train = False    # 每次训练把上一次的final_state作为下一次的init_state，仅用于RNN类型模型，目前仅支持pytorch
														
 
															-    continue_flag = ""           # 但实际效果不佳，可能原因：仅能以 batch_size = 1 训练
														
 
															-    if do_continue_train:
														
 
															-        shuffle_train_data = False
														
 
															-        batch_size = 1
														
 
															-        continue_flag = "continue_"
														
 
															-
														
 
															-    # 训练模式
														
 
															-    debug_mode = False  # 调试模式下，是为了跑通代码，追求快
														
 
															-    debug_num = 500  # 仅用debug_num条数据来调试
														
 
															-
														
 
															-    # 框架参数
														
 
															-    used_frame = frame  # 选择的深度学习框架，不同的框架模型保存后缀不一样
														
 
															-    model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
														
 
															-    model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
														
 
															-
														
 
															-    # 路径参数
														
 
															-    train_data_path = "./data/J00285.csv"
														
 
															-    model_save_path = "./checkpoint/" + used_frame + "/"
														
 
															-    figure_save_path = "./figure/"
														
 
															-    log_save_path = "./log/"
														
 
															-    do_log_print_to_screen = True
														
 
															-    do_log_save_to_file = True                  # 是否将config和训练过程记录到log
														
 
															-    do_figure_save = False
														
 
															-    do_train_visualized = False          # 训练loss可视化，pytorch用visdom，tf用tensorboardX，实际上可以通用, keras没有
														
 
															-    if not os.path.exists(model_save_path):
														
 
															-        os.makedirs(model_save_path)    # makedirs 递归创建目录
														
 
															-    if not os.path.exists(figure_save_path):
														
 
															-        os.mkdir(figure_save_path)
														
 
															-    if do_train and (do_log_save_to_file or do_train_visualized):
														
 
															-        cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-        log_save_path = log_save_path + cur_time + '_' + used_frame + "/"
														
 
															-        os.makedirs(log_save_path)
														
 
															-
														
--- a/wind-LSTM-CNN-v2.0-nwp+rp+环境/test.py
+++ b/wind-LSTM-CNN-v2.0-nwp+rp+环境/test.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/11 15:58
														
 
															-# file: test.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-# index = pd.date_range('1/1/2000', periods=9, freq='T')
														
 
															-# series = pd.Series(range(9), index=index)
														
 
															-# df = pd.DataFrame({'value': series})
														
 
															-# series1 = series.resample('3T').sum()
														
 
															-# series2 = series.resample('3T', label='right').sum()
														
 
															-# series3 = series.resample('3T', label='right', closed='right').sum()
														
 
															-# series4 = series.resample('30S').asfreq()
														
 
															-# series5 = series.resample('30S').bfill()
														
 
															-# print(series)
														
 
															-# print(series1)
														
 
															-# print(series2)
														
 
															-# print(series3)
														
 
															-# print(series4)
														
 
															-# print("---", series5)
														
 
															-
														
 
															-# x = np.random.randint(1,100,20).reshape((10,2))
														
 
															-# print(x)
														
 
															-# from sklearn.model_selection import train_test_split
														
 
															-#
														
 
															-# x_train, x_test = train_test_split(x, test_size=0.2, random_state=1, shuffle=False)
														
 
															-# print("x_train", x_train)
														
 
															-# print("x_test", x_test)
														
 
															-
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-#创建一组数据
														
 
															-data = {'name': ['John', 'Mike', 'Mozla', 'Rose', 'David', 'Marry', 'Wansi', 'Sidy', 'Jack', 'Alic'],
														
 
															-        'age': [20, 32, 29, np.nan, 15, 28, 21, 30, 37, 25],
														
 
															-        'gender': [0, 0, 1, 1, 0, 1, 0, 0, 1, 1],
														
 
															-        'isMarried': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
														
 
															-label = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
														
 
															-df = pd.DataFrame(data, index=label)
														
 
															-print(df.loc[:,'name'])
														
 
															-pass
														
--- a/wind-LSTM-v2.0-dq+rp/.gitignore
+++ b/wind-LSTM-v2.0-dq+rp/.gitignore
@@ -1,13 +0,0 @@
 
															-*/__pycache__
														
 
															-/__pycache__
														
 
															-/.idea
														
 
															-/checkpoint
														
 
															-/log
														
 
															-/data
														
 
															-/figure
														
 
															-*.log
														
 
															-*.swp
														
 
															-/log
														
 
															-/data
														
 
															-
														
 
															-
														
--- a/wind-LSTM-v2.0-dq+rp/Readme.md
+++ b/wind-LSTM-v2.0-dq+rp/Readme.md
@@ -1,18 +0,0 @@
 
															-## 超短期功率预测系统训练端
														
 
															-
														
 
															-这个项目将LSTM长短期时序模型用于超短期电力功率预测任务，实现特性如下: 
														
 
															-
														
 
															-- 程序简洁、模块化
														
 
															-- 支持可扩展的Keras框架（LSTM，可修改网络层）
														
 
															-- 参数、模型和框架支持高度可定制和修改
														
 
															-- 支持增量训练（在预训练模型上进行微调）
														
 
															-- 支持同时预测多个指标（目前预测实际功率）
														
 
															-- 支持预测任意时间节点数（目前设置16个点）
														
 
															-- 支持训练可视化和记录日志
														
 
															-
														
 
															-
														
 
															-
														
 
															-| 训练case | 表头  |
														
 
															-|--------| ----  |
														
 
															-| 1      | 单元格 |
														
 
															-| 2      | 单元格 |
														
--- a/wind-LSTM-v2.0-dq+rp/back.py
+++ b/wind-LSTM-v2.0-dq+rp/back.py
@@ -1,76 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/14 15:32
														
 
															-# file: back.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-v2.0-dq+rp/config.py
+++ b/wind-LSTM-v2.0-dq+rp/config.py
@@ -1,94 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 14:46
														
 
															-# file: config.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import yaml
														
 
															-import argparse
														
 
															-
														
 
															-
														
 
															-class myargparse(argparse.ArgumentParser):
														
 
															-    def __init__(self, discription, add_help):
														
 
															-        super(myargparse, self).__init__(description=discription, add_help=add_help)
														
 
															-        # default_config_parser = parser = argparse.ArgumentParser(
														
 
															-        #     description='Training Config', add_help=False)
														
 
															-        self.add_argument(
														
 
															-            '-c',
														
 
															-            '--config_yaml',
														
 
															-            default=
														
 
															-            'config.yml',
														
 
															-            type=str,
														
 
															-            metavar='FILE',
														
 
															-            help='YAML config file specifying default arguments')
														
 
															-
														
 
															-        feature_columns = ['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE']
														
 
															-        # feature_columns = list(range(1, 28))
														
 
															-        label_columns = ['C_REAL_VALUE']
														
 
															-
														
 
															-        label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-        # 在控制台可以指定的参数， yml中没有
														
 
															-        self.add_argument('--feature_columns', type=list, default=feature_columns, help='要作为特征的列')
														
 
															-
														
 
															-        self.add_argument('--label_columns', type=list, default=label_columns, help='要预测的列')
														
 
															-
														
 
															-        self.add_argument('--label_in_feature_index', type=list, default=label_in_feature_index, help='标签在特征列的索引')
														
 
															-
														
 
															-        self.add_argument('--input_size', type=int, default=len(feature_columns), help='输入维度')
														
 
															-
														
 
															-        self.add_argument('--output_size', type=int, default=len(label_columns), help='输出维度')
														
 
															-
														
 
															-        self.add_argument("--train_data_path", type=str, default=None,help='数据集地址')  # train_data_path yml中有
														
 
															-
														
 
															-        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
														
 
															-
														
 
															-        self.add_argument('--model_name', type=str, default=None, help='模型名称')
														
 
															-
														
 
															-        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
														
 
															-
														
 
															-
														
 
															-    def _init_dir(self, opt):
														
 
															-        import os, time
														
 
															-        # 在这里给opt赋值
														
 
															-        opt.model_name = "model_" + opt.continue_flag + opt.used_frame + opt.model_postfix[opt.used_frame]
														
 
															-        opt.model_save_path = './checkpoint/' + opt.model_name + "/"
														
 
															-        if not os.path.exists(opt.model_save_path):
														
 
															-            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
														
 
															-        if not os.path.exists(opt.figure_save_path):
														
 
															-            os.mkdir(opt.figure_save_path)
														
 
															-        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
														
 
															-            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
														
 
															-            os.makedirs(log_save_path)
														
 
															-
														
 
															-
														
 
															-# YAML should override the argparser's content
														
 
															-    def _parse_args_and_yaml(self):
														
 
															-        given_configs, remaining = self.parse_known_args()
														
 
															-        if given_configs.config_yaml:
														
 
															-            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-                cfg = yaml.safe_load(f)
														
 
															-                self.set_defaults(**cfg)
														
 
															-
														
 
															-        # The main arg parser parses the rest of the args, the usual
														
 
															-        # defaults will have been overridden if config file specified.
														
 
															-        opt = self.parse_args(remaining)
														
 
															-        self._init_dir(opt)
														
 
															-        # Cache the args as a text string to save them in the output dir later
														
 
															-        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
														
 
															-        return opt, opt_text
														
 
															-
														
 
															-
														
 
															-    def parse_args_and_yaml(self):
														
 
															-        return self._parse_args_and_yaml()[0]
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # opt = _parse_args_and_yaml()
														
 
															-    pass
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/wind-LSTM-v2.0-dq+rp/config.yml
+++ b/wind-LSTM-v2.0-dq+rp/config.yml
@@ -1,78 +0,0 @@
 
															-Model:
														
 
															-  batch_size: 64
														
 
															-  dropout_rate: 0.2
														
 
															-  epoch: 20
														
 
															-  hidden_size: 128
														
 
															-  learning_rate: 0.001
														
 
															-  lstm_layers: 2
														
 
															-  patience: 5
														
 
															-  random_seed: 42
														
 
															-  time_step: 16
														
 
															-add_train: false
														
 
															-continue_flag: ''
														
 
															-data_format:
														
 
															-  dq: dq.csv
														
 
															-  envir: "\u73AF\u5883\u6570\u636E.csv"
														
 
															-  nwp: nwp.csv
														
 
															-  rp: rp.csv
														
 
															-  formula: t_forecast_power_ultra_short_term_his.csv
														
 
															-debug_model: false
														
 
															-debug_num: 500
														
 
															-do_continue_train: false
														
 
															-do_figure_save: false
														
 
															-do_log_print_to_screen: true
														
 
															-do_log_save_to_file: true
														
 
															-do_predict: true
														
 
															-do_train: false
														
 
															-do_train_visualized: true
														
 
															-excel_data_path: ./data/J00307/
														
 
															-figure_save_path: ./figure/
														
 
															-is_continuous_predict: True
														
 
															-log_save_path: ./log/
														
 
															-mean:
														
 
															-  C_AIRT: 10.305992230762874
														
 
															-  C_CELLT: 10.664897925448384
														
 
															-  C_DIFFUSER: 143.2639061079428
														
 
															-  C_DIFFUSERDA: 6.571077155136789
														
 
															-  C_DIRECTR: 68.21328208942887
														
 
															-  C_DIRECTRDA: 3.163283039920654
														
 
															-  C_FORECAST: 3.1419734966774113
														
 
															-  C_GLOBALR: 173.2587817174973
														
 
															-  C_GLOBALRDA: 7.756491280271097
														
 
															-  C_HOURDA: 1.998222150590958
														
 
															-  C_P: 947.7830440532276
														
 
															-  C_RH: 55.59672286965865
														
 
															-  C_VALUE: 3.404744648318043
														
 
															-  C_WD: 212.88300686007108
														
 
															-  C_WS: 1.802446483180428
														
 
															-model_postfix:
														
 
															-  keras: .h5
														
 
															-  pytorch: .pth
														
 
															-  tensorflow: .ckpt
														
 
															-predict_points: 16
														
 
															-shuffle_train_data: false
														
 
															-std:
														
 
															-  C_AIRT: 12.127220611319888
														
 
															-  C_CELLT: 12.654848145970181
														
 
															-  C_DIFFUSER: 230.93680419867772
														
 
															-  C_DIFFUSERDA: 6.4933162833681415
														
 
															-  C_DIRECTR: 166.61348332191056
														
 
															-  C_DIRECTRDA: 4.991297839913351
														
 
															-  C_FORECAST: 4.447082956749344
														
 
															-  C_GLOBALR: 258.87947949591955
														
 
															-  C_GLOBALRDA: 7.9174382136573955
														
 
															-  C_HOURDA: 2.9110230573747247
														
 
															-  C_P: 25.75152505719027
														
 
															-  C_RH: 22.445059526990818
														
 
															-  C_VALUE: 5.013868885103326
														
 
															-  C_WD: 112.90029001408325
														
 
															-  C_WS: 1.6575249140627502
														
 
															-train_data_path: ./data/
														
 
															-train_data_rate: 0.9
														
 
															-use_cuda: false
														
 
															-used_frame: keras
														
 
															-valid_data_rate: 0.15
														
 
															-
														
 
															-is_photovoltaic: True
														
 
															-cap: 110
														
 
															-envir_columns: 16
														
--- a/wind-LSTM-v2.0-dq+rp/data_analyse.py
+++ b/wind-LSTM-v2.0-dq+rp/data_analyse.py
@@ -1,93 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 18:57
														
 
															-# file: data_analyse.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-from data_utils import *
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def formula_acc(self):
														
 
															-        excel_data_path = self.opt.excel_data_path
														
 
															-        data_format = self.opt.data_format
														
 
															-        formula_path = excel_data_path + data_format["formula"]
														
 
															-        formula = pd.read_csv(formula_path, usecols=['C_ABLE_VALUE', 'C_FORECAST_HOW_LONG_AGO', 'C_FORECAST_TIME'])
														
 
															-        formula["C_FORECAST_TIME"] = formula["C_FORECAST_TIME"].apply(timestr_to_datetime)
														
 
															-        formula = formula.rename(columns={"C_FORECAST_TIME": "C_TIME"})
														
 
															-        formula = formula.loc[formula['C_FORECAST_HOW_LONG_AGO'] == 16]
														
 
															-        return formula
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        df.to_csv(self.opt.excel_data_path + "dq+rp.csv")
														
 
															-        formula = self.formula_acc()
														
 
															-        df = pd.merge(df, formula, on='C_TIME')
														
 
															-
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_ABLE_VALUE'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data, label='label', color='b')
														
 
															-                plt.plot(X, predict_data, label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                # self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                #       str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-v2.0-dq+rp/data_features.py
+++ b/wind-LSTM-v2.0-dq+rp/data_features.py
@@ -1,96 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 17:42
														
 
															-# file: data_features.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-from sklearn.model_selection import train_test_split
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-
														
 
															-
														
 
															-class data_features(object):
														
 
															-    def __init__(self, opt, mean, std):
														
 
															-        self.opt = opt
														
 
															-        self.time_step = self.opt.Model["time_step"]
														
 
															-        self.mean = mean
														
 
															-        self.std = std
														
 
															-        self.columns = list()
														
 
															-
														
 
															-    def get_train_data(self, dfs):
														
 
															-        train_x, valid_x, train_y, valid_y = [], [], [], []
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            tx, vx, ty, vy = train_test_split(trainx, trainy, test_size=self.opt.valid_data_rate,
														
 
															-                                                                  random_state=self.opt.Model["random_seed"],
														
 
															-                                                                  shuffle=self.opt.shuffle_train_data)  # 划分训练和验证集
														
 
															-            train_x.append(tx)
														
 
															-            valid_x.append(vx)
														
 
															-            train_y.append(ty)
														
 
															-            valid_y.append(vy)
														
 
															-
														
 
															-        train_x = np.concatenate(train_x, axis=0)
														
 
															-        valid_x = np.concatenate(valid_x, axis=0)
														
 
															-        train_y = np.concatenate(train_y, axis=0)
														
 
															-        valid_y = np.concatenate(valid_y, axis=0)
														
 
															-
														
 
															-        train_x = self.norm_features(train_x)
														
 
															-        valid_x = self.norm_features(valid_x)
														
 
															-        train_y = self.norm_label(train_y)
														
 
															-        valid_y = self.norm_label(valid_y)
														
 
															-
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-    def get_test_data(self, dfs):
														
 
															-        test_x, test_y, data_y = [], [], []
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            test_x.append(trainx)
														
 
															-            test_y.append(trainy)
														
 
															-            data_y.append(datay)
														
 
															-
														
 
															-        test_x = np.concatenate(test_x, axis=0)
														
 
															-        test_y = np.concatenate(test_y, axis=0)
														
 
															-
														
 
															-        test_x = self.norm_features(test_x)
														
 
															-        test_y = self.norm_label(test_y)
														
 
															-
														
 
															-        return test_x, test_y, data_y
														
 
															-
														
 
															-    def get_data_features(self, df):
														
 
															-        norm_data = df.reset_index()
														
 
															-        feature_data = norm_data[:-self.opt.predict_points]
														
 
															-        label_data = norm_data[self.opt.predict_points:].reset_index(drop=True)
														
 
															-        time_step = self.opt.Model["time_step"]
														
 
															-        time_step_loc = time_step - 1
														
 
															-        train_num = int(len(feature_data))
														
 
															-        time_rp = [feature_data.loc[i:i + time_step_loc, ['C_TIME', 'C_REAL_VALUE']] for i in range(train_num - time_step)]
														
 
															-        dq = [label_data.loc[i:i + time_step_loc, 'C_FP_VALUE'] for i in range(train_num - time_step)]
														
 
															-        features_x, features_y = [], []
														
 
															-        for row in zip(time_rp, dq):
														
 
															-            row0 = row[0]
														
 
															-            row1 = row[1]
														
 
															-            row0['C_FP_VALUE'] = row1
														
 
															-            row0.set_index('C_TIME', inplace=True, drop=False)
														
 
															-            row0["C_TIME"] = row0["C_TIME"].apply(datetime_to_timestr)
														
 
															-            features_x.append(row0)
														
 
															-        self.columns = row0.columns.tolist()
														
 
															-
														
 
															-        features_y = [label_data.loc[i:i + time_step_loc, ['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE']] for i in range(train_num - time_step)]
														
 
															-        return features_x, features_y
														
 
															-
														
 
															-    def norm_features(self, data: np.ndarray):
														
 
															-        mean = np.array([self.mean[col] for col in self.columns])
														
 
															-        std = np.array([self.std[col] for col in self.columns])
														
 
															-        data = (data - mean) / std  # 归一化
														
 
															-        return data
														
 
															-
														
 
															-    def norm_label(self, label_data: np.ndarray):
														
 
															-        return (label_data - self.mean['C_REAL_VALUE']) / self.std['C_REAL_VALUE']
														
--- a/wind-LSTM-v2.0-dq+rp/data_process.py
+++ b/wind-LSTM-v2.0-dq+rp/data_process.py
@@ -1,140 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 10:10
														
 
															-# file: main.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-import yaml
														
 
															-
														
 
															-
														
 
															-class data_process(object):
														
 
															-    def __init__(self, opt):
														
 
															-        self.std = None
														
 
															-        self.mean = None
														
 
															-        self.opt = opt
														
 
															-        # 都是在ndarray量纲下进行计算
														
 
															-        # self.norm_data = (self.tables[:, 1:] - self.mean) / self.std  # 归一化，去量纲
														
 
															-        # self.norm_data.insert(0, 'C_TIME', self.tables['C_TIME'])
														
 
															-        # self.set_yml({'mean': self.mean.to_dict(), 'std': self.std.to_dict()})
														
 
															-        # self.start_num_in_test = 0
														
 
															-
														
 
															-    def get_processed_data(self):
														
 
															-        excel_data_path = self.opt.excel_data_path
														
 
															-        data_format = self.opt.data_format
														
 
															-        dq_path = excel_data_path + data_format["dq"]
														
 
															-        rp_path = excel_data_path + data_format["rp"]
														
 
															-        envir_path = excel_data_path + data_format["envir"]
														
 
															-
														
 
															-        dq_columns = ['C_FORECAST_TIME', 'C_FP_VALUE']
														
 
															-        rp_columns = ['C_TIME', 'C_REAL_VALUE']  # 待优化 ["'C_TIME'", "'C_REAL_VALUE'"] 原因：csv 字符串是单引号''，read_csv带单引号
														
 
															-
														
 
															-        # envir = self.read_data(envir_path).loc[:, "C_TIME":]  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
														
 
															-        # envir = self.data_cleaning(envir)
														
 
															-        # envir["C_TIME"] = envir["C_TIME"].apply(timestr_to_datetime)
														
 
															-        # envir.set_index('C_TIME', inplace=True)
														
 
															-        # envir = self.drop_duplicated(envir)
														
 
															-
														
 
															-        # 读取的df，经过时序转换、清洗、去重三部曲
														
 
															-        dq = self.read_data(dq_path, dq_columns)
														
 
															-        dq = dq.rename(columns={"C_FORECAST_TIME": "C_TIME"})
														
 
															-        dq["C_TIME"] = dq["C_TIME"].apply(timestr_to_datetime)
														
 
															-        dq.set_index('C_TIME', inplace=True)
														
 
															-        dq = self.data_cleaning(dq)
														
 
															-        dq = self.drop_duplicated(dq)
														
 
															-
														
 
															-        rp = self.read_data(rp_path, rp_columns)
														
 
															-        rp["C_TIME"] = rp["C_TIME"].apply(timestr_to_datetime)
														
 
															-        rp.set_index('C_TIME', inplace=True)  # nan也可以设置索引列
														
 
															-        rp = self.data_cleaning(rp)
														
 
															-        rp = self.drop_duplicated(rp)
														
 
															-
														
 
															-        df = self.tables_unite(rp, dq)
														
 
															-        dfs = self.missing_time_splite(df)
														
 
															-        dfs = [self.data_fill(df) for df in dfs]
														
 
															-        self.norm(dfs)  # 归一化 待解决
														
 
															-        return dfs
														
 
															-
														
 
															-    def norm(self, dfs):
														
 
															-        df = pd.concat(dfs, axis=0)
														
 
															-        df = df.reset_index()
														
 
															-        df["C_TIME"] = df["C_TIME"].apply(datetime_to_timestr)
														
 
															-        mean = np.mean(df, axis=0)  # 数据的均值
														
 
															-        std = np.std(df, axis=0)  # 标准差
														
 
															-        if hasattr(self.opt, 'mean') is False or hasattr(self.opt, 'std') is False:
														
 
															-            self.set_yml({'mean': mean.to_dict(), 'std': std.to_dict()})
														
 
															-        self.mean, self.std = mean.to_dict(), std.to_dict()
														
 
															-
														
 
															-    def data_cleaning(self, data):
														
 
															-        data = data.replace(-99, np.nan)
														
 
															-        # nan 超过30% 删除
														
 
															-        data = data.dropna(axis=1, thresh=len(data)*0.7)
														
 
															-        # nan 替换成0 本周问题 1.卷积学习，0是否合适？
														
 
															-        data = data.replace(np.nan, 0)
														
 
															-        # 删除取值全部相同的列
														
 
															-        data = data.loc[:, (data != data.iloc[0]).any()]
														
 
															-        return data
														
 
															-
														
 
															-    def missing_time_splite(self, df):
														
 
															-        dt = pd.Timedelta(minutes=15)
														
 
															-        day1 = pd.Timedelta(days=1)
														
 
															-        cnt = 0
														
 
															-        cnt1 = 0
														
 
															-        start_index = 0
														
 
															-        dfs = []
														
 
															-        for i in range(1, len(df)):
														
 
															-            if df.index[i] - df.index[i-1] >= day1:
														
 
															-                df_x = df.iloc[start_index:i, ]
														
 
															-                dfs.append(df_x)
														
 
															-                start_index = i
														
 
															-                cnt1 += 1
														
 
															-            if df.index[i] - df.index[i-1] != dt:
														
 
															-                print(df.index[i-1], end=" ~ ")
														
 
															-                print(df.index[i])
														
 
															-                cnt += 1
														
 
															-        dfs.append(df.iloc[start_index:, ])
														
 
															-        print("数据总数：", len(df), "，缺失段数：", cnt, "其中，超过一天的段数：", cnt1)
														
 
															-        return dfs
														
 
															-
														
 
															-    def data_fill(self, df):
														
 
															-        df = df.resample('15T').bfill()
														
 
															-        return df
														
 
															-
														
 
															-    def set_yml(self, yml_dict):
														
 
															-        with open(self.opt.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-            cfg = yaml.safe_load(f)
														
 
															-        for k, v in yml_dict.items():
														
 
															-            cfg[k] = v
														
 
															-        with open(self.opt.config_yaml, 'w') as f:
														
 
															-            yaml.safe_dump(cfg, f, default_flow_style=False)
														
 
															-
														
 
															-    def read_data(self, path, cols=None, index_col=None):
														
 
															-        init_data = pd.read_csv(path, usecols=cols, index_col=index_col)
														
 
															-        return init_data
														
 
															-
														
 
															-    def filter_data(self):
														
 
															-        check_table = self.tables[:, 2]  # 实际功率不能为0，为0代表没发电
														
 
															-        preserve_index = list(np.nonzero(check_table)[0])
														
 
															-        indexs = list(range(len(self.tables)))
														
 
															-        del_index = list(set(indexs) - set(preserve_index))
														
 
															-        self.tables = np.delete(self.tables, del_index, axis=0)
														
 
															-        return self.tables
														
 
															-
														
 
															-    def drop_duplicated(self, df):
														
 
															-        df = df.groupby(level=0).mean()  # DatetimeIndex时间索引去重
														
 
															-        return df
														
 
															-
														
 
															-    def tables_unite(self, t1, t2):
														
 
															-        return pd.merge(t1, t2, left_index=True, right_index=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    ds = DataSet()
														
 
															-    # dq = ds.read_data(dq_path, dq_columns)[0]
														
 
															-    # rp = ds.read_data(rp_path, rp_columns)[0]
														
 
															-    # # rp_average(rp)    # 计算平均功率
														
 
															-    # envir = ds.read_data(envir_path, envir_columns)[0]
														
 
															-    # tables = ds.tables_integra(dq, rp, envir)
														
 
															-    # ds.tables_norm_result(tables)
														
--- a/wind-LSTM-v2.0-dq+rp/data_utils.py
+++ b/wind-LSTM-v2.0-dq+rp/data_utils.py
@@ -1,65 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/22 17:17
														
 
															-# file: dpdUtils.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-
														
 
															-import time, datetime
														
 
															-
														
 
															-
														
 
															-class ValidationError(Exception):
														
 
															-    def __init__(self, message):
														
 
															-        self.message = message
														
 
															-
														
 
															-
														
 
															-def timestamp_to_datetime(ts):
														
 
															-    if type(ts) is not int:
														
 
															-        raise ValueError("timestamp-时间格式必须是整型")
														
 
															-    if len(str(ts)) == 13:
														
 
															-        return datetime.datetime.fromtimestamp(ts/1000)
														
 
															-    elif len(str(ts)) == 10:
														
 
															-        return datetime.datetime.fromtimestamp(ts)
														
 
															-    else:
														
 
															-        raise ValueError("timestamp-时间格式长度错误")
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestamp(dt, len):
														
 
															-    if len not in (10, 13):
														
 
															-        raise ValueError("timestamp-时间戳转换长度错误")
														
 
															-    if len == 10:
														
 
															-        return int(round(time.mktime(dt.timetuple())))
														
 
															-    else:
														
 
															-        return int(round(time.mktime(dt.timetuple()))*1000)
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestr(dt):
														
 
															-    return int(dt.strftime('%m%d%H%M'))
														
 
															-
														
 
															-
														
 
															-def timestr_to_datetime(time_data):
														
 
															-    """
														
 
															-    将时间戳或时间字符串转换为datetime.datetime类型
														
 
															-    :param time_data: int or str
														
 
															-    :return:datetime.datetime
														
 
															-    """
														
 
															-    if isinstance(time_data, float):
														
 
															-        result = timestamp_to_datetime(int(time_data))
														
 
															-    elif isinstance(time_data, int):
														
 
															-        result = timestamp_to_datetime(time_data)
														
 
															-    elif isinstance(time_data, str):
														
 
															-        if len(time_data) == 10:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y')
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d')
														
 
															-        elif len(time_data) in {17, 18, 19}:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')
														
 
															-        else:
														
 
															-            raise ValidationError("时间字符串长度不满足要求！")
														
 
															-    return result
														
 
															-
														
 
															-
														
 
															-def timestamp_to_timestr(t):
														
 
															-    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t))
														
 
															-
														
--- a/wind-LSTM-v2.0-dq+rp/logger.py
+++ b/wind-LSTM-v2.0-dq+rp/logger.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: logger.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import logging, sys
														
 
															-from logging.handlers import RotatingFileHandler
														
 
															-
														
 
															-
														
 
															-def load_logger(config):
														
 
															-    logger = logging.getLogger()
														
 
															-    logger.setLevel(level=logging.DEBUG)
														
 
															-
														
 
															-    # StreamHandler
														
 
															-    if config.do_log_print_to_screen:
														
 
															-        stream_handler = logging.StreamHandler(sys.stdout)
														
 
															-        stream_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
														
 
															-                                      fmt='[ %(asctime)s ] %(message)s')
														
 
															-        stream_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(stream_handler)
														
 
															-
														
 
															-    # FileHandler
														
 
															-    if config.do_log_save_to_file:
														
 
															-        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
														
 
															-        file_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
														
 
															-        file_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(file_handler)
														
 
															-
														
 
															-        # 把config信息也记录到log 文件中
														
 
															-        config_dict = {}
														
 
															-        for key in dir(config):
														
 
															-            if not key.startswith("_"):
														
 
															-                config_dict[key] = getattr(config, key)
														
 
															-        config_str = str(config_dict)
														
 
															-        config_list = config_str[1:-1].split(", '")
														
 
															-        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
														
 
															-        logger.info(config_save_str)
														
 
															-
														
 
															-    return logger
														
--- a/wind-LSTM-v2.0-dq+rp/model/__init__.py
+++ b/wind-LSTM-v2.0-dq+rp/model/__init__.py
--- a/wind-LSTM-v2.0-dq+rp/model/model_keras.py
+++ b/wind-LSTM-v2.0-dq+rp/model/model_keras.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-from keras.layers import Input, Dense, LSTM
														
 
															-from keras.models import Model
														
 
															-from keras.callbacks import ModelCheckpoint, EarlyStopping
														
 
															-
														
 
															-
														
 
															-def get_keras_model(opt):
														
 
															-    input1 = Input(shape=(opt.Model['time_step'], opt.input_size))
														
 
															-    lstm = input1
														
 
															-    for i in range(opt.Model['lstm_layers']):
														
 
															-        lstm = LSTM(units=opt.Model['hidden_size'],dropout=opt.Model['dropout_rate'],return_sequences=True)(lstm)
														
 
															-    output = Dense(opt.output_size)(lstm)
														
 
															-    model = Model(input1, output)
														
 
															-    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
														
 
															-    return model
														
 
															-
														
 
															-
														
 
															-def gpu_train_init():
														
 
															-    import tensorflow as tf
														
 
															-    from keras.backend.tensorflow_backend import set_session
														
 
															-    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
														
 
															-    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
														
 
															-    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
														
 
															-    sess = tf.Session(config=sess_config)
														
 
															-    set_session(sess)
														
 
															-
														
 
															-
														
 
															-def train(opt, train_and_valid_data):
														
 
															-    if opt.use_cuda: gpu_train_init()
														
 
															-    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
														
 
															-    model = get_keras_model(opt)
														
 
															-    model.summary()
														
 
															-    if opt.add_train:
														
 
															-        model.load_weights(opt.model_save_path + opt.model_name)
														
 
															-
														
 
															-    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
														
 
															-                                    save_best_only=True, mode='auto')
														
 
															-    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
														
 
															-    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
														
 
															-              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
														
 
															-
														
 
															-
														
 
															-def predict(config, test_X):
														
 
															-    model = get_keras_model(config)
														
 
															-    model.load_weights(config.model_save_path + config.model_name)
														
 
															-    result = model.predict(test_X, batch_size=1)
														
 
															-    # result = result.reshape((-1, config.output_size))
														
 
															-    return result
														
--- a/wind-LSTM-v2.0-dq+rp/requirements.txt
+++ b/wind-LSTM-v2.0-dq+rp/requirements.txt
@@ -1,8 +0,0 @@
 
															-sklearn
														
 
															-pandas
														
 
															-argparse
														
 
															-keras
														
 
															-tensorflow==1.15
														
 
															-matplotlib>=3.0.2
														
 
															-numpy>=1.14.6
														
 
															-scipy>=1.1.0
														
--- a/wind-LSTM-v2.0-dq+rp/run_case1.py
+++ b/wind-LSTM-v2.0-dq+rp/run_case1.py
@@ -1,55 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-
														
 
															-import numpy as np
														
 
															-import os
														
 
															-from data_process import data_process
														
 
															-from data_features import data_features
														
 
															-from logger import load_logger
														
 
															-from config import myargparse
														
 
															-from data_analyse import data_analyse
														
 
															-frame = "keras"
														
 
															-
														
 
															-if frame == "keras":
														
 
															-    from model.model_keras import train, predict
														
 
															-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
														
 
															-else:
														
 
															-    raise Exception("Wrong frame seletion")
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    parse = myargparse(discription="training config", add_help=False)
														
 
															-    opt = parse.parse_args_and_yaml()
														
 
															-    logger = load_logger(opt)
														
 
															-    try:
														
 
															-        np.random.seed(opt.Model["random_seed"])
														
 
															-        process = data_process(opt=opt)
														
 
															-        dfs = process.get_processed_data()
														
 
															-        features = data_features(opt=opt, mean=process.mean, std=process.std)
														
 
															-        if opt.do_train:
														
 
															-            train_X, valid_X, train_Y, valid_Y = features.get_train_data([dfs[0][:'2021/8/1'], dfs[1][:'2022/3/1']])
														
 
															-            train(opt, [train_X, train_Y, valid_X, valid_Y])
														
 
															-        if opt.do_predict:
														
 
															-            test_X, test_Y, df_Y = features.get_test_data([dfs[0]['2021/8/1':'2021/9/6'], dfs[1]['2022/3/1':'2022/4/4']])
														
 
															-            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
														
 
															-            analyse = data_analyse(opt, logger, process)
														
 
															-            analyse.predict_acc(result, df_Y)
														
 
															-    except Exception:
														
 
															-        logger.error("Run Error", exc_info=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    import argparse
														
 
															-    # argparse方便于命令行下输入参数，可以根据需要增加更多
														
 
															-    # parser = argparse.ArgumentParser()
														
 
															-    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
														
 
															-    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
														
 
															-    # args = parser.parse_args()
														
 
															-
														
 
															-    # con = Config()
														
 
															-    # for key in dir(args):               # dir(args) 函数获得args所有的属性
														
 
															-    #     if not key.startswith("_"):     # 去掉 args 自带属性，比如__name__等
														
 
															-    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
														
 
															-    main()
														
 
															-
														
--- a/wind-LSTM-v2.0-dq+rp/run_case_history.py
+++ b/wind-LSTM-v2.0-dq+rp/run_case_history.py
@@ -1,142 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 9:23
														
 
															-# file: run_case_history.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-class Data:
														
 
															-    def __init__(self, config):
														
 
															-        self.config = config
														
 
															-        self.data, self.data_column_name = self.read_data()
														
 
															-
														
 
															-        self.data_num = self.data.shape[0]
														
 
															-        self.train_num = int(self.data_num * self.config.train_data_rate)
														
 
															-
														
 
															-        self.mean = np.mean(self.data, axis=0)              # 数据的均值和方差
														
 
															-        self.std = np.std(self.data, axis=0)
														
 
															-        self.norm_data = (self.data - self.mean)/self.std   # 归一化，去量纲
														
 
															-
														
 
															-        self.start_num_in_test = 0      # 测试集中前几天的数据会被删掉，因为它不够一个time_step
														
 
															-
														
 
															-    def read_data(self):                # 读取初始数据
														
 
															-        if self.config.debug_mode:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, nrows=self.config.debug_num,
														
 
															-                                    usecols=self.config.feature_columns)
														
 
															-        else:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, usecols=self.config.feature_columns)
														
 
															-        init_data = self.filter_data(init_data)
														
 
															-        return init_data.values, init_data.columns.tolist()     # .columns.tolist() 是获取列名
														
 
															-
														
 
															-    def filter_data(self, init_data):
														
 
															-        return init_data[init_data.apply(np.sum, axis=1)!=0]
														
 
															-
														
 
															-    def get_train_and_valid_data(self):
														
 
															-        feature_data = self.norm_data[:self.train_num]
														
 
															-        label_data = self.norm_data[: self.train_num,
														
 
															-                                    self.config.label_in_feature_index]    # 将延后几天的数据作为label
														
 
															-
														
 
															-        if not self.config.do_continue_train:
														
 
															-            # 在非连续训练模式下，每time_step行数据会作为一个样本，两个样本错开一行，比如：1-20行，2-21行。。。。
														
 
															-            train_x, train_y = [], []
														
 
															-            for i in range(self.train_num-self.config.time_step*2):
														
 
															-                p1 = feature_data[:, 0][i:i+self.config.start_predict_point]
														
 
															-                p2 = feature_data[:, 1][i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                p = [list(t) for t in zip(p1, p2)]  # 实际功率， 预测功率 是一组特征值
														
 
															-                l = label_data[i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                train_x.append(p)
														
 
															-                train_y.append(l)
														
 
															-            # train_x = [feature_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # train_y = [label_data[i+self.config.start_predict_point:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # 这里选取后16个点 作为 预测及
														
 
															-        else:
														
 
															-            # 在连续训练模式下，每time_step行数据会作为一个样本，两个样本错开time_step行，
														
 
															-            # 比如：1-20行，21-40行。。。到数据末尾，然后又是 2-21行，22-41行。。。到数据末尾，……
														
 
															-            # 这样才可以把上一个样本的final_state作为下一个样本的init_state，而且不能shuffle
														
 
															-            # 目前本项目中仅能在pytorch的RNN系列模型中用
														
 
															-            train_x = [feature_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-            train_y = [label_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-
														
 
															-        train_x, train_y = np.array(train_x), np.array(train_y)
														
 
															-
														
 
															-        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
														
 
															-                                                              random_state=self.config.random_seed,
														
 
															-                                                              shuffle=self.config.shuffle_train_data)   # 划分训练和验证集，并打乱
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-
														
 
															-class Config:
														
 
															-    # 数据参数
														
 
															-    # feature_columns = list(range(2, 9))     # 要作为feature的列，按原数据从0开始计算，也可以用list 如 [2,4,6,8] 设置
														
 
															-    feature_columns = list(range(1, 3))
														
 
															-    # label_columns = [4, 5]                  # 要预测的列，按原数据从0开始计算, 如同时预测第四，五列 最低价和最高价
														
 
															-    label_columns = [1]
														
 
															-    # label_in_feature_index = [feature_columns.index(i) for i in label_columns]  # 这样写不行
														
 
															-    label_in_feature_index = (lambda x,y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-    predict_day = 1             # 预测未来几天
														
 
															-    predict_points = 16
														
 
															-    # 网络参数
														
 
															-    input_size = len(feature_columns)
														
 
															-    output_size = len(label_columns)
														
 
															-
														
 
															-    hidden_size = 128           # LSTM的隐藏层大小，也是输出大小
														
 
															-    lstm_layers = 2             # LSTM的堆叠层数
														
 
															-    dropout_rate = 0.2          # dropout概率
														
 
															-    time_step = 16             # 这个参数很重要，是设置用前多少个点的数据来预测，也是LSTM的time step数，请保证训练数据量大于它
														
 
															-    start_predict_point = 16
														
 
															-
														
 
															-    # 训练参数
														
 
															-    do_train = True
														
 
															-    do_predict = True
														
 
															-    add_train = False           # 是否载入已有模型参数进行增量训练
														
 
															-    shuffle_train_data = False   # 是否对训练数据做shuffle
														
 
															-    use_cuda = False            # 是否使用GPU训练
														
 
															-
														
 
															-    train_data_rate = 0.95      # 训练数据占总体数据比例，测试数据就是 1-train_data_rate
														
 
															-    valid_data_rate = 0.15      # 验证数据占训练数据比例，验证集在训练过程使用，为了做模型和参数选择
														
 
															-
														
 
															-    batch_size = 64
														
 
															-    learning_rate = 0.001
														
 
															-    epoch = 20                  # 整个训练集被训练多少遍，不考虑早停的前提下
														
 
															-    patience = 5                # 训练多少epoch，验证集没提升就停掉
														
 
															-    random_seed = 42            # 随机种子，保证可复现
														
 
															-
														
 
															-    do_continue_train = False    # 每次训练把上一次的final_state作为下一次的init_state，仅用于RNN类型模型，目前仅支持pytorch
														
 
															-    continue_flag = ""           # 但实际效果不佳，可能原因：仅能以 batch_size = 1 训练
														
 
															-    if do_continue_train:
														
 
															-        shuffle_train_data = False
														
 
															-        batch_size = 1
														
 
															-        continue_flag = "continue_"
														
 
															-
														
 
															-    # 训练模式
														
 
															-    debug_mode = False  # 调试模式下，是为了跑通代码，追求快
														
 
															-    debug_num = 500  # 仅用debug_num条数据来调试
														
 
															-
														
 
															-    # 框架参数
														
 
															-    used_frame = frame  # 选择的深度学习框架，不同的框架模型保存后缀不一样
														
 
															-    model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
														
 
															-    model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
														
 
															-
														
 
															-    # 路径参数
														
 
															-    train_data_path = "./data/J00285.csv"
														
 
															-    model_save_path = "./checkpoint/" + used_frame + "/"
														
 
															-    figure_save_path = "./figure/"
														
 
															-    log_save_path = "./log/"
														
 
															-    do_log_print_to_screen = True
														
 
															-    do_log_save_to_file = True                  # 是否将config和训练过程记录到log
														
 
															-    do_figure_save = False
														
 
															-    do_train_visualized = False          # 训练loss可视化，pytorch用visdom，tf用tensorboardX，实际上可以通用, keras没有
														
 
															-    if not os.path.exists(model_save_path):
														
 
															-        os.makedirs(model_save_path)    # makedirs 递归创建目录
														
 
															-    if not os.path.exists(figure_save_path):
														
 
															-        os.mkdir(figure_save_path)
														
 
															-    if do_train and (do_log_save_to_file or do_train_visualized):
														
 
															-        cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-        log_save_path = log_save_path + cur_time + '_' + used_frame + "/"
														
 
															-        os.makedirs(log_save_path)
														
 
															-
														
--- a/wind-LSTM-v2.0-dq+rp/test.py
+++ b/wind-LSTM-v2.0-dq+rp/test.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/11 15:58
														
 
															-# file: test.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-# index = pd.date_range('1/1/2000', periods=9, freq='T')
														
 
															-# series = pd.Series(range(9), index=index)
														
 
															-# df = pd.DataFrame({'value': series})
														
 
															-# series1 = series.resample('3T').sum()
														
 
															-# series2 = series.resample('3T', label='right').sum()
														
 
															-# series3 = series.resample('3T', label='right', closed='right').sum()
														
 
															-# series4 = series.resample('30S').asfreq()
														
 
															-# series5 = series.resample('30S').bfill()
														
 
															-# print(series)
														
 
															-# print(series1)
														
 
															-# print(series2)
														
 
															-# print(series3)
														
 
															-# print(series4)
														
 
															-# print("---", series5)
														
 
															-
														
 
															-# x = np.random.randint(1,100,20).reshape((10,2))
														
 
															-# print(x)
														
 
															-# from sklearn.model_selection import train_test_split
														
 
															-#
														
 
															-# x_train, x_test = train_test_split(x, test_size=0.2, random_state=1, shuffle=False)
														
 
															-# print("x_train", x_train)
														
 
															-# print("x_test", x_test)
														
 
															-
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-#创建一组数据
														
 
															-data = {'name': ['John', 'Mike', 'Mozla', 'Rose', 'David', 'Marry', 'Wansi', 'Sidy', 'Jack', 'Alic'],
														
 
															-        'age': [20, 32, 29, np.nan, 15, 28, 21, 30, 37, 25],
														
 
															-        'gender': [0, 0, 1, 1, 0, 1, 0, 0, 1, 1],
														
 
															-        'isMarried': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
														
 
															-label = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
														
 
															-df = pd.DataFrame(data, index=label)
														
 
															-print(df.loc[:,'name'])
														
 
															-pass
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/.gitignore
+++ b/wind-LSTM-v2.0-nwp+rp+环境/.gitignore
@@ -1,13 +0,0 @@
 
															-*/__pycache__
														
 
															-/__pycache__
														
 
															-/.idea
														
 
															-/checkpoint
														
 
															-/log
														
 
															-/data
														
 
															-/figure
														
 
															-*.log
														
 
															-*.swp
														
 
															-/log
														
 
															-/data
														
 
															-
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/Readme.md
+++ b/wind-LSTM-v2.0-nwp+rp+环境/Readme.md
@@ -1,18 +0,0 @@
 
															-## 超短期功率预测系统训练端
														
 
															-
														
 
															-这个项目将LSTM长短期时序模型用于超短期电力功率预测任务，实现特性如下: 
														
 
															-
														
 
															-- 程序简洁、模块化
														
 
															-- 支持可扩展的Keras框架（LSTM，可修改网络层）
														
 
															-- 参数、模型和框架支持高度可定制和修改
														
 
															-- 支持增量训练（在预训练模型上进行微调）
														
 
															-- 支持同时预测多个指标（目前预测实际功率）
														
 
															-- 支持预测任意时间节点数（目前设置16个点）
														
 
															-- 支持训练可视化和记录日志
														
 
															-
														
 
															-
														
 
															-
														
 
															-| 训练case | 表头  |
														
 
															-|--------| ----  |
														
 
															-| 1      | 单元格 |
														
 
															-| 2      | 单元格 |
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/back.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/back.py
@@ -1,76 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/14 15:32
														
 
															-# file: back.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/config.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/config.py
@@ -1,94 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 14:46
														
 
															-# file: config.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import yaml
														
 
															-import argparse
														
 
															-
														
 
															-
														
 
															-class myargparse(argparse.ArgumentParser):
														
 
															-    def __init__(self, discription, add_help):
														
 
															-        super(myargparse, self).__init__(description=discription, add_help=add_help)
														
 
															-        # default_config_parser = parser = argparse.ArgumentParser(
														
 
															-        #     description='Training Config', add_help=False)
														
 
															-        self.add_argument(
														
 
															-            '-c',
														
 
															-            '--config_yaml',
														
 
															-            default=
														
 
															-            'config.yml',
														
 
															-            type=str,
														
 
															-            metavar='FILE',
														
 
															-            help='YAML config file specifying default arguments')
														
 
															-
														
 
															-        # feature_columns = list(range(1, 28))
														
 
															-        label_columns = ['C_REAL_VALUE']
														
 
															-
														
 
															-        # label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-
														
 
															-        # 在控制台可以指定的参数， yml中没有
														
 
															-        self.add_argument('--feature_columns', type=list, default=None, help='要作为特征的列')
														
 
															-
														
 
															-        self.add_argument('--label_columns', type=list, default=label_columns, help='要预测的列')
														
 
															-
														
 
															-        self.add_argument('--label_in_feature_index', type=list, default=None, help='标签在特征列的索引')
														
 
															-
														
 
															-        self.add_argument('--input_size', type=int, default=0, help='输入维度')
														
 
															-
														
 
															-        self.add_argument('--output_size', type=int, default=len(label_columns), help='输出维度')
														
 
															-
														
 
															-        self.add_argument("--train_data_path", type=str, default=None,help='数据集地址')  # train_data_path yml中有
														
 
															-
														
 
															-        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
														
 
															-
														
 
															-        self.add_argument('--model_name', type=str, default=None, help='模型名称')
														
 
															-
														
 
															-        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
														
 
															-
														
 
															-
														
 
															-    def _init_dir(self, opt):
														
 
															-        import os, time
														
 
															-        # 在这里给opt赋值
														
 
															-        opt.model_name = "model_" + opt.continue_flag + opt.used_frame + opt.model_postfix[opt.used_frame]
														
 
															-        opt.model_save_path = './checkpoint/' + opt.model_name + "/"
														
 
															-        if not os.path.exists(opt.model_save_path):
														
 
															-            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
														
 
															-        if not os.path.exists(opt.figure_save_path):
														
 
															-            os.mkdir(opt.figure_save_path)
														
 
															-        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
														
 
															-            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
														
 
															-            os.makedirs(log_save_path)
														
 
															-
														
 
															-
														
 
															-# YAML should override the argparser's content
														
 
															-    def _parse_args_and_yaml(self):
														
 
															-        given_configs, remaining = self.parse_known_args()
														
 
															-        if given_configs.config_yaml:
														
 
															-            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-                cfg = yaml.safe_load(f)
														
 
															-                self.set_defaults(**cfg)
														
 
															-
														
 
															-        # The main arg parser parses the rest of the args, the usual
														
 
															-        # defaults will have been overridden if config file specified.
														
 
															-        opt = self.parse_args(remaining)
														
 
															-        self._init_dir(opt)
														
 
															-        # Cache the args as a text string to save them in the output dir later
														
 
															-        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
														
 
															-        return opt, opt_text
														
 
															-
														
 
															-
														
 
															-    def parse_args_and_yaml(self):
														
 
															-        return self._parse_args_and_yaml()[0]
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # opt = _parse_args_and_yaml()
														
 
															-    pass
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/config.yml
+++ b/wind-LSTM-v2.0-nwp+rp+环境/config.yml
@@ -1,77 +0,0 @@
 
															-Model:
														
 
															-  batch_size: 64
														
 
															-  dropout_rate: 0.2
														
 
															-  epoch: 20
														
 
															-  hidden_size: 128
														
 
															-  learning_rate: 0.001
														
 
															-  lstm_layers: 2
														
 
															-  patience: 5
														
 
															-  random_seed: 42
														
 
															-  time_step: 16
														
 
															-add_train: false
														
 
															-continue_flag: ''
														
 
															-data_format:
														
 
															-  dq: dq.csv
														
 
															-  envir: "\u73AF\u5883\u6570\u636E.csv"
														
 
															-  nwp: nwp.csv
														
 
															-  rp: rp.csv
														
 
															-debug_model: false
														
 
															-debug_num: 500
														
 
															-do_continue_train: false
														
 
															-do_figure_save: false
														
 
															-do_log_print_to_screen: true
														
 
															-do_log_save_to_file: true
														
 
															-do_predict: true
														
 
															-do_train: true
														
 
															-do_train_visualized: True
														
 
															-excel_data_path: ./data/J00307/
														
 
															-figure_save_path: ./figure/
														
 
															-is_continuous_predict: True
														
 
															-log_save_path: ./log/
														
 
															-mean:
														
 
															-  C_AIRT: 10.305992230762874
														
 
															-  C_CELLT: 10.664897925448384
														
 
															-  C_DIFFUSER: 143.2639061079428
														
 
															-  C_DIFFUSERDA: 6.571077155136789
														
 
															-  C_DIRECTR: 68.21328208942887
														
 
															-  C_DIRECTRDA: 3.163283039920654
														
 
															-  C_FORECAST: 3.1419734966774113
														
 
															-  C_GLOBALR: 173.2587817174973
														
 
															-  C_GLOBALRDA: 7.756491280271097
														
 
															-  C_HOURDA: 1.998222150590958
														
 
															-  C_P: 947.7830440532276
														
 
															-  C_RH: 55.59672286965865
														
 
															-  C_VALUE: 3.404744648318043
														
 
															-  C_WD: 212.88300686007108
														
 
															-  C_WS: 1.802446483180428
														
 
															-model_postfix:
														
 
															-  keras: .h5
														
 
															-  pytorch: .pth
														
 
															-  tensorflow: .ckpt
														
 
															-predict_points: 16
														
 
															-shuffle_train_data: false
														
 
															-std:
														
 
															-  C_AIRT: 12.127220611319888
														
 
															-  C_CELLT: 12.654848145970181
														
 
															-  C_DIFFUSER: 230.93680419867772
														
 
															-  C_DIFFUSERDA: 6.4933162833681415
														
 
															-  C_DIRECTR: 166.61348332191056
														
 
															-  C_DIRECTRDA: 4.991297839913351
														
 
															-  C_FORECAST: 4.447082956749344
														
 
															-  C_GLOBALR: 258.87947949591955
														
 
															-  C_GLOBALRDA: 7.9174382136573955
														
 
															-  C_HOURDA: 2.9110230573747247
														
 
															-  C_P: 25.75152505719027
														
 
															-  C_RH: 22.445059526990818
														
 
															-  C_VALUE: 5.013868885103326
														
 
															-  C_WD: 112.90029001408325
														
 
															-  C_WS: 1.6575249140627502
														
 
															-train_data_path: ./data/
														
 
															-train_data_rate: 0.9
														
 
															-use_cuda: false
														
 
															-used_frame: keras
														
 
															-valid_data_rate: 0.15
														
 
															-
														
 
															-is_photovoltaic: True
														
 
															-cap: 110
														
 
															-envir_columns: 16
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/data_analyse.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/data_analyse.py
@@ -1,76 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 18:57
														
 
															-# file: data_analyse.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        df.to_csv(self.opt.excel_data_path + "nwp+rp+环境.csv")
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        # self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data, label='label', color='b')
														
 
															-                plt.plot(X, predict_data, label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                # self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                #       str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/data_features.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/data_features.py
@@ -1,106 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 17:42
														
 
															-# file: data_features.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-from sklearn.model_selection import train_test_split
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-
														
 
															-
														
 
															-class data_features(object):
														
 
															-    def __init__(self, opt, mean, std):
														
 
															-        self.opt = opt
														
 
															-        self.time_step = self.opt.Model["time_step"]
														
 
															-        self.mean = mean
														
 
															-        self.std = std
														
 
															-        self.columns = list()
														
 
															-
														
 
															-    def get_train_data(self, dfs):
														
 
															-        train_x, valid_x, train_y, valid_y = [], [], [], []
														
 
															-        self.opt.feature_columns = dfs[0].columns.tolist()
														
 
															-        self.opt.feature_columns.insert(0, 'C_TIME')
														
 
															-        self.opt.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.opt.feature_columns,
														
 
															-                                                                        self.opt.label_columns)  # 因为feature不一定从0开始
														
 
															-        self.opt.input_size = len(self.opt.feature_columns)
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            tx, vx, ty, vy = train_test_split(trainx, trainy, test_size=self.opt.valid_data_rate,
														
 
															-                                                                  random_state=self.opt.Model["random_seed"],
														
 
															-                                                                  shuffle=self.opt.shuffle_train_data)  # 划分训练和验证集
														
 
															-            train_x.append(tx)
														
 
															-            valid_x.append(vx)
														
 
															-            train_y.append(ty)
														
 
															-            valid_y.append(vy)
														
 
															-
														
 
															-        train_x = np.concatenate(train_x, axis=0)
														
 
															-        valid_x = np.concatenate(valid_x, axis=0)
														
 
															-        train_y = np.concatenate(train_y, axis=0)
														
 
															-        valid_y = np.concatenate(valid_y, axis=0)
														
 
															-
														
 
															-        train_x = self.norm_features(train_x)
														
 
															-        valid_x = self.norm_features(valid_x)
														
 
															-        train_y = self.norm_label(train_y)
														
 
															-        valid_y = self.norm_label(valid_y)
														
 
															-
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-    def get_test_data(self, dfs):
														
 
															-        test_x, test_y, data_y = [], [], []
														
 
															-        self.opt.feature_columns = dfs[0].columns.tolist()
														
 
															-        self.opt.feature_columns.insert(0, 'C_TIME')
														
 
															-        self.opt.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.opt.feature_columns,
														
 
															-                                                                        self.opt.label_columns)  # 因为feature不一定从0开始
														
 
															-        self.opt.input_size = len(self.opt.feature_columns)
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            test_x.append(trainx)
														
 
															-            test_y.append(trainy)
														
 
															-            data_y.append(datay)
														
 
															-
														
 
															-        test_x = np.concatenate(test_x, axis=0)
														
 
															-        test_y = np.concatenate(test_y, axis=0)
														
 
															-
														
 
															-        test_x = self.norm_features(test_x)
														
 
															-        test_y = self.norm_label(test_y)
														
 
															-
														
 
															-        return test_x, test_y, data_y
														
 
															-
														
 
															-    def get_data_features(self, df):   # 这段代码基于pandas方法的优化
														
 
															-        norm_data = df.reset_index()
														
 
															-        feature_data = norm_data[:-self.opt.predict_points]
														
 
															-        label_data = norm_data[self.opt.predict_points:].reset_index(drop=True)
														
 
															-        time_step = self.opt.Model["time_step"]
														
 
															-        time_step_loc = time_step - 1
														
 
															-        train_num = int(len(feature_data))
														
 
															-        time_rp = [feature_data.loc[i:i + time_step_loc, 'C_TIME':'C_WD_INST120'] for i in range(train_num - time_step)]
														
 
															-        nwp = [label_data.loc[i:i + time_step_loc, 'C_T':] for i in range(train_num - time_step)]
														
 
															-        features_x, features_y = [], []
														
 
															-        for row in zip(time_rp, nwp):
														
 
															-            row0 = row[0]
														
 
															-            row1 = row[1]
														
 
															-            row0 = pd.concat([row0, row1], axis=1)
														
 
															-            row0.set_index('C_TIME', inplace=True, drop=False)
														
 
															-            row0["C_TIME"] = row0["C_TIME"].apply(datetime_to_timestr)
														
 
															-            features_x.append(row0)
														
 
															-        self.columns = row0.columns.tolist()
														
 
															-
														
 
															-        features_y = [label_data.loc[i:i + time_step_loc, ['C_TIME', 'C_REAL_VALUE']] for i in range(train_num - time_step)]
														
 
															-        return features_x, features_y
														
 
															-
														
 
															-    def norm_features(self, data: np.ndarray):
														
 
															-        mean = np.array([self.mean[col] for col in self.columns])
														
 
															-        std = np.array([self.std[col] for col in self.columns])
														
 
															-        data = (data - mean) / std  # 归一化
														
 
															-        return data
														
 
															-
														
 
															-    def norm_label(self, label_data: np.ndarray):
														
 
															-        return (label_data - self.mean['C_REAL_VALUE']) / self.std['C_REAL_VALUE']
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/data_process.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/data_process.py
@@ -1,144 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 10:10
														
 
															-# file: main.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-import yaml
														
 
															-
														
 
															-
														
 
															-class data_process(object):
														
 
															-    def __init__(self, opt):
														
 
															-        self.std = None
														
 
															-        self.mean = None
														
 
															-        self.opt = opt
														
 
															-        # 都是在ndarray量纲下进行计算
														
 
															-        # self.norm_data = (self.tables[:, 1:] - self.mean) / self.std  # 归一化，去量纲
														
 
															-        # self.norm_data.insert(0, 'C_TIME', self.tables['C_TIME'])
														
 
															-        # self.set_yml({'mean': self.mean.to_dict(), 'std': self.std.to_dict()})
														
 
															-        # self.start_num_in_test = 0
														
 
															-
														
 
															-    def get_processed_data(self):
														
 
															-        excel_data_path = self.opt.excel_data_path
														
 
															-        data_format = self.opt.data_format
														
 
															-        dq_path = excel_data_path + data_format["dq"]
														
 
															-        rp_path = excel_data_path + data_format["rp"]
														
 
															-        nwp_path = excel_data_path + data_format["nwp"]
														
 
															-        envir_path = excel_data_path + data_format["envir"]
														
 
															-
														
 
															-
														
 
															-        dq_columns = ['C_FORECAST_TIME', 'C_FP_VALUE']
														
 
															-        rp_columns = ['C_TIME', 'C_REAL_VALUE']  # 待优化 ["'C_TIME'", "'C_REAL_VALUE'"] 原因：csv 字符串是单引号''，read_csv带单引号
														
 
															-
														
 
															-        nwp = self.read_data(nwp_path).loc[:, "C_PRE_TIME":]  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
														
 
															-        nwp = self.data_cleaning(nwp)
														
 
															-        nwp.drop(['C_FARM_ID', 'C_SC_DATE', 'C_SC_TIME', 'C_PRE_DATE'], axis=1, inplace=True)
														
 
															-        nwp["C_PRE_TIME"] = nwp["C_PRE_TIME"].apply(timestr_to_datetime)
														
 
															-        nwp.rename({"C_PRE_TIME": "C_TIME"}, axis=1, inplace=True)
														
 
															-        nwp.set_index('C_TIME', inplace=True)
														
 
															-        nwp = self.drop_duplicated(nwp)
														
 
															-
														
 
															-        envir = self.read_data(envir_path).loc[:, "C_TIME":]  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
														
 
															-        envir = self.data_cleaning(envir)
														
 
															-        envir["C_TIME"] = envir["C_TIME"].apply(timestr_to_datetime)
														
 
															-        envir.set_index('C_TIME', inplace=True)
														
 
															-        envir = self.drop_duplicated(envir)
														
 
															-
														
 
															-        rp = self.read_data(rp_path, rp_columns)
														
 
															-        rp["C_TIME"] = rp["C_TIME"].apply(timestr_to_datetime)
														
 
															-        rp.set_index('C_TIME', inplace=True)  # nan也可以设置索引列
														
 
															-        rp = self.data_cleaning(rp)
														
 
															-        rp = self.drop_duplicated(rp)
														
 
															-
														
 
															-        df = self.tables_unite(rp, envir)
														
 
															-        df = self.tables_unite(df, nwp)
														
 
															-        dfs = self.missing_time_splite(df)
														
 
															-        dfs = [self.data_fill(df) for df in dfs]
														
 
															-        self.norm(dfs)  # 归一化 待解决
														
 
															-        return dfs
														
 
															-
														
 
															-    def norm(self, dfs):
														
 
															-        df = pd.concat(dfs, axis=0)
														
 
															-        df = df.reset_index()
														
 
															-        df["C_TIME"] = df["C_TIME"].apply(datetime_to_timestr)
														
 
															-        mean = np.mean(df, axis=0)  # 数据的均值
														
 
															-        std = np.std(df, axis=0)  # 标准差
														
 
															-        if hasattr(self.opt, 'mean') is False or hasattr(self.opt, 'std') is False:
														
 
															-            self.set_yml({'mean': mean.to_dict(), 'std': std.to_dict()})
														
 
															-        print("归一化参数，均值为：{}，方差为：{}".format(mean.to_dict(), std.to_dict()))
														
 
															-        self.mean, self.std = mean.to_dict(), std.to_dict()
														
 
															-
														
 
															-    def data_cleaning(self, data):
														
 
															-        data = data.replace(-99, np.nan)
														
 
															-        # nan 超过30% 删除
														
 
															-        data = data.dropna(axis=1, thresh=len(data)*0.7)
														
 
															-        # 删除取值全部相同的列
														
 
															-        data = data.loc[:, (data != data.iloc[0]).any()]
														
 
															-        # nan 替换成0 本周问题 1.卷积学习，0是否合适？
														
 
															-        data = data.replace(np.nan, 0)
														
 
															-        return data
														
 
															-
														
 
															-    def missing_time_splite(self, df):
														
 
															-        dt = pd.Timedelta(minutes=15)
														
 
															-        day1 = pd.Timedelta(days=1)
														
 
															-        cnt = 0
														
 
															-        cnt1 = 0
														
 
															-        start_index = 0
														
 
															-        dfs = []
														
 
															-        for i in range(1, len(df)):
														
 
															-            if df.index[i] - df.index[i-1] >= day1:
														
 
															-                df_x = df.iloc[start_index:i, ]
														
 
															-                dfs.append(df_x)
														
 
															-                start_index = i
														
 
															-                cnt1 += 1
														
 
															-            if df.index[i] - df.index[i-1] != dt:
														
 
															-                print(df.index[i-1], end=" ~ ")
														
 
															-                print(df.index[i])
														
 
															-                cnt += 1
														
 
															-        dfs.append(df.iloc[start_index:, ])
														
 
															-        print("数据总数：", len(df), "，缺失段数：", cnt, "其中，超过一天的段数：", cnt1)
														
 
															-        return dfs
														
 
															-
														
 
															-    def data_fill(self, df):
														
 
															-        df = df.resample('15T').bfill()
														
 
															-        return df
														
 
															-
														
 
															-    def set_yml(self, yml_dict):
														
 
															-        with open(self.opt.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-            cfg = yaml.safe_load(f)
														
 
															-        for k, v in yml_dict.items():
														
 
															-            cfg[k] = v
														
 
															-        with open(self.opt.config_yaml, 'w') as f:
														
 
															-            yaml.safe_dump(cfg, f, default_flow_style=False)
														
 
															-
														
 
															-    def read_data(self, path, cols=None, index_col=None):
														
 
															-        init_data = pd.read_csv(path, usecols=cols, index_col=index_col)
														
 
															-        return init_data
														
 
															-
														
 
															-    def filter_data(self):
														
 
															-        check_table = self.tables[:, 2]  # 实际功率不能为0，为0代表没发电
														
 
															-        preserve_index = list(np.nonzero(check_table)[0])
														
 
															-        indexs = list(range(len(self.tables)))
														
 
															-        del_index = list(set(indexs) - set(preserve_index))
														
 
															-        self.tables = np.delete(self.tables, del_index, axis=0)
														
 
															-        return self.tables
														
 
															-
														
 
															-    def drop_duplicated(self, df):
														
 
															-        df = df.groupby(level=0).mean()  # DatetimeIndex时间索引去重
														
 
															-        return df
														
 
															-
														
 
															-    def tables_unite(self, t1, t2):
														
 
															-        return pd.merge(t1, t2, left_index=True, right_index=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    ds = DataSet()
														
 
															-    # dq = ds.read_data(dq_path, dq_columns)[0]
														
 
															-    # rp = ds.read_data(rp_path, rp_columns)[0]
														
 
															-    # # rp_average(rp)    # 计算平均功率
														
 
															-    # envir = ds.read_data(envir_path, envir_columns)[0]
														
 
															-    # tables = ds.tables_integra(dq, rp, envir)
														
 
															-    # ds.tables_norm_result(tables)
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/data_utils.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/data_utils.py
@@ -1,65 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/22 17:17
														
 
															-# file: dpdUtils.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-
														
 
															-import time, datetime
														
 
															-
														
 
															-
														
 
															-class ValidationError(Exception):
														
 
															-    def __init__(self, message):
														
 
															-        self.message = message
														
 
															-
														
 
															-
														
 
															-def timestamp_to_datetime(ts):
														
 
															-    if type(ts) is not int:
														
 
															-        raise ValueError("timestamp-时间格式必须是整型")
														
 
															-    if len(str(ts)) == 13:
														
 
															-        return datetime.datetime.fromtimestamp(ts/1000)
														
 
															-    elif len(str(ts)) == 10:
														
 
															-        return datetime.datetime.fromtimestamp(ts)
														
 
															-    else:
														
 
															-        raise ValueError("timestamp-时间格式长度错误")
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestamp(dt, len):
														
 
															-    if len not in (10, 13):
														
 
															-        raise ValueError("timestamp-时间戳转换长度错误")
														
 
															-    if len == 10:
														
 
															-        return int(round(time.mktime(dt.timetuple())))
														
 
															-    else:
														
 
															-        return int(round(time.mktime(dt.timetuple()))*1000)
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestr(dt):
														
 
															-    return int(dt.strftime('%m%d%H%M'))
														
 
															-
														
 
															-
														
 
															-def timestr_to_datetime(time_data):
														
 
															-    """
														
 
															-    将时间戳或时间字符串转换为datetime.datetime类型
														
 
															-    :param time_data: int or str
														
 
															-    :return:datetime.datetime
														
 
															-    """
														
 
															-    if isinstance(time_data, float):
														
 
															-        result = timestamp_to_datetime(int(time_data))
														
 
															-    elif isinstance(time_data, int):
														
 
															-        result = timestamp_to_datetime(time_data)
														
 
															-    elif isinstance(time_data, str):
														
 
															-        if len(time_data) == 10:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y')
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d')
														
 
															-        elif len(time_data) in {17, 18, 19}:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')
														
 
															-        else:
														
 
															-            raise ValidationError("时间字符串长度不满足要求！")
														
 
															-    return result
														
 
															-
														
 
															-
														
 
															-def timestamp_to_timestr(t):
														
 
															-    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t))
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/figure.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/figure.py
@@ -1,83 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: figure.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-
														
 
															-
														
 
															-class Figure(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.ds = process
														
 
															-        self.logger = logger
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res[-1])
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def draw(self, label_data, predict_norm_data, numbers):
														
 
															-        # label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
														
 
															-        #                                         config.label_in_feature_index]
														
 
															-        # dq_data = dq_data.reshape((-1, self.opt.output_size))
														
 
															-        predict_norm_data = self.get_16_points(predict_norm_data)
														
 
															-        label_data = self.get_16_points(label_data)
														
 
															-        label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        label_data = label_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]
														
 
															-        predict_data = predict_norm_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]   # 通过保存的均值和方差还原数据
														
 
															-        # dq_data = dq_data * self.ds.std[0] + self.ds.mean[0]
														
 
															-        # predict_data = predict_norm_data
														
 
															-        assert label_data.shape[0] == predict_data.shape[0], "The element number in origin and predicted data is different"
														
 
															-
														
 
															-        label_name = [self.ds.tables_column_name[i] for i in self.opt.label_in_feature_index]
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-
														
 
															-        # label 和 predict 是错开config.predict_day天的数据的
														
 
															-        # 下面是两种norm后的loss的计算方式，结果是一样的，可以简单手推一下
														
 
															-        # label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
														
 
															-        #              config.label_in_feature_index]
														
 
															-        # loss_norm = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
														
 
															-        # logger.info("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss = np.sum((label_data - predict_data) ** 2)/len(label_data)  # mse
														
 
															-        # loss = np.mean((label_data - predict_data) ** 2, axis=0)
														
 
															-        loss_sqrt = np.sqrt(loss)   # rmse
														
 
															-        loss_norm = 1 - loss_sqrt / self.opt.cap
														
 
															-        # loss_norm = loss/(ds.std[opt.label_in_feature_index] ** 2)
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss1 = np.sum((label_data - dq_data) ** 2) / len(label_data)  # mse
														
 
															-        # loss_sqrt1 = np.sqrt(loss1)  # rmse
														
 
															-        # loss_norm1 = 1 - loss_sqrt1 / self.opt.cap
														
 
															-        # self.logger.info("The mean squared error1 of power {} is ".format(label_name) + str(loss_norm1))
														
 
															-        if self.opt.is_continuous_predict:
														
 
															-            # label_X = range(int((self.ds.data_num - self.ds.train_num - 32)))
														
 
															-            label_X = list(range(numbers))
														
 
															-        else:
														
 
															-            label_X = range(int((self.ds.data_num - self.ds.train_num - self.ds.start_num_in_test)/2))
														
 
															-        print("label_x = ", label_X)
														
 
															-        predict_X = [x for x in label_X]
														
 
															-
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(label_X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(predict_X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/logger.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/logger.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: logger.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import logging, sys
														
 
															-from logging.handlers import RotatingFileHandler
														
 
															-
														
 
															-
														
 
															-def load_logger(config):
														
 
															-    logger = logging.getLogger()
														
 
															-    logger.setLevel(level=logging.DEBUG)
														
 
															-
														
 
															-    # StreamHandler
														
 
															-    if config.do_log_print_to_screen:
														
 
															-        stream_handler = logging.StreamHandler(sys.stdout)
														
 
															-        stream_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
														
 
															-                                      fmt='[ %(asctime)s ] %(message)s')
														
 
															-        stream_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(stream_handler)
														
 
															-
														
 
															-    # FileHandler
														
 
															-    if config.do_log_save_to_file:
														
 
															-        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
														
 
															-        file_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
														
 
															-        file_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(file_handler)
														
 
															-
														
 
															-        # 把config信息也记录到log 文件中
														
 
															-        config_dict = {}
														
 
															-        for key in dir(config):
														
 
															-            if not key.startswith("_"):
														
 
															-                config_dict[key] = getattr(config, key)
														
 
															-        config_str = str(config_dict)
														
 
															-        config_list = config_str[1:-1].split(", '")
														
 
															-        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
														
 
															-        logger.info(config_save_str)
														
 
															-
														
 
															-    return logger
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/model/__init__.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/model/__init__.py
--- a/wind-LSTM-v2.0-nwp+rp+环境/model/model_keras.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/model/model_keras.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-from keras.layers import Input, Dense, LSTM
														
 
															-from keras.models import Model
														
 
															-from keras.callbacks import ModelCheckpoint, EarlyStopping
														
 
															-
														
 
															-
														
 
															-def get_keras_model(opt):
														
 
															-    input1 = Input(shape=(opt.Model['time_step'], opt.input_size))
														
 
															-    lstm = input1
														
 
															-    for i in range(opt.Model['lstm_layers']):
														
 
															-        lstm = LSTM(units=opt.Model['hidden_size'],dropout=opt.Model['dropout_rate'],return_sequences=True)(lstm)
														
 
															-    output = Dense(opt.output_size)(lstm)
														
 
															-    model = Model(input1, output)
														
 
															-    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
														
 
															-    return model
														
 
															-
														
 
															-
														
 
															-def gpu_train_init():
														
 
															-    import tensorflow as tf
														
 
															-    from keras.backend.tensorflow_backend import set_session
														
 
															-    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
														
 
															-    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
														
 
															-    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
														
 
															-    sess = tf.Session(config=sess_config)
														
 
															-    set_session(sess)
														
 
															-
														
 
															-
														
 
															-def train(opt, train_and_valid_data):
														
 
															-    if opt.use_cuda: gpu_train_init()
														
 
															-    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
														
 
															-    model = get_keras_model(opt)
														
 
															-    model.summary()
														
 
															-    if opt.add_train:
														
 
															-        model.load_weights(opt.model_save_path + opt.model_name)
														
 
															-
														
 
															-    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
														
 
															-                                    save_best_only=True, mode='auto')
														
 
															-    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
														
 
															-    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
														
 
															-              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
														
 
															-
														
 
															-
														
 
															-def predict(config, test_X):
														
 
															-    model = get_keras_model(config)
														
 
															-    model.load_weights(config.model_save_path + config.model_name)
														
 
															-    result = model.predict(test_X, batch_size=1)
														
 
															-    # result = result.reshape((-1, config.output_size))
														
 
															-    return result
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/requirements.txt
+++ b/wind-LSTM-v2.0-nwp+rp+环境/requirements.txt
@@ -1,8 +0,0 @@
 
															-sklearn
														
 
															-pandas
														
 
															-argparse
														
 
															-keras
														
 
															-tensorflow==1.15
														
 
															-matplotlib>=3.0.2
														
 
															-numpy>=1.14.6
														
 
															-scipy>=1.1.0
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/run_case.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/run_case.py
@@ -1,58 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-
														
 
															-import numpy as np
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from figure import Figure
														
 
															-from data_process import data_process
														
 
															-from data_features import data_features
														
 
															-from logger import load_logger
														
 
															-from config import myargparse
														
 
															-from data_analyse import data_analyse
														
 
															-frame = "keras"
														
 
															-
														
 
															-if frame == "keras":
														
 
															-    from model.model_keras import train, predict
														
 
															-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
														
 
															-else:
														
 
															-    raise Exception("Wrong frame seletion")
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    parse = myargparse(discription="training config", add_help=False)
														
 
															-    opt = parse.parse_args_and_yaml()
														
 
															-    logger = load_logger(opt)
														
 
															-    try:
														
 
															-        np.random.seed(opt.Model["random_seed"])
														
 
															-        process = data_process(opt=opt)
														
 
															-        dfs = process.get_processed_data()
														
 
															-        features = data_features(opt=opt, mean=process.mean, std=process.std)
														
 
															-        if opt.do_train:
														
 
															-            train_X, valid_X, train_Y, valid_Y = features.get_train_data([dfs[0][:'2021/8/1'], dfs[1][:'2022/3/1']])
														
 
															-            train(opt, [train_X, train_Y, valid_X, valid_Y])
														
 
															-        if opt.do_predict:
														
 
															-            test_X, test_Y, df_Y = features.get_test_data([dfs[0]['2021/8/1':'2021/9/6'], dfs[1]['2022/3/1':'2022/4/4']])
														
 
															-            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
														
 
															-            analyse = data_analyse(opt, logger, process)
														
 
															-            analyse.predict_acc(result, df_Y)
														
 
															-    except Exception:
														
 
															-        logger.error("Run Error", exc_info=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    import argparse
														
 
															-    # argparse方便于命令行下输入参数，可以根据需要增加更多
														
 
															-    # parser = argparse.ArgumentParser()
														
 
															-    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
														
 
															-    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
														
 
															-    # args = parser.parse_args()
														
 
															-
														
 
															-    # con = Config()
														
 
															-    # for key in dir(args):               # dir(args) 函数获得args所有的属性
														
 
															-    #     if not key.startswith("_"):     # 去掉 args 自带属性，比如__name__等
														
 
															-    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
														
 
															-    main()
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/run_case_history.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/run_case_history.py
@@ -1,142 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 9:23
														
 
															-# file: run_case_history.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-class Data:
														
 
															-    def __init__(self, config):
														
 
															-        self.config = config
														
 
															-        self.data, self.data_column_name = self.read_data()
														
 
															-
														
 
															-        self.data_num = self.data.shape[0]
														
 
															-        self.train_num = int(self.data_num * self.config.train_data_rate)
														
 
															-
														
 
															-        self.mean = np.mean(self.data, axis=0)              # 数据的均值和方差
														
 
															-        self.std = np.std(self.data, axis=0)
														
 
															-        self.norm_data = (self.data - self.mean)/self.std   # 归一化，去量纲
														
 
															-
														
 
															-        self.start_num_in_test = 0      # 测试集中前几天的数据会被删掉，因为它不够一个time_step
														
 
															-
														
 
															-    def read_data(self):                # 读取初始数据
														
 
															-        if self.config.debug_mode:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, nrows=self.config.debug_num,
														
 
															-                                    usecols=self.config.feature_columns)
														
 
															-        else:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, usecols=self.config.feature_columns)
														
 
															-        init_data = self.filter_data(init_data)
														
 
															-        return init_data.values, init_data.columns.tolist()     # .columns.tolist() 是获取列名
														
 
															-
														
 
															-    def filter_data(self, init_data):
														
 
															-        return init_data[init_data.apply(np.sum, axis=1)!=0]
														
 
															-
														
 
															-    def get_train_and_valid_data(self):
														
 
															-        feature_data = self.norm_data[:self.train_num]
														
 
															-        label_data = self.norm_data[: self.train_num,
														
 
															-                                    self.config.label_in_feature_index]    # 将延后几天的数据作为label
														
 
															-
														
 
															-        if not self.config.do_continue_train:
														
 
															-            # 在非连续训练模式下，每time_step行数据会作为一个样本，两个样本错开一行，比如：1-20行，2-21行。。。。
														
 
															-            train_x, train_y = [], []
														
 
															-            for i in range(self.train_num-self.config.time_step*2):
														
 
															-                p1 = feature_data[:, 0][i:i+self.config.start_predict_point]
														
 
															-                p2 = feature_data[:, 1][i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                p = [list(t) for t in zip(p1, p2)]  # 实际功率， 预测功率 是一组特征值
														
 
															-                l = label_data[i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                train_x.append(p)
														
 
															-                train_y.append(l)
														
 
															-            # train_x = [feature_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # train_y = [label_data[i+self.config.start_predict_point:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # 这里选取后16个点 作为 预测及
														
 
															-        else:
														
 
															-            # 在连续训练模式下，每time_step行数据会作为一个样本，两个样本错开time_step行，
														
 
															-            # 比如：1-20行，21-40行。。。到数据末尾，然后又是 2-21行，22-41行。。。到数据末尾，……
														
 
															-            # 这样才可以把上一个样本的final_state作为下一个样本的init_state，而且不能shuffle
														
 
															-            # 目前本项目中仅能在pytorch的RNN系列模型中用
														
 
															-            train_x = [feature_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-            train_y = [label_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-
														
 
															-        train_x, train_y = np.array(train_x), np.array(train_y)
														
 
															-
														
 
															-        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
														
 
															-                                                              random_state=self.config.random_seed,
														
 
															-                                                              shuffle=self.config.shuffle_train_data)   # 划分训练和验证集，并打乱
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-
														
 
															-class Config:
														
 
															-    # 数据参数
														
 
															-    # feature_columns = list(range(2, 9))     # 要作为feature的列，按原数据从0开始计算，也可以用list 如 [2,4,6,8] 设置
														
 
															-    feature_columns = list(range(1, 3))
														
 
															-    # label_columns = [4, 5]                  # 要预测的列，按原数据从0开始计算, 如同时预测第四，五列 最低价和最高价
														
 
															-    label_columns = [1]
														
 
															-    # label_in_feature_index = [feature_columns.index(i) for i in label_columns]  # 这样写不行
														
 
															-    label_in_feature_index = (lambda x,y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-    predict_day = 1             # 预测未来几天
														
 
															-    predict_points = 16
														
 
															-    # 网络参数
														
 
															-    input_size = len(feature_columns)
														
 
															-    output_size = len(label_columns)
														
 
															-
														
 
															-    hidden_size = 128           # LSTM的隐藏层大小，也是输出大小
														
 
															-    lstm_layers = 2             # LSTM的堆叠层数
														
 
															-    dropout_rate = 0.2          # dropout概率
														
 
															-    time_step = 16             # 这个参数很重要，是设置用前多少个点的数据来预测，也是LSTM的time step数，请保证训练数据量大于它
														
 
															-    start_predict_point = 16
														
 
															-
														
 
															-    # 训练参数
														
 
															-    do_train = True
														
 
															-    do_predict = True
														
 
															-    add_train = False           # 是否载入已有模型参数进行增量训练
														
 
															-    shuffle_train_data = False   # 是否对训练数据做shuffle
														
 
															-    use_cuda = False            # 是否使用GPU训练
														
 
															-
														
 
															-    train_data_rate = 0.95      # 训练数据占总体数据比例，测试数据就是 1-train_data_rate
														
 
															-    valid_data_rate = 0.15      # 验证数据占训练数据比例，验证集在训练过程使用，为了做模型和参数选择
														
 
															-
														
 
															-    batch_size = 64
														
 
															-    learning_rate = 0.001
														
 
															-    epoch = 20                  # 整个训练集被训练多少遍，不考虑早停的前提下
														
 
															-    patience = 5                # 训练多少epoch，验证集没提升就停掉
														
 
															-    random_seed = 42            # 随机种子，保证可复现
														
 
															-
														
 
															-    do_continue_train = False    # 每次训练把上一次的final_state作为下一次的init_state，仅用于RNN类型模型，目前仅支持pytorch
														
 
															-    continue_flag = ""           # 但实际效果不佳，可能原因：仅能以 batch_size = 1 训练
														
 
															-    if do_continue_train:
														
 
															-        shuffle_train_data = False
														
 
															-        batch_size = 1
														
 
															-        continue_flag = "continue_"
														
 
															-
														
 
															-    # 训练模式
														
 
															-    debug_mode = False  # 调试模式下，是为了跑通代码，追求快
														
 
															-    debug_num = 500  # 仅用debug_num条数据来调试
														
 
															-
														
 
															-    # 框架参数
														
 
															-    used_frame = frame  # 选择的深度学习框架，不同的框架模型保存后缀不一样
														
 
															-    model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
														
 
															-    model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
														
 
															-
														
 
															-    # 路径参数
														
 
															-    train_data_path = "./data/J00285.csv"
														
 
															-    model_save_path = "./checkpoint/" + used_frame + "/"
														
 
															-    figure_save_path = "./figure/"
														
 
															-    log_save_path = "./log/"
														
 
															-    do_log_print_to_screen = True
														
 
															-    do_log_save_to_file = True                  # 是否将config和训练过程记录到log
														
 
															-    do_figure_save = False
														
 
															-    do_train_visualized = False          # 训练loss可视化，pytorch用visdom，tf用tensorboardX，实际上可以通用, keras没有
														
 
															-    if not os.path.exists(model_save_path):
														
 
															-        os.makedirs(model_save_path)    # makedirs 递归创建目录
														
 
															-    if not os.path.exists(figure_save_path):
														
 
															-        os.mkdir(figure_save_path)
														
 
															-    if do_train and (do_log_save_to_file or do_train_visualized):
														
 
															-        cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-        log_save_path = log_save_path + cur_time + '_' + used_frame + "/"
														
 
															-        os.makedirs(log_save_path)
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp+环境/test.py
+++ b/wind-LSTM-v2.0-nwp+rp+环境/test.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/11 15:58
														
 
															-# file: test.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-# index = pd.date_range('1/1/2000', periods=9, freq='T')
														
 
															-# series = pd.Series(range(9), index=index)
														
 
															-# df = pd.DataFrame({'value': series})
														
 
															-# series1 = series.resample('3T').sum()
														
 
															-# series2 = series.resample('3T', label='right').sum()
														
 
															-# series3 = series.resample('3T', label='right', closed='right').sum()
														
 
															-# series4 = series.resample('30S').asfreq()
														
 
															-# series5 = series.resample('30S').bfill()
														
 
															-# print(series)
														
 
															-# print(series1)
														
 
															-# print(series2)
														
 
															-# print(series3)
														
 
															-# print(series4)
														
 
															-# print("---", series5)
														
 
															-
														
 
															-# x = np.random.randint(1,100,20).reshape((10,2))
														
 
															-# print(x)
														
 
															-# from sklearn.model_selection import train_test_split
														
 
															-#
														
 
															-# x_train, x_test = train_test_split(x, test_size=0.2, random_state=1, shuffle=False)
														
 
															-# print("x_train", x_train)
														
 
															-# print("x_test", x_test)
														
 
															-
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-#创建一组数据
														
 
															-data = {'name': ['John', 'Mike', 'Mozla', 'Rose', 'David', 'Marry', 'Wansi', 'Sidy', 'Jack', 'Alic'],
														
 
															-        'age': [20, 32, 29, np.nan, 15, 28, 21, 30, 37, 25],
														
 
															-        'gender': [0, 0, 1, 1, 0, 1, 0, 0, 1, 1],
														
 
															-        'isMarried': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
														
 
															-label = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
														
 
															-df = pd.DataFrame(data, index=label)
														
 
															-print(df.loc[:,'name'])
														
 
															-pass
														
--- a/wind-LSTM-v2.0-nwp+rp/.gitignore
+++ b/wind-LSTM-v2.0-nwp+rp/.gitignore
@@ -1,13 +0,0 @@
 
															-*/__pycache__
														
 
															-/__pycache__
														
 
															-/.idea
														
 
															-/checkpoint
														
 
															-/log
														
 
															-/data
														
 
															-/figure
														
 
															-*.log
														
 
															-*.swp
														
 
															-/log
														
 
															-/data
														
 
															-
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp/Readme.md
+++ b/wind-LSTM-v2.0-nwp+rp/Readme.md
@@ -1,18 +0,0 @@
 
															-## 超短期功率预测系统训练端
														
 
															-
														
 
															-这个项目将LSTM长短期时序模型用于超短期电力功率预测任务，实现特性如下: 
														
 
															-
														
 
															-- 程序简洁、模块化
														
 
															-- 支持可扩展的Keras框架（LSTM，可修改网络层）
														
 
															-- 参数、模型和框架支持高度可定制和修改
														
 
															-- 支持增量训练（在预训练模型上进行微调）
														
 
															-- 支持同时预测多个指标（目前预测实际功率）
														
 
															-- 支持预测任意时间节点数（目前设置16个点）
														
 
															-- 支持训练可视化和记录日志
														
 
															-
														
 
															-
														
 
															-
														
 
															-| 训练case | 表头  |
														
 
															-|--------| ----  |
														
 
															-| 1      | 单元格 |
														
 
															-| 2      | 单元格 |
														
--- a/wind-LSTM-v2.0-nwp+rp/back.py
+++ b/wind-LSTM-v2.0-nwp+rp/back.py
@@ -1,76 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/14 15:32
														
 
															-# file: back.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-v2.0-nwp+rp/config.py
+++ b/wind-LSTM-v2.0-nwp+rp/config.py
@@ -1,94 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 14:46
														
 
															-# file: config.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import yaml
														
 
															-import argparse
														
 
															-
														
 
															-
														
 
															-class myargparse(argparse.ArgumentParser):
														
 
															-    def __init__(self, discription, add_help):
														
 
															-        super(myargparse, self).__init__(description=discription, add_help=add_help)
														
 
															-        # default_config_parser = parser = argparse.ArgumentParser(
														
 
															-        #     description='Training Config', add_help=False)
														
 
															-        self.add_argument(
														
 
															-            '-c',
														
 
															-            '--config_yaml',
														
 
															-            default=
														
 
															-            'config.yml',
														
 
															-            type=str,
														
 
															-            metavar='FILE',
														
 
															-            help='YAML config file specifying default arguments')
														
 
															-
														
 
															-        # feature_columns = list(range(1, 28))
														
 
															-        label_columns = ['C_REAL_VALUE']
														
 
															-
														
 
															-        # label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-
														
 
															-        # 在控制台可以指定的参数， yml中没有
														
 
															-        self.add_argument('--feature_columns', type=list, default=None, help='要作为特征的列')
														
 
															-
														
 
															-        self.add_argument('--label_columns', type=list, default=label_columns, help='要预测的列')
														
 
															-
														
 
															-        self.add_argument('--label_in_feature_index', type=list, default=None, help='标签在特征列的索引')
														
 
															-
														
 
															-        self.add_argument('--input_size', type=int, default=0, help='输入维度')
														
 
															-
														
 
															-        self.add_argument('--output_size', type=int, default=len(label_columns), help='输出维度')
														
 
															-
														
 
															-        self.add_argument("--train_data_path", type=str, default=None,help='数据集地址')  # train_data_path yml中有
														
 
															-
														
 
															-        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
														
 
															-
														
 
															-        self.add_argument('--model_name', type=str, default=None, help='模型名称')
														
 
															-
														
 
															-        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
														
 
															-
														
 
															-
														
 
															-    def _init_dir(self, opt):
														
 
															-        import os, time
														
 
															-        # 在这里给opt赋值
														
 
															-        opt.model_name = "model_" + opt.continue_flag + opt.used_frame + opt.model_postfix[opt.used_frame]
														
 
															-        opt.model_save_path = './checkpoint/' + opt.model_name + "/"
														
 
															-        if not os.path.exists(opt.model_save_path):
														
 
															-            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
														
 
															-        if not os.path.exists(opt.figure_save_path):
														
 
															-            os.mkdir(opt.figure_save_path)
														
 
															-        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
														
 
															-            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
														
 
															-            os.makedirs(log_save_path)
														
 
															-
														
 
															-
														
 
															-# YAML should override the argparser's content
														
 
															-    def _parse_args_and_yaml(self):
														
 
															-        given_configs, remaining = self.parse_known_args()
														
 
															-        if given_configs.config_yaml:
														
 
															-            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-                cfg = yaml.safe_load(f)
														
 
															-                self.set_defaults(**cfg)
														
 
															-
														
 
															-        # The main arg parser parses the rest of the args, the usual
														
 
															-        # defaults will have been overridden if config file specified.
														
 
															-        opt = self.parse_args(remaining)
														
 
															-        self._init_dir(opt)
														
 
															-        # Cache the args as a text string to save them in the output dir later
														
 
															-        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
														
 
															-        return opt, opt_text
														
 
															-
														
 
															-
														
 
															-    def parse_args_and_yaml(self):
														
 
															-        return self._parse_args_and_yaml()[0]
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    # opt = _parse_args_and_yaml()
														
 
															-    pass
														
 
															-
														
 
															-
														
 
															-
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp/config.yml
+++ b/wind-LSTM-v2.0-nwp+rp/config.yml
@@ -1,77 +0,0 @@
 
															-Model:
														
 
															-  batch_size: 64
														
 
															-  dropout_rate: 0.2
														
 
															-  epoch: 20
														
 
															-  hidden_size: 128
														
 
															-  learning_rate: 0.001
														
 
															-  lstm_layers: 2
														
 
															-  patience: 5
														
 
															-  random_seed: 42
														
 
															-  time_step: 16
														
 
															-add_train: false
														
 
															-continue_flag: ''
														
 
															-data_format:
														
 
															-  dq: dq.csv
														
 
															-  envir: "\u73AF\u5883\u6570\u636E.csv"
														
 
															-  nwp: nwp.csv
														
 
															-  rp: rp.csv
														
 
															-debug_model: false
														
 
															-debug_num: 500
														
 
															-do_continue_train: false
														
 
															-do_figure_save: false
														
 
															-do_log_print_to_screen: true
														
 
															-do_log_save_to_file: true
														
 
															-do_predict: true
														
 
															-do_train: true
														
 
															-do_train_visualized: True
														
 
															-excel_data_path: ./data/J00307/
														
 
															-figure_save_path: ./figure/
														
 
															-is_continuous_predict: True
														
 
															-log_save_path: ./log/
														
 
															-mean:
														
 
															-  C_AIRT: 10.305992230762874
														
 
															-  C_CELLT: 10.664897925448384
														
 
															-  C_DIFFUSER: 143.2639061079428
														
 
															-  C_DIFFUSERDA: 6.571077155136789
														
 
															-  C_DIRECTR: 68.21328208942887
														
 
															-  C_DIRECTRDA: 3.163283039920654
														
 
															-  C_FORECAST: 3.1419734966774113
														
 
															-  C_GLOBALR: 173.2587817174973
														
 
															-  C_GLOBALRDA: 7.756491280271097
														
 
															-  C_HOURDA: 1.998222150590958
														
 
															-  C_P: 947.7830440532276
														
 
															-  C_RH: 55.59672286965865
														
 
															-  C_VALUE: 3.404744648318043
														
 
															-  C_WD: 212.88300686007108
														
 
															-  C_WS: 1.802446483180428
														
 
															-model_postfix:
														
 
															-  keras: .h5
														
 
															-  pytorch: .pth
														
 
															-  tensorflow: .ckpt
														
 
															-predict_points: 16
														
 
															-shuffle_train_data: false
														
 
															-std:
														
 
															-  C_AIRT: 12.127220611319888
														
 
															-  C_CELLT: 12.654848145970181
														
 
															-  C_DIFFUSER: 230.93680419867772
														
 
															-  C_DIFFUSERDA: 6.4933162833681415
														
 
															-  C_DIRECTR: 166.61348332191056
														
 
															-  C_DIRECTRDA: 4.991297839913351
														
 
															-  C_FORECAST: 4.447082956749344
														
 
															-  C_GLOBALR: 258.87947949591955
														
 
															-  C_GLOBALRDA: 7.9174382136573955
														
 
															-  C_HOURDA: 2.9110230573747247
														
 
															-  C_P: 25.75152505719027
														
 
															-  C_RH: 22.445059526990818
														
 
															-  C_VALUE: 5.013868885103326
														
 
															-  C_WD: 112.90029001408325
														
 
															-  C_WS: 1.6575249140627502
														
 
															-train_data_path: ./data/
														
 
															-train_data_rate: 0.9
														
 
															-use_cuda: false
														
 
															-used_frame: keras
														
 
															-valid_data_rate: 0.15
														
 
															-
														
 
															-is_photovoltaic: True
														
 
															-cap: 110
														
 
															-envir_columns: 16
														
--- a/wind-LSTM-v2.0-nwp+rp/data_analyse.py
+++ b/wind-LSTM-v2.0-nwp+rp/data_analyse.py
@@ -1,77 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 18:57
														
 
															-# file: data_analyse.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import pandas as pd
														
 
															-
														
 
															-
														
 
															-class data_analyse(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.logger = logger
														
 
															-        self.ds = process
														
 
															-
														
 
															-    def calculate_acc(self, label_data, predict_data):
														
 
															-        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
														
 
															-        loss_sqrt = np.sqrt(loss)  # rmse
														
 
															-        loss_acc = 1 - loss_sqrt / self.opt.cap
														
 
															-        return loss_acc
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res.iloc[-1].values)
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def predict_acc(self, predict_data, dfy):
														
 
															-        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
														
 
															-        dfs = dfy[0]
														
 
															-        for i in range(1, len(dfy)):
														
 
															-            dfs.extend(dfy[i])
														
 
															-        for i, df in enumerate(dfs):
														
 
															-            df["PREDICT"] = predict_data[i]
														
 
															-            dfs[i] = df
														
 
															-        data = self.get_16_points(dfs)
														
 
															-        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'PREDICT'])
														
 
															-        # label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        df.to_csv(self.opt.excel_data_path + "nwp+rp.csv")
														
 
															-
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
														
 
															-
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
														
 
															-
														
 
															-        # self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
														
 
															-
														
 
															-    def preidct_draw(self, label_data, predict_data):
														
 
															-        X = list(range(label_data.shape[0]))
														
 
															-        print("label_x = ", X)
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(X, label_data, label='label', color='b')
														
 
															-                plt.plot(X, predict_data, label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                # self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                #       str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
 
															-
														
 
															-    def tangle_results(self):
														
 
															-        pass
														
--- a/wind-LSTM-v2.0-nwp+rp/data_features.py
+++ b/wind-LSTM-v2.0-nwp+rp/data_features.py
@@ -1,106 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/12 17:42
														
 
															-# file: data_features.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-from sklearn.model_selection import train_test_split
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-
														
 
															-
														
 
															-class data_features(object):
														
 
															-    def __init__(self, opt, mean, std):
														
 
															-        self.opt = opt
														
 
															-        self.time_step = self.opt.Model["time_step"]
														
 
															-        self.mean = mean
														
 
															-        self.std = std
														
 
															-        self.columns = list()
														
 
															-
														
 
															-    def get_train_data(self, dfs):
														
 
															-        train_x, valid_x, train_y, valid_y = [], [], [], []
														
 
															-        self.opt.feature_columns = dfs[0].columns.tolist()
														
 
															-        self.opt.feature_columns.insert(0, 'C_TIME')
														
 
															-        self.opt.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.opt.feature_columns,
														
 
															-                                                                        self.opt.label_columns)  # 因为feature不一定从0开始
														
 
															-        self.opt.input_size = len(self.opt.feature_columns)
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            tx, vx, ty, vy = train_test_split(trainx, trainy, test_size=self.opt.valid_data_rate,
														
 
															-                                                                  random_state=self.opt.Model["random_seed"],
														
 
															-                                                                  shuffle=self.opt.shuffle_train_data)  # 划分训练和验证集
														
 
															-            train_x.append(tx)
														
 
															-            valid_x.append(vx)
														
 
															-            train_y.append(ty)
														
 
															-            valid_y.append(vy)
														
 
															-
														
 
															-        train_x = np.concatenate(train_x, axis=0)
														
 
															-        valid_x = np.concatenate(valid_x, axis=0)
														
 
															-        train_y = np.concatenate(train_y, axis=0)
														
 
															-        valid_y = np.concatenate(valid_y, axis=0)
														
 
															-
														
 
															-        train_x = self.norm_features(train_x)
														
 
															-        valid_x = self.norm_features(valid_x)
														
 
															-        train_y = self.norm_label(train_y)
														
 
															-        valid_y = self.norm_label(valid_y)
														
 
															-
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-    def get_test_data(self, dfs):
														
 
															-        test_x, test_y, data_y = [], [], []
														
 
															-        self.opt.feature_columns = dfs[0].columns.tolist()
														
 
															-        self.opt.feature_columns.insert(0, 'C_TIME')
														
 
															-        self.opt.label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(self.opt.feature_columns,
														
 
															-                                                                        self.opt.label_columns)  # 因为feature不一定从0开始
														
 
															-        self.opt.input_size = len(self.opt.feature_columns)
														
 
															-        for df in dfs:
														
 
															-            datax, datay = self.get_data_features(df)
														
 
															-            trainx = np.array(datax)
														
 
															-            trainy = [y['C_REAL_VALUE'].values for y in datay]
														
 
															-            trainy = np.expand_dims(np.array(trainy), axis=-1)  # 在最后一维加一维度
														
 
															-            test_x.append(trainx)
														
 
															-            test_y.append(trainy)
														
 
															-            data_y.append(datay)
														
 
															-
														
 
															-        test_x = np.concatenate(test_x, axis=0)
														
 
															-        test_y = np.concatenate(test_y, axis=0)
														
 
															-
														
 
															-        test_x = self.norm_features(test_x)
														
 
															-        test_y = self.norm_label(test_y)
														
 
															-
														
 
															-        return test_x, test_y, data_y
														
 
															-
														
 
															-    def get_data_features(self, df):   # 这段代码基于pandas方法的优化
														
 
															-        norm_data = df.reset_index()
														
 
															-        feature_data = norm_data[:-self.opt.predict_points]
														
 
															-        label_data = norm_data[self.opt.predict_points:].reset_index(drop=True)
														
 
															-        time_step = self.opt.Model["time_step"]
														
 
															-        time_step_loc = time_step - 1
														
 
															-        train_num = int(len(feature_data))
														
 
															-        time_rp = [feature_data.loc[i:i + time_step_loc, ['C_TIME', 'C_REAL_VALUE']] for i in range(train_num - time_step)]
														
 
															-        nwp = [label_data.loc[i:i + time_step_loc, 'C_T':] for i in range(train_num - time_step)]
														
 
															-        features_x, features_y = [], []
														
 
															-        for row in zip(time_rp, nwp):
														
 
															-            row0 = row[0]
														
 
															-            row1 = row[1]
														
 
															-            row0 = pd.concat([row0, row1], axis=1)
														
 
															-            row0.set_index('C_TIME', inplace=True, drop=False)
														
 
															-            row0["C_TIME"] = row0["C_TIME"].apply(datetime_to_timestr)
														
 
															-            features_x.append(row0)
														
 
															-        self.columns = row0.columns.tolist()
														
 
															-
														
 
															-        features_y = [label_data.loc[i:i + time_step_loc, ['C_TIME', 'C_REAL_VALUE']] for i in range(train_num - time_step)]
														
 
															-        return features_x, features_y
														
 
															-
														
 
															-    def norm_features(self, data: np.ndarray):
														
 
															-        mean = np.array([self.mean[col] for col in self.columns])
														
 
															-        std = np.array([self.std[col] for col in self.columns])
														
 
															-        data = (data - mean) / std  # 归一化
														
 
															-        return data
														
 
															-
														
 
															-    def norm_label(self, label_data: np.ndarray):
														
 
															-        return (label_data - self.mean['C_REAL_VALUE']) / self.std['C_REAL_VALUE']
														
--- a/wind-LSTM-v2.0-nwp+rp/data_process.py
+++ b/wind-LSTM-v2.0-nwp+rp/data_process.py
@@ -1,134 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/17 10:10
														
 
															-# file: main.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-from data_utils import *
														
 
															-import yaml
														
 
															-
														
 
															-
														
 
															-class data_process(object):
														
 
															-    def __init__(self, opt):
														
 
															-        self.std = None
														
 
															-        self.mean = None
														
 
															-        self.opt = opt
														
 
															-        # 都是在ndarray量纲下进行计算
														
 
															-        # self.norm_data = (self.tables[:, 1:] - self.mean) / self.std  # 归一化，去量纲
														
 
															-        # self.norm_data.insert(0, 'C_TIME', self.tables['C_TIME'])
														
 
															-        # self.set_yml({'mean': self.mean.to_dict(), 'std': self.std.to_dict()})
														
 
															-        # self.start_num_in_test = 0
														
 
															-
														
 
															-    def get_processed_data(self):
														
 
															-        excel_data_path = self.opt.excel_data_path
														
 
															-        data_format = self.opt.data_format
														
 
															-        dq_path = excel_data_path + data_format["dq"]
														
 
															-        rp_path = excel_data_path + data_format["rp"]
														
 
															-        nwp_path = excel_data_path + data_format["nwp"]
														
 
															-
														
 
															-        rp_columns = ['C_TIME', 'C_REAL_VALUE']  # 待优化 ["'C_TIME'", "'C_REAL_VALUE'"] 原因：csv 字符串是单引号''，read_csv带单引号
														
 
															-
														
 
															-        nwp = self.read_data(nwp_path).loc[:, "C_PRE_TIME":]  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
														
 
															-        nwp = self.data_cleaning(nwp)
														
 
															-        nwp.drop(['C_FARM_ID', 'C_SC_DATE', 'C_SC_TIME', 'C_PRE_DATE'], axis=1, inplace=True)
														
 
															-        nwp["C_PRE_TIME"] = nwp["C_PRE_TIME"].apply(timestr_to_datetime)
														
 
															-        nwp.rename({"C_PRE_TIME": "C_TIME"}, axis=1, inplace=True)
														
 
															-        nwp.set_index('C_TIME', inplace=True)
														
 
															-        nwp = self.drop_duplicated(nwp)
														
 
															-
														
 
															-        rp = self.read_data(rp_path, rp_columns)
														
 
															-        rp["C_TIME"] = rp["C_TIME"].apply(timestr_to_datetime)
														
 
															-        rp.set_index('C_TIME', inplace=True)  # nan也可以设置索引列
														
 
															-        rp = self.data_cleaning(rp)
														
 
															-        rp = self.drop_duplicated(rp)
														
 
															-
														
 
															-        df = self.tables_unite(rp, nwp)
														
 
															-        dfs = self.missing_time_splite(df)
														
 
															-        dfs = [self.data_fill(df) for df in dfs]
														
 
															-        self.norm(dfs)  # 归一化 待解决
														
 
															-        return dfs
														
 
															-
														
 
															-    def norm(self, dfs):
														
 
															-        df = pd.concat(dfs, axis=0)
														
 
															-        df = df.reset_index()
														
 
															-        df["C_TIME"] = df["C_TIME"].apply(datetime_to_timestr)
														
 
															-        mean = np.mean(df, axis=0)  # 数据的均值
														
 
															-        std = np.std(df, axis=0)  # 标准差
														
 
															-        if hasattr(self.opt, 'mean') is False or hasattr(self.opt, 'std') is False:
														
 
															-            self.set_yml({'mean': mean.to_dict(), 'std': std.to_dict()})
														
 
															-        print("归一化参数，均值为：{}，方差为：{}".format(mean.to_dict(), std.to_dict()))
														
 
															-        self.mean, self.std = mean.to_dict(), std.to_dict()
														
 
															-
														
 
															-    def data_cleaning(self, data):
														
 
															-        data = data.replace(-99, np.nan)
														
 
															-        # nan 超过30% 删除
														
 
															-        data = data.dropna(axis=1, thresh=len(data)*0.7)
														
 
															-        # 删除取值全部相同的列
														
 
															-        data = data.loc[:, (data != data.iloc[0]).any()]
														
 
															-        # nan 替换成0 本周问题 1.卷积学习，0是否合适？
														
 
															-        data = data.replace(np.nan, 0)
														
 
															-        return data
														
 
															-
														
 
															-    def missing_time_splite(self, df):
														
 
															-        dt = pd.Timedelta(minutes=15)
														
 
															-        day1 = pd.Timedelta(days=1)
														
 
															-        cnt = 0
														
 
															-        cnt1 = 0
														
 
															-        start_index = 0
														
 
															-        dfs = []
														
 
															-        for i in range(1, len(df)):
														
 
															-            if df.index[i] - df.index[i-1] >= day1:
														
 
															-                df_x = df.iloc[start_index:i, ]
														
 
															-                dfs.append(df_x)
														
 
															-                start_index = i
														
 
															-                cnt1 += 1
														
 
															-            if df.index[i] - df.index[i-1] != dt:
														
 
															-                print(df.index[i-1], end=" ~ ")
														
 
															-                print(df.index[i])
														
 
															-                cnt += 1
														
 
															-        dfs.append(df.iloc[start_index:, ])
														
 
															-        print("数据总数：", len(df), "，缺失段数：", cnt, "其中，超过一天的段数：", cnt1)
														
 
															-        return dfs
														
 
															-
														
 
															-    def data_fill(self, df):
														
 
															-        df = df.resample('15T').bfill()
														
 
															-        return df
														
 
															-
														
 
															-    def set_yml(self, yml_dict):
														
 
															-        with open(self.opt.config_yaml, 'r', encoding='utf-8') as f:
														
 
															-            cfg = yaml.safe_load(f)
														
 
															-        for k, v in yml_dict.items():
														
 
															-            cfg[k] = v
														
 
															-        with open(self.opt.config_yaml, 'w') as f:
														
 
															-            yaml.safe_dump(cfg, f, default_flow_style=False)
														
 
															-
														
 
															-    def read_data(self, path, cols=None, index_col=None):
														
 
															-        init_data = pd.read_csv(path, usecols=cols, index_col=index_col)
														
 
															-        return init_data
														
 
															-
														
 
															-    def filter_data(self):
														
 
															-        check_table = self.tables[:, 2]  # 实际功率不能为0，为0代表没发电
														
 
															-        preserve_index = list(np.nonzero(check_table)[0])
														
 
															-        indexs = list(range(len(self.tables)))
														
 
															-        del_index = list(set(indexs) - set(preserve_index))
														
 
															-        self.tables = np.delete(self.tables, del_index, axis=0)
														
 
															-        return self.tables
														
 
															-
														
 
															-    def drop_duplicated(self, df):
														
 
															-        df = df.groupby(level=0).mean()  # DatetimeIndex时间索引去重
														
 
															-        return df
														
 
															-
														
 
															-    def tables_unite(self, t1, t2):
														
 
															-        return pd.merge(t1, t2, left_index=True, right_index=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    ds = DataSet()
														
 
															-    # dq = ds.read_data(dq_path, dq_columns)[0]
														
 
															-    # rp = ds.read_data(rp_path, rp_columns)[0]
														
 
															-    # # rp_average(rp)    # 计算平均功率
														
 
															-    # envir = ds.read_data(envir_path, envir_columns)[0]
														
 
															-    # tables = ds.tables_integra(dq, rp, envir)
														
 
															-    # ds.tables_norm_result(tables)
														
--- a/wind-LSTM-v2.0-nwp+rp/data_process1.py
+++ b/wind-LSTM-v2.0-nwp+rp/data_process1.py
@@ -1,140 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-
														
 
															-# J00226超短期功率预测，数据清洗：删除和填充
														
 
															-
														
 
															-# %% 调库，定义函数
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-import lightgbm as lgb
														
 
															-from os.path import isfile
														
 
															-from sklearn.model_selection import train_test_split
														
 
															-from sklearn.metrics import mean_squared_error
														
 
															-from pickle import dump, load
														
 
															-from datetime import datetime
														
 
															-from time import mktime, strptime, strftime
														
 
															-import time
														
 
															-
														
 
															-
														
 
															-root_path = "./"
														
 
															-
														
 
															-
														
 
															-# 计算相邻两条数据时间差不是15分钟的数量
														
 
															-def missing(df):
														
 
															-    # dt = pd.Timedelta(hours=1)
														
 
															-    dt = pd.Timedelta(minutes=15)
														
 
															-    cnt = 0
														
 
															-
														
 
															-    if type(df["C_TIME"][0]) == str:
														
 
															-        df['C_TIME'] = pd.to_datetime(df['C_TIME'])
														
 
															-
														
 
															-    for i in range(1, len(df)):
														
 
															-        if df["C_TIME"][i] - df["C_TIME"][i-1] != dt:
														
 
															-            print(df.iloc[i - 1, 0], end=" ~ ")
														
 
															-            print(df.iloc[i, 0])
														
 
															-            # print(df["C_TIME"][i] - df["C_TIME"][i-1])
														
 
															-
														
 
															-            cnt += 1
														
 
															-
														
 
															-    print("数据总数：", len(df), "，缺失段数：", cnt)
														
 
															-
														
 
															-
														
 
															-def show_curve(series):
														
 
															-    plt.figure(dpi=100, figsize=(18, 6))
														
 
															-    # plt.plot(df["预测功率"], color="red", label="预测功率")
														
 
															-    # plt.plot(df["实际功率"], color="blue", label="实际功率")
														
 
															-    # plt.plot(df["短期预测功率"], color="green", label="短期预测功率")
														
 
															-    plt.plot(series)
														
 
															-
														
 
															-    # plt.legend(loc="upper right")
														
 
															-
														
 
															-    plt.show()
														
 
															-
														
 
															-
														
 
															-def data_process():
														
 
															-    df_nwp = pd.read_excel(root_path + "nwp.xls", usecols="B:AA")
														
 
															-    df_dq = pd.read_excel(root_path + "dq.xls", usecols="B,C")
														
 
															-    df_rp = pd.read_excel(root_path + "rp.xls", usecols="A,C")
														
 
															-
														
 
															-    # missing(df_nwp)   # 34848 1
														
 
															-    # missing(df_dq)    # 34368 6
														
 
															-    # missing(df_rp)    # 33602 13
														
 
															-
														
 
															-    # plt.figure(dpi=100, figsize=(18, 6))
														
 
															-    # plt.plot(df_dq["C_FORECAST"][0:2000], color="red", label="forecast")
														
 
															-    # plt.plot(df_rp["C_VALUE"][0:2000], color="blue", label="value")
														
 
															-
														
 
															-    # plt.legend(loc="upper right")
														
 
															-    # plt.show()
														
 
															-
														
 
															-    # show_curve(df_dq["C_FORECAST"][0:2000])
														
 
															-    # show_curve(df_rp["C_VALUE"][0:2000])
														
 
															-
														
 
															-    # 使用concat合并三个表，并将索引reset
														
 
															-    df_nwp_dq_rp = pd.concat([df_nwp.set_index("C_TIME"), df_dq.set_index("C_TIME"),
														
 
															-                              df_rp.set_index("C_TIME")], axis=1, join="inner").reset_index()
														
 
															-
														
 
															-    # missing(df_env_nwp_rp)  # 24557 4341
														
 
															-    missing(df_nwp_dq_rp)   # 32738 20
														
 
															-
														
 
															-    df_nwp_dq_rp.to_csv(root_path + "nwp_dq_rp.csv", index=False)
														
 
															-
														
 
															-
														
 
															-# 数据填充和删除
														
 
															-def data_fill(df):
														
 
															-    # 将缺省数据较多的那一天数据直接删除
														
 
															-    # del_day = "2021-12-15"
														
 
															-    idx1 = df[df.C_TIME == "2021-12-15 14:45:00"].index.tolist()[0]
														
 
															-    idx2 = df[df.C_TIME == "2021-12-16 00:00:00"].index.tolist()[0]
														
 
															-    df.drop(list(range(idx1, idx2)), inplace=True)
														
 
															-
														
 
															-    idx1 = df[df.C_TIME == "2021-10-14 00:00:00"].index.tolist()[0]
														
 
															-    idx2 = df[df.C_TIME == "2021-10-16 00:00:00"].index.tolist()[0]
														
 
															-    df.drop(list(range(idx1, idx2)), inplace=True)
														
 
															-
														
 
															-    idx1 = df[df.C_TIME == "2021-11-13 00:00:00"].index.tolist()[0]
														
 
															-    idx2 = df[df.C_TIME == "2021-11-14 00:00:00"].index.tolist()[0]
														
 
															-    df.drop(list(range(idx1, idx2)), inplace=True)
														
 
															-
														
 
															-    #
														
 
															-    df.to_csv(root_path + "nwp_dq_rp_1.csv", index=False)
														
 
															-    df = pd.read_csv(root_path + "nwp_dq_rp_1.csv")
														
 
															-
														
 
															-    missing(df)
														
 
															-
														
 
															-    # 数据填充
														
 
															-    cols = df.columns
														
 
															-
														
 
															-    if type(df["C_TIME"][0]) == str:
														
 
															-        df['C_TIME'] = pd.to_datetime(df['C_TIME'])
														
 
															-
														
 
															-    dt15 = pd.Timedelta(minutes=15)
														
 
															-    dt3 = pd.Timedelta(hours=3)
														
 
															-
														
 
															-    for i in range(1, len(df)):
														
 
															-        tdt = df["C_TIME"][i] - df["C_TIME"][i-1]
														
 
															-
														
 
															-        if tdt > dt15 and tdt <= dt3:
														
 
															-            num = int(tdt / dt15)
														
 
															-            last_time = df["C_TIME"][i-1]
														
 
															-            mp = {col: df[col][i] - df[col][i-1] for col in cols}
														
 
															-
														
 
															-            for j in range(1, num):
														
 
															-                df_temp = {cols[k]: (df[cols[k]][i-1] + mp[cols[k]] / num * j)
														
 
															-                           for k in range(1, len(cols))}
														
 
															-                df_temp["C_TIME"] = last_time + dt15
														
 
															-                last_time = df_temp["C_TIME"]
														
 
															-
														
 
															-                df_temp = pd.DataFrame(df_temp, index=[len(df) + 1])
														
 
															-                df = df.append(df_temp, ignore_index=True)
														
 
															-
														
 
															-    df.sort_values(by="C_TIME", inplace=True, ascending=True)
														
 
															-    df.to_csv(root_path + "nwp_dq_rp_1.csv", index=False)
														
 
															-
														
 
															-
														
 
															-# %%
														
 
															-if __name__ == "__main__":
														
 
															-    df_nwp_dq_rp = pd.read_csv(
														
 
															-        root_path + "nwp_dq_rp.csv", usecols=list(range(0, 27)))
														
 
															-    df = df_nwp_dq_rp
														
 
															-    # data_fill(df_nwp_dq_rp)
														
--- a/wind-LSTM-v2.0-nwp+rp/data_utils.py
+++ b/wind-LSTM-v2.0-nwp+rp/data_utils.py
@@ -1,65 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/22 17:17
														
 
															-# file: dpdUtils.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-
														
 
															-import time, datetime
														
 
															-
														
 
															-
														
 
															-class ValidationError(Exception):
														
 
															-    def __init__(self, message):
														
 
															-        self.message = message
														
 
															-
														
 
															-
														
 
															-def timestamp_to_datetime(ts):
														
 
															-    if type(ts) is not int:
														
 
															-        raise ValueError("timestamp-时间格式必须是整型")
														
 
															-    if len(str(ts)) == 13:
														
 
															-        return datetime.datetime.fromtimestamp(ts/1000)
														
 
															-    elif len(str(ts)) == 10:
														
 
															-        return datetime.datetime.fromtimestamp(ts)
														
 
															-    else:
														
 
															-        raise ValueError("timestamp-时间格式长度错误")
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestamp(dt, len):
														
 
															-    if len not in (10, 13):
														
 
															-        raise ValueError("timestamp-时间戳转换长度错误")
														
 
															-    if len == 10:
														
 
															-        return int(round(time.mktime(dt.timetuple())))
														
 
															-    else:
														
 
															-        return int(round(time.mktime(dt.timetuple()))*1000)
														
 
															-
														
 
															-
														
 
															-def datetime_to_timestr(dt):
														
 
															-    return int(dt.strftime('%m%d%H%M'))
														
 
															-
														
 
															-
														
 
															-def timestr_to_datetime(time_data):
														
 
															-    """
														
 
															-    将时间戳或时间字符串转换为datetime.datetime类型
														
 
															-    :param time_data: int or str
														
 
															-    :return:datetime.datetime
														
 
															-    """
														
 
															-    if isinstance(time_data, float):
														
 
															-        result = timestamp_to_datetime(int(time_data))
														
 
															-    elif isinstance(time_data, int):
														
 
															-        result = timestamp_to_datetime(time_data)
														
 
															-    elif isinstance(time_data, str):
														
 
															-        if len(time_data) == 10:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y')
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d')
														
 
															-        elif len(time_data) in {17, 18, 19}:
														
 
															-            result = datetime.datetime.strptime(time_data, '%d/%m/%Y %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
														
 
															-            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')
														
 
															-        else:
														
 
															-            raise ValidationError("时间字符串长度不满足要求！")
														
 
															-    return result
														
 
															-
														
 
															-
														
 
															-def timestamp_to_timestr(t):
														
 
															-    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t))
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp/figure.py
+++ b/wind-LSTM-v2.0-nwp+rp/figure.py
@@ -1,83 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: figure.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import sys
														
 
															-import numpy as np
														
 
															-import matplotlib.pyplot as plt
														
 
															-
														
 
															-
														
 
															-class Figure(object):
														
 
															-    def __init__(self, opt, logger, process):
														
 
															-        self.opt = opt
														
 
															-        self.ds = process
														
 
															-        self.logger = logger
														
 
															-
														
 
															-    def get_16_points(self, results):
														
 
															-        # results为模型预测的一维数组，遍历，取每16个点的最后一个点
														
 
															-        preds = []
														
 
															-        for res in results:
														
 
															-            preds.append(res[-1])
														
 
															-        return np.array(preds)
														
 
															-
														
 
															-    def draw(self, label_data, predict_norm_data, numbers):
														
 
															-        # label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
														
 
															-        #                                         config.label_in_feature_index]
														
 
															-        # dq_data = dq_data.reshape((-1, self.opt.output_size))
														
 
															-        predict_norm_data = self.get_16_points(predict_norm_data)
														
 
															-        label_data = self.get_16_points(label_data)
														
 
															-        label_data = label_data.reshape((-1, self.opt.output_size))
														
 
															-        # label_data 要进行反归一化
														
 
															-        label_data = label_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]
														
 
															-        predict_data = predict_norm_data * self.ds.std[self.opt.label_in_feature_index] + \
														
 
															-                       self.ds.mean[self.opt.label_in_feature_index]   # 通过保存的均值和方差还原数据
														
 
															-        # dq_data = dq_data * self.ds.std[0] + self.ds.mean[0]
														
 
															-        # predict_data = predict_norm_data
														
 
															-        assert label_data.shape[0] == predict_data.shape[0], "The element number in origin and predicted data is different"
														
 
															-
														
 
															-        label_name = [self.ds.tables_column_name[i] for i in self.opt.label_in_feature_index]
														
 
															-        label_column_num = len(self.opt.label_columns)
														
 
															-
														
 
															-        # label 和 predict 是错开config.predict_day天的数据的
														
 
															-        # 下面是两种norm后的loss的计算方式，结果是一样的，可以简单手推一下
														
 
															-        # label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
														
 
															-        #              config.label_in_feature_index]
														
 
															-        # loss_norm = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
														
 
															-        # logger.info("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        loss = np.sum((label_data - predict_data) ** 2)/len(label_data)  # mse
														
 
															-        # loss = np.mean((label_data - predict_data) ** 2, axis=0)
														
 
															-        loss_sqrt = np.sqrt(loss)   # rmse
														
 
															-        loss_norm = 1 - loss_sqrt / self.opt.cap
														
 
															-        # loss_norm = loss/(ds.std[opt.label_in_feature_index] ** 2)
														
 
															-        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
														
 
															-
														
 
															-        # loss1 = np.sum((label_data - dq_data) ** 2) / len(label_data)  # mse
														
 
															-        # loss_sqrt1 = np.sqrt(loss1)  # rmse
														
 
															-        # loss_norm1 = 1 - loss_sqrt1 / self.opt.cap
														
 
															-        # self.logger.info("The mean squared error1 of power {} is ".format(label_name) + str(loss_norm1))
														
 
															-        if self.opt.is_continuous_predict:
														
 
															-            # label_X = range(int((self.ds.data_num - self.ds.train_num - 32)))
														
 
															-            label_X = list(range(numbers))
														
 
															-        else:
														
 
															-            label_X = range(int((self.ds.data_num - self.ds.train_num - self.ds.start_num_in_test)/2))
														
 
															-        print("label_x = ", label_X)
														
 
															-        predict_X = [x for x in label_X]
														
 
															-
														
 
															-        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出，如果是有桌面的Linux，如Ubuntu，可去掉这一行
														
 
															-            for i in range(label_column_num):
														
 
															-                plt.figure(i+1)                     # 预测数据绘制
														
 
															-                plt.plot(label_X, label_data[:, i], label='label', color='b')
														
 
															-                plt.plot(predict_X, predict_data[:, i], label='predict', color='g')
														
 
															-                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
														
 
															-                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
														
 
															-                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
														
 
															-                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
														
 
															-
														
 
															-                if self.opt.do_figure_save:
														
 
															-                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], opt.used_frame))
														
 
															-
														
 
															-            plt.show()
														
--- a/wind-LSTM-v2.0-nwp+rp/logger.py
+++ b/wind-LSTM-v2.0-nwp+rp/logger.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 15:19
														
 
															-# file: logger.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-import logging, sys
														
 
															-from logging.handlers import RotatingFileHandler
														
 
															-
														
 
															-
														
 
															-def load_logger(config):
														
 
															-    logger = logging.getLogger()
														
 
															-    logger.setLevel(level=logging.DEBUG)
														
 
															-
														
 
															-    # StreamHandler
														
 
															-    if config.do_log_print_to_screen:
														
 
															-        stream_handler = logging.StreamHandler(sys.stdout)
														
 
															-        stream_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
														
 
															-                                      fmt='[ %(asctime)s ] %(message)s')
														
 
															-        stream_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(stream_handler)
														
 
															-
														
 
															-    # FileHandler
														
 
															-    if config.do_log_save_to_file:
														
 
															-        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
														
 
															-        file_handler.setLevel(level=logging.INFO)
														
 
															-        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
														
 
															-        file_handler.setFormatter(formatter)
														
 
															-        logger.addHandler(file_handler)
														
 
															-
														
 
															-        # 把config信息也记录到log 文件中
														
 
															-        config_dict = {}
														
 
															-        for key in dir(config):
														
 
															-            if not key.startswith("_"):
														
 
															-                config_dict[key] = getattr(config, key)
														
 
															-        config_str = str(config_dict)
														
 
															-        config_list = config_str[1:-1].split(", '")
														
 
															-        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
														
 
															-        logger.info(config_save_str)
														
 
															-
														
 
															-    return logger
														
--- a/wind-LSTM-v2.0-nwp+rp/model/__init__.py
+++ b/wind-LSTM-v2.0-nwp+rp/model/__init__.py
--- a/wind-LSTM-v2.0-nwp+rp/model/model_keras.py
+++ b/wind-LSTM-v2.0-nwp+rp/model/model_keras.py
@@ -1,48 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-from keras.layers import Input, Dense, LSTM
														
 
															-from keras.models import Model
														
 
															-from keras.callbacks import ModelCheckpoint, EarlyStopping
														
 
															-
														
 
															-
														
 
															-def get_keras_model(opt):
														
 
															-    input1 = Input(shape=(opt.Model['time_step'], opt.input_size))
														
 
															-    lstm = input1
														
 
															-    for i in range(opt.Model['lstm_layers']):
														
 
															-        lstm = LSTM(units=opt.Model['hidden_size'],dropout=opt.Model['dropout_rate'],return_sequences=True)(lstm)
														
 
															-    output = Dense(opt.output_size)(lstm)
														
 
															-    model = Model(input1, output)
														
 
															-    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
														
 
															-    return model
														
 
															-
														
 
															-
														
 
															-def gpu_train_init():
														
 
															-    import tensorflow as tf
														
 
															-    from keras.backend.tensorflow_backend import set_session
														
 
															-    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
														
 
															-    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
														
 
															-    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
														
 
															-    sess = tf.Session(config=sess_config)
														
 
															-    set_session(sess)
														
 
															-
														
 
															-
														
 
															-def train(opt, train_and_valid_data):
														
 
															-    if opt.use_cuda: gpu_train_init()
														
 
															-    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
														
 
															-    model = get_keras_model(opt)
														
 
															-    model.summary()
														
 
															-    if opt.add_train:
														
 
															-        model.load_weights(opt.model_save_path + opt.model_name)
														
 
															-
														
 
															-    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
														
 
															-                                    save_best_only=True, mode='auto')
														
 
															-    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
														
 
															-    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
														
 
															-              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
														
 
															-
														
 
															-
														
 
															-def predict(config, test_X):
														
 
															-    model = get_keras_model(config)
														
 
															-    model.load_weights(config.model_save_path + config.model_name)
														
 
															-    result = model.predict(test_X, batch_size=1)
														
 
															-    # result = result.reshape((-1, config.output_size))
														
 
															-    return result
														
--- a/wind-LSTM-v2.0-nwp+rp/requirements.txt
+++ b/wind-LSTM-v2.0-nwp+rp/requirements.txt
@@ -1,8 +0,0 @@
 
															-sklearn
														
 
															-pandas
														
 
															-argparse
														
 
															-keras
														
 
															-tensorflow==1.15
														
 
															-matplotlib>=3.0.2
														
 
															-numpy>=1.14.6
														
 
															-scipy>=1.1.0
														
--- a/wind-LSTM-v2.0-nwp+rp/run_case1.py
+++ b/wind-LSTM-v2.0-nwp+rp/run_case1.py
@@ -1,58 +0,0 @@
 
															-# -*- coding: UTF-8 -*-
														
 
															-
														
 
															-import numpy as np
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-from figure import Figure
														
 
															-from data_process import data_process
														
 
															-from data_features import data_features
														
 
															-from logger import load_logger
														
 
															-from config import myargparse
														
 
															-from data_analyse import data_analyse
														
 
															-frame = "keras"
														
 
															-
														
 
															-if frame == "keras":
														
 
															-    from model.model_keras import train, predict
														
 
															-    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
														
 
															-else:
														
 
															-    raise Exception("Wrong frame seletion")
														
 
															-
														
 
															-
														
 
															-def main():
														
 
															-    parse = myargparse(discription="training config", add_help=False)
														
 
															-    opt = parse.parse_args_and_yaml()
														
 
															-    logger = load_logger(opt)
														
 
															-    try:
														
 
															-        np.random.seed(opt.Model["random_seed"])
														
 
															-        process = data_process(opt=opt)
														
 
															-        dfs = process.get_processed_data()
														
 
															-        features = data_features(opt=opt, mean=process.mean, std=process.std)
														
 
															-        if opt.do_train:
														
 
															-            train_X, valid_X, train_Y, valid_Y = features.get_train_data([dfs[0][:'2021/8/1'], dfs[1][:'2022/3/1']])
														
 
															-            train(opt, [train_X, train_Y, valid_X, valid_Y])
														
 
															-        if opt.do_predict:
														
 
															-            test_X, test_Y, df_Y = features.get_test_data([dfs[0]['2021/8/1':'2021/9/6'], dfs[1]['2022/3/1':'2022/4/4']])
														
 
															-            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
														
 
															-            analyse = data_analyse(opt, logger, process)
														
 
															-            analyse.predict_acc(result, df_Y)
														
 
															-    except Exception:
														
 
															-        logger.error("Run Error", exc_info=True)
														
 
															-
														
 
															-
														
 
															-if __name__ == "__main__":
														
 
															-    import argparse
														
 
															-    # argparse方便于命令行下输入参数，可以根据需要增加更多
														
 
															-    # parser = argparse.ArgumentParser()
														
 
															-    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
														
 
															-    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
														
 
															-    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
														
 
															-    # args = parser.parse_args()
														
 
															-
														
 
															-    # con = Config()
														
 
															-    # for key in dir(args):               # dir(args) 函数获得args所有的属性
														
 
															-    #     if not key.startswith("_"):     # 去掉 args 自带属性，比如__name__等
														
 
															-    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
														
 
															-    main()
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp/run_case_history.py
+++ b/wind-LSTM-v2.0-nwp+rp/run_case_history.py
@@ -1,142 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/3/20 9:23
														
 
															-# file: run_case_history.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-
														
 
															-class Data:
														
 
															-    def __init__(self, config):
														
 
															-        self.config = config
														
 
															-        self.data, self.data_column_name = self.read_data()
														
 
															-
														
 
															-        self.data_num = self.data.shape[0]
														
 
															-        self.train_num = int(self.data_num * self.config.train_data_rate)
														
 
															-
														
 
															-        self.mean = np.mean(self.data, axis=0)              # 数据的均值和方差
														
 
															-        self.std = np.std(self.data, axis=0)
														
 
															-        self.norm_data = (self.data - self.mean)/self.std   # 归一化，去量纲
														
 
															-
														
 
															-        self.start_num_in_test = 0      # 测试集中前几天的数据会被删掉，因为它不够一个time_step
														
 
															-
														
 
															-    def read_data(self):                # 读取初始数据
														
 
															-        if self.config.debug_mode:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, nrows=self.config.debug_num,
														
 
															-                                    usecols=self.config.feature_columns)
														
 
															-        else:
														
 
															-            init_data = pd.read_csv(self.config.train_data_path, usecols=self.config.feature_columns)
														
 
															-        init_data = self.filter_data(init_data)
														
 
															-        return init_data.values, init_data.columns.tolist()     # .columns.tolist() 是获取列名
														
 
															-
														
 
															-    def filter_data(self, init_data):
														
 
															-        return init_data[init_data.apply(np.sum, axis=1)!=0]
														
 
															-
														
 
															-    def get_train_and_valid_data(self):
														
 
															-        feature_data = self.norm_data[:self.train_num]
														
 
															-        label_data = self.norm_data[: self.train_num,
														
 
															-                                    self.config.label_in_feature_index]    # 将延后几天的数据作为label
														
 
															-
														
 
															-        if not self.config.do_continue_train:
														
 
															-            # 在非连续训练模式下，每time_step行数据会作为一个样本，两个样本错开一行，比如：1-20行，2-21行。。。。
														
 
															-            train_x, train_y = [], []
														
 
															-            for i in range(self.train_num-self.config.time_step*2):
														
 
															-                p1 = feature_data[:, 0][i:i+self.config.start_predict_point]
														
 
															-                p2 = feature_data[:, 1][i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                p = [list(t) for t in zip(p1, p2)]  # 实际功率， 预测功率 是一组特征值
														
 
															-                l = label_data[i+self.config.start_predict_point:i+self.config.start_predict_point*2]
														
 
															-                train_x.append(p)
														
 
															-                train_y.append(l)
														
 
															-            # train_x = [feature_data[i:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # train_y = [label_data[i+self.config.start_predict_point:i+self.config.time_step] for i in range(self.train_num-self.config.time_step)]
														
 
															-            # 这里选取后16个点 作为 预测及
														
 
															-        else:
														
 
															-            # 在连续训练模式下，每time_step行数据会作为一个样本，两个样本错开time_step行，
														
 
															-            # 比如：1-20行，21-40行。。。到数据末尾，然后又是 2-21行，22-41行。。。到数据末尾，……
														
 
															-            # 这样才可以把上一个样本的final_state作为下一个样本的init_state，而且不能shuffle
														
 
															-            # 目前本项目中仅能在pytorch的RNN系列模型中用
														
 
															-            train_x = [feature_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-            train_y = [label_data[start_index + i*self.config.time_step : start_index + (i+1)*self.config.time_step]
														
 
															-                       for start_index in range(self.config.time_step)
														
 
															-                       for i in range((self.train_num - start_index) // self.config.time_step)]
														
 
															-
														
 
															-        train_x, train_y = np.array(train_x), np.array(train_y)
														
 
															-
														
 
															-        train_x, valid_x, train_y, valid_y = train_test_split(train_x, train_y, test_size=self.config.valid_data_rate,
														
 
															-                                                              random_state=self.config.random_seed,
														
 
															-                                                              shuffle=self.config.shuffle_train_data)   # 划分训练和验证集，并打乱
														
 
															-        return train_x, valid_x, train_y, valid_y
														
 
															-
														
 
															-
														
 
															-class Config:
														
 
															-    # 数据参数
														
 
															-    # feature_columns = list(range(2, 9))     # 要作为feature的列，按原数据从0开始计算，也可以用list 如 [2,4,6,8] 设置
														
 
															-    feature_columns = list(range(1, 3))
														
 
															-    # label_columns = [4, 5]                  # 要预测的列，按原数据从0开始计算, 如同时预测第四，五列 最低价和最高价
														
 
															-    label_columns = [1]
														
 
															-    # label_in_feature_index = [feature_columns.index(i) for i in label_columns]  # 这样写不行
														
 
															-    label_in_feature_index = (lambda x,y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
														
 
															-
														
 
															-    predict_day = 1             # 预测未来几天
														
 
															-    predict_points = 16
														
 
															-    # 网络参数
														
 
															-    input_size = len(feature_columns)
														
 
															-    output_size = len(label_columns)
														
 
															-
														
 
															-    hidden_size = 128           # LSTM的隐藏层大小，也是输出大小
														
 
															-    lstm_layers = 2             # LSTM的堆叠层数
														
 
															-    dropout_rate = 0.2          # dropout概率
														
 
															-    time_step = 16             # 这个参数很重要，是设置用前多少个点的数据来预测，也是LSTM的time step数，请保证训练数据量大于它
														
 
															-    start_predict_point = 16
														
 
															-
														
 
															-    # 训练参数
														
 
															-    do_train = True
														
 
															-    do_predict = True
														
 
															-    add_train = False           # 是否载入已有模型参数进行增量训练
														
 
															-    shuffle_train_data = False   # 是否对训练数据做shuffle
														
 
															-    use_cuda = False            # 是否使用GPU训练
														
 
															-
														
 
															-    train_data_rate = 0.95      # 训练数据占总体数据比例，测试数据就是 1-train_data_rate
														
 
															-    valid_data_rate = 0.15      # 验证数据占训练数据比例，验证集在训练过程使用，为了做模型和参数选择
														
 
															-
														
 
															-    batch_size = 64
														
 
															-    learning_rate = 0.001
														
 
															-    epoch = 20                  # 整个训练集被训练多少遍，不考虑早停的前提下
														
 
															-    patience = 5                # 训练多少epoch，验证集没提升就停掉
														
 
															-    random_seed = 42            # 随机种子，保证可复现
														
 
															-
														
 
															-    do_continue_train = False    # 每次训练把上一次的final_state作为下一次的init_state，仅用于RNN类型模型，目前仅支持pytorch
														
 
															-    continue_flag = ""           # 但实际效果不佳，可能原因：仅能以 batch_size = 1 训练
														
 
															-    if do_continue_train:
														
 
															-        shuffle_train_data = False
														
 
															-        batch_size = 1
														
 
															-        continue_flag = "continue_"
														
 
															-
														
 
															-    # 训练模式
														
 
															-    debug_mode = False  # 调试模式下，是为了跑通代码，追求快
														
 
															-    debug_num = 500  # 仅用debug_num条数据来调试
														
 
															-
														
 
															-    # 框架参数
														
 
															-    used_frame = frame  # 选择的深度学习框架，不同的框架模型保存后缀不一样
														
 
															-    model_postfix = {"pytorch": ".pth", "keras": ".h5", "tensorflow": ".ckpt"}
														
 
															-    model_name = "model_" + continue_flag + used_frame + model_postfix[used_frame]
														
 
															-
														
 
															-    # 路径参数
														
 
															-    train_data_path = "./data/J00285.csv"
														
 
															-    model_save_path = "./checkpoint/" + used_frame + "/"
														
 
															-    figure_save_path = "./figure/"
														
 
															-    log_save_path = "./log/"
														
 
															-    do_log_print_to_screen = True
														
 
															-    do_log_save_to_file = True                  # 是否将config和训练过程记录到log
														
 
															-    do_figure_save = False
														
 
															-    do_train_visualized = False          # 训练loss可视化，pytorch用visdom，tf用tensorboardX，实际上可以通用, keras没有
														
 
															-    if not os.path.exists(model_save_path):
														
 
															-        os.makedirs(model_save_path)    # makedirs 递归创建目录
														
 
															-    if not os.path.exists(figure_save_path):
														
 
															-        os.mkdir(figure_save_path)
														
 
															-    if do_train and (do_log_save_to_file or do_train_visualized):
														
 
															-        cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
														
 
															-        log_save_path = log_save_path + cur_time + '_' + used_frame + "/"
														
 
															-        os.makedirs(log_save_path)
														
 
															-
														
--- a/wind-LSTM-v2.0-nwp+rp/test.py
+++ b/wind-LSTM-v2.0-nwp+rp/test.py
@@ -1,43 +0,0 @@
 
															-#!/usr/bin/env python
														
 
															-# -*- coding: utf-8 -*-
														
 
															-# time: 2023/4/11 15:58
														
 
															-# file: test.py
														
 
															-# author: David
														
 
															-# company: shenyang JY
														
 
															-import pandas as pd
														
 
															-import numpy as np
														
 
															-# index = pd.date_range('1/1/2000', periods=9, freq='T')
														
 
															-# series = pd.Series(range(9), index=index)
														
 
															-# df = pd.DataFrame({'value': series})
														
 
															-# series1 = series.resample('3T').sum()
														
 
															-# series2 = series.resample('3T', label='right').sum()
														
 
															-# series3 = series.resample('3T', label='right', closed='right').sum()
														
 
															-# series4 = series.resample('30S').asfreq()
														
 
															-# series5 = series.resample('30S').bfill()
														
 
															-# print(series)
														
 
															-# print(series1)
														
 
															-# print(series2)
														
 
															-# print(series3)
														
 
															-# print(series4)
														
 
															-# print("---", series5)
														
 
															-
														
 
															-# x = np.random.randint(1,100,20).reshape((10,2))
														
 
															-# print(x)
														
 
															-# from sklearn.model_selection import train_test_split
														
 
															-#
														
 
															-# x_train, x_test = train_test_split(x, test_size=0.2, random_state=1, shuffle=False)
														
 
															-# print("x_train", x_train)
														
 
															-# print("x_test", x_test)
														
 
															-
														
 
															-
														
 
															-import numpy as np
														
 
															-import pandas as pd
														
 
															-#创建一组数据
														
 
															-data = {'name': ['John', 'Mike', 'Mozla', 'Rose', 'David', 'Marry', 'Wansi', 'Sidy', 'Jack', 'Alic'],
														
 
															-        'age': [20, 32, 29, np.nan, 15, 28, 21, 30, 37, 25],
														
 
															-        'gender': [0, 0, 1, 1, 0, 1, 0, 0, 1, 1],
														
 
															-        'isMarried': ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no', 'no']}
														
 
															-label = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']
														
 
															-df = pd.DataFrame(data, index=label)
														
 
															-print(df.loc[:,'name'])
														
 
															-pass