liudawei 1 ano atrás
pai
commit
78b2149aa2
63 arquivos alterados com 3694 adições e 0 exclusões
  1. 5 0
      丁浩测试/.gitignore
  2. 221 0
      丁浩测试/JiaYueTest.py
  3. 29 0
      丁浩测试/ReadMe.md
  4. 370 0
      丁浩测试/metrics.py
  5. BIN
      加强训练/.DS_Store
  6. 13 0
      加强训练/.gitignore
  7. 58 0
      加强训练/dataset/TimeDataset.py
  8. BIN
      加强训练/model/.DS_Store
  9. 39 0
      加强训练/model/LSTM.py
  10. 77 0
      加强训练/model/Transformer_base.py
  11. 37 0
      加强训练/readme.md
  12. BIN
      加强训练/save/2021_03_2022_03.pt
  13. BIN
      加强训练/save/best_acc_short.pt
  14. BIN
      加强训练/save/best_loss_short.pt
  15. BIN
      加强训练/save/best_loss_short_encoder.pt
  16. BIN
      加强训练/save/lstm_base.pt
  17. BIN
      加强训练/save/lstm_base_pro.pt
  18. BIN
      加强训练/save/短期对比.xlsx
  19. 123 0
      加强训练/training.py
  20. 143 0
      加强训练/training_model.py
  21. BIN
      加强训练/utils/.DS_Store
  22. 16 0
      加强训练/utils/Arg.py
  23. 127 0
      加强训练/utils/ModeTest.py
  24. 50 0
      加强训练/utils/dataSplit.py
  25. 40 0
      加强训练/utils/datapro.py
  26. 13 0
      劳店分区/.gitignore
  27. 18 0
      劳店分区/Readme.md
  28. 76 0
      劳店分区/back.py
  29. 95 0
      劳店分区/config.py
  30. 79 0
      劳店分区/config.yml
  31. 90 0
      劳店分区/data_analyse.py
  32. 97 0
      劳店分区/data_features.py
  33. 123 0
      劳店分区/data_process.py
  34. 65 0
      劳店分区/data_utils.py
  35. 83 0
      劳店分区/figure.py
  36. 43 0
      劳店分区/logger.py
  37. 0 0
      劳店分区/model/__init__.py
  38. 68 0
      劳店分区/model/model_keras.py
  39. 68 0
      劳店分区/model/model_keras_1.py
  40. 33 0
      劳店分区/model/sloss.py
  41. 8 0
      劳店分区/requirements.txt
  42. 63 0
      劳店分区/run_case_分区.py
  43. 63 0
      劳店分区/run_case_直接.py
  44. BIN
      秀水分区/.DS_Store
  45. 13 0
      秀水分区/.gitignore
  46. 10 0
      秀水分区/README.md
  47. 86 0
      秀水分区/calculate.py
  48. 86 0
      秀水分区/config.py
  49. 54 0
      秀水分区/config_xiushui.yml
  50. 107 0
      秀水分区/data_analyse.py
  51. 72 0
      秀水分区/data_features.py
  52. 219 0
      秀水分区/data_process.py
  53. 67 0
      秀水分区/data_utils.py
  54. 83 0
      秀水分区/figure.py
  55. 43 0
      秀水分区/logger.py
  56. 0 0
      秀水分区/model/__init__.py
  57. 125 0
      秀水分区/model/model_keras_base.py
  58. 96 0
      秀水分区/model/model_keras_fenqu.py
  59. 38 0
      秀水分区/model/sloss.py
  60. 8 0
      秀水分区/requirements.txt
  61. 65 0
      秀水分区/run_case_分区.py
  62. 62 0
      秀水分区/run_case_直接.py
  63. 27 0
      秀水分区/test.py

+ 5 - 0
丁浩测试/.gitignore

@@ -0,0 +1,5 @@
+/.idea
+/__pycache__
+/dataset/shandong/Dataset_training/NWP
+/dataset/shandong/Dataset_training/power
+/dataset

+ 221 - 0
丁浩测试/JiaYueTest.py

@@ -0,0 +1,221 @@
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from scipy.optimize import curve_fit
+from metrics import RMSE
+import lightgbm as lgb
+
+# root_path = './dataset/data/'
+root_path = './dataset/shandong/Dataset_training/'
+# 设置图大小
+width = 16
+height = 9
+plt.figure(figsize=(width, height))
+# 设置全局字体样式
+plt.rcParams['font.family'] = 'Microsoft YaHei'
+# max_power = 22
+# 定义sigmoid函数
+def sigmoid(x, k, x0,max_power,min_power):
+    y = max_power / (1 + np.exp(-k * (x - x0)))+min_power
+    return y
+
+
+def contact_all():
+    df = None
+    csvpath = (r"./dataset/shandong/Dataset_training/concat_all.csv")
+    for id in range(6):
+        df_NWP = pd.read_csv(r"./dataset/shandong/Dataset_training/NWP/NWP_{}.csv".format(id))
+        df_PowerRE = pd.read_csv(r"./dataset/shandong/Dataset_training/power/power_{}.csv".format(id))
+        df_raw = pd.merge(df_NWP, df_PowerRE, on="C_TIME")
+        all_features = ['C_TIME', 'C_WS90', 'C_WS100',  'C_REAL_VALUE']
+        df_raw = df_raw[all_features]
+        print(len(df_raw))
+        if df is None:
+            df = df_raw
+        else:
+            df = pd.concat([df, df_raw], axis=0)
+    print(len(df))
+    df.to_csv(csvpath, index=False)
+
+
+def show_cz_plot():
+    df = pd.read_csv(root_path + 'concat102.csv')
+    # 创建一些样本数据
+    x1 = df['C_WS100'].values
+    dif = df['C_WS'].values - x1
+    x1 = np.arange(1, len(df)+1)
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(64, 14))
+    # 绘制散点图
+    ax1.plot(x1, dif, color='red', label='turbine-102')
+    # 添加标题和标签
+    ax1.set_title('折线图')
+    ax1.set_xlabel('时间点')
+    ax1.set_ylabel('NWP与机头风速差值')
+    ax1.legend()
+
+    # plt.scatter(dif, y1, color='blue', label='数据集2')
+    # plt.scatter(x3, y3, color='green', label='数据集3')
+    # 绘制散点图
+
+    df = pd.read_csv(root_path + 'concat103.csv')
+    # 创建一些样本数据
+    x1 = df['C_WS100'].values
+    y1 = df['C_ACTIVE_POWER'].values
+    dif = df['C_WS'].values - x1
+    x1 = np.arange(1, len(df) + 1)
+    ax2.plot(x1, dif, color='blue', label='turbine-103')
+    # 添加标题和标签
+    ax2.set_title('折线图')
+    ax2.set_xlabel('时间点')
+    ax2.set_ylabel('NWP与机头风速差值')
+    ax2.legend()
+    plt.show()
+
+
+def show_cz_scatter():
+    df = pd.read_csv(root_path + 'concat102.csv')
+    # 创建一些样本数据
+    x1 = df['C_WS100'].values
+    dif = df['C_WS'].values - x1
+    x1 = np.arange(1, len(df)+1)
+    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(64, 14))
+    # 绘制散点图
+    ax1.scatter(x1, dif, color='red', label='turbine-102')
+    # 添加标题和标签
+    ax1.set_title('散点图')
+    ax1.set_xlabel('时间点')
+    ax1.set_ylabel('NWP与机头风速差值')
+    ax1.legend()
+
+    # plt.scatter(dif, y1, color='blue', label='数据集2')
+    # plt.scatter(x3, y3, color='green', label='数据集3')
+    # 绘制散点图
+
+    df = pd.read_csv(root_path + 'concat103.csv')
+    # 创建一些样本数据
+    x1 = df['C_WS100'].values
+    y1 = df['C_ACTIVE_POWER'].values
+    dif = df['C_WS'].values - x1
+    x1 = np.arange(1, len(df) + 1)
+    ax2.scatter(x1, dif, color='blue', label='turbine-103')
+    # 添加标题和标签
+    ax2.set_title('散点图')
+    ax2.set_xlabel('时间点')
+    ax2.set_ylabel('NWP与机头风速差值')
+    ax2.legend()
+    plt.show()
+
+
+
+def show_WSandPower_nihe():
+    df = pd.read_csv(root_path + 'concat_all.csv')
+    # 读样本数据
+    x1 = df['C_WS100'].values
+    y1 = df['C_REAL_VALUE'].values
+
+    # 拟合数据
+    # p0 = [1, np.median(x1),max(y1),min(y1)]
+    p0 = [1, 7.3,max(y1),min(y1)]
+    popt, pcov = curve_fit(sigmoid, x1, y1,p0=p0)
+    y_fit = sigmoid(x1, *p0)
+    trusts = []
+    for i in range(len(y1)):
+        if y1[i] > y_fit[i]-2 and y1[i]<y_fit[i]+2:
+            trusts.append(i)
+    # 绘制散点图
+    plt.scatter(x1, y1, color='blue',  label='data')
+    plt.scatter(x1, y_fit, color='red', label='fit')
+    plt.scatter(x1[trusts],y1[trusts],color='green', label='trust')
+
+    print("k = %f, x0 = %f" % (popt[0], popt[1]))
+    plt.title('散点图')
+    plt.xlabel('NWP100米风速值')
+    plt.ylabel('功率值')
+    # 添加图例
+    plt.legend()
+    # 显示图形
+    plt.show()
+
+
+def data_clean(df):
+    # 读样本数据
+    x1 = df['C_WS100'].values
+    y1 = df['C_REAL_VALUE'].values
+    # 拟合数据
+    # p0 = [1, np.median(x1),max(y1),min(y1)]
+    p0 = [1, 7.3,max(y1),min(y1)]
+    # p0 = [1, (max(x1)+min(x1))/2, max(y1), min(y1)]
+    popt, pcov = curve_fit(sigmoid, x1, y1,p0=p0)
+    y_fit = sigmoid(x1, *p0)
+    trusts = []
+    for i in range(len(y1)):
+        if y1[i] > y_fit[i]-2 and y1[i]<y_fit[i]+2:
+            trusts.append(i)
+    df = df.loc[trusts]
+    df.reset_index(drop=True, inplace=True)
+    return df
+
+
+# 23/04/26 风电实验基准
+def train_lgbmodel_feng(clean=False):
+    print('------------generate features-----------------')
+    index = 'shandong'
+    df = pd.read_csv('./dataset/'+index+'/Dataset_training/concat_all.csv')
+    all_features = ['C_WS100']
+    border = int(len(df) * 0.8)
+    valid_border = border+int(len(df) * 0.1)
+    start =0 #int(len(df) * 0.5)
+    df_train = df.loc[:border]
+    df_valid = df.loc[border:valid_border]
+    df_test = df.loc[valid_border:]
+    if clean:
+        df_test.reset_index(drop=True, inplace=True)
+        df_train = data_clean(df_train)
+    X_train = df_train[all_features]
+    Y_train = df_train[['C_REAL_VALUE']]
+    X_valid = df_valid[all_features]
+    Y_valid = df_valid[['C_REAL_VALUE']]
+    X_test = df_test[all_features]
+    Y_test = df_test[['C_REAL_VALUE']]
+    print("train shape{}{} test shape{}{}".format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))
+    print('------------training-----------------')
+    model = lgb.LGBMRegressor(objective='regression', n_estimators=1000,
+                              learning_rate=0.025, n_jobs=-1, random_state=630)
+    # Train the model
+    # model.fit(X_train, Y_train, eval_metric='rmse',
+    #           eval_set=[(X_train, Y_train), (X_temp,Y_temp)],
+    #           eval_names=['train', 'val'],
+    #           early_stopping_rounds=20, verbose=0)
+    model.fit(X_train, Y_train, eval_metric='rmse',
+              eval_set=[(X_valid, Y_valid)],
+              eval_names=['valid'],
+              early_stopping_rounds=20, verbose=0)
+    best_iteration = model.best_iteration_
+    print(best_iteration)
+    # --------------feature importance----------------
+    '''
+    feature_importance = pd.DataFrame()
+    feature_importance['fea_name'] = all_features
+    feature_importance['fea_imp'] = model.feature_importances_
+    feature_importance = feature_importance.sort_values('fea_imp', ascending=False)
+    print(feature_importance)
+    '''
+    # --------------feature importance----------------
+    Y_pred = model.predict(X_test, num_iteration=best_iteration)
+    Y_pred = np.maximum(Y_pred, 0)
+    Y_pred = Y_pred.reshape([-1])
+    Y_true = Y_test.values.reshape([-1])
+    for li in range(len(Y_pred)):
+        if Y_pred[li]<0.016:
+            Y_pred[li] = 0
+    print('------------testing-----------------')
+    print("test shape{}{}".format( Y_pred.shape, Y_true.shape))
+    mean = RMSE(Y_pred, Y_true)
+    print("模型RMSE:{}".format(mean))
+    print("模型准确率:{}%".format(100-mean/22.5*100))
+    
+if __name__=="__main__":
+    print("main:")
+    # contact_all()
+    train_lgbmodel_feng(True)
+    # show_WSandPower_nihe()

+ 29 - 0
丁浩测试/ReadMe.md

@@ -0,0 +1,29 @@
+# 清洗数据和绘图代码
+
+直接运行JiaYueTest.py即可
+
+主要为物理模型清洗训练数据实验代码与各种图表分析代码
+
+- dataset目录:存放数据处理得到的数据集。分析和训练使用Dataset_training目录下的数据集
+- metrics.py 为计算准确率等工具类
+
+### 使用方法:
+
+- 清洗
+    - 更改 JiaYueTest.py main 中train_lgbmodel_feng(clean=False)
+    - clean = True 则清洗
+    - clean = False 则不清洗
+- 绘图
+    - 调用其中各类函数 直接绘图
+
+### 清洗结果:
+
+清洗前:
+
+- 模型RMSE:5.4043335719494
+- 模型准确率:75.98073968022489%
+
+清洗后:
+
+- 模型RMSE:4.56214414966847
+- 模型准确率:79.72380377925124%

+ 370 - 0
丁浩测试/metrics.py

@@ -0,0 +1,370 @@
+import numpy as np
+
+def RSE(pred, true):
+    return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2))
+
+def CORR(pred, true):
+    u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0) 
+    d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0))
+    return (u/d).mean(-1)
+
+def MAE(pred, true):
+    return np.mean(np.abs(pred-true))
+
+def MSE(pred, true):
+    return np.mean((pred-true)**2)
+
+def RMSE(pred, true):
+    return np.sqrt(MSE(pred, true))
+
+def MAPE(pred, true):
+    return np.mean(np.abs((pred - true) / true))
+
+def MSPE(pred, true):
+    return np.mean(np.square((pred - true) / true))
+
+def metric(pred, true):
+    mae = MAE(pred, true)
+    mse = MSE(pred, true)
+    rmse = RMSE(pred, true)
+    mape = MAPE(pred, true)
+    mspe = MSPE(pred, true)
+    
+    return mae,mse,rmse,mape,mspe
+
+
+# ShortTerm percentage
+def ST(pred, true):
+    return np.abs((pred-true)/pred*100)
+
+
+# ShortTerm percentage 西北  把不计入的点准确率赋值为-1
+def ST_xibei(pred, true, cap):
+    # pred_8 = pred
+    a = np.abs(true-pred)
+    b = true-pred
+    precentage = []
+    sum = 0
+    num1 =0 #过低预测计数
+    num2 = 0 #过高预测计数
+    for i in range(len(pred)):
+        if pred[i] == 0:
+            if true[i] < cap*0.03:
+                precentage.append(-1)
+                pass
+            else:
+                precentage.append(0)
+        elif true[i] == 0:
+            if pred[i] < cap*0.03:
+                precentage.append(-1)
+                pass
+            else:
+                precentage.append(0)
+        # 小于装机容量 0.03不计
+        elif pred[i] < cap*0.03 and true[i] < cap*0.03:
+            precentage.append(-1)
+            pass
+        else:
+            tmp = 1-a[i]/pred[i]
+            if tmp<0:
+                # print("超低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                num1+=1
+                precentage.append(0)
+            else:
+                if tmp<0.5 and b[i]>0:
+                    # print("过低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num1+=1
+                elif tmp < 0.5 and b[i] < 0:
+                    # print("过高预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num2+=1
+                precentage.append(tmp*100)
+    print("过低预测{} 过高预测{}".format(num1, num2))
+    for i in precentage:
+        sum += i
+    mean = sum / len(precentage)
+    return precentage,mean
+
+# ShortTerm percentage 西北  输出低预测和高预测
+def ST_xibei_2(pred, true,cap):
+    # pred_8 = pred
+    a = np.abs(true-pred)
+    b = true-pred
+    precentage = []
+    sum = 0
+    num1 =0
+    num2 = 0
+    for i in range(len(pred)):
+        if pred[i] == 0:
+            if true[i] < cap*0.03:
+                # precentage.append(100)
+                pass
+            else:
+                precentage.append(0)
+        elif true[i] == 0:
+            if pred[i] < cap*0.03:
+                # precentage.append(100)
+                pass
+            else:
+                precentage.append(0)
+        # 小于装机容量 0.03不计
+        elif pred[i] < cap*0.03 and true[i] < cap*0.03:
+            pass
+        else:
+            tmp = 1-a[i]/pred[i]
+            if tmp<0:
+                # print("超低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                num1+=1
+                precentage.append(0)
+            else:
+                if tmp<0.5 and b[i]>0:
+                    # print("过低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num1+=1
+                elif tmp < 0.5 and b[i] < 0:
+                    # print("过高预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num2+=1
+                precentage.append(tmp*100)
+    print("过低预测{} 过高预测{}".format(num1, num2))
+    for i in precentage:
+        sum += i
+    mean = sum / len(precentage)
+    return precentage,mean
+
+# ShortTerm percentage 西北  输出低预测和高预测 超低预测(超低预测准确率直接为0)
+def ST_xibei_xiangxi(pred, true,cap):
+    # pred_8 = pred
+    a = np.abs(true-pred)
+    b = true-pred
+    precentage = []
+    sum = 0
+    num1 =0
+    num2 = 0
+    num3 = 0 #超低预测 预测值不为0
+    num4 = 0 #超高预测
+    num5 = 0 #超低预测 预测值为0 真实值大于0.03*cap
+    print('-----------------评测---------------')
+    for i in range(len(pred)):
+        if pred[i] == 0:
+            if true[i] < cap*0.03:
+                pass
+            else:
+                precentage.append(0)
+                num5 += 1
+                print('预测值为0 超低点实际值:',true[i])
+        elif true[i] == 0:
+            if pred[i] < cap*0.03:
+                pass
+            else:
+                precentage.append(0)
+                # print('超高点预测值:',pred[i])
+                num4 +=1
+        # 小于装机容量 0.03不计
+        elif pred[i] < cap*0.03 and true[i] < cap*0.03:
+            pass
+        else:
+            tmp = 1-a[i]/pred[i]
+            if tmp<0:
+                # print("超低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                num3+=1
+                # print('超低点预测值:', pred[i])
+                precentage.append(0)
+            else:
+                if tmp<0.5 and b[i]>0:
+                    # print("过低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num1+=1
+                elif tmp < 0.5 and b[i] < 0:
+                    # print("过高预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num2+=1
+                precentage.append(tmp*100)
+    print("过低预测{} 过高预测{} 超低{} 超高{} 超低为0点{}".format(num1, num2,num3,num4,num5))
+    for i in precentage:
+        sum += i
+    mean = sum / len(precentage)
+    return precentage,mean
+
+
+
+# ShortTerm percentage 西北  输出详细信息
+def ST_xibei_1(pred, true):
+    # pred_8 = pred
+    cap = 1000
+    a = np.abs(true-pred)
+    b = true-pred
+    precentage = []
+    sum = 0
+    num1 =0
+    num2 = 0
+    sub_index=[]
+    for i in range(len(pred)):
+        if pred[i] == 0:
+            if true[i] < cap*0.03:
+                # precentage.append(100)
+                pass
+            else:
+                precentage.append(0)
+                sub_index.append(i)
+        elif true[i] == 0:
+            if pred[i] < cap*0.03:
+                # precentage.append(100)
+                pass
+            else:
+                precentage.append(0)
+                sub_index.append(i)
+        # 小于装机容量 0.03不计
+        elif pred[i] < cap*0.03 and true[i] < cap*0.03:
+            pass
+        else:
+            tmp = 1-a[i]/pred[i]
+            if tmp<0:
+                # print("超低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                num1+=1
+                precentage.append(0)
+                sub_index.append(i)
+            else:
+                if tmp<0.5 and b[i]>0:
+                    # print("过低预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num1+=1
+                    sub_index.append(i)
+                    precentage.append(tmp * 100)
+                elif tmp < 0.5 and b[i] < 0:
+                    # print("过高预测 差值{} 真实值{} 预测值{}".format(a[i], true[i], pred[i]))
+                    num2+=1
+                    sub_index.append(i)
+                    precentage.append(tmp * 100)
+                # precentage.append(tmp*100)
+    print("过低预测{} 过高预测{}".format(num1, num2))
+    for i in precentage:
+        sum += i
+    mean = sum / len(precentage)
+    return precentage,mean,sub_index
+
+# UltraShortTerm percentage
+def UST_huabei(pred, true):
+    a = np.abs(pred-true)
+    ust4hour=[]
+    ust1day=[]
+    count = 0
+    buji = 0
+    for i in range(15,len(true),1):
+        num1 = 0
+        num2 = 0
+        for j in range(16):
+            num1 = num1 + a[i-j, j, 0]
+            num2 = num2 + pred[i-j, j, 0]
+        if (num2 / 16) < 0.6 and true[i,0,0] < 0.6:
+            ele = 0
+            buji += 1
+        else:
+            ele = num1 / num2 if num1 / num2 < 1 else 1
+        ust4hour.append(ele)
+        count += 1
+        if count % 96 == 0:
+            ele = 0
+            for k in range(len(ust4hour)-96,len(ust4hour)):
+                ele = ele + ust4hour[k]
+            ust1day.append((1-ele/(96-buji))*100)
+            buji = 0
+    return ust4hour, ust1day
+
+
+# UltraShortTerm percentage 西北地区
+def UST_XiBei(pred, true, capacity):
+    pt_sub = np.abs(pred - true)
+    ust1day = []
+    denominator = 0
+    ele = 0
+    for i in range(0, len(true), 1):
+        if pred[i]>=capacity*0.03 or true[i]>=capacity*0.03:
+            ele += abs((true[i]/(true[i]+pred[i]))-0.5)*pt_sub[i]
+            denominator += pt_sub[i] #分母累加
+        #每遍历96点 计算
+        if (i+1)%96==0:
+            ele = 1-(2*ele/denominator)
+            ust1day.append(ele)
+            # 清零重新累加
+            ele = 0
+            denominator = 0
+    ust_sum = 0
+    for i in ust1day:
+        ust_sum += i
+    mean = ust_sum / len(ust1day) * 100
+    return ust1day, mean
+
+
+
+
+# UltraShortTerm percentage 西北地区
+def UST_XiBei_old(pred, true, capacity):
+    a = np.abs(pred - true)
+    pos = 8
+    ust1day = []
+    count = 0
+    num1 = []
+    num2 = 0
+    num3 = []
+    jilu = []
+
+    for i in range(0, len(true), 1):
+        if pred[i] >= capacity*0.03 or true[i] >= capacity*0.03:
+            jilu.append(i)
+            num1.append(abs(true[i] / (pred[i] + true[i]) - 0.5))
+            num2 += a[i]
+        count += 1
+        if count % 96 == 0:
+            ele = 0
+            for j in jilu:
+                num3.append(a[j] / num2)
+            for k in range(len(num1)):
+                ele += num1[k] * num3[k]
+            ust1day.append(1 - 2 * ele)
+            num1.clear()
+            num2 = 0
+            num3.clear()
+            jilu.clear()
+    sum =0
+    for i in ust1day:
+        sum += i
+    mean = sum / len(ust1day)
+    return ust1day, mean
+
+
+def UST_ShanDong_wind(pred, true, capacity):
+    tmp_s = []
+    for i in range(len(pred)//96):
+        sub_pred = pred[i*96:(i+1)*96]
+        sub_true = true[i*96:(i+1)*96]
+        tmp = (1-np.sqrt(np.mean((sub_pred - sub_true)**2))/capacity)*100
+        tmp_s.append(tmp)
+    # for i in range(20):
+    #     print(tmp_s[i])
+    mean = np.mean(tmp_s)
+    return mean
+
+#每个时点对未来16个点做出预测
+def UST_XiBei_16(pred, true, capacity):
+    a = np.abs(pred - true)
+    pos = 8
+    ust1day = []
+    count = 0
+    num1 = []
+    num2 = 0
+    num3 = []
+    jilu = []
+
+    for i in range(0, len(true), 1):
+        if pred[i][pos] >= capacity*0.03 or true[i][pos] >= capacity*0.03:
+            jilu.append(i)
+            num1.append(abs(true[i][pos] / (pred[i][pos] + true[i][pos]) - 0.5))
+            num2 += a[i][pos]
+        count += 1
+        if count % 96 == 0:
+            ele = 0
+            for j in jilu:
+                num3.append(a[j][pos] / num2)
+            for k in range(len(num1)):
+                ele += num1[k] * num3[k]
+            ust1day.append(1 - 2 * ele)
+            num1.clear()
+            num2 = 0
+            num3.clear()
+            jilu.clear()
+    return ust1day

BIN
加强训练/.DS_Store


+ 13 - 0
加强训练/.gitignore

@@ -0,0 +1,13 @@
+*/__pycache__
+/__pycache__
+/.idea
+/checkpoint
+/log
+/data
+/figure
+*.log
+*.swp
+/log
+/data
+
+

+ 58 - 0
加强训练/dataset/TimeDataset.py

@@ -0,0 +1,58 @@
+from torch.utils.data import Dataset, DataLoader
+import pandas as pd
+import torch
+import numpy as np
+from utils.Arg import Arg
+arg = Arg()
+time_split = arg.time_split
+powermax = arg.power_max
+powermin = arg.power_min
+
+class TimeSeriesDataset(Dataset):
+    def __init__(self, file_inputs_2, file_inputs_3):
+        self.inputs_3 = pd.read_csv(file_inputs_3)#18维度
+        self.inputs_3 = self.inputs_3.iloc[:,1:-1]
+        self.target = pd.read_csv(file_inputs_2 ,usecols=[1])#1维
+        self.time = pd.read_csv(file_inputs_2,usecols=[0])
+        self.inputs_3.iloc[:,4:12] = np.sin(self.inputs_3.iloc[:,4:12] / 180 * np.pi)
+        self.inputs_3.iloc[:,:4]  = (self.inputs_3.iloc[:,:4]  - self.inputs_3.iloc[:,:4].min()) / (self.inputs_3.iloc[:,:4].max() - self.inputs_3.iloc[:,:4] .min())
+        self.inputs_3.iloc[:, 12:] = (self.inputs_3.iloc[:, 12:] - self.inputs_3.iloc[:, 12:].min()) / (
+                    self.inputs_3.iloc[:, 12:].max() - self.inputs_3.iloc[:, 12:].min())
+        # print(self.inputs_3.iloc[:, 13].max())
+        # print(self.inputs_3.iloc[:, 13].min())
+        self.target.iloc[:, 0] = (self.target.iloc[:, 0] - powermin) / (powermax)
+        assert len(self.inputs_3) == len(self.target) == len(self.time)
+
+    def __len__(self):
+        return len(self.target) - time_split
+
+    def __getitem__(self, idx):
+        inputs_3 = self.inputs_3.iloc[idx:idx + time_split, :]
+        #print(inputs_3)
+        target = self.target.iloc[idx + time_split - 1, :]
+        inputs_3 = torch.tensor(inputs_3.values, dtype=torch.float32).unsqueeze(1)
+        inputs_3 = (inputs_3.view(inputs_3.size()[0],inputs_3.size()[2]))
+        target = torch.tensor(target.values, dtype=torch.float32)
+        return inputs_3, target
+
+# class TimeSeriesDataset(Dataset):
+#     def __init__(self,  file_inputs_2,file_inputs_3):
+#         self.inputs_3 = pd.read_csv(file_inputs_3,usecols=[2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19])#18维度
+#         self.target = pd.read_csv(file_inputs_2,usecols=[2])#1维
+#         self.time = pd.read_csv(file_inputs_2,usecols=[0])
+#         self.inputs_3.iloc[:,0:6] = np.sin(self.inputs_3.iloc[:,0:6] / 180 * np.pi)
+#         self.inputs_3.iloc[:,7:]  = (self.inputs_3.iloc[:,7:]  - self.inputs_3.iloc[:,7:].min()) / (self.inputs_3.iloc[:,7:].max() - self.inputs_3.iloc[:,7:] .min())
+#         self.target.iloc[:, 0] = (self.target.iloc[:, 0] - powermin) / (powermax - powermin)
+#         assert len(self.inputs_3) == len(self.target) == len(self.time)
+#
+#     def __len__(self):
+#         return len(self.target) - 16
+#
+#     def __getitem__(self, idx):
+#         inputs_3 = self.inputs_3.iloc[idx:idx+16,:]
+#         target=self.target.iloc[idx+7,:]
+#         inputs_3 = torch.tensor(inputs_3.values, dtype=torch.float32)
+#         target = torch.tensor(target.values, dtype=torch.float32)
+#         # print(inputs_3.size())
+#         # print(target.size())
+#         return inputs_3, target

BIN
加强训练/model/.DS_Store


+ 39 - 0
加强训练/model/LSTM.py

@@ -0,0 +1,39 @@
+import torch
+from torch import nn
+
+
+class TimeSeriesTransformer(nn.Module):
+    def __init__(self, input_size_3=18, num_layers=1):
+        super(TimeSeriesTransformer, self).__init__()
+        self.hidden_size3 = 128
+        self.num_layers = num_layers
+        self.GRU3 = nn.GRU(input_size_3, self.hidden_size3, num_layers, batch_first=True, bidirectional=True)
+        self.attention_C = nn.Linear(2*self.hidden_size3,1)
+        self.fc1 = nn.Linear(2 * self.hidden_size3, 128)
+        self.bn1 = nn.BatchNorm1d(128)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Linear(128, 64)
+        self.bn2 = nn.BatchNorm1d(64)
+        self.relu2 = nn.ReLU()
+        self.fc3 = nn.Linear(64,1)
+    def attention_layer(self, lstm_out, attention_w):
+        attention_scores = attention_w(lstm_out)
+        attention_weights = torch.softmax(attention_scores, dim=1)
+        context_vector = torch.sum(attention_weights * lstm_out, dim=1)
+        return context_vector
+    #def forward(self, inputs_1, inputs_2, inputs_3):
+    def forward(self, inputs_3):
+        # inputs_1.shape = (batch_size, seq_len, input_size_1)
+        # inputs_2.shape = (batch_size, seq_len, input_size_2)
+        # inputs_3.shape = (batch_size, seq_len, input_size_3)
+        # 初始化hidden state和cell state
+        h2 = torch.zeros(self.num_layers*2, inputs_3.size(0), self.hidden_size3).to(inputs_3.device)
+        output_3, _ = self.GRU3(inputs_3, h2)
+        context_C = self.attention_layer(output_3, self.attention_C)
+        h = torch.cat([context_C], dim=1)
+        h = self.fc1(h)
+        h = self.relu1(h)
+        h = self.fc2(h)
+        h = self.relu2(h)
+        output = self.fc3(h)
+        return output

+ 77 - 0
加强训练/model/Transformer_base.py

@@ -0,0 +1,77 @@
+import math
+
+import torch
+from torch import nn
+from utils.Arg import Arg
+
+arg = Arg()
+
+class PositionalEncoding(nn.Module):
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        x = x + self.pe[:x.size(0), :]
+        return self.dropout(x)
+
+
+# class TimeSeriesTransformer(nn.Module):
+#     def __init__(self, input_dim, output_dim, d_model, nhead, num_layers, dropout=0.1):
+#         super().__init__()
+#         self.input_dim = input_dim
+#         self.output_dim = output_dim
+#         self.pos_enc = PositionalEncoding(d_model, dropout)
+#         self.lstm = nn.LSTM(input_dim, d_model, batch_first=True) # 添加LSTM层
+#         self.transformer = nn.Transformer(d_model, nhead, num_layers, dropout=dropout)
+#         self.act = nn.GELU()  # 尝试使用GELU作为激活函数
+#         self.hidden_dim = 128  # 定义一个隐藏层的维度
+#         self.linear = nn.Linear(d_model, self.hidden_dim)
+#         self.output_proj = nn.Linear(self.hidden_dim, output_dim)
+#         self.layer_norm = nn.LayerNorm(d_model)
+#
+#     def forward(self, src, tgt):
+#         src = self.pos_enc(src)  # 在输入数据通过LSTM和Transformer之前,先进行位置编码
+#         src, _ = self.lstm(src)
+#         tgt = self.pos_enc(tgt)  # 在目标数据通过LSTM和Transformer之前,先进行位置编码
+#         tgt, _ = self.lstm(tgt)
+#
+#         output = self.transformer(src, tgt)
+#         output = self.layer_norm(output)
+#         output = self.act(self.linear(output))
+#         output = self.output_proj(output)
+#         output = output.squeeze(0)
+#         return output
+
+class TimeSeriesTransformer(nn.Module):
+    def __init__(self, input_dim, output_dim, d_model, nhead, num_layers, dropout=0.1):
+        super().__init__()
+        self.input_dim = input_dim
+        self.output_dim = output_dim
+        self.pos_enc = PositionalEncoding(d_model, dropout)
+        self.transformer = nn.Transformer(d_model, nhead, num_layers, dropout=dropout)
+        self.act = nn.GELU()  # 尝试使用GELU作为激活函数
+        self.hidden_dim = 64  # 降低隐藏层维度
+        self.linear = nn.Linear(d_model, self.hidden_dim)
+        self.output_proj = nn.Linear(self.hidden_dim, output_dim)
+        self.layer_norm = nn.LayerNorm(d_model)
+        self.sigmoid = nn.Tanh()
+
+    def forward(self, src, tgt):
+        src = self.pos_enc(src)  # 在输入数据通过Transformer之前,先进行位置编码
+        tgt = self.pos_enc(tgt)  # 在目标数据通过Transformer之前,先进行位置编码
+
+        output = self.transformer(src, tgt)
+        output = self.layer_norm(output)
+        output = self.act(self.linear(output))
+        output = self.output_proj(output)
+        output = self.sigmoid(output.squeeze(0))
+        return output

+ 37 - 0
加强训练/readme.md

@@ -0,0 +1,37 @@
+# 加强训练代码
+
+## 运行方法
+
+直接运行training即可
+
+该代码已经包含了基础训练,模型测试等方法
+
+## 文件内容
+
+- data目录:存放数据处理得到的数据集。加强训练主要使用training目录下的数据集,该数据集将数据库中的数据按月进行了划分,并过滤掉了开头大量时间为0的数据。其他数据集与之前的导入结果相同
+- dataset目录:重写Pytorch中dataset,使用自己的dataset
+- model目录:存放深度学习模型的目录,仅仅使用LSTM模型进行了测试
+- save目录:存放代码运行后的模型等
+- utils目录:辅助代码
+
+## 特别说明
+
+Training_model.py写了训练模式,包含在特定数据集下进行训练、基本训练,某几天训练,风机数据训练等等。
+
+## 代码调试以及运行方法
+
+- 通过数据处理代码处理得到数据集,替换/data目录中的文件
+- 进入/model目录选择自己使用的模型
+- 进入/utils/Arg.py设置模型参数以及训练配置
+- 进入training.py目录,开始模型基础训练
+  - 将model_use设置为False
+  - training_model.base_train(model)以下的所有代码进行注释
+  - 运行training.py
+  - 等待运行完毕,模型会自动保存模型文件为save/lstm_base.pt
+- 模型测试,使用ModeTest.test_model(2022, i, "lstm_base_pro.pt") 测试2022年第i个月份的模型准确率
+- 加强训练
+  - 将model_use设置为True
+  - 注释training_model.base_train(model)
+  - 选择使用训练模型进行训练
+  - 测试模型准确率
+

BIN
加强训练/save/2021_03_2022_03.pt


BIN
加强训练/save/best_acc_short.pt


BIN
加强训练/save/best_loss_short.pt


BIN
加强训练/save/best_loss_short_encoder.pt


BIN
加强训练/save/lstm_base.pt


BIN
加强训练/save/lstm_base_pro.pt


BIN
加强训练/save/短期对比.xlsx


+ 123 - 0
加强训练/training.py

@@ -0,0 +1,123 @@
+import random
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+import pandas as pd
+from dataset.TimeDataset import TimeSeriesDataset
+#from model.Transformer_base import TimeSeriesTransformer
+from model.LSTM import TimeSeriesTransformer
+from tqdm import tqdm
+from utils.Arg import Arg
+from utils import ModeTest
+import matplotlib.pyplot as plt
+import training_model
+
+arg = Arg()
+# 超参数
+input_dim = arg.input_dim
+output_dim = arg.output_dim
+input_seq_length = arg.input_seq_length
+output_seq_length = arg.output_seq_length
+d_model = arg.d_model
+nhead = arg.nhead
+num_layers = arg.num_layers
+dropout = arg.dropout
+batch_size = arg.batch_size
+epochs = arg.epochs
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+
+def setup_seed(seed):
+    torch.manual_seed( seed )
+    torch.cuda.manual_seed_all( seed )
+    np.random.seed( seed )
+    random.seed( seed )
+
+def plot_fig(time,out,tag):
+    plt.plot(time,out)
+    plt.plot(time,tag)
+    plt.show()
+
+def train(model_use = False):
+    #model = TimeSeriesTransformer(input_dim, output_dim, d_model, nhead, num_layers, dropout).to(device)
+    model = TimeSeriesTransformer()
+    #model = torch.compile(model, mode="reduce-overhead")
+    if model_use:
+        print("载入历史训练的模型")
+        model.load_state_dict(torch.load('./save/best_loss_short_encoder.pt'))
+    optimizer = optim.Adam(model.parameters(), lr=1e-3)
+    criterion = nn.MSELoss()#nn.L1Loss()#nn.MSELoss()
+    best_loss = float('inf')
+
+    model.train()
+    for epoch in range(epochs):
+        epoch_loss = 0.0
+        data_len = 0
+        for i in tqdm(range(6), desc='Training progress:'):
+            if i == 4 :
+                continue
+            file_inputs_2 = './data/Dataset_training/NWP/NWP_{}.csv'.format(i)
+            file_inputs_3 = './data/Dataset_training/power/power_{}.csv'.format(i)
+            dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
+            dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+            # 训练模型
+            data_loss = 0.0
+            for batch_idx, (input_seq, output_seq) in enumerate(dataloader):
+                input_seq, output_seq = input_seq.to(device), output_seq.to(device)
+                # 前向传播
+                # input_seq = input_seq.permute(1, 0, 2)
+                # tgt = input_seq[-1:]
+                #predictions = model(input_seq,tgt)
+                predictions = model(input_seq)
+                # 计算损失
+                loss = criterion(predictions, output_seq)
+                # 反向传播
+                optimizer.zero_grad()
+                loss.backward()
+                optimizer.step()
+                data_loss += loss.item()
+            data_len += len(dataloader)
+            epoch_loss += data_loss
+            #print(f"Datasate is {i} ,Loss is {data_loss/data_len}")
+        print(f"Epoch {epoch+1}, Loss: {epoch_loss / data_len}")
+        # 保存 模型
+        if epoch_loss < best_loss:
+            best_loss = epoch_loss
+            print("Best loss model is saved")
+        torch.save(model.state_dict(), './save/best_loss_short_encoder.pt')
+
+
+
+
+if __name__ == '__main__':
+    setup_seed(50)
+    model_use = True
+    model = TimeSeriesTransformer()
+    #model = torch.compile(model, mode="reduce-overhead")
+    if model_use:
+        print("载入历史训练的模型")
+        model.load_state_dict(torch.load('save/lstm_base.pt'))
+
+    #training_model.base_train(model)
+
+
+    # re_train_for_data(model, 2022,11)
+    # re_train_for_data(model, 2022,9)
+    #train(model_use = True)
+    #training_model.re_train_for_data(model, 2022, 5)
+    #training_model.re_train_for_data(model, 2022, 10)
+
+    # for i in [5]:
+    #     #training_model.re_train_for_data(model, 2022,11)
+    #     training_model.re_train_for_data(model,2023,4)
+    #     ModeTest.test_model(2023,i,"lstm_base_pro.pt") #69
+    #     break
+
+    for i in range(4,5):
+        #training_model.re_train_for_turbine_sum_power(model)
+        #training_model.re_train_for_data(model, 2023,1)
+        #training_model.re_train_for_data(model, 2022,1)
+        #training_model.re_train_for_data(model, 2023, 2)
+        ModeTest.test_model(2022, i, "lstm_base_pro.pt")

+ 143 - 0
加强训练/training_model.py

@@ -0,0 +1,143 @@
+import glob
+import os
+import random
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+import pandas as pd
+from dataset.TimeDataset import TimeSeriesDataset
+#from model.Transformer_base import TimeSeriesTransformer
+from model.LSTM import TimeSeriesTransformer
+from tqdm import tqdm
+from utils.Arg import Arg
+from utils import ModeTest
+import matplotlib.pyplot as plt
+
+arg = Arg()
+# 超参数
+input_dim = arg.input_dim
+output_dim = arg.output_dim
+input_seq_length = arg.input_seq_length
+output_seq_length = arg.output_seq_length
+d_model = arg.d_model
+nhead = arg.nhead
+num_layers = arg.num_layers
+dropout = arg.dropout
+batch_size = arg.batch_size
+epochs = arg.epochs
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+def train_for_one_Data(model,dataloader):
+    optimizer = optim.Adam(model.parameters(), lr=1e-3)
+    criterion = nn.MSELoss()
+    model.train()
+
+    # 训练模型
+    data_len = 0
+    data_loss = 0.0
+    epoch_loss = 0.0
+    for batch_idx, (input_seq, output_seq) in enumerate(dataloader):
+        input_seq, output_seq = input_seq.to(device), output_seq.to(device)
+        # 前向传播
+        # input_seq = input_seq.permute(1, 0, 2)
+        # tgt = input_seq[-1:]
+        #predictions = model(input_seq,tgt)
+        predictions = model(input_seq)
+        # 计算损失
+        loss = criterion(predictions, output_seq)
+        # 反向传播
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+        data_loss += loss.item()
+    data_len += len(dataloader)
+    epoch_loss += data_loss
+    return epoch_loss / data_len
+
+def base_train(model):
+    best_loss = float('inf')
+    for epoch in range(epochs):
+        #训练一年的数据
+        #print(list(range(1, 5)))
+        for i in tqdm(range(3, 13), desc='Training progress:'):
+            file_inputs_2 = './data/training/NWP/NWP_2021_{}.csv'.format(i)
+            file_inputs_3 = './data/training/power/power_2021_{}.csv'.format(i)
+            dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
+            dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+            epoch_loss = train_for_one_Data(model, dataloader)
+            print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
+            if epoch_loss < best_loss:
+                best_loss = epoch_loss
+                torch.save(model.state_dict(), 'save/lstm_base.pt')
+                print("Best loss model is saved")
+        for i in tqdm(range(1, 4), desc='Training progress:'):
+            file_inputs_2 = './data/training/NWP/NWP_2022_{}.csv'.format(i)
+            file_inputs_3 = './data/training/power/power_2022_{}.csv'.format(i)
+            dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
+            dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+            epoch_loss = train_for_one_Data(model, dataloader)
+            print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
+            if epoch_loss < best_loss:
+                best_loss = epoch_loss
+                torch.save(model.state_dict(), 'save/lstm_base.pt')
+                print("Best loss model is saved")
+
+def re_train_for_data(model,year,month):
+    best_loss = float('inf')
+    for epoch in range(epochs):
+        # 训练一年的数据
+        file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv'
+        file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv'
+        dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
+        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+        epoch_loss = train_for_one_Data(model, dataloader)
+        print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
+        if epoch_loss < best_loss:
+            best_loss = epoch_loss
+            torch.save(model.state_dict(), './save/lstm_base_pro.pt')
+            print("Best loss model is saved")
+
+def re_train_for_alotday(model,year,month,day):
+    from torch.utils.data import Subset
+
+    best_loss = float('inf')
+    for epoch in range(epochs):
+        # 训练一年的数据
+        file_inputs_2 = f'./data/training/NWP/NWP_{year}_{month}.csv'
+        file_inputs_3 = f'./data/training/power/power_{year}_{month}.csv'
+        dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
+        number = int(len(dataset)-day*24*60/15)
+        # 创建一个Subset,只包含前num_samples个样本
+        dataset = Subset(dataset, indices=range(number,len(dataset)+1))
+
+        dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+        epoch_loss = train_for_one_Data(model, dataloader)
+        print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
+        if epoch_loss < best_loss:
+            best_loss = epoch_loss
+            torch.save(model.state_dict(), './save/lstm_base_pro.pt')
+            print("Best loss model is saved")
+
+def re_train_for_turbine_sum_power(model):
+    from torch.utils.data import Subset
+    best_loss = float('inf')
+    # 训练一年的数据
+    file_inputs_2 = f'./data/all_power/NWP.csv'
+    file_inputs_3 = f'./data/all_power/power_training.csv'
+    dataset = TimeSeriesDataset(file_inputs_3, file_inputs_2)
+    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
+    for epoch in range(epochs):
+        epoch_loss = train_for_one_Data(model, dataloader)
+        print(f"Epoch {epoch + 1}, Loss: {epoch_loss / len(dataloader)}")
+        if epoch_loss < best_loss:
+            best_loss = epoch_loss
+            torch.save(model.state_dict(), './save/lstm_base_pro.pt')
+            print("Best loss model is saved")
+
+def extract_date_from_path(path):
+    parts = path.split('_')
+    year = int(parts[-2])
+    month = int(parts[-1].split('.')[0])
+    return year, month

BIN
加强训练/utils/.DS_Store


+ 16 - 0
加强训练/utils/Arg.py

@@ -0,0 +1,16 @@
+class Arg():
+    def __init__(self):
+        self.input_dim = 18
+        self.output_dim = 1
+        self.input_seq_length = 16
+        self.output_seq_length = 1
+        self.d_model = 18
+        self.nhead = 3
+        self.num_layers = 4
+        self.dropout = 0.05
+        self.batch_size = 32
+        self.epochs = 20
+        self.time_split = 16
+        self.power_max = 110
+        self.power_min = 0
+        self.hidden = 6

+ 127 - 0
加强训练/utils/ModeTest.py

@@ -0,0 +1,127 @@
+import pandas as pd
+import numpy as np
+from torch import nn
+from torch.utils.data import DataLoader
+from dataset.TimeDataset import TimeSeriesDataset
+from training import TimeSeriesTransformer
+import torch
+import matplotlib.pyplot as plt
+from utils.Arg import Arg
+arg = Arg()
+
+input_dim = arg.input_dim
+output_dim = arg.output_dim
+input_seq_length = arg.input_seq_length
+output_seq_length = arg.output_seq_length
+d_model = arg.d_model
+nhead = arg.nhead
+num_layers = arg.num_layers
+dropout = arg.dropout
+batch_size = arg.batch_size
+epochs = arg.epochs
+time_split = arg.time_split
+
+def rmse(predictions, targets):
+    return np.sqrt(((predictions - targets) ** 2).mean())
+
+def calculate_rmse_for_intervals(list1, list2, step=96):
+    assert len(list1) == len(list2), "两个列表的长度必须相等"
+    rmse_results = []
+    for i in range(0, len(list1), step):
+        predictions = np.array(list1[i:i + step])
+        targets = np.array(list2[i:i + step])
+        rmse_val = rmse(predictions, targets)
+        rmse_results.append(rmse_val)
+    return rmse_results
+cap = arg.power_max
+
+def calculate_accuracy(output_values, target_values,powermax,powermin):
+    output_values_i = output_values[0:len(output_values)]
+    target_values_i = target_values[0:len(target_values)]
+    for i in range(len(output_values)):
+        output_values_i[i] = output_values_i[i] * (powermax - powermin) + powermin
+        if output_values[i] < 0: output_values[i] = 0
+        target_values_i[i] = target_values_i[i] * (powermax - powermin) + powermin
+    cal = calculate_rmse_for_intervals(output_values_i,target_values_i)
+    totalacc = 0
+    acc_list = []
+    for i in range(len(cal)):
+        accuracy = (1-cal[i]/cap)*100
+        #print("第{}天的准确率为{}".format(i,accuracy))
+        acc_list.extend([accuracy])
+        totalacc = totalacc+accuracy
+    return acc_list,totalacc/len(cal)
+
+def test(test_loader,C_TIME,powermax,powermin,name):
+    test_loss = 0.0
+    criterion = nn.MSELoss()
+    with torch.no_grad():
+        #model = TimeSeriesTransformer(input_dim, output_dim, d_model, nhead, num_layers, dropout)
+        model = TimeSeriesTransformer()
+        model.load_state_dict(torch.load(f'./save/{name}'))
+        output_values = []
+        target_values = []
+        datetime_values = C_TIME
+
+        for i, (inputs_3, target) in enumerate(test_loader):
+            #inputs_3 = inputs_3.permute(1, 0, 2)
+            #target = target.unsqueeze(0)
+            #tgt = inputs_3[-1].unsqueeze(0)
+            #output = model(inputs_3, tgt)
+            #print(output.shape)
+            #inputs_3 = inputs_3.permute(1, 0, 2)
+            #tgt = inputs_3[-1:]
+            output = model(inputs_3)#,tgt)
+            test_loss += criterion(output, target).item()
+            test_loss /= len(test_loader)
+            output_values.extend(output.squeeze().tolist())
+            target_values.extend(target.squeeze().tolist())
+            acclist,acc = calculate_accuracy(output_values, target_values,powermax,powermin)
+        print('Test Loss: {:.4f},Test accuracy:{:.4f}'.format(test_loss,acc))
+
+    print(np.min(output_values))
+    print(np.max(output_values))
+    for i in range(len(output_values)):
+        output_values[i] = output_values[i] * (powermax - powermin) + powermin
+        if output_values[i] < 0: output_values[i] = 0
+        target_values[i] = target_values[i] * (powermax - powermin) + powermin
+
+
+    plt.plot(output_values, label='model Output')
+    plt.plot(target_values, label='Actual Value')
+    for i in range(0, len(output_values) // 96 + 1):
+        plt.axvline(x=96 * i, linestyle='--', color='gray')
+        plt.annotate(round(acclist[i],2), xy=(96 * i+48, plt.ylim()[1]-5), xytext=(96 * i, plt.ylim()[1]-5), va='top', ha='left',fontsize=8)
+
+    plt.axvline(x=len(output_values), linestyle='--', color='gray')
+    plt.xlabel('Sample Index')
+    plt.ylabel('Value')
+    plt.legend()
+    plt.show()
+    df1 = pd.DataFrame(output_values)
+    df2 = pd.DataFrame(target_values)
+    df3 = pd.DataFrame(datetime_values)
+    df = pd.concat([df1, df2], axis=1)
+    df = pd.concat([df3, df], axis=1)
+    df.columns=["时间","预测功率","实际功率"]
+    df.to_excel('./save/短期对比.xlsx', index=False, sheet_name='对比数据')
+    return test_loss,acc
+
+def data_prase(NWP_test,power_test):
+    data_NWP = pd.read_csv(NWP_test)
+    data_power = pd.read_csv(power_test).iloc[:,1]
+    C_TIME = data_NWP.iloc[:,0]
+
+    dataset = TimeSeriesDataset(power_test, NWP_test)
+    test_loader = DataLoader(dataset, batch_size=arg.batch_size)
+    powermax = arg.power_max
+    powermin = arg.power_min
+    return test_loader,C_TIME,powermax,powermin
+
+def test_model(year,month,name):
+    NWP_test = f'./data/training/NWP/NWP_{year}_{month}.csv'
+    power_test = f'./data/training/power/power_{year}_{month}.csv'
+    test_loader, C_TIME, powermax, powermin = data_prase(NWP_test,power_test)
+    test_loss,acc = test(test_loader, C_TIME, powermax, powermin,name)
+    return test_loss,acc
+

+ 50 - 0
加强训练/utils/dataSplit.py

@@ -0,0 +1,50 @@
+import glob
+import os
+from Arg import Arg
+import pandas as pd
+arg = Arg()
+
+def data_split_by_month(getpath,savepath,name):
+    # 读取你的大数据集
+    df = pd.read_csv(getpath)
+    # 15分钟一个点,看看数据集是否存在超过半个月的数据集,如果不存在就不导出
+    if len(df) < 15*24*60/15:
+        print("当前数据集个数不足半个月,不予导出!!")
+        return
+    # 确保你的日期列是 datetime 类型
+    df['C_TIME'] = pd.to_datetime(df['C_TIME'])
+    # 设置日期列为索引
+    df = df.set_index('C_TIME')
+    # 按照年份和月份进行分组
+    grouped = df.groupby([df.index.year, df.index.month])
+
+    # 遍历分组后的数据,将每个月的数据保存为一个新的 csv 文件
+    for (year, month), group in grouped:
+        group.to_csv(savepath + f'{name}_{year}_{month}.csv')
+
+    return grouped
+
+
+
+def split_for_month():
+    path = "../data/Dataset_training/NWP"
+    csv_files = glob.glob(os.path.join(path, '*.csv'))
+    data_len = len(csv_files)
+    for i in range(data_len-1):
+
+        getpath = f'../data/Dataset_training/NWP/NWP_{i}.csv'
+        savepath = '../data/training/NWP/'
+        if not os.path.exists(savepath):  # 如果路径不存在
+            os.makedirs(savepath)
+        data_split_by_month(getpath, savepath,"NWP")
+
+
+        getpath = f'../data/Dataset_training/power/power_{i}.csv'
+        savepath = '../data/training/power/'
+        if not os.path.exists(savepath):  # 如果路径不存在
+            os.makedirs(savepath)
+        data_split_by_month(getpath, savepath,"power")
+
+
+if __name__ == '__main__':
+    split_for_month()

+ 40 - 0
加强训练/utils/datapro.py

@@ -0,0 +1,40 @@
+import pandas as pd
+import matplotlib.pyplot as plt
+
+def split_test():
+    # 读取大型数据集
+    large_dataset = pd.read_csv("../data/Dataset_test/power/power_test.csv")
+
+    # 将数据集等分为5个较小的数据集
+    num_splits = 5
+    split_size = len(large_dataset) // num_splits
+
+    small_datasets = []
+    for i in range(num_splits):
+        start = i * split_size
+        end = (i + 1) * split_size if i < num_splits - 1 else len(large_dataset)
+        small_datasets.append(large_dataset[start:end])
+
+    # 保存5个较小的数据集为.csv文件
+    for i, small_dataset in enumerate(small_datasets):
+        small_dataset.to_csv(f"../data/Dataset_test/power/power_dataset_{i + 1}.csv", index=False)
+
+
+if __name__ == '__main__':
+    # 读取Excel文件
+    # df = pd.read_excel("./data/nwp1.xlsx")
+    #
+    # # 计算分段数量
+    # num_windows = df.shape[0] // 16
+    #
+    # # 循环每一个分段
+    # for i in range(num_windows):
+    #     window = df.iloc[i * 16:(i + 1) * 16,2:-2]
+    #     data = window.astype(float)
+    #
+    #     # 将数据绘制为图像
+    #     plt.imshow(data, cmap="gray")
+    #
+    #     # 保存图像
+    #     plt.savefig("./wind/window_{}.png".format(i))
+    split_test()

+ 13 - 0
劳店分区/.gitignore

@@ -0,0 +1,13 @@
+*/__pycache__
+/__pycache__
+/.idea
+/checkpoint
+/log
+/data
+/figure
+*.log
+*.swp
+/log
+/data
+
+

+ 18 - 0
劳店分区/Readme.md

@@ -0,0 +1,18 @@
+## 超短期功率预测系统训练端
+
+这个项目将LSTM长短期时序模型用于超短期电力功率预测任务,实现特性如下: 
+
+- 程序简洁、模块化
+- 支持可扩展的Keras框架(LSTM,可修改网络层)
+- 参数、模型和框架支持高度可定制和修改
+- 支持增量训练(在预训练模型上进行微调)
+- 支持同时预测多个指标(目前预测实际功率)
+- 支持预测任意时间节点数(目前设置16个点)
+- 支持训练可视化和记录日志
+
+
+
+| 训练case | 表头  |
+|--------| ----  |
+| 1      | 单元格 |
+| 2      | 单元格 |

+ 76 - 0
劳店分区/back.py

@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/4/14 15:32
+# file: back.py
+# author: David
+# company: shenyang JY
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+import pandas as pd
+
+
+class data_analyse(object):
+    def __init__(self, opt, logger, process):
+        self.opt = opt
+        self.logger = logger
+        self.ds = process
+
+    def calculate_acc(self, label_data, predict_data):
+        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
+        loss_sqrt = np.sqrt(loss)  # rmse
+        loss_acc = 1 - loss_sqrt / self.opt.cap
+        return loss_acc
+
+    def get_16_points(self, results):
+        # results为模型预测的一维数组,遍历,取每16个点的最后一个点
+        preds = []
+        for res in results:
+            preds.append(res.iloc[-1].values)
+        return np.array(preds)
+
+    def predict_acc(self, predict_data, dfy):
+        predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
+        dfs = dfy[0]
+        for i in range(1, len(dfy)):
+            dfs.extend(dfy[i])
+        for i, df in enumerate(dfs):
+            df["PREDICT"] = predict_data[i]
+            dfs[i] = df
+        data = self.get_16_points(dfs)
+        df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
+        # label_data = label_data.reshape((-1, self.opt.output_size))
+        # label_data 要进行反归一化
+
+        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
+        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
+
+        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
+
+        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
+
+        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
+        self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
+
+    def preidct_draw(self, label_data, predict_data):
+        X = list(range(label_data.shape[0]))
+        print("label_x = ", X)
+        label_column_num = len(self.opt.label_columns)
+        label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
+        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出,如果是有桌面的Linux,如Ubuntu,可去掉这一行
+            for i in range(label_column_num):
+                plt.figure(i+1)                     # 预测数据绘制
+                plt.plot(X, label_data[:, i], label='label', color='b')
+                plt.plot(X, predict_data[:, i], label='predict', color='g')
+                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
+                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
+                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
+                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
+
+                if self.opt.do_figure_save:
+                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
+
+            plt.show()
+
+    def tangle_results(self):
+        pass

+ 95 - 0
劳店分区/config.py

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/17 14:46
+# file: config.py
+# author: David
+# company: shenyang JY
+
+import yaml
+import argparse
+
+
+class myargparse(argparse.ArgumentParser):
+    def __init__(self, discription, add_help):
+        super(myargparse, self).__init__(description=discription, add_help=add_help)
+        # default_config_parser = parser = argparse.ArgumentParser(
+        #     description='Training Config', add_help=False)
+        self.add_argument(
+            '-c',
+            '--config_yaml',
+            default=
+            'config.yml',
+            type=str,
+            metavar='FILE',
+            help='YAML config file specifying default arguments')
+
+        # feature_columns = list(range(1, 28))
+        label_columns = ['C_REAL_VALUE']
+
+        # label_in_feature_index = (lambda x, y: [x.index(i) for i in y])(feature_columns, label_columns)  # 因为feature不一定从0开始
+
+        # 在控制台可以指定的参数, yml中没有
+        self.add_argument('--feature_columns', type=list, default=None, help='要作为特征的列')
+
+        self.add_argument('--label_columns', type=list, default=label_columns, help='要预测的列')
+
+        self.add_argument('--label_in_feature_index', type=list, default=None, help='标签在特征列的索引')
+
+        self.add_argument('--input_size', type=int, default=0, help='输入维度')
+        self.add_argument('--input_size_lstm', type=int, default=0, help='输入维度')
+        self.add_argument('--input_size_cnn', type=int, default=0, help='输入维度')
+
+        self.add_argument('--output_size', type=int, default=16, help='输出维度')
+
+        self.add_argument("--train_data_path", type=str, default=None,help='数据集地址')  # train_data_path yml中有
+
+        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
+
+        self.add_argument('--model_name', type=str, default=None, help='模型名称')
+
+        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
+
+
+    def _init_dir(self, opt):
+        import os, time
+        # 在这里给opt赋值
+        opt.model_name = "model_" + opt.continue_flag + opt.used_frame + opt.model_postfix[opt.used_frame]
+        opt.model_save_path = './checkpoint/' + opt.model_name + "/"
+        if not os.path.exists(opt.model_save_path):
+            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
+        if not os.path.exists(opt.figure_save_path):
+            os.mkdir(opt.figure_save_path)
+        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
+            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
+            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
+            os.makedirs(log_save_path)
+
+
+# YAML should override the argparser's content
+    def _parse_args_and_yaml(self):
+        given_configs, remaining = self.parse_known_args()
+        if given_configs.config_yaml:
+            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
+                cfg = yaml.safe_load(f)
+                self.set_defaults(**cfg)
+
+        # The main arg parser parses the rest of the args, the usual
+        # defaults will have been overridden if config file specified.
+        opt = self.parse_args(remaining)
+        self._init_dir(opt)
+        # Cache the args as a text string to save them in the output dir later
+        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
+        return opt, opt_text
+
+
+    def parse_args_and_yaml(self):
+        return self._parse_args_and_yaml()[0]
+
+
+if __name__ == "__main__":
+    # opt = _parse_args_and_yaml()
+    pass
+
+
+
+

+ 79 - 0
劳店分区/config.yml

@@ -0,0 +1,79 @@
+Model:
+  batch_size: 64
+  dropout_rate: 0.2
+  epoch: 20
+  hidden_size: 128
+  learning_rate: 0.001
+  lstm_layers: 2
+  patience: 5
+  random_seed: 42
+  time_step: 16
+add_train: false
+continue_flag: ''
+data_format:
+  dq: dq.csv
+  envir: "\u73AF\u5883\u6570\u636E.csv"
+  nwp: NWP.csv
+  rp: power.csv
+  turbine: turbine-15
+  cluter_power: cluster_power_分区后/cluster_data.csv
+debug_model: false
+debug_num: 500
+do_continue_train: false
+do_figure_save: false
+do_log_print_to_screen: true
+do_log_save_to_file: true
+do_predict: true
+do_train: false
+do_train_visualized: True
+csv_data_path: ./data/
+figure_save_path: ./figure/
+is_continuous_predict: True
+log_save_path: ./log/
+mean:
+  C_AIRT: 10.305992230762874
+  C_CELLT: 10.664897925448384
+  C_DIFFUSER: 143.2639061079428
+  C_DIFFUSERDA: 6.571077155136789
+  C_DIRECTR: 68.21328208942887
+  C_DIRECTRDA: 3.163283039920654
+  C_FORECAST: 3.1419734966774113
+  C_GLOBALR: 173.2587817174973
+  C_GLOBALRDA: 7.756491280271097
+  C_HOURDA: 1.998222150590958
+  C_P: 947.7830440532276
+  C_RH: 55.59672286965865
+  C_VALUE: 3.404744648318043
+  C_WD: 212.88300686007108
+  C_WS: 1.802446483180428
+model_postfix:
+  keras: .h5
+  pytorch: .pth
+  tensorflow: .ckpt
+predict_points: 16
+shuffle_train_data: false
+std:
+  C_AIRT: 12.127220611319888
+  C_CELLT: 12.654848145970181
+  C_DIFFUSER: 230.93680419867772
+  C_DIFFUSERDA: 6.4933162833681415
+  C_DIRECTR: 166.61348332191056
+  C_DIRECTRDA: 4.991297839913351
+  C_FORECAST: 4.447082956749344
+  C_GLOBALR: 258.87947949591955
+  C_GLOBALRDA: 7.9174382136573955
+  C_HOURDA: 2.9110230573747247
+  C_P: 25.75152505719027
+  C_RH: 22.445059526990818
+  C_VALUE: 5.013868885103326
+  C_WD: 112.90029001408325
+  C_WS: 1.6575249140627502
+train_data_path: ./data/
+train_data_rate: 0.9
+use_cuda: false
+used_frame: keras
+valid_data_rate: 0.15
+
+is_photovoltaic: True
+cap: 150
+envir_columns: 16

+ 90 - 0
劳店分区/data_analyse.py

@@ -0,0 +1,90 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/4/12 18:57
+# file: data_analyse.py
+# author: David
+# company: shenyang JY
+import sys
+import numpy as np
+import pandas as pd
+
+
+class data_analyse(object):
+    def __init__(self, opt, logger, process):
+        self.opt = opt
+        self.logger = logger
+        self.ds = process
+
+    def dq_acc(self):
+        excel_data_path = self.opt.csv_data_path
+        data_format = self.opt.data_format
+        dq_path = excel_data_path + data_format["dq"]
+        dq_columns = ['C_TIME', 'C_ABLE_VALUE']
+        dq = pd.read_csv(dq_path, usecols=dq_columns)
+        dq['C_TIME'] = dq['C_TIME'].astype(float)/1000
+        dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], unit='s')
+        return dq
+
+    def cdq_acc(self):
+        excel_data_path = self.opt.excel_data_path
+        data_format = self.opt.data_format
+        dq_path = excel_data_path + data_format["cdq"]
+        dq_columns = ['C_TIME', 'C_ABLE_VALUE']
+        cdq = pd.read_excel(dq_path, usecols=dq_columns)
+        cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
+        return cdq
+
+    def calculate_acc(self, label_data, predict_data):
+        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
+        loss_sqrt = np.sqrt(loss)  # rmse
+        loss_acc = 1 - loss_sqrt / self.opt.cap
+        return loss_acc
+
+    def calculate_acc_307(self, label_data, predict_data):
+        p1 = label_data - predict_data
+        p2 = p1 / self.opt.cap
+        p3 = p2 ** 2
+        p4 = np.sum(p3)
+        p5 = p4 / len(label_data)
+        p6 = np.sqrt(p5)
+        p7 = 1 - p6
+        return p7
+
+    def get_16_points(self, results):
+        # results为模型预测的一维数组,遍历,取每16个点的最后一个点
+        preds = []
+        for res in results:
+            preds.append(res.iloc[-1].values)
+        return np.array(preds)
+
+    def predict_acc(self, predict_data, dfy, predict_all=False):
+        if predict_all is True:
+            predict_data = predict_data * self.ds.std['SUM'] + self.ds.mean['SUM']
+        else:
+            predict_data0 = predict_data[0]
+            predict_data1 = predict_data[1]
+            predict_data0 = predict_data0 * self.ds.std['C_ACTIVE_POWER1'] + self.ds.mean['C_ACTIVE_POWER1']
+            predict_data1 = predict_data1 * self.ds.std['C_ACTIVE_POWER2'] + self.ds.mean['C_ACTIVE_POWER2']
+            predict_data = predict_data0 + predict_data1
+            # predict_data = predict_data[2] * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
+
+        # dfs = dfy[0]
+        # for i in range(1, len(dfy)):
+        #     dfs.extend(dfy[i])
+        for i, df in enumerate(dfy):
+            df["PREDICT"] = predict_data[i]
+            dfy[i] = df
+        data = self.get_16_points(dfy)
+        df = pd.DataFrame(data, columns=['C_TIME', 'C_ACTIVE_POWER1', 'C_ACTIVE_POWER2', 'SUM', 'C_REAL_VALUE', 'PREDICT'])
+        # label_data = label_data.reshape((-1, self.opt.output_size))
+        # label_data 要进行反归一化
+        dq = self.dq_acc()
+        df = pd.merge(df, dq, on='C_TIME')
+        # df.to_csv(self.opt.excel_data_path + "nwp+rp+环境(LSTM+CNN).csv")
+        # label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
+
+        loss_norm = self.calculate_acc(df['SUM'], df['PREDICT'])
+        self.logger.info("The mean squared error of power {} is ".format('power') + str(loss_norm))
+
+        loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_ABLE_VALUE'])
+        self.logger.info("The mean squared error of power {} is ".format('CDQ') + str(loss_norm))

+ 97 - 0
劳店分区/data_features.py

@@ -0,0 +1,97 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/4/12 17:42
+# file: data_features.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+from sklearn.model_selection import train_test_split
+import numpy as np
+from data_utils import *
+
+
+class data_features(object):
+    def __init__(self, opt, mean, std):
+        self.opt = opt
+        self.time_step = self.opt.Model["time_step"]
+        self.mean = mean
+        self.std = std
+        self.columns_lstm = list()
+        self.labels = list()
+
+    def get_train_data(self, dfs):
+        train_x, valid_x, train_y, valid_y = [], [], [], []
+        for i, df in enumerate(dfs):
+            if len(df) <= self.opt.Model["time_step"]:
+                continue
+            trainx_, trainy = self.get_data_features(df)
+            trainx_ = [np.array(x) for x in trainx_]
+            trainy_ = [y.iloc[:, 1:].values for y in trainy]
+            tx, vx, ty, vy = train_test_split(trainx_, trainy_, test_size=self.opt.valid_data_rate,
+                                                                  random_state=self.opt.Model["random_seed"],
+                                                                  shuffle=self.opt.shuffle_train_data)  # 划分训练和验证集
+            # 分裂 tx 和 vx
+            train_x.extend(tx)
+            valid_x.extend(vx)
+            train_y.extend(ty)
+            valid_y.extend(vy)
+
+        # train_y = np.concatenate(train_y, axis=0)
+        # valid_y = np.concatenate(valid_y, axis=0)
+
+        train_x = self.norm_features(train_x)
+        valid_x = self.norm_features(valid_x)
+        train_y = self.norm_label(train_y)
+        valid_y = self.norm_label(valid_y)
+
+        print("训练的数据集有{}个点".format(len(train_x)))
+        return np.array(train_x), np.array(valid_x), np.array(train_y), np.array(valid_y)
+
+    def get_test_data(self, dfs):
+        test_x, test_y, data_y = [], [], []
+        for df in dfs:
+            if len(df) <= self.opt.Model["time_step"]:
+                continue
+            testx_, testy = self.get_data_features(df)
+            testx_ = [np.array(x) for x in testx_]
+            testy_ = [y.iloc[:, 1:].values for y in testy]
+            test_x.extend(testx_)
+            test_y.extend(testy_)
+            data_y.extend(testy)
+
+        test_y = np.concatenate(test_y, axis=0)
+
+        test_x = self.norm_features(test_x)
+        test_y = self.norm_label(test_y)
+
+        print("测试的数据集有{}个点".format(len(test_x)))
+        return np.array(test_x), test_y, data_y
+
+    def get_data_features(self, feature_data):   # 这段代码基于pandas方法的优化
+        time_step = self.opt.Model["time_step"]
+        time_step_loc = time_step - 1
+        train_num = int(len(feature_data))
+        features_x = [feature_data.loc[i:i + time_step_loc, 'C_T':'C_WS170'] for i in range(train_num - time_step)]
+        features_y = [feature_data.loc[i:i + time_step_loc, ['C_TIME', 'C_ACTIVE_POWER1', 'C_ACTIVE_POWER2', 'SUM', 'C_REAL_VALUE']] for i in range(train_num - time_step)]
+        self.columns_lstm = features_x[0].columns.tolist()
+        self.labels = features_y[0].columns.tolist()
+        self.labels.remove('C_TIME')
+        self.opt.input_size_lstm = len(self.columns_lstm)
+        # self.opt.input_size_lstm = len(self.columns_lstm)
+        return features_x, features_y
+
+    def norm_features(self, data: np.ndarray):
+        for i, d in enumerate(data):
+            mean = np.array([self.mean[col] for col in self.columns_lstm])
+            std = np.array([self.std[col] for col in self.columns_lstm])
+            d = (d - mean) / std  # 归一化
+            data[i] = d
+        return data
+
+    def norm_label(self, label_data: np.ndarray):
+        for i, d in enumerate(label_data):
+            mean = np.array([self.mean[col] for col in self.labels])
+            std = np.array([self.std[col] for col in self.labels])
+            d = (d - mean) / std  # 归一化
+            label_data[i] = d
+        return label_data

+ 123 - 0
劳店分区/data_process.py

@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/17 10:10
+# file: main.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+import os
+import numpy as np
+from data_utils import *
+import yaml
+
+
+class data_process(object):
+    def __init__(self, opt):
+        self.std = None
+        self.mean = None
+        self.opt = opt
+
+    # 主要是联立后的补值操作
+    def get_processed_data(self, test_list):
+        """
+
+        :param args:
+        :return: 返回分割补值处理好的npy向量文件
+        """
+        csv_data_path = self.opt.csv_data_path
+        nwp = pd.read_csv(os.path.join(csv_data_path, self.opt.data_format["nwp"]))
+        cluster_power = pd.read_csv(os.path.join(csv_data_path, self.opt.data_format["cluter_power"]))
+        cluster_power['C_TIME'] = pd.to_datetime(cluster_power['C_TIME'])
+        cluster_power['C_TIME'] = cluster_power['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')
+        # 第一步:联立
+        unite = pd.merge(nwp, cluster_power, on='C_TIME')
+        # 第二步:计算间隔
+        unite['C_TIME'] = pd.to_datetime(unite['C_TIME'])
+        unite['time_diff'] = unite['C_TIME'].diff()
+        dt_short = pd.Timedelta(minutes=15)
+        dt_long = pd.Timedelta(minutes=15 * 10)
+        dfs = self.missing_time_splite(unite, dt_short, dt_long)
+        miss_points = unite[(unite['time_diff'] > dt_short) & (unite['time_diff'] < dt_long)]
+        miss_number = miss_points['time_diff'].dt.total_seconds().sum(axis=0)/(15*60) - len(miss_points)
+        print("再次测算,需要插值的总点数为:", miss_number)
+        dfs_train, dfs_test = self.data_fill(dfs, test_list)
+        self.normalize(dfs_train)  # 归一化
+        return dfs_train, dfs_test
+
+    def normalize(self, dfs):
+        """
+        暂时不将C_TIME归一化
+        :param dfs:
+        :return:
+        """
+        df = pd.concat(dfs, axis=0)
+        # df = df.reset_index()
+        # df["C_TIME"] = df["C_TIME"].apply(datetime_to_timestr)
+        mean = np.mean(df.iloc[1:, :], axis=0)  # 数据的均值
+        std = np.std(df.iloc[1:, :], axis=0)  # 标准差
+        # if hasattr(self.opt, 'mean') is False or hasattr(self.opt, 'std') is False:
+        #     self.set_yml({'mean': mean.to_dict(), 'std': std.to_dict()})
+        print("归一化参数,均值为:{},方差为:{}".format(mean.to_dict(), std.to_dict()))
+        self.mean, self.std = mean.to_dict(), std.to_dict()
+
+    def missing_time_splite(self, df, dt_short, dt_long):
+        n_long, n_short, n_points = 0, 0, 0
+        start_index = 0
+        dfs = []
+        for i in range(1, len(df)):
+            if df['time_diff'][i] >= dt_long:
+                df_long = df.iloc[start_index:i, :-1]
+                dfs.append(df_long)
+                start_index = i
+                n_long += 1
+            if df['time_diff'][i] > dt_short:
+                print(df['C_TIME'][i-1], end=" ~ ")
+                print(df['C_TIME'][i], end=" ")
+                points = df['time_diff'].dt.total_seconds()[i]/(60*15)-1
+                print("缺失点数:", points)
+                if df['time_diff'][i] < dt_long:
+                    n_short += 1
+                    n_points += points
+                    print("需要补值的点数:", points)
+        dfs.append(df.iloc[start_index:, :-1])
+        print("数据总数:", len(df), ",时序缺失的间隔:", n_short, "其中,较长的时间间隔:", n_long)
+        print("需要补值的总点数:", n_points)
+        return dfs[1:]
+
+    def data_fill(self, dfs, test):
+        dfs_train, dfs_test = [], []
+        for i, df in enumerate(dfs):
+            df1 = df.set_index('C_TIME', inplace=False)
+            dff = df1.resample('15T').bfill()
+            dff.reset_index(inplace=True)
+            points = len(dff) - len(df1)
+            if i not in test:
+                if i == 0:
+                    dff = dff.iloc[8:, :].reset_index(drop=True)
+                dfs_train.append(dff)
+                print("{} ~ {} 有 {} 个点, 填补 {} 个点.".format(dff.iloc[0, 0], dff.iloc[-1, 0], len(dff), points))
+            else:
+                print("{} ~ {} 有 {} 个点, 缺失 {} 个点.(测试集)".format(dff.iloc[0, 0], dff.iloc[-1, 0], len(dff), points))
+                dfs_test.append(dfs[i].reset_index(drop=True))
+        return dfs_train, dfs_test
+
+    def set_yml(self, yml_dict):
+        with open(self.opt.config_yaml, 'r', encoding='utf-8') as f:
+            cfg = yaml.safe_load(f)
+        for k, v in yml_dict.items():
+            cfg[k] = v
+        with open(self.opt.config_yaml, 'w') as f:
+            yaml.safe_dump(cfg, f, default_flow_style=False)
+
+    def drop_duplicated(self, df):
+        df = df.groupby(level=0).mean()  # DatetimeIndex时间索引去重
+        return df
+
+if __name__ == "__main__":
+    # dq = ds.read_data(dq_path, dq_columns)[0]
+    # rp = ds.read_data(rp_path, rp_columns)[0]
+    # # rp_average(rp)    # 计算平均功率
+    # envir = ds.read_data(envir_path, envir_columns)[0]
+    # tables = ds.tables_integra(dq, rp, envir)
+    # ds.tables_norm_result(tables)
+    pass

+ 65 - 0
劳店分区/data_utils.py

@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/22 17:17
+# file: dpdUtils.py
+# author: David
+# company: shenyang JY
+
+
+import time, datetime
+
+
+class ValidationError(Exception):
+    def __init__(self, message):
+        self.message = message
+
+
+def timestamp_to_datetime(ts):
+    if type(ts) is not int:
+        raise ValueError("timestamp-时间格式必须是整型")
+    if len(str(ts)) == 13:
+        return datetime.datetime.fromtimestamp(ts/1000)
+    elif len(str(ts)) == 10:
+        return datetime.datetime.fromtimestamp(ts)
+    else:
+        raise ValueError("timestamp-时间格式长度错误")
+
+
+def datetime_to_timestamp(dt, len):
+    if len not in (10, 13):
+        raise ValueError("timestamp-时间戳转换长度错误")
+    if len == 10:
+        return int(round(time.mktime(dt.timetuple())))
+    else:
+        return int(round(time.mktime(dt.timetuple()))*1000)
+
+
+def datetime_to_timestr(dt):
+    return int(dt.strftime('%m%d%H%M'))
+
+
+def timestr_to_datetime(time_data):
+    """
+    将时间戳或时间字符串转换为datetime.datetime类型
+    :param time_data: int or str
+    :return:datetime.datetime
+    """
+    if isinstance(time_data, float):
+        result = timestamp_to_datetime(int(time_data))
+    elif isinstance(time_data, int):
+        result = timestamp_to_datetime(time_data)
+    elif isinstance(time_data, str):
+        if len(time_data) == 10:
+            result = datetime.datetime.strptime(time_data, '%d/%m/%Y')
+            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d')
+        elif len(time_data) in {17, 18, 19}:
+            # result = datetime.datetime.strptime(time_data, '%d/%m/%Y %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
+            result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')
+        else:
+            raise ValidationError("时间字符串长度不满足要求!")
+    return result
+
+
+def timestamp_to_timestr(t):
+    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t))
+

+ 83 - 0
劳店分区/figure.py

@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/20 15:19
+# file: figure.py
+# author: David
+# company: shenyang JY
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class Figure(object):
+    def __init__(self, opt, logger, process):
+        self.opt = opt
+        self.ds = process
+        self.logger = logger
+
+    def get_16_points(self, results):
+        # results为模型预测的一维数组,遍历,取每16个点的最后一个点
+        preds = []
+        for res in results:
+            preds.append(res[-1])
+        return np.array(preds)
+
+    def draw(self, label_data, predict_norm_data, numbers):
+        # label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
+        #                                         config.label_in_feature_index]
+        # dq_data = dq_data.reshape((-1, self.opt.output_size))
+        predict_norm_data = self.get_16_points(predict_norm_data)
+        label_data = self.get_16_points(label_data)
+        label_data = label_data.reshape((-1, self.opt.output_size))
+        # label_data 要进行反归一化
+        label_data = label_data * self.ds.std[self.opt.label_in_feature_index] + \
+                       self.ds.mean[self.opt.label_in_feature_index]
+        predict_data = predict_norm_data * self.ds.std[self.opt.label_in_feature_index] + \
+                       self.ds.mean[self.opt.label_in_feature_index]   # 通过保存的均值和方差还原数据
+        # dq_data = dq_data * self.ds.std[0] + self.ds.mean[0]
+        # predict_data = predict_norm_data
+        assert label_data.shape[0] == predict_data.shape[0], "The element number in origin and predicted data is different"
+
+        label_name = [self.ds.tables_column_name[i] for i in self.opt.label_in_feature_index]
+        label_column_num = len(self.opt.label_columns)
+
+        # label 和 predict 是错开config.predict_day天的数据的
+        # 下面是两种norm后的loss的计算方式,结果是一样的,可以简单手推一下
+        # label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
+        #              config.label_in_feature_index]
+        # loss_norm = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
+        # logger.info("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))
+
+        loss = np.sum((label_data - predict_data) ** 2)/len(label_data)  # mse
+        # loss = np.mean((label_data - predict_data) ** 2, axis=0)
+        loss_sqrt = np.sqrt(loss)   # rmse
+        loss_norm = 1 - loss_sqrt / self.opt.cap
+        # loss_norm = loss/(ds.std[opt.label_in_feature_index] ** 2)
+        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
+
+        # loss1 = np.sum((label_data - dq_data) ** 2) / len(label_data)  # mse
+        # loss_sqrt1 = np.sqrt(loss1)  # rmse
+        # loss_norm1 = 1 - loss_sqrt1 / self.opt.cap
+        # self.logger.info("The mean squared error1 of power {} is ".format(label_name) + str(loss_norm1))
+        if self.opt.is_continuous_predict:
+            # label_X = range(int((self.ds.data_num - self.ds.train_num - 32)))
+            label_X = list(range(numbers))
+        else:
+            label_X = range(int((self.ds.data_num - self.ds.train_num - self.ds.start_num_in_test)/2))
+        print("label_x = ", label_X)
+        predict_X = [x for x in label_X]
+
+        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出,如果是有桌面的Linux,如Ubuntu,可去掉这一行
+            for i in range(label_column_num):
+                plt.figure(i+1)                     # 预测数据绘制
+                plt.plot(label_X, label_data[:, i], label='label', color='b')
+                plt.plot(predict_X, predict_data[:, i], label='predict', color='g')
+                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
+                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
+                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
+                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
+
+                if self.opt.do_figure_save:
+                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], opt.used_frame))
+
+            plt.show()

+ 43 - 0
劳店分区/logger.py

@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/20 15:19
+# file: logger.py
+# author: David
+# company: shenyang JY
+
+import logging, sys
+from logging.handlers import RotatingFileHandler
+
+
+def load_logger(config):
+    logger = logging.getLogger()
+    logger.setLevel(level=logging.DEBUG)
+
+    # StreamHandler
+    if config.do_log_print_to_screen:
+        stream_handler = logging.StreamHandler(sys.stdout)
+        stream_handler.setLevel(level=logging.INFO)
+        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
+                                      fmt='[ %(asctime)s ] %(message)s')
+        stream_handler.setFormatter(formatter)
+        logger.addHandler(stream_handler)
+
+    # FileHandler
+    if config.do_log_save_to_file:
+        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
+        file_handler.setLevel(level=logging.INFO)
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+
+        # 把config信息也记录到log 文件中
+        config_dict = {}
+        for key in dir(config):
+            if not key.startswith("_"):
+                config_dict[key] = getattr(config, key)
+        config_str = str(config_dict)
+        config_list = config_str[1:-1].split(", '")
+        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
+        logger.info(config_save_str)
+
+    return logger

+ 0 - 0
劳店分区/model/__init__.py


+ 68 - 0
劳店分区/model/model_keras.py

@@ -0,0 +1,68 @@
+# -*- coding: UTF-8 -*-
+from keras.layers import Input, Dense, LSTM, Lambda, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten
+from keras.models import Model
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+
+
+def get_keras_model(opt):
+    lstm_input = Input(shape=(opt.predict_points, opt.input_size_lstm))
+    lstm = lstm_input
+    for i in range(opt.Model['lstm_layers']):
+        rs = True
+        # if i == opt.Model['lstm_layers']-1:
+        #     rs = False
+        lstm = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=rs)(lstm)
+    output = Dense(1)(lstm)
+    output = Flatten(data_format='channels_last')(output)
+
+    lstm1 = lstm_input
+    for i in range(opt.Model['lstm_layers']):
+        rs = True
+        # if i == opt.Model['lstm_layers']-1:
+        #     rs = False
+        lstm1 = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=rs)(lstm1)
+    output1 = Dense(1)(lstm1)
+    output1 = Flatten(data_format='channels_last')(output1)
+
+    outputs = Lambda(sum)([output, output1])
+    # outputs = Dense(opt.output_size)(outputs)
+    model = Model(lstm_input, [output, output1])
+    # model = Model(lstm_input, outputs)
+    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
+    return model
+
+
+def gpu_train_init():
+    import tensorflow as tf
+    from keras.backend.tensorflow_backend import set_session
+    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
+    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
+    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
+    sess = tf.Session(config=sess_config)
+    set_session(sess)
+
+
+def train(opt, train_and_valid_data):
+    if opt.use_cuda: gpu_train_init()
+    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+    import numpy as np
+    print("----------", np.array(train_X[0]).shape)
+    print("++++++++++", np.array(train_X[1]).shape)
+    model = get_keras_model(opt)
+    model.summary()
+    if opt.add_train:
+        model.load_weights(opt.model_save_path + opt.model_name)
+
+    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
+                                    save_best_only=True, mode='auto')
+    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+
+
+def predict(config, test_X):
+    model = get_keras_model(config)
+    model.load_weights(config.model_save_path + config.model_name)
+    result = model.predict(test_X, batch_size=1)
+    # result = result.reshape((-1, config.output_size))
+    return result

+ 68 - 0
劳店分区/model/model_keras_1.py

@@ -0,0 +1,68 @@
+# -*- coding: UTF-8 -*-
+from keras.layers import Input, Dense, LSTM, Lambda, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten
+from keras.models import Model
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+
+
+def get_keras_model(opt):
+    lstm_input = Input(shape=(opt.predict_points, opt.input_size_lstm))
+
+    lstm = lstm_input
+    for i in range(opt.Model['lstm_layers']):
+        rs = True
+        # if i == opt.Model['lstm_layers']-1:
+        #     rs = False
+        lstm = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=rs)(lstm)
+    output = Dense(1)(lstm)
+    output = Flatten(data_format='channels_last')(output)
+
+    # lstm1 = lstm_input
+    # for i in range(opt.Model['lstm_layers']):
+    #     rs = True
+    #     # if i == opt.Model['lstm_layers']-1:
+    #     #     rs = False
+    #     lstm1 = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=rs)(lstm1)
+    # output1 = Dense(1)(lstm1)
+    # output1 = Flatten(data_format='channels_last')(output1)
+    #
+    # outputs = Lambda(sum, output_shape=())([output, output1])
+
+    model = Model(lstm_input, output)
+    model.compile(loss='mse', optimizer='adam')     # metrics=["mae"]
+    return model
+
+
+def gpu_train_init():
+    import tensorflow as tf
+    from keras.backend.tensorflow_backend import set_session
+    sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
+    sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
+    sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
+    sess = tf.Session(config=sess_config)
+    set_session(sess)
+
+
+def train(opt, train_and_valid_data):
+    if opt.use_cuda: gpu_train_init()
+    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+    import numpy as np
+    print("----------", np.array(train_X[0]).shape)
+    print("++++++++++", np.array(train_X[1]).shape)
+    model = get_keras_model(opt)
+    model.summary()
+    if opt.add_train:
+        model.load_weights(opt.model_save_path + opt.model_name)
+
+    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.model_name, monitor='val_loss',
+                                    save_best_only=True, mode='auto')
+    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+    model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+
+
+def predict(config, test_X):
+    model = get_keras_model(config)
+    model.load_weights(config.model_save_path + config.model_name)
+    result = model.predict(test_X, batch_size=1)
+    # result = result.reshape((-1, config.output_size))
+    return result

+ 33 - 0
劳店分区/model/sloss.py

@@ -0,0 +1,33 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/5/8 13:15
+# file: loss.py.py
+# author: David
+# company: shenyang JY
+import tensorflow as tf
+from keras import backend as K
+import keras
+
+
+class SouthLoss(keras.losses.Loss):
+    def __init__(self, cap):
+        """
+        南网新规则损失函数
+        :param cap:装机容量
+        """
+        self.cap = 0.2*cap
+        super().__init__()
+
+    def call(self, y_true, y_predict):
+        """
+        自动调用
+        :param y_true: 标签
+        :param y_predict: 预测
+        :return: 损失值
+        """
+        print(type(y_true))
+        print("y_shape:", y_true.shape)
+        loss = K.square(y_true - y_predict)
+        loss = K.mean(loss, axis=-1)
+
+        return loss

+ 8 - 0
劳店分区/requirements.txt

@@ -0,0 +1,8 @@
+sklearn
+pandas
+argparse
+keras
+tensorflow==1.15
+matplotlib>=3.0.2
+numpy>=1.14.6
+scipy>=1.1.0

+ 63 - 0
劳店分区/run_case_分区.py

@@ -0,0 +1,63 @@
+# -*- coding: UTF-8 -*-
+
+import numpy as np
+import os
+import sys
+import time
+from figure import Figure
+from data_process import data_process
+from data_features import data_features
+from logger import load_logger
+from config import myargparse
+from data_analyse import data_analyse
+frame = "keras"
+
+if frame == "keras":
+    from model.model_keras import train, predict
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+else:
+    raise Exception("Wrong frame seletion")
+
+
+def main():
+    parse = myargparse(discription="training config", add_help=False)
+    opt = parse.parse_args_and_yaml()
+    logger = load_logger(opt)
+    try:
+        np.random.seed(opt.Model["random_seed"])
+        process = data_process(opt=opt)
+        dfs_train, dfs_test = process.get_processed_data([9, 10])
+        features = data_features(opt=opt, mean=process.mean, std=process.std)
+        if opt.do_train:
+            train_X, valid_X, train_Y, valid_Y = features.get_train_data(dfs_train)
+            train_Y = [np.array([y[:, 0] for y in train_Y]), np.array([y[:, 1] for y in train_Y])]
+            valid_Y = [np.array([y[:, 0] for y in valid_Y]), np.array([y[:, 1] for y in valid_Y])]
+
+            # train_Y = [np.array([y[:, 3] for y in train_Y])]
+            # valid_Y = [np.array([y[:, 3] for y in valid_Y])]
+            train(opt, [train_X, train_Y, valid_X, valid_Y])
+        if opt.do_predict:
+            test_X, test_Y, df_Y = features.get_test_data(dfs_test)
+            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
+            analyse = data_analyse(opt, logger, process)
+            analyse.predict_acc(result, df_Y, predict_all=False)
+    except Exception:
+        logger.error("Run Error", exc_info=True)
+
+
+if __name__ == "__main__":
+    import argparse
+    # argparse方便于命令行下输入参数,可以根据需要增加更多
+    # parser = argparse.ArgumentParser()
+    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
+    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
+    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
+    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
+    # args = parser.parse_args()
+
+    # con = Config()
+    # for key in dir(args):               # dir(args) 函数获得args所有的属性
+    #     if not key.startswith("_"):     # 去掉 args 自带属性,比如__name__等
+    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
+    main()
+

+ 63 - 0
劳店分区/run_case_直接.py

@@ -0,0 +1,63 @@
+# -*- coding: UTF-8 -*-
+
+import numpy as np
+import os
+import sys
+import time
+from figure import Figure
+from data_process import data_process
+from data_features import data_features
+from logger import load_logger
+from config import myargparse
+from data_analyse import data_analyse
+frame = "keras"
+
+if frame == "keras":
+    from model.model_keras_1 import train, predict
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+else:
+    raise Exception("Wrong frame seletion")
+
+
+def main():
+    parse = myargparse(discription="training config", add_help=False)
+    opt = parse.parse_args_and_yaml()
+    logger = load_logger(opt)
+    try:
+        np.random.seed(opt.Model["random_seed"])
+        process = data_process(opt=opt)
+        dfs_train, dfs_test = process.get_processed_data([9, 10])
+        features = data_features(opt=opt, mean=process.mean, std=process.std)
+        if opt.do_train:
+            train_X, valid_X, train_Y, valid_Y = features.get_train_data(dfs_train)
+            # train_Y = [np.array([y[:, 0] for y in train_Y]), np.array([y[:, 1] for y in train_Y])]
+            # valid_Y = [np.array([y[:, 0] for y in valid_Y]), np.array([y[:, 1] for y in valid_Y])]
+
+            train_Y = [np.array([y[:, 3] for y in train_Y])]
+            valid_Y = [np.array([y[:, 3] for y in valid_Y])]
+            train(opt, [train_X, train_Y, valid_X, valid_Y])
+        if opt.do_predict:
+            test_X, test_Y, df_Y = features.get_test_data(dfs_test)
+            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
+            analyse = data_analyse(opt, logger, process)
+            analyse.predict_acc(result, df_Y, predict_all=True)
+    except Exception:
+        logger.error("Run Error", exc_info=True)
+
+
+if __name__ == "__main__":
+    import argparse
+    # argparse方便于命令行下输入参数,可以根据需要增加更多
+    # parser = argparse.ArgumentParser()
+    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
+    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
+    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
+    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
+    # args = parser.parse_args()
+
+    # con = Config()
+    # for key in dir(args):               # dir(args) 函数获得args所有的属性
+    #     if not key.startswith("_"):     # 去掉 args 自带属性,比如__name__等
+    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
+    main()
+

BIN
秀水分区/.DS_Store


+ 13 - 0
秀水分区/.gitignore

@@ -0,0 +1,13 @@
+*/__pycache__
+/__pycache__
+/.idea
+/checkpoint
+/log
+/data
+/figure
+*.log
+*.swp
+/log
+/data
+
+

+ 10 - 0
秀水分区/README.md

@@ -0,0 +1,10 @@
+## light-SD  信号分解实验
+
+这个项目将LSTM长短期时序模型+CNN提取环境特征用于超短期光电功率预测任务,实现特性如下: 
+
+三点优化: 
+- 扩大了回看窗口
+- 加大了卷积核数量和池化方法
+- LSTM序列传播和时序传播结合
+
+

+ 86 - 0
秀水分区/calculate.py

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/6/16 10:15
+# file: calculate.py
+# author: David
+# company: shenyang JY
+import requests
+import json, time, datetime
+
+url = 'http://49.4.78.194:17160/apiCalculate/calculate'
+'''
+准确率接口使用手顺:
+①入口方法为 calculate_acc
+② 按照参数传传值
+data含有C_TIME时间、realValue实际功率、ableValue可用功率(没有数值用实际功率替代)、forecastAbleValue预测功率
+opt为包含场站必要信息的字典,字段为:cap装机容量 province省份 formulaType公式类型 electricType电站类型 stationCode场站编码
+具体介绍参考接口文档
+③公式计算分为按天和按点两种,指定好opt.formulaType,即可设置公式类型,再在求取的每天或每个点的结果上进行平均,结果返回
+'''
+
+
+def wrap_json(df, opt):
+    """
+    包装json
+    :param df: 列名为 C_TIME realValue ableValue forecastAbleValue的DataFrame
+    :param opt: 参数字典
+    :return: json列表
+    """
+    d = opt.formulaType.split('_')[0]
+    jata, dfs = [], []
+    if d == 'POINT':
+        df['time'] = df['C_TIME'].apply(datetime_to_timestamp)
+        for i, row in df.iterrows():
+            dfs.append(row.to_frame().T)
+    elif d == 'DAY':
+        df['time'] = df['C_TIME'].apply(datetime_to_timestamp)
+        df['C_TIME'] = df['C_TIME'].dt.strftime('%y%m%d')   # 转换成年月日
+        for i, group in df.groupby('C_TIME'):
+            dfs.append(group)
+    outter_dict = {"electricCapacity": str(opt.cap), "province": opt.province, "formulaType": opt.formulaType, "electricType":opt.electricType, "stationCode": opt.stationCode}
+    timestamp = int(time.mktime(datetime.datetime.now().timetuple()) * 1000 + datetime.datetime.now().microsecond / 1000.0)
+    inner_dict = {"genTime": str(timestamp)+"L", "capacity": str(opt.cap), "openCapacity": str(opt.cap)}
+    for df in dfs:
+        calculationInfoList = df.iloc[:, 1:].to_json(orient='records')
+        outter_dict['calculationInfoList'] = [dict(calculation, **inner_dict) for calculation in eval(calculationInfoList)]
+        jata.append(json.dumps(outter_dict))
+    return jata
+
+
+def send_reqest(url, jata):
+    """
+    发送请求
+    :param url: 请求地址
+    :param jata: Json数据
+    :return: 准确率
+    """
+    headers = {
+        'content-type': 'application/json;charset=UTF-8',
+        "Authorization": "dXNlcjoxMjM0NTY="
+    }
+    acc, number = 0, 0
+    for i in range(len(jata)):
+        res = requests.post(url, headers=headers, data=jata[i])
+        if res.json()['code'] == '500':
+            print("没通过考核标准")
+            continue
+        number += 1
+        acc += float(res.json()['data'][:-1])
+    acc /= number
+    return acc
+
+
+def calculate_acc(data, opt):
+    """
+    准确率调用接口计算
+    :param data: 列名为 C_TIME realValue ableValue forecastAbleValue的DataFrame
+    :param opt: 参数字段
+    :return: 计算结果
+    """
+    jata = wrap_json(data, opt)
+    acc = send_reqest(url=url, jata=jata)
+    return acc
+
+
+def datetime_to_timestamp(dt):
+    return int(round(time.mktime(dt.timetuple()))*1000)

+ 86 - 0
秀水分区/config.py

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/17 14:46
+# file: config.py
+# author: David
+# company: shenyang JY
+
+import yaml
+import argparse
+
+
+class myargparse(argparse.ArgumentParser):
+    def __init__(self, discription, add_help):
+        super(myargparse, self).__init__(description=discription, add_help=add_help)
+
+        self.add_argument('-c', '--config_yaml',default='config_xiushui.yml', type=str, metavar='FILE')
+        self.add_argument('--norm_yaml', default='./data/xiushui/xiushui15/norm.yaml', type=str, metavar='FILE')
+
+        self.add_argument('--input_size', type=int, default=0, help='输入维度')
+        self.add_argument('--input_size_lstm', type=int, default=0, help='输入维度')
+        self.add_argument('--input_size_cnn', type=int, default=0, help='输入维度')
+
+        self.add_argument('--output_size', type=int, default=16, help='输出维度')  # 16个点
+
+        # model_name 和 model_save_path 这两个参数根据yml中的参数拼接而成
+
+        self.add_argument('--model_prefix', type=str, default=None, help='模型名称')
+
+        self.add_argument('--save_name', type=str, default=None, help='保存名称')
+
+        self.add_argument('--model_save_path', type=str, default=None, help='模型保存地址')
+
+        self.add_argument('--columns_lstm', type=list, default=None, help='lstm列名')
+
+        self.add_argument('--columns_cnn', type=list, default=None, help='cnn列名')
+
+
+    def _init_dir(self, opt):
+        import os, time
+        # 在这里给opt赋值
+        opt.model_prefix = "model_" + opt.continue_flag
+        opt.model_save_path = './checkpoint/' + opt.model_prefix + "/"
+        opt.save_name = "model_" + opt.save_frame + opt.model_postfix['keras']
+        if not os.path.exists(opt.model_save_path):
+            os.makedirs(opt.model_save_path)    # makedirs 递归创建目录
+        if not os.path.exists(opt.figure_save_path):
+            os.mkdir(opt.figure_save_path)
+        if opt.do_train and (opt.do_log_save_to_file or opt.do_train_visualized):
+            cur_time = time.strftime("%Y_%m_%d_%H_%M_%S", time.localtime())
+            log_save_path = opt.log_save_path + cur_time + '_' + opt.used_frame + "/"
+            os.makedirs(log_save_path)
+
+
+# YAML should override the argparser's content
+    def _parse_args_and_yaml(self):
+        given_configs, remaining = self.parse_known_args()
+        if given_configs.config_yaml:
+            with open(given_configs.config_yaml, 'r', encoding='utf-8') as f:
+                cfg = yaml.safe_load(f)
+                self.set_defaults(**cfg)
+        if given_configs.norm_yaml:
+            with open(given_configs.norm_yaml, 'r', encoding='utf-8') as f:
+                cfg = yaml.safe_load(f)
+                print("归一化参数:", cfg)
+                self.set_defaults(**cfg)
+
+        # The main arg parser parses the rest of the args, the usual
+        # defaults will have been overridden if config file specified.
+        opt = self.parse_args(remaining)
+        self._init_dir(opt)
+        # Cache the args as a text string to save them in the output dir later
+        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
+        return opt, opt_text
+
+
+    def parse_args_and_yaml(self):
+        return self._parse_args_and_yaml()[0]
+
+
+if __name__ == "__main__":
+    # opt = _parse_args_and_yaml()
+    pass
+
+
+
+

+ 54 - 0
秀水分区/config_xiushui.yml

@@ -0,0 +1,54 @@
+Model:
+  batch_size: 64
+  dropout_rate: 0.2
+  epoch: 50
+  hidden_size: 64
+  learning_rate: 0.001
+  lstm_layers: 2
+  patience: 5
+  random_seed: 42
+  time_step: 16
+add_train: false
+cap: 50
+continue_flag: 'xiushui'
+data_format:
+  cdq: cdq.csv
+  dq: dq.csv
+  test: xiushui_test.csv
+  envir: envir.xlsx
+  nwp: xiushui_train.csv
+  rp: rp.xlsx
+  rp1: power.csv
+debug_model: false
+debug_num: 500
+do_continue_train: false
+do_figure_save: false
+do_log_print_to_screen: true
+do_log_save_to_file: true
+do_predict: true
+do_train: true
+do_train_visualized: true
+electricType: E1
+envir_columns: 16
+excel_data_path: ./data/xiushui/xiushui15/
+figure_save_path: ./figure/
+formulaType: DAY_SHORT_ACCURACY
+is_continuous_predict: true
+is_photovoltaic: true
+log_save_path: ./log/
+
+model_postfix:
+  keras: .h5
+  pytorch: .pthc
+  tensorflow: .ckpt
+predict_point: 0
+province: E13
+save_frame: lstm
+shuffle_train_data: false
+stationCode: J00301
+
+train_data_path: ./data/
+train_data_rate: 0.9
+use_cuda: false
+used_frame: keras
+valid_data_rate: 0.15

+ 107 - 0
秀水分区/data_analyse.py

@@ -0,0 +1,107 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/4/12 18:57
+# file: data_analyse.py
+# author: David
+# company: shenyang JY
+import sys
+import numpy as np
+np.random.seed(42)
+import matplotlib.pyplot as plt
+import pandas as pd
+from calculate import calculate_acc
+
+
+class data_analyse(object):
+    def __init__(self, opt, logger):
+        self.opt = opt
+        self.logger = logger
+
+    def dq_acc(self):
+        excel_data_path = self.opt.excel_data_path
+        data_format = self.opt.data_format
+        dq_path = excel_data_path + data_format["dq"]
+        dq_columns = ['C_TIME', 'C_ABLE_VALUE']
+        dq = pd.read_csv(dq_path, usecols=dq_columns)
+        dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
+        return dq
+
+    def cdq_acc(self):
+        excel_data_path = self.opt.excel_data_path
+        data_format = self.opt.data_format
+        dq_path = excel_data_path + data_format["cdq"]
+        dq_columns = ['C_TIME', 'C_ABLE_VALUE']
+        cdq = pd.read_csv(dq_path, usecols=dq_columns)
+        cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
+        return cdq
+
+    def calculate_acc(self, label_data, predict_data):
+        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
+        loss_sqrt = np.sqrt(loss)  # rmse
+        return loss_sqrt
+
+    def predict_acc(self, predict_data, dfs, predict_all=False):
+        if predict_all is True:
+            # predict_data = predict_data * self.opt.std['C_VALUE'] + self.opt.mean['C_VALUE']
+            predict_data = predict_data * self.opt.std['sum_power'] + self.opt.mean['sum_power']
+        else:
+            predict_data0 = predict_data[0] * self.opt.std['col1_power'] + self.opt.mean['col1_power']
+            predict_data1 = predict_data[1] * self.opt.std['col2_power'] + self.opt.mean['col2_power']
+            predict_data = predict_data0 + predict_data1
+        dfs1 = []
+        dfs2 = []
+        for i, df in enumerate(dfs):
+            df["forecastAbleValue"] = predict_data[i]
+            dfs1.append(df.iloc[0])  # 第1个点
+            dfs2.append(df.iloc[-1])
+            # if df.iloc[-1, -1] < 0:
+            #     print("预测中有一个负值,为:", df.iloc[-1, -1])
+            # else:
+            #     print("预测结果为:", df.iloc[-1, -1])
+            #     dfs1.append(df.iloc[0])
+            # if df.iloc[-1, -1] < 0:
+            #     print("预测中有一个负值,为:", df.iloc[-1, -1])
+            # else:
+            #     print("预测结果为:", df.iloc[-1, -1])
+            #     dfs2.append(df.iloc[-1])
+            # dfs[i] = df.iloc[self.opt.predict_point]  # 第16个点
+        df1 = pd.concat(dfs1, axis=1).T
+        df2 = pd.concat(dfs2, axis=1).T
+        # df = pd.concat(dfs, axis=1).T
+        # df1 = df.drop(['label'], axis=1)
+        # df1 = df1.iloc[15:, :]
+        # df2 = df2.iloc[:-15, :]
+        # fig, ax = plt.subplots()
+        # ax.plot(df1["C_TIME"], df1["forecastAbleValue"], color='b')
+        # ax.plot(df2["C_TIME"], df2["forecastAbleValue"], color='r')
+        # ax.plot(df2["C_TIME"], df2["C_VALUE"], color='y')
+        # plt.show()
+        # rmse = self.calculate_acc(label_data=df['realValue'], predict_data=df['forecastAbleValue'])
+        # df1.to_csv('./figure/fenqu.csv')
+        self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df1)))
+        self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df2)))
+        # self.logger.info("新模型预测rmse是: {} ".format('公式') + str(rmse))
+        # self.predict_draw(df1)
+        # self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df1['realValue'], df1['forecastAbleValue'])))
+        # self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df2['realValue'], df2['forecastAbleValue'])))
+
+    def cal_acc(self, df):
+        df.rename(columns={'C_VALUE': 'realValue'}, inplace=True)
+        df['ableValue'] = df['realValue']
+        acc = calculate_acc(df, self.opt)
+        return acc
+
+    def calculate_acc(self, label_data, predict_data):
+        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
+        loss_sqrt = np.sqrt(loss)  # rmse
+        loss_acc = 1 - loss_sqrt / self.opt.cap
+        return loss_acc
+
+    def predict_draw(self, df):
+        df.realValue.plot()
+        df.forecastAbleValue.plot()
+        plt.show()
+
+
+if __name__ == '__main__':
+    pass

+ 72 - 0
秀水分区/data_features.py

@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/4/12 17:42
+# file: data_features.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+from sklearn.model_selection import train_test_split
+import numpy as np
+
+class data_features(object):
+    def __init__(self, opt):
+        self.opt = opt
+        self.time_step = self.opt.Model["time_step"]
+        self.columns = list()
+
+    def get_train_data(self, dfs):
+        train_x, valid_x, train_y, valid_y = [], [], [], []
+        for i, df in enumerate(dfs, start=1):
+            datax, datay = self.get_data_features(df, is_train=True)
+            tx, vx, ty, vy = train_test_split(datax, datay, test_size=self.opt.valid_data_rate, random_state=self.opt.Model["random_seed"], shuffle=self.opt.shuffle_train_data)  # 划分训练和验证集
+            train_x.extend(tx)
+            valid_x.extend(vx)
+            train_y.extend(ty)
+            valid_y.extend(vy)
+
+        train_y = np.concatenate([[y.iloc[:, 1:].values for y in train_y]], axis=0)
+        valid_y = np.concatenate([[y.iloc[:, 1:].values for y in valid_y]], axis=0)
+
+        train_x = np.array([x[0].values for x in train_x])
+        valid_x = np.array([x[0].values for x in valid_x])
+
+        return train_x, valid_x, train_y, valid_y
+
+    def get_test_data(self, dfs):
+        test_x, test_y, data_y = [], [], []
+        for i, df in enumerate(dfs, start=1):
+            datax, datay = self.get_data_features(df, is_train=False)
+
+            test_x.extend(datax)
+            test_y.extend(datay)
+            data_y.extend(datay)
+
+        test_x = np.array([x[0].values for x in test_x])
+        test_y = np.concatenate([[y.iloc[:, 1:].values for y in test_y]], axis=0)
+        return test_x, test_y, data_y
+
+    def get_data_features(self, norm_data, is_train):   # 这段代码基于pandas方法的优化
+        time_step = self.opt.Model["time_step"]
+        feature_data = norm_data.reset_index(drop=True)
+        time_step_loc = time_step - 1
+        train_num = int(len(feature_data))
+        label_features = ['C_TIME', 'col1_power', 'col2_power', 'sum_power', 'C_VALUE'] if is_train is True else ['C_TIME', 'col1_power', 'col2_power', 'sum_power', 'C_VALUE']
+        nwp = [feature_data.loc[i:i + time_step_loc, 'C_RADIATION':'C_TPR'].reset_index(drop=True) for i in range(train_num - time_step)]  # 数据库字段 'C_T': 'C_WS170'
+        labels = [feature_data.loc[i:i + time_step_loc, label_features].reset_index(drop=True) for i in range(train_num - time_step)]
+        features_x, features_y = [], []
+        print("匹配环境前,{}组".format(len(nwp)), end=" -> ")
+        for i, row in enumerate(zip(nwp, labels)):
+            time_end = row[1]['C_TIME'][0]
+            time_start = time_end - pd.DateOffset(1)
+            # row1 = envir[(envir.C_TIME < time_end) & (envir.C_TIME > time_start)][-16:]
+            # if len(row1) < 16:
+            #     print("环境数据不足16个点:", len(row1))
+            #     continue
+            # row1 = row1.reset_index(drop=True).drop(['C_TIME'], axis=1)
+            # features_x.append([row1.iloc[:,:-4], row1.iloc[:,-4:]])
+            features_x.append([row[0]])
+            features_y.append(row[1])
+        print("匹配环境后,{}组".format(len(features_x)))
+        return features_x, features_y
+
+

+ 219 - 0
秀水分区/data_process.py

@@ -0,0 +1,219 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/17 10:10
+# file: main.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+import os
+import numpy as np
+from data_utils import *
+import yaml
+
+
+class data_process(object):
+    def __init__(self, opt):
+        self.std = {}
+        self.mean = {}
+        self.opt = opt
+
+    # 主要是联立后的补值操作
+    def get_train_data(self):
+        """
+        :param args:
+        :return: 返回分割补值处理好的npy向量文件
+        """
+        csv_data_path = self.opt.excel_data_path
+        data_train = pd.read_csv(os.path.join(csv_data_path, self.opt.data_format["nwp"]))
+        data_train['C_TIME'] = pd.to_datetime(data_train['C_TIME'])
+        data_train = self.data_cleaning(data_train)
+        # power['C_TIME'] = pd.to_datetime(power['C_TIME'])
+        envir_cols = ['C_TIME', 'C_GLOBALR', 'C_DIRECTR', 'C_DIFFUSER', 'C_AIRT', 'C_CELLT']
+        # envir_cols = ['C_TIME', 'C_GLOBALR', 'C_DIRECTR', 'C_DIFFUSER', 'C_OBLIQUER']
+
+        # envir = envir.drop(labels='C_SYNC_TIME', axis=1)
+        # 第一步:联立 MBD 环境数据不应该联立
+        # unite = pd.merge(unite, envir, on='C_TIME')
+        # 第二步:计算间隔
+        # data_train['C_TIME'] = pd.to_datetime(data_train['C_TIME'])
+        # data_train['time_diff'] = data_train['C_TIME'].diff()
+        # dt_short = pd.Timedelta(minutes=15)
+        # dt_long = pd.Timedelta(minutes=15 * 10)
+        # dfs = self.missing_time_splite(data_train, dt_short, dt_long)
+        # miss_points = data_train[(data_train['time_diff'] > dt_short) & (data_train['time_diff'] < dt_long)]
+        # miss_number = miss_points['time_diff'].dt.total_seconds().sum(axis=0)/(15*60) - len(miss_points)
+        # print("再次测算,需要插值的总点数为:", miss_number)
+        # dfs_train, dfs_test = self.data_fill(dfs)
+
+
+        # from sklearn.model_selection import train_test_split
+        # data_train, data_test = train_test_split(data_train, test_size=(1-self.opt.train_data_rate),
+        #                                   random_state=self.opt.Model["random_seed"],
+        #                                   shuffle=self.opt.shuffle_train_data)
+
+        # envir = envir.iloc[:, :-1]
+        data_train = self.norm_features(data_train, ['C_TIME'])
+        self.feature_columns(data_train)
+        return data_train
+
+    def get_test_data(self):
+        data_test = pd.read_csv(os.path.join(self.opt.excel_data_path, self.opt.data_format["test"]))
+        data_test['C_TIME'] = pd.to_datetime(data_test['C_TIME'])
+        data_test = self.data_cleaning(data_test)        # envir_cols = ['C_TIME', 'C_GLOBALR', 'C_DIRECTR', 'C_DIFFUSER', 'C_OBLIQUER']
+        # envir = envir.iloc[:, :-1]
+        # drop_cols = ['C_TEMPERATURE120',	'C_TEMPERATURE130',	'C_TEMPERATURE140',
+        #              'C_TEMPERATURE150',	'C_TEMPERATURE160',	'C_TEMPERATURE170',
+        #              'C_TEMPERATURE180',	'C_TEMPERATURE190',	'C_TEMPERATURE200',
+        #              'C_DIRECTION120',    'C_DIRECTION130',    'C_DIRECTION140',
+        #              'C_DIRECTION150',    'C_DIRECTION160',    'C_DIRECTION170',
+        #              'C_DIRECTION180',    'C_DIRECTION190',    'C_DIRECTION200',
+        #              'C_SPEED120',    'C_SPEED130',    'C_SPEED140',    'C_SPEED150',
+        #              'C_SPEED160',    'C_SPEED170',    'C_SPEED180',
+        #              'C_SPEED190',    'C_SPEED200'
+        #
+        #              ]
+        preserve = ['C_TIME', 'C_RADIATION', 'C_SURFACE_PRESSURE', 'C_HUMIDITY2', 'C_TEMPERATURE2', 'C_TEMPERATURE10', 'C_TEMPERATURE30', 'C_TEMPERATURE50', 'C_TEMPERATURE70', 'C_TEMPERATURE80', 'C_TEMPERATURE90', 'C_TEMPERATURE110', 'C_DIRECTION10', 'C_DIRECTION30', 'C_DIRECTION50', 'C_DIRECTION70', 'C_DIRECTION80', 'C_DIRECTION90', 'C_DIRECTION110', 'C_SPEED10', 'C_SPEED30', 'C_SPEED50', 'C_SPEED70', 'C_SPEED80', 'C_SPEED90', 'C_SPEED110', 'C_DNI_CALCD', 'C_SOLAR_ZENITH', 'C_CLEARSKY_GHI', 'C_LCC', 'C_MCC', 'C_HCC', 'C_TCC', 'C_TPR', 'col1_power', 'col2_power', 'sum_power', 'C_VALUE']
+        data_test = data_test.loc[:, preserve]
+        data_test = self.norm_features(data_test, ['C_TIME', 'sum_power', 'C_VALUE'])
+        self.feature_columns(data_test)
+        return data_test
+
+    def feature_columns(self, data):
+        self.opt.columns_lstm = list(data.loc[:, 'C_RADIATION': 'C_TPR'])
+        self.opt.input_size_lstm = len(self.opt.columns_lstm)
+        self.opt.input_size_cnn = 1
+        print("cnn:", self.opt.columns_cnn)
+        print("lstm", self.opt.columns_lstm)
+        print("opt列名设置完毕!", self.opt.columns_cnn, self.opt.columns_lstm)
+
+    def norm_features(self, data, drop_list):
+        data1_ = data.drop(drop_list, axis=1, errors='ignore')
+        columns = list(data1_.columns)
+        mean = np.array([self.opt.mean[col] for col in columns])
+        std = np.array([self.opt.std[col] for col in columns])
+        new = []
+        for i, d in data1_.iterrows():
+            d = (d - mean) / std  # 归一化
+            new.append(d)
+        new = pd.concat(new, axis=1).T
+        for col in new.columns:  # 繁琐不一定是 简洁
+            data[col] = new[col]
+        return data
+
+    def data_cleaning(self, data, clean_value=[-9999.0, -99]):
+        for val in clean_value:
+            data = data.replace(val, np.nan)
+        # nan 超过30% 删除
+        data = data.dropna(axis=1, thresh=len(data)*0.8)
+        # 删除取值全部相同的列
+        data = data.loc[:, (data != data.iloc[0]).any()]
+        # 剩下的nan进行线性插值
+        data = data.interpolate(method='bfill')
+        return data
+
+    def missing_time_splite(self, df, dt_short, dt_long):
+        n_long, n_short, n_points = 0, 0, 0
+        start_index = 0
+        dfs = []
+        for i in range(1, len(df)):
+            if df['time_diff'][i] >= dt_long:
+                df_long = df.iloc[start_index:i, :-1]
+                dfs.append(df_long)
+                start_index = i
+                n_long += 1
+            if df['time_diff'][i] > dt_short:
+                print(df['C_TIME'][i-1], end=" ~ ")
+                print(df['C_TIME'][i], end=" ")
+                points = df['time_diff'].dt.total_seconds()[i]/(60*15)-1
+                print("缺失点数:", points)
+                if df['time_diff'][i] < dt_long:
+                    n_short += 1
+                    n_points += points
+                    print("需要补值的点数:", points)
+        dfs.append(df.iloc[start_index:, :-1])
+        print("数据总数:", len(df), ",时序缺失的间隔:", n_short, "其中,较长的时间间隔:", n_long)
+        print("需要补值的总点数:", n_points)
+        return dfs
+
+    def data_fill(self, dfs, test):
+        dfs_train, dfs_test, inserts = [], [], 0
+        for i, df in enumerate(dfs):
+            df1 = df.set_index('C_TIME', inplace=False)
+            dff = df1.resample('15T').bfill()
+            dff.reset_index(inplace=True)
+            points = len(dff) - len(df1)
+            if i not in test:
+                if i == 0:
+                    dff = dff.iloc[8:, :].reset_index(drop=True)
+                dfs_train.append(dff)
+                print("{} ~ {} 有 {} 个点, 填补 {} 个点.".format(dff.iloc[0, 0], dff.iloc[-1, 0], len(dff), points))
+                inserts += points
+            else:
+                print("{} ~ {} 有 {} 个点, 缺失 {} 个点.(测试集)".format(dff.iloc[0, 0], dff.iloc[-1, 0], len(dff), points))
+                dfs_test.append(dfs[i].reset_index(drop=True))
+        print("训练集分成了{}段".format(len(dfs_train)))
+        return dfs_train, dfs_test
+
+    def drop_duplicated(self, df):
+        df = df.groupby(level=0).mean()  # DatetimeIndex时间索引去重
+        return df
+
+    def read_data(self, path, cols=None, index_col=None):
+        init_data = pd.read_excel(path, usecols=cols, index_col=index_col, engine='openpyxl')
+        return init_data
+
+
+
+
+if __name__ == "__main__":
+    from config import myargparse
+    parse = myargparse(discription="training config", add_help=False)
+    opt = parse.parse_args_and_yaml()
+    ds = data_process(opt=opt)
+    path = ds.opt.excel_data_path
+    os.makedirs(path, exist_ok=True)
+    excel_data_path = ds.opt.excel_data_path
+    data_format = ds.opt.data_format
+    # dq_path = excel_data_path + data_format["dq"].replace('.csv', '.xlsx')
+    rp_path = excel_data_path + data_format["rp"].replace('.csv', '.xlsx')
+    nwp_path = excel_data_path + data_format["nwp"].replace('.csv', '.xlsx')
+    envir_path = excel_data_path + data_format["envir"].replace('.csv', '.xlsx')
+    rp_columns = ['C_TIME', 'C_REAL_VALUE']  # 待优化 ["'C_TIME'", "'C_REAL_VALUE'"] 原因:csv 字符串是单引号'',read_csv带单引号
+
+    nwp = ds.read_data(nwp_path)  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
+    nwp = ds.data_cleaning(nwp)
+    nwp.drop(['C_SYNC_TIME'], axis=1, inplace=True)
+    # nwp.set_index('C_TIME', inplace=True)
+    # nwp = ds.drop_duplicated(nwp)
+
+    envir = ds.read_data(envir_path)  # 待优化 导出csv按照表的列顺序 read_csv按照csv列顺序读取
+    envir = ds.data_cleaning(envir)
+    # envir.set_index('C_TIME', inplace=True)
+    # envir.drop(['C_WS_NO', 'C_SYNC_TIME'])
+    # envir = ds.drop_duplicated(envir)
+
+    rp = ds.read_data(rp_path, rp_columns)
+    # rp.set_index('C_TIME', inplace=True)  # nan也可以设置索引列
+    rp = ds.data_cleaning(rp)
+    # rp = ds.drop_duplicated(rp)
+
+    dataframes = [nwp, rp, envir]
+    # 查找最大起始时间和最小结束时间
+    max_start_time = max(df['C_TIME'].min() for df in dataframes)
+    min_end_time = min(df['C_TIME'].max() for df in dataframes)
+
+    print(max_start_time)
+    print(min_end_time)
+
+    # 重新调整每个 DataFrame 的时间范围,只保留在 [max_start_time, min_end_time] 区间内的数据
+    for i, df in enumerate(dataframes):
+        df['C_TIME'] = pd.to_datetime(df['C_TIME'])  # 确保时间列是 datetime 类型
+        df_filtered = df[(df['C_TIME'] >= max_start_time) & (df['C_TIME'] <= min_end_time)]
+
+        # 将结果保存到新文件,文件名为原文件名加上 "_filtered" 后缀
+        dataframes[i] = df_filtered.reset_index(drop=True)
+
+
+    dataframes[0].to_csv(os.path.join(path, 'process', 'nwp.csv'), index=False)
+    dataframes[2].to_csv(os.path.join(path, 'process', 'envir.csv'), index=False)
+    dataframes[1].to_csv(os.path.join(path, 'process', 'rp.csv'), index=False)

+ 67 - 0
秀水分区/data_utils.py

@@ -0,0 +1,67 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/22 17:17
+# file: dpdUtils.py
+# author: David
+# company: shenyang JY
+
+
+import time, datetime
+
+
+class ValidationError(Exception):
+    def __init__(self, message):
+        self.message = message
+
+
+def timestamp_to_datetime(ts):
+    if type(ts) is not int:
+        raise ValueError("timestamp-时间格式必须是整型")
+    if len(str(ts)) == 13:
+        return datetime.datetime.fromtimestamp(ts/1000)
+    elif len(str(ts)) == 10:
+        return datetime.datetime.fromtimestamp(ts)
+    else:
+        raise ValueError("timestamp-时间格式长度错误")
+
+
+def datetime_to_timestamp(dt, len):
+    if len not in (10, 13):
+        raise ValueError("timestamp-时间戳转换长度错误")
+    if len == 10:
+        return int(round(time.mktime(dt.timetuple())))
+    else:
+        return int(round(time.mktime(dt.timetuple()))*1000)
+
+
+def datetime_to_timestr(dt):
+    return int(dt.strftime('%m%d%H%M'))
+
+
+def timestr_to_datetime(time_data):
+    """
+    将时间戳或时间字符串转换为datetime.datetime类型
+    :param time_data: int or str
+    :return:datetime.datetime
+    """
+    if isinstance(time_data, float):
+        result = timestamp_to_datetime(int(time_data))
+    elif isinstance(time_data, int):
+        result = timestamp_to_datetime(time_data)
+    elif isinstance(time_data, str):
+        if len(time_data) == 10:
+            result = datetime.datetime.strptime(time_data, '%d/%m/%Y')
+            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d')
+        elif len(time_data) in {17, 18, 19}:
+            result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
+            # result = datetime.datetime.strptime(time_data, '%Y-%m-%d %H:%M:%S')
+        else:
+            raise ValidationError("时间字符串长度不满足要求!")
+    else:
+        return time_data
+    return result
+
+
+def timestamp_to_timestr(t):
+    return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(t))
+

+ 83 - 0
秀水分区/figure.py

@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/20 15:19
+# file: figure.py
+# author: David
+# company: shenyang JY
+import sys
+import numpy as np
+import matplotlib.pyplot as plt
+
+
+class Figure(object):
+    def __init__(self, opt, logger, process):
+        self.opt = opt
+        self.ds = process
+        self.logger = logger
+
+    def get_16_points(self, results):
+        # results为模型预测的一维数组,遍历,取每16个点的最后一个点
+        preds = []
+        for res in results:
+            preds.append(res[-1])
+        return np.array(preds)
+
+    def draw(self, label_data, predict_norm_data, numbers):
+        # label_data = origin_data.data[origin_data.train_num + origin_data.start_num_in_test : ,
+        #                                         config.label_in_feature_index]
+        # dq_data = dq_data.reshape((-1, self.opt.output_size))
+        predict_norm_data = self.get_16_points(predict_norm_data)
+        label_data = self.get_16_points(label_data)
+        label_data = label_data.reshape((-1, self.opt.output_size))
+        # label_data 要进行反归一化
+        label_data = label_data * self.ds.std[self.opt.label_in_feature_index] + \
+                       self.ds.mean[self.opt.label_in_feature_index]
+        predict_data = predict_norm_data * self.ds.std[self.opt.label_in_feature_index] + \
+                       self.ds.mean[self.opt.label_in_feature_index]   # 通过保存的均值和方差还原数据
+        # dq_data = dq_data * self.ds.std[0] + self.ds.mean[0]
+        # predict_data = predict_norm_data
+        assert label_data.shape[0] == predict_data.shape[0], "The element number in origin and predicted data is different"
+
+        label_name = [self.ds.tables_column_name[i] for i in self.opt.label_in_feature_index]
+        label_column_num = len(self.opt.label_columns)
+
+        # label 和 predict 是错开config.predict_day天的数据的
+        # 下面是两种norm后的loss的计算方式,结果是一样的,可以简单手推一下
+        # label_norm_data = origin_data.norm_data[origin_data.train_num + origin_data.start_num_in_test:,
+        #              config.label_in_feature_index]
+        # loss_norm = np.mean((label_norm_data[config.predict_day:] - predict_norm_data[:-config.predict_day]) ** 2, axis=0)
+        # logger.info("The mean squared error of stock {} is ".format(label_name) + str(loss_norm))
+
+        loss = np.sum((label_data - predict_data) ** 2)/len(label_data)  # mse
+        # loss = np.mean((label_data - predict_data) ** 2, axis=0)
+        loss_sqrt = np.sqrt(loss)   # rmse
+        loss_norm = 1 - loss_sqrt / self.opt.cap
+        # loss_norm = loss/(ds.std[opt.label_in_feature_index] ** 2)
+        self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
+
+        # loss1 = np.sum((label_data - dq_data) ** 2) / len(label_data)  # mse
+        # loss_sqrt1 = np.sqrt(loss1)  # rmse
+        # loss_norm1 = 1 - loss_sqrt1 / self.opt.cap
+        # self.logger.info("The mean squared error1 of power {} is ".format(label_name) + str(loss_norm1))
+        if self.opt.is_continuous_predict:
+            # label_X = range(int((self.ds.data_num - self.ds.train_num - 32)))
+            label_X = list(range(numbers))
+        else:
+            label_X = range(int((self.ds.data_num - self.ds.train_num - self.ds.start_num_in_test)/2))
+        print("label_x = ", label_X)
+        predict_X = [x for x in label_X]
+
+        if not sys.platform.startswith('linux'):    # 无桌面的Linux下无法输出,如果是有桌面的Linux,如Ubuntu,可去掉这一行
+            for i in range(label_column_num):
+                plt.figure(i+1)                     # 预测数据绘制
+                plt.plot(label_X, label_data[:, i], label='label', color='b')
+                plt.plot(predict_X, predict_data[:, i], label='predict', color='g')
+                # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
+                # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
+                self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
+                      str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
+
+                if self.opt.do_figure_save:
+                    plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], opt.used_frame))
+
+            plt.show()

+ 43 - 0
秀水分区/logger.py

@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/20 15:19
+# file: logger.py
+# author: David
+# company: shenyang JY
+
+import logging, sys
+from logging.handlers import RotatingFileHandler
+
+
+def load_logger(config):
+    logger = logging.getLogger()
+    logger.setLevel(level=logging.DEBUG)
+
+    # StreamHandler
+    if config.do_log_print_to_screen:
+        stream_handler = logging.StreamHandler(sys.stdout)
+        stream_handler.setLevel(level=logging.INFO)
+        formatter = logging.Formatter(datefmt='%Y/%m/%d %H:%M:%S',
+                                      fmt='[ %(asctime)s ] %(message)s')
+        stream_handler.setFormatter(formatter)
+        logger.addHandler(stream_handler)
+
+    # FileHandler
+    if config.do_log_save_to_file:
+        file_handler = RotatingFileHandler(config.log_save_path + "out.log", maxBytes=1024000, backupCount=5)
+        file_handler.setLevel(level=logging.INFO)
+        formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+
+        # 把config信息也记录到log 文件中
+        config_dict = {}
+        for key in dir(config):
+            if not key.startswith("_"):
+                config_dict[key] = getattr(config, key)
+        config_str = str(config_dict)
+        config_list = config_str[1:-1].split(", '")
+        config_save_str = "\nConfig:\n" + "\n'".join(config_list)
+        logger.info(config_save_str)
+
+    return logger

+ 0 - 0
秀水分区/model/__init__.py


+ 125 - 0
秀水分区/model/model_keras_base.py

@@ -0,0 +1,125 @@
+# -*- coding: UTF-8 -*-
+from keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten
+from keras.models import Model, load_model
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+from keras import optimizers
+from keras.callbacks import TensorBoard
+import matplotlib.pyplot as plt
+import numpy as np
+from keras.callbacks import TensorBoard, EarlyStopping
+
+
+# model_history = model.fit_generator(train_generator, epochs=30, validation_data=evaluate_generator,
+#                                     callbacks=[early_stopping, tbCallBack])
+
+
+def get_keras_model(opt):
+    lstm_input = Input(shape=(opt.Model['time_step'], opt.input_size_lstm))
+    lstm = lstm_input
+    for i in range(opt.Model['lstm_layers']):
+        rs = True
+        if i == opt.Model['lstm_layers']-1:
+            rs = False
+        lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=rs)(lstm)
+
+    lstm = Dense(16)(lstm)
+    # lstm = Flatten()(lstm)
+    # lstm = concatenate([lstm, cnn])
+    # lstm = Dense(16)(lstm)
+    # lstm = Flatten()(lstm)
+    # output = Dense(opt.output_size)(lstm)
+    model = Model(lstm_input, lstm)
+    adam = optimizers.adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+    model.compile(loss='mse', optimizer='adam')
+    return model
+
+
+def train_init(use_cuda=False):
+    import tensorflow as tf
+    from keras.backend.tensorflow_backend import set_session
+    if use_cuda:
+        # gpu init
+        sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
+        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
+        sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
+        sess = tf.Session(config=sess_config)
+        set_session(sess)
+    else:
+        session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
+        tf.set_random_seed(1234)
+        sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
+        set_session(sess)
+
+
+def train(opt, train_and_valid_data):
+    train_init(opt.use_cuda)
+    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+    print("----------", np.array(train_X[0]).shape)
+    print("++++++++++", np.array(train_X[1]).shape)
+    model = get_keras_model(opt)
+    model.summary()
+    weight_lstm_1, bias_lstm_1 = model.get_layer('dense_1').get_weights()
+    print("weight_lstm_1 = ", weight_lstm_1)
+    print("bias_lstm_1 = ", bias_lstm_1)
+    if opt.add_train:
+        model.load_weights(opt.model_save_path + 'model_kerass.h5')
+
+    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.save_name, monitor='val_loss',
+                                    save_best_only=True, mode='auto')
+    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+
+    history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+
+    # acc = history.history['acc']
+    loss = history.history['loss']
+    epochs = range(1, len(loss) + 1)
+    plt.title('Loss')
+    # plt.plot(epochs, acc, 'red', label='Training acc')
+    plt.plot(epochs, loss, 'blue', label='Validation loss')
+    plt.legend()
+    # plt.show()
+
+def predict(config, test_X):
+    model = get_keras_model(config)
+    model.load_weights(config.model_save_path + config.save_name)
+    result = model.predict(test_X, batch_size=1)
+    # result = result.reshape((-1, config.output_size))
+    return result
+
+
+def predict_cls(config, test_X, df_Y):
+    results, results1, results2, results3, results4 = [], [], [], [], []
+    dfy1, dfy2, dfy3, dfy4 = [], [], [],[]
+    model = get_keras_model(config)
+    model1 = get_keras_model(config)
+    for i, X in enumerate(zip(test_X[0], test_X[1])):
+        X = [np.array([X[0]]), np.array([X[1]])]
+        # X = np.array([X[1]])
+        print("label=", df_Y[i]['label'][0])
+        if df_Y[i]['label'][0] == 1:
+            model1.load_weights('./checkpoint/model_keras.h5/' + 'model_keras1.h5')
+            result = model1.predict(X, batch_size=1)
+            results1.append(result[0])
+            results.append(result[0])
+            dfy1.append(df_Y[i])
+        elif df_Y[i]['label'][0] == 2:
+            model.load_weights('./checkpoint/model_keras.h5/' + 'model_keras2.h5')
+            result = model.predict(X, batch_size=1)
+            results2.append(result[0])
+            results.append(result[0])
+            dfy2.append(df_Y[i])
+        elif df_Y[i]['label'][0] == 3:
+            model.load_weights('./checkpoint/model_keras.h5/' + 'model_keras3.h5')
+            result = model.predict(X, batch_size=1)
+            results3.append(result[0])
+            results.append(result[0])
+            dfy3.append(df_Y[i])
+        elif df_Y[i]['label'][0] == 4:
+            model.load_weights('./checkpoint/model_keras.h5/' + 'model_keras4.h5')
+            result = model.predict(X, batch_size=1)
+            results4.append(result[0])
+            results.append(result[0])
+            dfy4.append(df_Y[i])
+    # result = result.reshape((-1, config.output_size))
+    return np.array(results), np.array(results1), np.array(results2), np.array(results3), np.array(results4), dfy1, dfy2, dfy3, dfy4

+ 96 - 0
秀水分区/model/model_keras_fenqu.py

@@ -0,0 +1,96 @@
+# -*- coding: UTF-8 -*-
+from keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, Reshape, Flatten, Lambda
+from keras.models import Model, load_model
+from keras.callbacks import ModelCheckpoint, EarlyStopping
+from keras import optimizers
+from keras.callbacks import TensorBoard
+import matplotlib.pyplot as plt
+import numpy as np
+from keras.callbacks import TensorBoard, EarlyStopping
+
+
+def get_keras_model(opt):
+    lstm_input = Input(shape=(opt.Model['time_step'], opt.input_size_lstm))
+    lstm = lstm_input
+    for i in range(opt.Model['lstm_layers']):
+        rs = True
+        if i == opt.Model['lstm_layers']-1:
+            rs = False
+        lstm = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=rs)(lstm)
+    output = Dense(16, name='dense_1')(lstm)
+    # output = Flatten(data_format='channels_last')(output)
+
+    lstm1 = lstm_input
+    for i in range(opt.Model['lstm_layers']):
+        rs = True
+        if i == opt.Model['lstm_layers']-1:
+            rs = False
+        lstm1 = LSTM(units=opt.Model['hidden_size'], dropout=opt.Model['dropout_rate'], return_sequences=rs)(lstm1)
+    output1 = Dense(16, name='dense_2')(lstm1)
+    # output1 = Flatten(data_format='channels_last')(output1)
+
+    outputs = Lambda(sum)([output, output1])
+    # outputs = Dense(16, name='dense_3')(outputs)
+    model = Model(lstm_input, [output, output1])
+    # model = Model(lstm_input, outputs)
+    # model.compile(loss={'dense_1': 'mse', 'dense_2': 'mse', 'dense_3': 'mse'},
+    #               loss_weights={'dense_1': 500, 'dense_2': 500, 'dense_3': 0.04},
+    #               metrics={'dense_1': ['accuracy', 'mse'], 'dense_2': ['accuracy', 'mse'], 'dense_3': ['accuracy', 'mse']},
+    #               optimizer='adam')     # metrics=["mae"]
+    model.compile(loss={'dense_1': 'mse', 'dense_2': 'mse'},
+                  metrics={'dense_1': ['accuracy', 'mse'], 'dense_2': ['accuracy', 'mse'],},
+                  optimizer='adam')  # metrics=["mae"]
+    return model
+
+
+def train_init(use_cuda=False):
+    import tensorflow as tf
+    from keras.backend.tensorflow_backend import set_session
+    if use_cuda:
+        # gpu init
+        sess_config = tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)
+        sess_config.gpu_options.per_process_gpu_memory_fraction = 0.7  # 最多使用70%GPU内存
+        sess_config.gpu_options.allow_growth=True   # 初始化时不全部占满GPU显存, 按需分配
+        sess = tf.Session(config=sess_config)
+        set_session(sess)
+    else:
+        session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
+        tf.set_random_seed(1234)
+        sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
+        set_session(sess)
+
+
+def train(opt, train_and_valid_data):
+    train_init(opt.use_cuda)
+    train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+    print("----------", np.array(train_X[0]).shape)
+    print("++++++++++", np.array(train_X[1]).shape)
+    model = get_keras_model(opt)
+    model.summary()
+    weight_lstm_1, bias_lstm_1 = model.get_layer('dense_1').get_weights()
+    print("weight_lstm_1 = ", weight_lstm_1)
+    print("bias_lstm_1 = ", bias_lstm_1)
+    if opt.add_train:
+        model.load_weights(opt.model_save_path + 'model_kerass.h5')
+
+    check_point = ModelCheckpoint(filepath=opt.model_save_path + opt.save_name, monitor='val_loss',
+                                    save_best_only=True, mode='auto')
+    early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+
+    history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+              validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+    loss = history.history['loss']
+    epochs = range(1, len(loss) + 1)
+    plt.title('Loss')
+    # plt.plot(epochs, acc, 'red', label='Training acc')
+    plt.plot(epochs, loss, 'blue', label='Validation loss')
+    plt.legend()
+    # plt.show()
+
+def predict(config, test_X):
+    model = get_keras_model(config)
+    model.load_weights(config.model_save_path + 'model_' + config.save_frame + '.h5')
+    result = model.predict(test_X, batch_size=1)
+    # result = result.reshape((-1, config.output_size))
+    return result
+

+ 38 - 0
秀水分区/model/sloss.py

@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/5/8 13:15
+# file: loss.py.py
+# author: David
+# company: shenyang JY
+import tensorflow as tf
+from keras import backend as K
+import keras
+
+
+class SouthLoss(keras.losses.Loss):
+    def __init__(self, cap, units=None):
+        """
+        南网新规则损失函数
+        :param cap:装机容量
+        """
+        super().__init__()
+        self.cap = 0.2 * cap
+
+    def call(self, y_true, y_predict):
+        """
+        自动调用
+        :param y_true: 标签
+        :param y_predict: 预测
+        :return: 损失值
+        """
+        # 计算实际和预测的差值
+        diff = y_true - y_predict
+        # y_true 减去 0.2cap,再乘以1000,y_true所有低于0.2cap的为高负值,高于0.2cap为高正值
+        # 再sigmoid函数,对于高正值,此函数为1,对于高负值,此函数为0
+        logistic_values = tf.sigmoid(1000 * (y_true - self.cap))
+        # logistic_values,遮罩了所有负值,1-logistic_values,遮罩了所有正值
+        # 遮罩负值,正值保留原有差值,遮罩正值,负值用0.2cap替换负值
+        base = logistic_values * diff + (1-logistic_values)*self.cap
+        loss = K.square(diff/base)
+        loss = K.mean(loss, axis=-1)
+        return loss

+ 8 - 0
秀水分区/requirements.txt

@@ -0,0 +1,8 @@
+sklearn
+pandas
+argparse
+keras
+tensorflow==1.15
+matplotlib>=3.0.2
+numpy>=1.14.6
+scipy>=1.1.0

+ 65 - 0
秀水分区/run_case_分区.py

@@ -0,0 +1,65 @@
+# -*- coding: UTF-8 -*-
+
+import numpy as np
+np.random.seed(42)
+import os
+from data_process import data_process
+from data_features import data_features
+from logger import load_logger
+from config import myargparse
+from data_analyse import data_analyse
+frame = "keras"
+
+if frame == "keras":
+    from model.model_keras_fenqu import train, predict
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+else:
+    raise Exception("Wrong frame seletion")
+
+
+def main():
+    parse = myargparse(discription="training config", add_help=False)
+    opt = parse.parse_args_and_yaml()
+    logger = load_logger(opt)
+    try:
+        process = data_process(opt=opt)
+        features = data_features(opt=opt)
+        if opt.do_train:
+            data_train = process.get_train_data()
+            train_X, valid_X, train_Y, valid_Y = features.get_train_data([data_train])
+            print("训练的数据集有{}个点".format(len(train_X[0])))
+            # train_Y = [np.array([y[:, 0] for y in train_Y])]
+            # valid_Y = [np.array([y[:, 0] for y in valid_Y])]
+            # train(opt, [train_X, train_Y, valid_X, valid_Y])
+
+            train_Y = [np.array([y[:, 0] for y in train_Y]), np.array([y[:, 1] for y in train_Y])]
+            valid_Y = [np.array([y[:, 0] for y in valid_Y]), np.array([y[:, 1] for y in valid_Y])]
+            train(opt, [train_X, train_Y, valid_X, valid_Y])
+        if opt.do_predict:
+            data_test = process.get_test_data()
+            test_X, test_Y, df_Y = features.get_test_data([data_test])
+            print("测试集有{}个点".format(len(test_X)))
+            result = predict(opt, test_X)       # 这里输出的是未还原的归一化预测数据
+            analyse = data_analyse(opt, logger)
+            # analyse.predict_acc(result, df_Y, predict_all=True)
+            analyse.predict_acc(result, df_Y, predict_all=False)
+    except Exception:
+        logger.error("Run Error", exc_info=True)
+
+
+if __name__ == "__main__":
+    import argparse
+    # argparse方便于命令行下输入参数,可以根据需要增加更多
+    # parser = argparse.ArgumentParser()
+    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
+    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
+    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
+    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
+    # args = parser.parse_args()
+
+    # con = Config()
+    # for key in dir(args):               # dir(args) 函数获得args所有的属性
+    #     if not key.startswith("_"):     # 去掉 args 自带属性,比如__name__等
+    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
+    main()
+

+ 62 - 0
秀水分区/run_case_直接.py

@@ -0,0 +1,62 @@
+# -*- coding: UTF-8 -*-
+
+import numpy as np
+np.random.seed(42)
+import os
+from data_process import data_process
+from data_features import data_features
+from logger import load_logger
+from config import myargparse
+from data_analyse import data_analyse
+frame = "keras"
+
+if frame == "keras":
+    from model.model_keras_base import train, predict
+
+    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '3'
+else:
+    raise Exception("Wrong frame seletion")
+
+
+def main():
+    parse = myargparse(discription="training config", add_help=False)
+    opt = parse.parse_args_and_yaml()
+    logger = load_logger(opt)
+    try:
+        process = data_process(opt=opt)
+        features = data_features(opt=opt)
+        if opt.do_train:
+            data_train = process.get_train_data()
+            train_X, valid_X, train_Y, valid_Y = features.get_train_data([data_train])
+            print("训练的数据集有{}个点".format(len(train_X)))
+            train_Y = np.array([y[:, 2] for y in train_Y])
+            valid_Y = np.array([y[:, 2] for y in valid_Y])
+            train(opt, [train_X, train_Y, valid_X, valid_Y])
+        if opt.do_predict:
+            data_test = process.get_test_data()
+            # dfs = [group for name, group in data_test.groupby('label')]
+            test_X, test_Y, df_Y = features.get_test_data([data_test])
+            print("测试集有{}个点".format(len(test_X)))
+            analyse = data_analyse(opt, logger)
+            result = predict(opt, test_X)
+            analyse.predict_acc(result, df_Y, predict_all=True)
+    except Exception:
+        logger.error("Run Error", exc_info=True)
+
+
+if __name__ == "__main__":
+    import argparse
+    # argparse方便于命令行下输入参数,可以根据需要增加更多
+    # parser = argparse.ArgumentParser()
+    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
+    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
+    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
+    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
+    # args = parser.parse_args()
+
+    # con = Config()
+    # for key in dir(args):               # dir(args) 函数获得args所有的属性
+    #     if not key.startswith("_"):     # 去掉 args 自带属性,比如__name__等
+    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
+    main()
+

+ 27 - 0
秀水分区/test.py

@@ -0,0 +1,27 @@
+
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/8/3 9:24
+# file: test.py
+# author: David
+# company: shenyang JY
+
+
+import pandas as pd
+
+data = [[45, 65, 100], [56, 45, 50], [67, 68, 98]]
+index = [['张三', '李四', '王五']]
+columns = ['数学', '语文', '英语']
+df = pd.DataFrame(data=data, index=index, columns=columns)
+
+# 使用列名提取
+print(df[['数学', '英语']])
+print('------------------------------')
+print(df.loc[:, ['数学', '英语']])   # 逗号左侧行,右侧列
+print('------------------------------')
+print(df.iloc[:, [0, 2]])
+print('------------------------------')
+# 提取连续的列 从语文到最后
+print(df.loc[:, '语文':])
+print('------------------------------')
+print(df.iloc[:, 1:])