JiaYueTest.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. import pandas as pd
  4. from scipy.optimize import curve_fit
  5. from metrics import RMSE
  6. import lightgbm as lgb
  7. # root_path = './dataset/data/'
  8. root_path = './dataset/shandong/Dataset_training/'
  9. # 设置图大小
  10. width = 16
  11. height = 9
  12. plt.figure(figsize=(width, height))
  13. # 设置全局字体样式
  14. plt.rcParams['font.family'] = 'Microsoft YaHei'
  15. # max_power = 22
  16. # 定义sigmoid函数
  17. def sigmoid(x, k, x0,max_power,min_power):
  18. y = max_power / (1 + np.exp(-k * (x - x0)))+min_power
  19. return y
  20. def contact_all():
  21. df = None
  22. csvpath = (r"./dataset/shandong/Dataset_training/concat_all.csv")
  23. for id in range(6):
  24. df_NWP = pd.read_csv(r"./dataset/shandong/Dataset_training/NWP/NWP_{}.csv".format(id))
  25. df_PowerRE = pd.read_csv(r"./dataset/shandong/Dataset_training/power/power_{}.csv".format(id))
  26. df_raw = pd.merge(df_NWP, df_PowerRE, on="C_TIME")
  27. all_features = ['C_TIME', 'C_WS90', 'C_WS100', 'C_REAL_VALUE']
  28. df_raw = df_raw[all_features]
  29. print(len(df_raw))
  30. if df is None:
  31. df = df_raw
  32. else:
  33. df = pd.concat([df, df_raw], axis=0)
  34. print(len(df))
  35. df.to_csv(csvpath, index=False)
  36. def show_cz_plot():
  37. df = pd.read_csv(root_path + 'concat102.csv')
  38. # 创建一些样本数据
  39. x1 = df['C_WS100'].values
  40. dif = df['C_WS'].values - x1
  41. x1 = np.arange(1, len(df)+1)
  42. fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(64, 14))
  43. # 绘制散点图
  44. ax1.plot(x1, dif, color='red', label='turbine-102')
  45. # 添加标题和标签
  46. ax1.set_title('折线图')
  47. ax1.set_xlabel('时间点')
  48. ax1.set_ylabel('NWP与机头风速差值')
  49. ax1.legend()
  50. # plt.scatter(dif, y1, color='blue', label='数据集2')
  51. # plt.scatter(x3, y3, color='green', label='数据集3')
  52. # 绘制散点图
  53. df = pd.read_csv(root_path + 'concat103.csv')
  54. # 创建一些样本数据
  55. x1 = df['C_WS100'].values
  56. y1 = df['C_ACTIVE_POWER'].values
  57. dif = df['C_WS'].values - x1
  58. x1 = np.arange(1, len(df) + 1)
  59. ax2.plot(x1, dif, color='blue', label='turbine-103')
  60. # 添加标题和标签
  61. ax2.set_title('折线图')
  62. ax2.set_xlabel('时间点')
  63. ax2.set_ylabel('NWP与机头风速差值')
  64. ax2.legend()
  65. plt.show()
  66. def show_cz_scatter():
  67. df = pd.read_csv(root_path + 'concat102.csv')
  68. # 创建一些样本数据
  69. x1 = df['C_WS100'].values
  70. dif = df['C_WS'].values - x1
  71. x1 = np.arange(1, len(df)+1)
  72. fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(64, 14))
  73. # 绘制散点图
  74. ax1.scatter(x1, dif, color='red', label='turbine-102')
  75. # 添加标题和标签
  76. ax1.set_title('散点图')
  77. ax1.set_xlabel('时间点')
  78. ax1.set_ylabel('NWP与机头风速差值')
  79. ax1.legend()
  80. # plt.scatter(dif, y1, color='blue', label='数据集2')
  81. # plt.scatter(x3, y3, color='green', label='数据集3')
  82. # 绘制散点图
  83. df = pd.read_csv(root_path + 'concat103.csv')
  84. # 创建一些样本数据
  85. x1 = df['C_WS100'].values
  86. y1 = df['C_ACTIVE_POWER'].values
  87. dif = df['C_WS'].values - x1
  88. x1 = np.arange(1, len(df) + 1)
  89. ax2.scatter(x1, dif, color='blue', label='turbine-103')
  90. # 添加标题和标签
  91. ax2.set_title('散点图')
  92. ax2.set_xlabel('时间点')
  93. ax2.set_ylabel('NWP与机头风速差值')
  94. ax2.legend()
  95. plt.show()
  96. def show_WSandPower_nihe():
  97. df = pd.read_csv(root_path + 'concat_all.csv')
  98. # 读样本数据
  99. x1 = df['C_WS100'].values
  100. y1 = df['C_REAL_VALUE'].values
  101. # 拟合数据
  102. # p0 = [1, np.median(x1),max(y1),min(y1)]
  103. p0 = [1, 7.3,max(y1),min(y1)]
  104. popt, pcov = curve_fit(sigmoid, x1, y1,p0=p0)
  105. y_fit = sigmoid(x1, *p0)
  106. trusts = []
  107. for i in range(len(y1)):
  108. if y1[i] > y_fit[i]-2 and y1[i]<y_fit[i]+2:
  109. trusts.append(i)
  110. # 绘制散点图
  111. plt.scatter(x1, y1, color='blue', label='data')
  112. plt.scatter(x1, y_fit, color='red', label='fit')
  113. plt.scatter(x1[trusts],y1[trusts],color='green', label='trust')
  114. print("k = %f, x0 = %f" % (popt[0], popt[1]))
  115. plt.title('散点图')
  116. plt.xlabel('NWP100米风速值')
  117. plt.ylabel('功率值')
  118. # 添加图例
  119. plt.legend()
  120. # 显示图形
  121. plt.show()
  122. def data_clean(df):
  123. # 读样本数据
  124. x1 = df['C_WS100'].values
  125. y1 = df['C_REAL_VALUE'].values
  126. # 拟合数据
  127. # p0 = [1, np.median(x1),max(y1),min(y1)]
  128. p0 = [1, 7.3,max(y1),min(y1)]
  129. # p0 = [1, (max(x1)+min(x1))/2, max(y1), min(y1)]
  130. popt, pcov = curve_fit(sigmoid, x1, y1,p0=p0)
  131. y_fit = sigmoid(x1, *p0)
  132. trusts = []
  133. for i in range(len(y1)):
  134. if y1[i] > y_fit[i]-2 and y1[i]<y_fit[i]+2:
  135. trusts.append(i)
  136. df = df.loc[trusts]
  137. df.reset_index(drop=True, inplace=True)
  138. return df
  139. # 23/04/26 风电实验基准
  140. def train_lgbmodel_feng(clean=False):
  141. print('------------generate features-----------------')
  142. index = 'shandong'
  143. df = pd.read_csv('./dataset/'+index+'/Dataset_training/concat_all.csv')
  144. all_features = ['C_WS100']
  145. border = int(len(df) * 0.8)
  146. valid_border = border+int(len(df) * 0.1)
  147. start =0 #int(len(df) * 0.5)
  148. df_train = df.loc[:border]
  149. df_valid = df.loc[border:valid_border]
  150. df_test = df.loc[valid_border:]
  151. if clean:
  152. df_test.reset_index(drop=True, inplace=True)
  153. df_train = data_clean(df_train)
  154. X_train = df_train[all_features]
  155. Y_train = df_train[['C_REAL_VALUE']]
  156. X_valid = df_valid[all_features]
  157. Y_valid = df_valid[['C_REAL_VALUE']]
  158. X_test = df_test[all_features]
  159. Y_test = df_test[['C_REAL_VALUE']]
  160. print("train shape{}{} test shape{}{}".format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))
  161. print('------------training-----------------')
  162. model = lgb.LGBMRegressor(objective='regression', n_estimators=1000,
  163. learning_rate=0.025, n_jobs=-1, random_state=630)
  164. # Train the model
  165. # model.fit(X_train, Y_train, eval_metric='rmse',
  166. # eval_set=[(X_train, Y_train), (X_temp,Y_temp)],
  167. # eval_names=['train', 'val'],
  168. # early_stopping_rounds=20, verbose=0)
  169. model.fit(X_train, Y_train, eval_metric='rmse',
  170. eval_set=[(X_valid, Y_valid)],
  171. eval_names=['valid'],
  172. early_stopping_rounds=20, verbose=0)
  173. best_iteration = model.best_iteration_
  174. print(best_iteration)
  175. # --------------feature importance----------------
  176. '''
  177. feature_importance = pd.DataFrame()
  178. feature_importance['fea_name'] = all_features
  179. feature_importance['fea_imp'] = model.feature_importances_
  180. feature_importance = feature_importance.sort_values('fea_imp', ascending=False)
  181. print(feature_importance)
  182. '''
  183. # --------------feature importance----------------
  184. Y_pred = model.predict(X_test, num_iteration=best_iteration)
  185. Y_pred = np.maximum(Y_pred, 0)
  186. Y_pred = Y_pred.reshape([-1])
  187. Y_true = Y_test.values.reshape([-1])
  188. for li in range(len(Y_pred)):
  189. if Y_pred[li]<0.016:
  190. Y_pred[li] = 0
  191. print('------------testing-----------------')
  192. print("test shape{}{}".format( Y_pred.shape, Y_true.shape))
  193. mean = RMSE(Y_pred, Y_true)
  194. print("模型RMSE:{}".format(mean))
  195. print("模型准确率:{}%".format(100-mean/22.5*100))
  196. if __name__=="__main__":
  197. print("main:")
  198. # contact_all()
  199. train_lgbmodel_feng(True)
  200. # show_WSandPower_nihe()