123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221 |
- import matplotlib.pyplot as plt
- import numpy as np
- import pandas as pd
- from scipy.optimize import curve_fit
- from metrics import RMSE
- import lightgbm as lgb
- # root_path = './dataset/data/'
- root_path = './dataset/shandong/Dataset_training/'
- # 设置图大小
- width = 16
- height = 9
- plt.figure(figsize=(width, height))
- # 设置全局字体样式
- plt.rcParams['font.family'] = 'Microsoft YaHei'
- # max_power = 22
- # 定义sigmoid函数
- def sigmoid(x, k, x0,max_power,min_power):
- y = max_power / (1 + np.exp(-k * (x - x0)))+min_power
- return y
- def contact_all():
- df = None
- csvpath = (r"./dataset/shandong/Dataset_training/concat_all.csv")
- for id in range(6):
- df_NWP = pd.read_csv(r"./dataset/shandong/Dataset_training/NWP/NWP_{}.csv".format(id))
- df_PowerRE = pd.read_csv(r"./dataset/shandong/Dataset_training/power/power_{}.csv".format(id))
- df_raw = pd.merge(df_NWP, df_PowerRE, on="C_TIME")
- all_features = ['C_TIME', 'C_WS90', 'C_WS100', 'C_REAL_VALUE']
- df_raw = df_raw[all_features]
- print(len(df_raw))
- if df is None:
- df = df_raw
- else:
- df = pd.concat([df, df_raw], axis=0)
- print(len(df))
- df.to_csv(csvpath, index=False)
- def show_cz_plot():
- df = pd.read_csv(root_path + 'concat102.csv')
- # 创建一些样本数据
- x1 = df['C_WS100'].values
- dif = df['C_WS'].values - x1
- x1 = np.arange(1, len(df)+1)
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(64, 14))
- # 绘制散点图
- ax1.plot(x1, dif, color='red', label='turbine-102')
- # 添加标题和标签
- ax1.set_title('折线图')
- ax1.set_xlabel('时间点')
- ax1.set_ylabel('NWP与机头风速差值')
- ax1.legend()
- # plt.scatter(dif, y1, color='blue', label='数据集2')
- # plt.scatter(x3, y3, color='green', label='数据集3')
- # 绘制散点图
- df = pd.read_csv(root_path + 'concat103.csv')
- # 创建一些样本数据
- x1 = df['C_WS100'].values
- y1 = df['C_ACTIVE_POWER'].values
- dif = df['C_WS'].values - x1
- x1 = np.arange(1, len(df) + 1)
- ax2.plot(x1, dif, color='blue', label='turbine-103')
- # 添加标题和标签
- ax2.set_title('折线图')
- ax2.set_xlabel('时间点')
- ax2.set_ylabel('NWP与机头风速差值')
- ax2.legend()
- plt.show()
- def show_cz_scatter():
- df = pd.read_csv(root_path + 'concat102.csv')
- # 创建一些样本数据
- x1 = df['C_WS100'].values
- dif = df['C_WS'].values - x1
- x1 = np.arange(1, len(df)+1)
- fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(64, 14))
- # 绘制散点图
- ax1.scatter(x1, dif, color='red', label='turbine-102')
- # 添加标题和标签
- ax1.set_title('散点图')
- ax1.set_xlabel('时间点')
- ax1.set_ylabel('NWP与机头风速差值')
- ax1.legend()
- # plt.scatter(dif, y1, color='blue', label='数据集2')
- # plt.scatter(x3, y3, color='green', label='数据集3')
- # 绘制散点图
- df = pd.read_csv(root_path + 'concat103.csv')
- # 创建一些样本数据
- x1 = df['C_WS100'].values
- y1 = df['C_ACTIVE_POWER'].values
- dif = df['C_WS'].values - x1
- x1 = np.arange(1, len(df) + 1)
- ax2.scatter(x1, dif, color='blue', label='turbine-103')
- # 添加标题和标签
- ax2.set_title('散点图')
- ax2.set_xlabel('时间点')
- ax2.set_ylabel('NWP与机头风速差值')
- ax2.legend()
- plt.show()
- def show_WSandPower_nihe():
- df = pd.read_csv(root_path + 'concat_all.csv')
- # 读样本数据
- x1 = df['C_WS100'].values
- y1 = df['C_REAL_VALUE'].values
- # 拟合数据
- # p0 = [1, np.median(x1),max(y1),min(y1)]
- p0 = [1, 7.3,max(y1),min(y1)]
- popt, pcov = curve_fit(sigmoid, x1, y1,p0=p0)
- y_fit = sigmoid(x1, *p0)
- trusts = []
- for i in range(len(y1)):
- if y1[i] > y_fit[i]-2 and y1[i]<y_fit[i]+2:
- trusts.append(i)
- # 绘制散点图
- plt.scatter(x1, y1, color='blue', label='data')
- plt.scatter(x1, y_fit, color='red', label='fit')
- plt.scatter(x1[trusts],y1[trusts],color='green', label='trust')
- print("k = %f, x0 = %f" % (popt[0], popt[1]))
- plt.title('散点图')
- plt.xlabel('NWP100米风速值')
- plt.ylabel('功率值')
- # 添加图例
- plt.legend()
- # 显示图形
- plt.show()
- def data_clean(df):
- # 读样本数据
- x1 = df['C_WS100'].values
- y1 = df['C_REAL_VALUE'].values
- # 拟合数据
- # p0 = [1, np.median(x1),max(y1),min(y1)]
- p0 = [1, 7.3,max(y1),min(y1)]
- # p0 = [1, (max(x1)+min(x1))/2, max(y1), min(y1)]
- popt, pcov = curve_fit(sigmoid, x1, y1,p0=p0)
- y_fit = sigmoid(x1, *p0)
- trusts = []
- for i in range(len(y1)):
- if y1[i] > y_fit[i]-2 and y1[i]<y_fit[i]+2:
- trusts.append(i)
- df = df.loc[trusts]
- df.reset_index(drop=True, inplace=True)
- return df
- # 23/04/26 风电实验基准
- def train_lgbmodel_feng(clean=False):
- print('------------generate features-----------------')
- index = 'shandong'
- df = pd.read_csv('./dataset/'+index+'/Dataset_training/concat_all.csv')
- all_features = ['C_WS100']
- border = int(len(df) * 0.8)
- valid_border = border+int(len(df) * 0.1)
- start =0 #int(len(df) * 0.5)
- df_train = df.loc[:border]
- df_valid = df.loc[border:valid_border]
- df_test = df.loc[valid_border:]
- if clean:
- df_test.reset_index(drop=True, inplace=True)
- df_train = data_clean(df_train)
- X_train = df_train[all_features]
- Y_train = df_train[['C_REAL_VALUE']]
- X_valid = df_valid[all_features]
- Y_valid = df_valid[['C_REAL_VALUE']]
- X_test = df_test[all_features]
- Y_test = df_test[['C_REAL_VALUE']]
- print("train shape{}{} test shape{}{}".format(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape))
- print('------------training-----------------')
- model = lgb.LGBMRegressor(objective='regression', n_estimators=1000,
- learning_rate=0.025, n_jobs=-1, random_state=630)
- # Train the model
- # model.fit(X_train, Y_train, eval_metric='rmse',
- # eval_set=[(X_train, Y_train), (X_temp,Y_temp)],
- # eval_names=['train', 'val'],
- # early_stopping_rounds=20, verbose=0)
- model.fit(X_train, Y_train, eval_metric='rmse',
- eval_set=[(X_valid, Y_valid)],
- eval_names=['valid'],
- early_stopping_rounds=20, verbose=0)
- best_iteration = model.best_iteration_
- print(best_iteration)
- # --------------feature importance----------------
- '''
- feature_importance = pd.DataFrame()
- feature_importance['fea_name'] = all_features
- feature_importance['fea_imp'] = model.feature_importances_
- feature_importance = feature_importance.sort_values('fea_imp', ascending=False)
- print(feature_importance)
- '''
- # --------------feature importance----------------
- Y_pred = model.predict(X_test, num_iteration=best_iteration)
- Y_pred = np.maximum(Y_pred, 0)
- Y_pred = Y_pred.reshape([-1])
- Y_true = Y_test.values.reshape([-1])
- for li in range(len(Y_pred)):
- if Y_pred[li]<0.016:
- Y_pred[li] = 0
- print('------------testing-----------------')
- print("test shape{}{}".format( Y_pred.shape, Y_true.shape))
- mean = RMSE(Y_pred, Y_true)
- print("模型RMSE:{}".format(mean))
- print("模型准确率:{}%".format(100-mean/22.5*100))
-
- if __name__=="__main__":
- print("main:")
- # contact_all()
- train_lgbmodel_feng(True)
- # show_WSandPower_nihe()
|