123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163 |
- # -*- coding: utf-8 -*-
- import pandas as pd
- import matplotlib.pyplot as plt
- import matplotlib.dates as mdates
- from sklearn.preprocessing import MinMaxScaler
- import os
- # cluster_power路径位置
- root_path = "../data-process/data/"
- # 1、2类平均机头风速,总平均机头风速,nwp风速,实际功率
- add_cols = ["C_WS_1", "C_WS_2", "C_WS_ALL",
- "C_WS100", "C_WS170", "power", "C_REAL_VALUE"]
- # 处理几个表的数据,拼接在一起,得到上述列
- def data_process():
- id1 = [142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
- 156, 157, 158, 159, 160, 161]
- id2 = [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
- 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
- 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141]
- df_power = pd.read_csv(root_path + "power.csv")
- df_nwp = pd.read_csv(root_path + "NWP.csv",
- usecols=["C_TIME", "C_WS100", "C_WS170"])
- # df_nwp_power = pd.merge(df_power, df_nwp, on="C_TIME", how="inner")
- turbine_path = root_path + "output_filtered_csv_files/"
- df_turbine = pd.read_csv(
- turbine_path + "turbine-102.csv", usecols=["C_TIME"])
- df_turbine["C_WS_1"] = [0] * len(df_turbine)
- df_turbine["C_WS_2"] = [0] * len(df_turbine)
- df_turbine["C_WS_ALL"] = [0] * len(df_turbine)
- df_turbine["power"] = [0] * len(df_turbine)
- for ids in id1:
- df_temp = pd.read_csv(turbine_path + f"turbine-{ids}.csv")
- # if len(df_temp) != len(df_turbine):
- # print("false")
- df_turbine["C_WS_1"] += df_temp["C_WS"]
- df_turbine["C_WS_ALL"] += df_temp["C_WS"]
- df_turbine["power"] += df_temp["C_ACTIVE_POWER"]
- df_turbine["C_WS_1"] /= len(id1)
- for ids in id2:
- df_temp = pd.read_csv(turbine_path + f"turbine-{ids}.csv")
- # if len(df_temp) != len(df_turbine):
- # print("false")
- df_turbine["C_WS_2"] += df_temp["C_WS"]
- df_turbine["C_WS_ALL"] += df_temp["C_WS"]
- df_turbine["power"] += df_temp["C_ACTIVE_POWER"]
- df_turbine["C_WS_2"] /= len(id2)
- df_turbine["C_WS_ALL"] /= (len(id1) + len(id2))
- df_turbine["power"] /= (len(id1) + len(id2))
- df_all = pd.concat([df_power.set_index("C_TIME"), df_nwp.set_index("C_TIME"),
- df_turbine.set_index("C_TIME")], axis=1, join="inner").reset_index()
- df_all = df_all.reindex(columns=["C_TIME"] + add_cols)
- # df_all.drop(columns="power", inplace=True)
- df_all.to_csv(root_path + "df_all.csv", index=False)
- # data_process()
- # 在cluster_data.csv中新增若干列(add_cols),得到cluster_data_1.csv
- def data_add(dirname, filename):
- df_temp = pd.read_csv(dirname + filename)
- df_all = pd.read_csv(root_path + "df_all.csv")
- df = pd.merge(df_all, df_temp, on="C_TIME", how="inner")
- df = df.reindex(columns=["C_TIME", "power_1",
- "power_2"] + add_cols + ["SUM"])
- df.to_csv(dirname + "cluster_data_1.csv", index=False)
- # 画随时间变化的曲线
- def show_curve(dirname, filename, series1, series2):
- df = pd.read_csv(dirname + filename)
- cols = df.columns[1:]
- scaler = MinMaxScaler()
- # 最大最小归一化
- df[cols] = scaler.fit_transform(df[cols])
- c_time = pd.to_datetime(df["C_TIME"])
- plt.figure(figsize=(12, 8), dpi=100)
- plt.plot(c_time, df[series1], label=series1)
- plt.plot(c_time, df[series2], label=series2)
- plt.legend()
- date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')
- plt.gca().xaxis.set_major_formatter(date_format)
- plt.xticks(rotation=30)
- plt.show()
- plt.savefig(dirname + "curve_" + series1 + "_" + series2 + ".png")
- plt.close()
- # 画s型曲线
- def show_scatter(dirname, filename, series1, series2, series3):
- df = pd.read_csv(dirname + filename)
- cols = df.columns[1:]
- scaler = MinMaxScaler()
- # 最大最小归一化
- # df[cols] = scaler.fit_transform(df[cols])
- plt.figure(figsize=(10, 8), dpi=100)
- point_size = 10
- plt.scatter(df[series1], df[series3], label=series1, s=point_size)
- plt.scatter(df[series2], df[series3], label=series2, s=point_size)
- plt.xlabel(series1 + " / " + series2)
- plt.ylabel(series3)
- plt.legend()
- plt.show()
- plt.savefig(dirname + "scatter_" + series1 +
- "_" + series2 + "_" + series3 + ".png")
- plt.close()
- # %%
- if __name__ == "__main__":
- cluster_path = root_path + "cluster_power/"
- # 新增数据
- data_add(cluster_path, "cluster_data.csv")
- for root, dirs, files in os.walk(cluster_path):
- for sub_dir in dirs:
- subdir_path = os.path.join(root, sub_dir)
- # print(subdir_path)
- # file_path = os.path.join(subdir_path, "cluster_data.csv")
- # print(file_path)
- data_add(subdir_path + '/', "cluster_data.csv")
- # %% 画曲线图
- show_curve(cluster_path, "cluster_data_1.csv", "SUM", "C_WS_ALL")
- for root, dirs, files in os.walk(cluster_path):
- for sub_dir in dirs:
- subdir_path = os.path.join(root, sub_dir)
- show_curve(subdir_path + "/",
- "cluster_data_1.csv", "SUM", "C_WS_ALL")
- # show_curve(subdir_path + "/", "cluster_data_1.csv", "power_1", "C_WS_1")
- # show_curve(subdir_path + "/", "cluster_data_1.csv", "power_2", "C_WS_2")
- # %% 画散点图(s型曲线)
- show_scatter(cluster_path, "cluster_data_1.csv",
- "C_WS_ALL", "C_WS100", "SUM")
- for root, dirs, files in os.walk(cluster_path):
- for sub_dir in dirs:
- subdir_path = os.path.join(root, sub_dir)
- show_scatter(subdir_path + "/", "cluster_data_1.csv",
- "C_WS_ALL", "C_WS100", "SUM")
|