data_add.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163
  1. # -*- coding: utf-8 -*-
  2. import pandas as pd
  3. import matplotlib.pyplot as plt
  4. import matplotlib.dates as mdates
  5. from sklearn.preprocessing import MinMaxScaler
  6. import os
  7. # cluster_power路径位置
  8. root_path = "../data-process/data/"
  9. # 1、2类平均机头风速,总平均机头风速,nwp风速,实际功率
  10. add_cols = ["C_WS_1", "C_WS_2", "C_WS_ALL",
  11. "C_WS100", "C_WS170", "power", "C_REAL_VALUE"]
  12. # 处理几个表的数据,拼接在一起,得到上述列
  13. def data_process():
  14. id1 = [142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
  15. 156, 157, 158, 159, 160, 161]
  16. id2 = [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115,
  17. 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
  18. 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141]
  19. df_power = pd.read_csv(root_path + "power.csv")
  20. df_nwp = pd.read_csv(root_path + "NWP.csv",
  21. usecols=["C_TIME", "C_WS100", "C_WS170"])
  22. # df_nwp_power = pd.merge(df_power, df_nwp, on="C_TIME", how="inner")
  23. turbine_path = root_path + "output_filtered_csv_files/"
  24. df_turbine = pd.read_csv(
  25. turbine_path + "turbine-102.csv", usecols=["C_TIME"])
  26. df_turbine["C_WS_1"] = [0] * len(df_turbine)
  27. df_turbine["C_WS_2"] = [0] * len(df_turbine)
  28. df_turbine["C_WS_ALL"] = [0] * len(df_turbine)
  29. df_turbine["power"] = [0] * len(df_turbine)
  30. for ids in id1:
  31. df_temp = pd.read_csv(turbine_path + f"turbine-{ids}.csv")
  32. # if len(df_temp) != len(df_turbine):
  33. # print("false")
  34. df_turbine["C_WS_1"] += df_temp["C_WS"]
  35. df_turbine["C_WS_ALL"] += df_temp["C_WS"]
  36. df_turbine["power"] += df_temp["C_ACTIVE_POWER"]
  37. df_turbine["C_WS_1"] /= len(id1)
  38. for ids in id2:
  39. df_temp = pd.read_csv(turbine_path + f"turbine-{ids}.csv")
  40. # if len(df_temp) != len(df_turbine):
  41. # print("false")
  42. df_turbine["C_WS_2"] += df_temp["C_WS"]
  43. df_turbine["C_WS_ALL"] += df_temp["C_WS"]
  44. df_turbine["power"] += df_temp["C_ACTIVE_POWER"]
  45. df_turbine["C_WS_2"] /= len(id2)
  46. df_turbine["C_WS_ALL"] /= (len(id1) + len(id2))
  47. df_turbine["power"] /= (len(id1) + len(id2))
  48. df_all = pd.concat([df_power.set_index("C_TIME"), df_nwp.set_index("C_TIME"),
  49. df_turbine.set_index("C_TIME")], axis=1, join="inner").reset_index()
  50. df_all = df_all.reindex(columns=["C_TIME"] + add_cols)
  51. # df_all.drop(columns="power", inplace=True)
  52. df_all.to_csv(root_path + "df_all.csv", index=False)
  53. # data_process()
  54. # 在cluster_data.csv中新增若干列(add_cols),得到cluster_data_1.csv
  55. def data_add(dirname, filename):
  56. df_temp = pd.read_csv(dirname + filename)
  57. df_all = pd.read_csv(root_path + "df_all.csv")
  58. df = pd.merge(df_all, df_temp, on="C_TIME", how="inner")
  59. df = df.reindex(columns=["C_TIME", "power_1",
  60. "power_2"] + add_cols + ["SUM"])
  61. df.to_csv(dirname + "cluster_data_1.csv", index=False)
  62. # 画随时间变化的曲线
  63. def show_curve(dirname, filename, series1, series2):
  64. df = pd.read_csv(dirname + filename)
  65. cols = df.columns[1:]
  66. scaler = MinMaxScaler()
  67. # 最大最小归一化
  68. df[cols] = scaler.fit_transform(df[cols])
  69. c_time = pd.to_datetime(df["C_TIME"])
  70. plt.figure(figsize=(12, 8), dpi=100)
  71. plt.plot(c_time, df[series1], label=series1)
  72. plt.plot(c_time, df[series2], label=series2)
  73. plt.legend()
  74. date_format = mdates.DateFormatter('%Y-%m-%d %H:%M')
  75. plt.gca().xaxis.set_major_formatter(date_format)
  76. plt.xticks(rotation=30)
  77. plt.show()
  78. plt.savefig(dirname + "curve_" + series1 + "_" + series2 + ".png")
  79. plt.close()
  80. # 画s型曲线
  81. def show_scatter(dirname, filename, series1, series2, series3):
  82. df = pd.read_csv(dirname + filename)
  83. cols = df.columns[1:]
  84. scaler = MinMaxScaler()
  85. # 最大最小归一化
  86. # df[cols] = scaler.fit_transform(df[cols])
  87. plt.figure(figsize=(10, 8), dpi=100)
  88. point_size = 10
  89. plt.scatter(df[series1], df[series3], label=series1, s=point_size)
  90. plt.scatter(df[series2], df[series3], label=series2, s=point_size)
  91. plt.xlabel(series1 + " / " + series2)
  92. plt.ylabel(series3)
  93. plt.legend()
  94. plt.show()
  95. plt.savefig(dirname + "scatter_" + series1 +
  96. "_" + series2 + "_" + series3 + ".png")
  97. plt.close()
  98. # %%
  99. if __name__ == "__main__":
  100. cluster_path = root_path + "cluster_power/"
  101. # 新增数据
  102. data_add(cluster_path, "cluster_data.csv")
  103. for root, dirs, files in os.walk(cluster_path):
  104. for sub_dir in dirs:
  105. subdir_path = os.path.join(root, sub_dir)
  106. # print(subdir_path)
  107. # file_path = os.path.join(subdir_path, "cluster_data.csv")
  108. # print(file_path)
  109. data_add(subdir_path + '/', "cluster_data.csv")
  110. # %% 画曲线图
  111. show_curve(cluster_path, "cluster_data_1.csv", "SUM", "C_WS_ALL")
  112. for root, dirs, files in os.walk(cluster_path):
  113. for sub_dir in dirs:
  114. subdir_path = os.path.join(root, sub_dir)
  115. show_curve(subdir_path + "/",
  116. "cluster_data_1.csv", "SUM", "C_WS_ALL")
  117. # show_curve(subdir_path + "/", "cluster_data_1.csv", "power_1", "C_WS_1")
  118. # show_curve(subdir_path + "/", "cluster_data_1.csv", "power_2", "C_WS_2")
  119. # %% 画散点图(s型曲线)
  120. show_scatter(cluster_path, "cluster_data_1.csv",
  121. "C_WS_ALL", "C_WS100", "SUM")
  122. for root, dirs, files in os.walk(cluster_path):
  123. for sub_dir in dirs:
  124. subdir_path = os.path.join(root, sub_dir)
  125. show_scatter(subdir_path + "/", "cluster_data_1.csv",
  126. "C_WS_ALL", "C_WS100", "SUM")