cluster_power.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # time: 2023/5/11 14:43
  4. # file: cluster_power.py
  5. # author: David
  6. # company: shenyang JY
  7. import os
  8. import numpy as np
  9. import pandas as pd
  10. # 从confinuous_data中拿数据
  11. cluster = []
  12. def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
  13. """
  14. 从turbine-*.csv的文件列表中进行聚类功率相加
  15. cluster:聚类的结果
  16. turbine_id:风机ID
  17. input_path:输入路径 output_filtered_csv_files 所在路径
  18. output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
  19. """
  20. if not os.path.exists(output_path):
  21. os.makedirs(output_path)
  22. files = os.listdir(input_path)
  23. files.remove('turbine-144.csv')
  24. assert len(cluster) == len(files)
  25. dfs = [pd.read_csv(os.path.join(input_path, f)) for f in files]
  26. cfs = {}
  27. for j in range(1, max(cluster)+1):
  28. arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
  29. cfs.setdefault(j, [turbine_id[k] for k in arr_j])
  30. dfs_j = [dfs[k] for k in arr_j] # cluster
  31. dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
  32. for df in dfs_j[1:]:
  33. if df['C_TIME'].equals(time_series) is False:
  34. print("风机之间的日期不一致!")
  35. raise ValueError
  36. dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
  37. dfj.to_csv(os.path.join(output_path, 'power_' + str(j) + '.csv'), index=False)
  38. paint_cluster_power(output_path)
  39. for key, value in cfs.items():
  40. print("第{}组:{}".format(key, cfs[key]))
  41. def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
  42. """
  43. 从嵌套turbine-*.csv的多个文件夹列表中进行聚类功率相加
  44. cluster:聚类的结果
  45. turbine_id:风机ID
  46. input_path:输入路径 continuous_data 所在路径
  47. output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
  48. """
  49. if not os.path.exists(output_path):
  50. os.makedirs(output_path)
  51. assert len(cluster) == len(turbine_id)
  52. continuous_data_path = input_path
  53. # 遍历整个continuous_data_path文件夹
  54. continuous_list = []
  55. for dirpath, dirnames, filenames in os.walk(continuous_data_path):
  56. dirname = dirpath.split('/')[-1]
  57. filenames = [file for file in filenames if not file.startswith('turbine-144')]
  58. x = [os.path.join(dirpath, filename) for filename in filenames]
  59. continuous_list.append((x, dirname))
  60. continuous_list.pop(0)
  61. for i in range(len(continuous_list)):
  62. cfs = {}
  63. dfs = [pd.read_csv(path) for path in continuous_list[i][0]]
  64. for j in range(1, max(cluster)+1):
  65. arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
  66. cfs.setdefault(j, [turbine_id[k] for k in arr_j])
  67. dfs_j = [dfs[k] for k in arr_j] # cluster
  68. dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
  69. for df in dfs_j[1:]:
  70. if df['C_TIME'].equals(time_series) is False:
  71. print("风机之间的日期不一致!")
  72. raise ValueError
  73. dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
  74. output_1 = os.path.join(output_path, continuous_list[i][1])
  75. if not os.path.exists(output_1):
  76. os.makedirs(output_1)
  77. dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False)
  78. paint_cluster_power(output_1)
  79. print("------{}的分组信息----".format(continuous_list[i][1]))
  80. for key, value in cfs.items():
  81. print("第{}组:{}".format(key, cfs[key]))
  82. def paint_cluster_power(cluster_path):
  83. dfs = [pd.read_csv(os.path.join(cluster_path, file_path)).rename(columns={'C_ACTIVE_POWER':file_path.split('/')[-1][:-4]}) for file_path in os.listdir(cluster_path)]
  84. df_cluster = pd.DataFrame({df.columns[-1]: df.iloc[:, -1] for df in dfs})
  85. df_cluster.insert(loc=0, column='C_TIME', value=dfs[0]['C_TIME'])
  86. df_cluster.to_csv(os.path.join(cluster_path, 'cluster_data.csv'), index=False)