cluster_power.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # time: 2023/5/11 14:43
  4. # file: cluster_power.py
  5. # author: David
  6. # company: shenyang JY
  7. import os
  8. import re
  9. import numpy as np
  10. import pandas as pd
  11. def read_cfs(cfs, input_path, output_path, is_folder=False):
  12. if not os.path.exists(output_path):
  13. os.makedirs(output_path)
  14. dfs = {}
  15. for j, ids in cfs.items():
  16. if is_folder:
  17. dirname = input_path.split('/')[-1]
  18. x = re.findall('(?<=Continuous_Turbine_Data_).*?(?=_)',dirname)[0]
  19. dfs_j = [pd.read_csv(os.path.join(input_path, f"turbine-{id}_{int(x)}.csv")) for id in ids]
  20. else:
  21. dfs_j = [pd.read_csv(os.path.join(input_path, f"turbine-{id}.csv")) for id in ids]
  22. dfj, time_series = dfs_j[0].loc[:, ['C_TIME', 'C_WS', 'C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
  23. for df in dfs_j[1:]:
  24. if df['C_TIME'].equals(time_series) is False:
  25. print("风机之间的日期不一致!")
  26. raise ValueError
  27. dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
  28. dfj['C_WS'] += df['C_WS']
  29. dfj['C_WS'] /= len(dfj)
  30. dfj.rename(columns=({'C_ACTIVE_POWER':'C_ACTIVE_POWER'+str(j), 'C_WS': 'C_WS'+str(j)}), inplace=True)
  31. if is_folder:
  32. dfj.to_csv(os.path.join(output_path, 'cluster_' + str(j) + '.csv'), index=False)
  33. else:
  34. dfj[20:].to_csv(os.path.join(output_path, 'cluster_' + str(j) + '.csv'), index=False)
  35. dfs[j] = dfj
  36. return dfs
  37. def get_cfs(cluster, turbine_id):
  38. cfs = {}
  39. for j in range(1, max(cluster) + 1):
  40. arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
  41. cfs.setdefault(j, [turbine_id[k] for k in arr_j])
  42. for key, value in cfs.items():
  43. print("第{}组:{}".format(key, cfs[key]))
  44. return cfs
  45. def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
  46. """
  47. 从turbine-*.csv的文件列表中进行聚类功率相加
  48. cluster:聚类的结果
  49. turbine_id:风机ID
  50. input_path:输入路径 output_filtered_csv_files 所在路径
  51. output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
  52. """
  53. if not os.path.exists(output_path):
  54. os.makedirs(output_path)
  55. cfs = get_cfs(cluster, turbine_id)
  56. dfs = read_cfs(cfs, input_path, output_path)
  57. dfs_cluster = pd.concat([df.set_index("C_TIME") for df in dfs.values()], join='inner', axis=1)
  58. dfs_cluster.reset_index().to_csv(os.path.join(output_path, 'cluster_data.csv'), index=False)
  59. def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
  60. """
  61. 从嵌套turbine-*.csv的多个文件夹列表中进行聚类功率相加
  62. cluster:聚类的结果
  63. turbine_id:风机ID
  64. input_path:输入路径 continuous_data 所在路径
  65. output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
  66. """
  67. if not os.path.exists(output_path):
  68. os.makedirs(output_path)
  69. continuous_list = [os.path.join(input_path, path) for path in os.listdir(input_path)]
  70. cfs = get_cfs(cluster, turbine_id)
  71. for con in continuous_list:
  72. dirname = con.split('/')[-1]
  73. output = os.path.join(output_path, dirname)
  74. dfs = read_cfs(cfs, con, output, True)
  75. dfs_cluster = pd.concat([df.set_index("C_TIME") for df in dfs.values()], join='inner', axis=1)
  76. dfs_cluster.reset_index().to_csv(os.path.join(output, 'cluster_data.csv'), index=False)
  77. def paint_cluster_power(cluster_path, dfs):
  78. dfs_cluster = pd.merge(dfs.values, axis=1, join='inner', on='C_TIME')
  79. dfs = [pd.read_csv(os.path.join(cluster_path, file_path)).rename(columns={'C_ACTIVE_POWER':'power_'+file_path.split('/')[-1][-5], 'C_WS': 'C_WS_'+file_path.split('/')[-1][-5]}) for file_path in os.listdir(cluster_path)]
  80. df_cluster = pd.DataFrame({df.columns[-1]: df.iloc[:, -1] for df in dfs})
  81. df_cluster.insert(loc=0, column='C_TIME', value=dfs[0]['C_TIME'])
  82. df_cluster.insert(loc=len(dfs)+1, column='SUM', value=df_cluster.iloc[:, 1:].sum(axis=1))
  83. df_cluster.to_csv(os.path.join(cluster_path, 'cluster_data.csv'), index=False)