|
@@ -11,20 +11,50 @@ import pandas as pd
|
|
# 从confinuous_data中拿数据
|
|
# 从confinuous_data中拿数据
|
|
cluster = []
|
|
cluster = []
|
|
|
|
|
|
-def
|
|
|
|
|
|
|
|
-
|
|
|
|
-def cluster_power(cluster, out_put):
|
|
|
|
|
|
+def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
|
|
"""
|
|
"""
|
|
- 把聚类的结果
|
|
|
|
|
|
+ 从turbine-*.csv的文件列表中进行聚类功率相加
|
|
|
|
+ cluster:聚类的结果
|
|
|
|
+ turbine_id:风机ID
|
|
|
|
+ input_path:输入路径 output_filtered_csv_files 所在路径
|
|
|
|
+ output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
|
|
"""
|
|
"""
|
|
- if not os.path.exists(out_put):
|
|
|
|
- os.makedirs(out_put)
|
|
|
|
- output_filtered_csv_files = '../data/output_filtered_csv_files/'
|
|
|
|
- files = os.listdir(output_filtered_csv_files)
|
|
|
|
|
|
+ if not os.path.exists(output_path):
|
|
|
|
+ os.makedirs(output_path)
|
|
|
|
+ files = os.listdir(input_path)
|
|
files.remove('turbine-144.csv')
|
|
files.remove('turbine-144.csv')
|
|
assert len(cluster) == len(files)
|
|
assert len(cluster) == len(files)
|
|
- continuous_data_path = '../data/continuous_data/'
|
|
|
|
|
|
+ dfs = [pd.read_csv(os.path.join(input_path, f)) for f in files]
|
|
|
|
+ cfs = {}
|
|
|
|
+ for j in range(1, max(cluster)+1):
|
|
|
|
+ arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
|
|
|
|
+ cfs.setdefault(j, [turbine_id[k] for k in arr_j])
|
|
|
|
+ dfs_j = [dfs[k] for k in arr_j] # cluster
|
|
|
|
+ dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
|
|
|
|
+ for df in dfs_j[1:]:
|
|
|
|
+ if df['C_TIME'].equals(time_series) is False:
|
|
|
|
+ print("风机之间的日期不一致!")
|
|
|
|
+ raise ValueError
|
|
|
|
+ dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
|
|
|
|
+ dfj.to_csv(os.path.join(output_path, 'power_' + str(j) + '.csv'), index=False)
|
|
|
|
+ paint_cluster_power(output_path)
|
|
|
|
+ for key, value in cfs.items():
|
|
|
|
+ print("第{}组:{}".format(key, cfs[key]))
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
|
|
|
|
+ """
|
|
|
|
+ 从嵌套turbine-*.csv的多个文件夹列表中进行聚类功率相加
|
|
|
|
+ cluster:聚类的结果
|
|
|
|
+ turbine_id:风机ID
|
|
|
|
+ input_path:输入路径 continuous_data 所在路径
|
|
|
|
+ output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
|
|
|
|
+ """
|
|
|
|
+ if not os.path.exists(output_path):
|
|
|
|
+ os.makedirs(output_path)
|
|
|
|
+ assert len(cluster) == len(turbine_id)
|
|
|
|
+ continuous_data_path = input_path
|
|
# 遍历整个continuous_data_path文件夹
|
|
# 遍历整个continuous_data_path文件夹
|
|
continuous_list = []
|
|
continuous_list = []
|
|
for dirpath, dirnames, filenames in os.walk(continuous_data_path):
|
|
for dirpath, dirnames, filenames in os.walk(continuous_data_path):
|
|
@@ -38,7 +68,7 @@ def cluster_power(cluster, out_put):
|
|
dfs = [pd.read_csv(path) for path in continuous_list[i][0]]
|
|
dfs = [pd.read_csv(path) for path in continuous_list[i][0]]
|
|
for j in range(1, max(cluster)+1):
|
|
for j in range(1, max(cluster)+1):
|
|
arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
|
|
arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
|
|
- cfs.setdefault(j, [files[k] for k in arr_j])
|
|
|
|
|
|
+ cfs.setdefault(j, [turbine_id[k] for k in arr_j])
|
|
dfs_j = [dfs[k] for k in arr_j] # cluster
|
|
dfs_j = [dfs[k] for k in arr_j] # cluster
|
|
dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
|
|
dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
|
|
for df in dfs_j[1:]:
|
|
for df in dfs_j[1:]:
|
|
@@ -46,7 +76,7 @@ def cluster_power(cluster, out_put):
|
|
print("风机之间的日期不一致!")
|
|
print("风机之间的日期不一致!")
|
|
raise ValueError
|
|
raise ValueError
|
|
dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
|
|
dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
|
|
- output_1 = os.path.join(out_put, continuous_list[i][1])
|
|
|
|
|
|
+ output_1 = os.path.join(output_path, continuous_list[i][1])
|
|
if not os.path.exists(output_1):
|
|
if not os.path.exists(output_1):
|
|
os.makedirs(output_1)
|
|
os.makedirs(output_1)
|
|
dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False)
|
|
dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False)
|