#!/usr/bin/env python # -*- coding: utf-8 -*- # time: 2023/5/11 14:43 # file: cluster_power.py # author: David # company: shenyang JY import os import numpy as np import pandas as pd # 从confinuous_data中拿数据 cluster = [] def cluster_power_list_file(cluster, turbine_id, input_path, output_path): """ 从turbine-*.csv的文件列表中进行聚类功率相加 cluster:聚类的结果 turbine_id:风机ID input_path:输入路径 output_filtered_csv_files 所在路径 output_path:输出每个聚类的功率,和所有聚类的功率cluster_data """ if not os.path.exists(output_path): os.makedirs(output_path) files = os.listdir(input_path) files.remove('turbine-144.csv') assert len(cluster) == len(files) dfs = [pd.read_csv(os.path.join(input_path, f)) for f in files] cfs = {} for j in range(1, max(cluster)+1): arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表 cfs.setdefault(j, [turbine_id[k] for k in arr_j]) dfs_j = [dfs[k] for k in arr_j] # cluster dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME'] for df in dfs_j[1:]: if df['C_TIME'].equals(time_series) is False: print("风机之间的日期不一致!") raise ValueError dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER'] dfj.to_csv(os.path.join(output_path, 'power_' + str(j) + '.csv'), index=False) paint_cluster_power(output_path) for key, value in cfs.items(): print("第{}组:{}".format(key, cfs[key])) def cluster_power_list_folder(cluster, turbine_id, input_path, output_path): """ 从嵌套turbine-*.csv的多个文件夹列表中进行聚类功率相加 cluster:聚类的结果 turbine_id:风机ID input_path:输入路径 continuous_data 所在路径 output_path:输出每个聚类的功率,和所有聚类的功率cluster_data """ if not os.path.exists(output_path): os.makedirs(output_path) assert len(cluster) == len(turbine_id) continuous_data_path = input_path # 遍历整个continuous_data_path文件夹 continuous_list = [] for dirpath, dirnames, filenames in os.walk(continuous_data_path): dirname = dirpath.split('/')[-1] filenames = [file for file in filenames if not file.startswith('turbine-144')] x = [os.path.join(dirpath, filename) for filename in filenames] continuous_list.append((x, dirname)) continuous_list.pop(0) for i in range(len(continuous_list)): cfs = {} dfs = [pd.read_csv(path) for path in continuous_list[i][0]] for j in range(1, max(cluster)+1): arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表 cfs.setdefault(j, [turbine_id[k] for k in arr_j]) dfs_j = [dfs[k] for k in arr_j] # cluster dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME'] for df in dfs_j[1:]: if df['C_TIME'].equals(time_series) is False: print("风机之间的日期不一致!") raise ValueError dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER'] output_1 = os.path.join(output_path, continuous_list[i][1]) if not os.path.exists(output_1): os.makedirs(output_1) dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False) paint_cluster_power(output_1) print("------{}的分组信息----".format(continuous_list[i][1])) for key, value in cfs.items(): print("第{}组:{}".format(key, cfs[key])) def paint_cluster_power(cluster_path): dfs = [pd.read_csv(os.path.join(cluster_path, file_path)).rename(columns={'C_ACTIVE_POWER':file_path.split('/')[-1][:-4]}) for file_path in os.listdir(cluster_path)] df_cluster = pd.DataFrame({df.columns[-1]: df.iloc[:, -1] for df in dfs}) df_cluster.insert(loc=0, column='C_TIME', value=dfs[0]['C_TIME']) df_cluster.to_csv(os.path.join(cluster_path, 'cluster_data.csv'), index=False)