123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # time: 2023/5/11 14:43
- # file: cluster_power.py
- # author: David
- # company: shenyang JY
- import os
- import re
- import numpy as np
- import pandas as pd
- def read_cfs(cfs, input_path, output_path, is_folder=False):
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- dfs = {}
- for j, ids in cfs.items():
- if is_folder:
- dirname = input_path.split('/')[-1]
- x = re.findall('(?<=Continuous_Turbine_Data_).*?(?=_)',dirname)[0]
- dfs_j = [pd.read_csv(os.path.join(input_path, f"turbine-{id}_{int(x)}.csv")) for id in ids]
- else:
- dfs_j = [pd.read_csv(os.path.join(input_path, f"turbine-{id}.csv")) for id in ids]
- dfj, time_series = dfs_j[0].loc[:, ['C_TIME', 'C_WS', 'C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
- for df in dfs_j[1:]:
- if df['C_TIME'].equals(time_series) is False:
- print("风机之间的日期不一致!")
- raise ValueError
- dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
- dfj['C_WS'] += df['C_WS']
- dfj['C_WS'] /= len(dfs_j)
- dfj.rename(columns=({'C_ACTIVE_POWER':'C_ACTIVE_POWER'+str(j), 'C_WS': 'C_WS'+str(j)}), inplace=True)
- if is_folder:
- dfj[20:].to_csv(os.path.join(output_path, 'cluster_' + str(j) + '.csv'), index=False)
- else:
- dfj[20:].to_csv(os.path.join(output_path, 'cluster_' + str(j) + '.csv'), index=False)
- dfs[j] = dfj
- return dfs
- def get_cfs(cluster, turbine_id):
- cfs = {}
- for j in range(1, max(cluster) + 1):
- arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
- cfs.setdefault(j, [turbine_id[k] for k in arr_j])
- for key, value in cfs.items():
- print("第{}组:{}".format(key, cfs[key]))
- return cfs
- def cluster_data_indep(dfs_cluster, root_path):
- df_power = pd.read_csv(root_path + "power.csv")
- df_nwp = pd.read_csv(root_path + "NWP.csv",
- usecols=["C_TIME", "C_WS100", "C_WS170"])
- df_all = pd.concat([df_power.set_index("C_TIME"), df_nwp.set_index("C_TIME"),
- dfs_cluster], axis=1, join="inner")
- return df_all
- def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
- """
- 从turbine-*.csv的文件列表中进行聚类功率相加
- cluster:聚类的结果
- turbine_id:风机ID
- input_path:输入路径 output_filtered_csv_files 所在路径
- output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
- """
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- cfs = get_cfs(cluster, turbine_id)
- dfs = read_cfs(cfs, input_path, output_path)
- dfs_cluster = pd.concat([df.set_index("C_TIME") for df in dfs.values()], join='inner', axis=1)
- dfs_cluster['SUM'] = dfs_cluster.filter(like='C_ACTIVE_POWER').sum(axis=1)
- dfs_cluster = cluster_data_indep(dfs_cluster, '../data-process/data/')
- dfs_cluster.reset_index().to_csv(os.path.join(output_path, 'cluster_data.csv'), index=False)
- def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
- """
- 从嵌套turbine-*.csv的多个文件夹列表中进行聚类功率相加
- cluster:聚类的结果
- turbine_id:风机ID
- input_path:输入路径 continuous_data 所在路径
- output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
- """
- if not os.path.exists(output_path):
- os.makedirs(output_path)
- continuous_list = [os.path.join(input_path, path) for path in os.listdir(input_path)]
- cfs = get_cfs(cluster, turbine_id)
- for con in continuous_list:
- dirname = con.split('/')[-1]
- output = os.path.join(output_path, dirname)
- dfs = read_cfs(cfs, con, output, True)
- dfs_cluster = pd.concat([df.set_index("C_TIME") for df in dfs.values()], join='inner', axis=1)
- dfs_cluster.reset_index().to_csv(os.path.join(output, 'cluster_data.csv'), index=False)
- if __name__ == '__main__':
- turbine_id = list(range(102, 162))
- cluster = np.array([2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
- cluster[42] = 1
- output_path = '../data-process/data/cluster_power/'
- cluster_power_list_file(cluster, turbine_id,
- input_path='../data-process/data/output_filtered_csv_files/', output_path=output_path)
- cluster_power_list_folder(cluster, turbine_id, input_path='../data-process/data/continuous_data/',
- output_path=output_path)
|