|
@@ -6,10 +6,47 @@
|
|
|
# company: shenyang JY
|
|
|
|
|
|
import os
|
|
|
+import re
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
-# 从confinuous_data中拿数据
|
|
|
-cluster = []
|
|
|
+
|
|
|
+
|
|
|
+def read_cfs(cfs, input_path, output_path, is_folder=False):
|
|
|
+ if not os.path.exists(output_path):
|
|
|
+ os.makedirs(output_path)
|
|
|
+ dfs = {}
|
|
|
+ for j, ids in cfs.items():
|
|
|
+ if is_folder:
|
|
|
+ dirname = input_path.split('/')[-1]
|
|
|
+ x = re.findall('(?<=Continuous_Turbine_Data_).*?(?=_)',dirname)[0]
|
|
|
+ dfs_j = [pd.read_csv(os.path.join(input_path, f"turbine-{id}_{int(x)}.csv")) for id in ids]
|
|
|
+ else:
|
|
|
+ dfs_j = [pd.read_csv(os.path.join(input_path, f"turbine-{id}.csv")) for id in ids]
|
|
|
+ dfj, time_series = dfs_j[0].loc[:, ['C_TIME', 'C_WS', 'C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
|
|
|
+ for df in dfs_j[1:]:
|
|
|
+ if df['C_TIME'].equals(time_series) is False:
|
|
|
+ print("风机之间的日期不一致!")
|
|
|
+ raise ValueError
|
|
|
+ dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
|
|
|
+ dfj['C_WS'] += df['C_WS']
|
|
|
+ dfj['C_WS'] /= len(dfj)
|
|
|
+ dfj.rename(columns=({'C_ACTIVE_POWER':'C_ACTIVE_POWER'+str(j), 'C_WS': 'C_WS'+str(j)}), inplace=True)
|
|
|
+ if is_folder:
|
|
|
+ dfj.to_csv(os.path.join(output_path, 'cluster_' + str(j) + '.csv'), index=False)
|
|
|
+ else:
|
|
|
+ dfj[20:].to_csv(os.path.join(output_path, 'cluster_' + str(j) + '.csv'), index=False)
|
|
|
+ dfs[j] = dfj
|
|
|
+ return dfs
|
|
|
+
|
|
|
+
|
|
|
+def get_cfs(cluster, turbine_id):
|
|
|
+ cfs = {}
|
|
|
+ for j in range(1, max(cluster) + 1):
|
|
|
+ arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
|
|
|
+ cfs.setdefault(j, [turbine_id[k] for k in arr_j])
|
|
|
+ for key, value in cfs.items():
|
|
|
+ print("第{}组:{}".format(key, cfs[key]))
|
|
|
+ return cfs
|
|
|
|
|
|
|
|
|
def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
|
|
@@ -22,25 +59,11 @@ def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
|
|
|
"""
|
|
|
if not os.path.exists(output_path):
|
|
|
os.makedirs(output_path)
|
|
|
- files = os.listdir(input_path)
|
|
|
- files.remove('turbine-144.csv')
|
|
|
- assert len(cluster) == len(files)
|
|
|
- dfs = [pd.read_csv(os.path.join(input_path, f)) for f in files]
|
|
|
- cfs = {}
|
|
|
- for j in range(1, max(cluster)+1):
|
|
|
- arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
|
|
|
- cfs.setdefault(j, [turbine_id[k] for k in arr_j])
|
|
|
- dfs_j = [dfs[k] for k in arr_j] # cluster
|
|
|
- dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
|
|
|
- for df in dfs_j[1:]:
|
|
|
- if df['C_TIME'].equals(time_series) is False:
|
|
|
- print("风机之间的日期不一致!")
|
|
|
- raise ValueError
|
|
|
- dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
|
|
|
- dfj.to_csv(os.path.join(output_path, 'power_' + str(j) + '.csv'), index=False)
|
|
|
- paint_cluster_power(output_path)
|
|
|
- for key, value in cfs.items():
|
|
|
- print("第{}组:{}".format(key, cfs[key]))
|
|
|
+
|
|
|
+ cfs = get_cfs(cluster, turbine_id)
|
|
|
+ dfs = read_cfs(cfs, input_path, output_path)
|
|
|
+ dfs_cluster = pd.concat([df.set_index("C_TIME") for df in dfs.values()], join='inner', axis=1)
|
|
|
+ dfs_cluster.reset_index().to_csv(os.path.join(output_path, 'cluster_data.csv'), index=False)
|
|
|
|
|
|
|
|
|
def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
|
|
@@ -53,42 +76,23 @@ def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
|
|
|
"""
|
|
|
if not os.path.exists(output_path):
|
|
|
os.makedirs(output_path)
|
|
|
- assert len(cluster) == len(turbine_id)
|
|
|
- continuous_data_path = input_path
|
|
|
- # 遍历整个continuous_data_path文件夹
|
|
|
- continuous_list = []
|
|
|
- for dirpath, dirnames, filenames in os.walk(continuous_data_path):
|
|
|
- dirname = dirpath.split('/')[-1]
|
|
|
- filenames = [file for file in filenames if not file.startswith('turbine-144')]
|
|
|
- x = [os.path.join(dirpath, filename) for filename in filenames]
|
|
|
- continuous_list.append((x, dirname))
|
|
|
- continuous_list.pop(0)
|
|
|
- for i in range(len(continuous_list)):
|
|
|
- cfs = {}
|
|
|
- dfs = [pd.read_csv(path) for path in continuous_list[i][0]]
|
|
|
- for j in range(1, max(cluster)+1):
|
|
|
- arr_j = np.where(cluster == j)[0] # cluster中聚类j的索引列表
|
|
|
- cfs.setdefault(j, [turbine_id[k] for k in arr_j])
|
|
|
- dfs_j = [dfs[k] for k in arr_j] # cluster
|
|
|
- dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
|
|
|
- for df in dfs_j[1:]:
|
|
|
- if df['C_TIME'].equals(time_series) is False:
|
|
|
- print("风机之间的日期不一致!")
|
|
|
- raise ValueError
|
|
|
- dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
|
|
|
- output_1 = os.path.join(output_path, continuous_list[i][1])
|
|
|
- if not os.path.exists(output_1):
|
|
|
- os.makedirs(output_1)
|
|
|
- dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False)
|
|
|
- paint_cluster_power(output_1)
|
|
|
- print("------{}的分组信息----".format(continuous_list[i][1]))
|
|
|
- for key, value in cfs.items():
|
|
|
- print("第{}组:{}".format(key, cfs[key]))
|
|
|
-
|
|
|
-def paint_cluster_power(cluster_path):
|
|
|
- dfs = [pd.read_csv(os.path.join(cluster_path, file_path)).rename(columns={'C_ACTIVE_POWER':file_path.split('/')[-1][:-4]}) for file_path in os.listdir(cluster_path)]
|
|
|
+ continuous_list = [os.path.join(input_path, path) for path in os.listdir(input_path)]
|
|
|
+ cfs = get_cfs(cluster, turbine_id)
|
|
|
+ for con in continuous_list:
|
|
|
+ dirname = con.split('/')[-1]
|
|
|
+ output = os.path.join(output_path, dirname)
|
|
|
+ dfs = read_cfs(cfs, con, output, True)
|
|
|
+ dfs_cluster = pd.concat([df.set_index("C_TIME") for df in dfs.values()], join='inner', axis=1)
|
|
|
+ dfs_cluster.reset_index().to_csv(os.path.join(output, 'cluster_data.csv'), index=False)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def paint_cluster_power(cluster_path, dfs):
|
|
|
+ dfs_cluster = pd.merge(dfs.values, axis=1, join='inner', on='C_TIME')
|
|
|
+ dfs = [pd.read_csv(os.path.join(cluster_path, file_path)).rename(columns={'C_ACTIVE_POWER':'power_'+file_path.split('/')[-1][-5], 'C_WS': 'C_WS_'+file_path.split('/')[-1][-5]}) for file_path in os.listdir(cluster_path)]
|
|
|
df_cluster = pd.DataFrame({df.columns[-1]: df.iloc[:, -1] for df in dfs})
|
|
|
df_cluster.insert(loc=0, column='C_TIME', value=dfs[0]['C_TIME'])
|
|
|
+ df_cluster.insert(loc=len(dfs)+1, column='SUM', value=df_cluster.iloc[:, 1:].sum(axis=1))
|
|
|
df_cluster.to_csv(os.path.join(cluster_path, 'cluster_data.csv'), index=False)
|
|
|
|
|
|
|
|
@@ -96,4 +100,3 @@ def paint_cluster_power(cluster_path):
|
|
|
|
|
|
|
|
|
|
|
|
-
|