Bladeren bron

cluster-power

liudawei 2 jaren geleden
bovenliggende
commit
7e1866aff6
5 gewijzigde bestanden met toevoegingen van 53 en 20 verwijderingen
  1. BIN
      analysis_img/121_122_turbine.png
  2. BIN
      analysis_img/cluster/cluster_1.png
  3. BIN
      analysis_img/cluster/cluster_2.png
  4. 41 11
      cluster_power.py
  5. 12 9
      data_analysis.py

BIN
analysis_img/121_122_turbine.png


BIN
analysis_img/cluster/cluster_1.png


BIN
analysis_img/cluster/cluster_2.png


+ 41 - 11
cluster_power.py

@@ -11,20 +11,50 @@ import pandas as pd
 # 从confinuous_data中拿数据
 cluster = []
 
-def
 
-
-def cluster_power(cluster, out_put):
+def cluster_power_list_file(cluster, turbine_id, input_path, output_path):
     """
-    把聚类的结果
+    从turbine-*.csv的文件列表中进行聚类功率相加
+    cluster:聚类的结果
+    turbine_id:风机ID
+    input_path:输入路径 output_filtered_csv_files 所在路径
+    output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
     """
-    if not os.path.exists(out_put):
-        os.makedirs(out_put)
-    output_filtered_csv_files = '../data/output_filtered_csv_files/'
-    files = os.listdir(output_filtered_csv_files)
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    files = os.listdir(input_path)
     files.remove('turbine-144.csv')
     assert len(cluster) == len(files)
-    continuous_data_path = '../data/continuous_data/'
+    dfs = [pd.read_csv(os.path.join(input_path, f)) for f in files]
+    cfs = {}
+    for j in range(1, max(cluster)+1):
+        arr_j = np.where(cluster == j)[0]   # cluster中聚类j的索引列表
+        cfs.setdefault(j, [turbine_id[k] for k in arr_j])
+        dfs_j = [dfs[k] for k in arr_j]   # cluster
+        dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
+        for df in dfs_j[1:]:
+            if df['C_TIME'].equals(time_series) is False:
+                print("风机之间的日期不一致!")
+                raise ValueError
+            dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
+        dfj.to_csv(os.path.join(output_path, 'power_' + str(j) + '.csv'), index=False)
+        paint_cluster_power(output_path)
+        for key, value in cfs.items():
+            print("第{}组:{}".format(key, cfs[key]))
+
+
+def cluster_power_list_folder(cluster, turbine_id, input_path, output_path):
+    """
+    从嵌套turbine-*.csv的多个文件夹列表中进行聚类功率相加
+    cluster:聚类的结果
+    turbine_id:风机ID
+    input_path:输入路径 continuous_data 所在路径
+    output_path:输出每个聚类的功率,和所有聚类的功率cluster_data
+    """
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+    assert len(cluster) == len(turbine_id)
+    continuous_data_path = input_path
     # 遍历整个continuous_data_path文件夹
     continuous_list = []
     for dirpath, dirnames, filenames in os.walk(continuous_data_path):
@@ -38,7 +68,7 @@ def cluster_power(cluster, out_put):
         dfs = [pd.read_csv(path) for path in continuous_list[i][0]]
         for j in range(1, max(cluster)+1):
             arr_j = np.where(cluster == j)[0]   # cluster中聚类j的索引列表
-            cfs.setdefault(j, [files[k] for k in arr_j])
+            cfs.setdefault(j, [turbine_id[k] for k in arr_j])
             dfs_j = [dfs[k] for k in arr_j]   # cluster
             dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
             for df in dfs_j[1:]:
@@ -46,7 +76,7 @@ def cluster_power(cluster, out_put):
                     print("风机之间的日期不一致!")
                     raise ValueError
                 dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
-            output_1 = os.path.join(out_put,  continuous_list[i][1])
+            output_1 = os.path.join(output_path,  continuous_list[i][1])
             if not os.path.exists(output_1):
                 os.makedirs(output_1)
             dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False)

+ 12 - 9
data_analysis.py

@@ -7,6 +7,8 @@
 @Time:2023/4/24 15:16
 
 """
+import os.path
+
 import pandas as pd
 #from mpl_toolkits.basemap import Basemap
 from scipy.signal import savgol_filter
@@ -125,17 +127,17 @@ class DataAnalysis:
         加载数据
         :return:
         """
-        self.info = pd.read_csv('../data/风机信息.csv', encoding='utf-8')
+        self.info = pd.read_csv('../data-process/data/风机信息.csv', encoding='utf-8')
         # power_15min = pd.read_csv('../data/power_15min.csv')
         # for i in range(len(power_15min)):
         #     if power_15min.loc[i, 'C_REAL_VALUE'] == -9999:
         #         # 方便在曲线中看出缺失数据位置
         #         power_15min.loc[i, 'C_REAL_VALUE'] = -34.56789
         # self.power_15min = power_15min
-        turbine_path = '../data/output_filtered_csv_files/turbine-{}.csv'
+        turbine_path = '../data-process/data/output_filtered_csv_files/turbine-{}.csv'
         self.turbine = {}
         for i in self.turbine_id:
-            self.turbine[i] = pd.read_csv(turbine_path.format(i))[21:]
+            self.turbine[i] = pd.read_csv(turbine_path.format(i))[20:].reset_index(drop=True)
 
     def compute_turbine_diff(self):
         """
@@ -321,7 +323,6 @@ class DataAnalysis:
 
         # 添加图例
         plt.legend()
-
         # 显示图形
         plt.savefig('analysis_img/cluster/cluster_{}.png'.format(k))
         plt.show()
@@ -412,14 +413,16 @@ class DataAnalysis:
             cluster = hierarchical_clustering(data, threshold=1,
                                               similarity_func=compute_pearsonr)
         self.cluster = cluster
-        from cluster_power import cluster_power
-        out_put = '../data/cluester_power/'
-        cluster_power(self.cluster, out_put)
+        from cluster_power import cluster_power_list_file, cluster_power_list_folder
+
+        output_path = '../data-process/data/cluester_power/'
+        cluster_power_list_file(self.cluster, self.turbine_id, input_path='../data-process/data/output_filtered_csv_files/', output_path=output_path)
+        cluster_power_list_folder(self.cluster, self.turbine_id, input_path='../data-process/data/continuous_data/', output_path=output_path)
 
 
-data_analysis = DataAnalysis(data_length=9771,
+data_analysis = DataAnalysis(data_length=9773,
                              data_start=0,
-                             data_end=9771)
+                             data_end=9773)
 
 data_analysis.process_ori_data()
 data_analysis.paint_double(20, 21)