David 1 月之前
父節點
當前提交
aa1a133ae2
共有 3 個文件被更改,包括 126 次插入173 次删除
  1. 35 167
      DataBase/turbine-cluster/data_analysis.py
  2. 76 0
      DataBase/turbine-cluster/draw_curve.py
  3. 15 6
      DataBase/turbine-cluster/z_power.py

+ 35 - 167
DataBase/turbine-cluster/data_analysis.py

@@ -8,8 +8,6 @@
 
 """
 import os.path
-import matplotlib.colors as mcolors
-colors = list(mcolors.XKCD_COLORS.keys())
 import pandas as pd
 # from mpl_toolkits.basemap import Basemap
 from scipy.signal import savgol_filter
@@ -57,6 +55,22 @@ def compute_distance(a, b):
     """
     return np.linalg.norm(a - b)
 
+def similarity_score(turbine_diff, threshold=0.5):
+    """
+    使用余弦相似度计算相似度分数并返回相似度大于阈值的index矩阵
+    :param turbine_diff: 需要计算相似的矩阵,数据格式n*m,n为数据条数,m为数据维数
+    :param threshold: 相似度阈值
+    :return: 返回相似计算后的矩阵
+    """
+    similarity = {i: [] for i in range(49)}
+    similarity_index = {i: [] for i in range(49)}
+    for turbine_i in range(49):
+        for turbine_j in range(49):
+            cos_similarity = compute_cos_similarity(turbine_diff[turbine_i], turbine_diff[turbine_j])
+            similarity[turbine_i].append(cos_similarity)
+            if cos_similarity > threshold:
+                similarity_index[turbine_i].append(turbine_j)
+    return similarity_index
 
 def hierarchical_clustering(data, threshold, similarity_func):
     """
@@ -89,7 +103,7 @@ def hierarchical_clustering(data, threshold, similarity_func):
     return clusters
 
 
-class DataAnalysis:
+class DataAnalysis(object):
     """
     数据分析类
     """
@@ -118,6 +132,8 @@ class DataAnalysis:
         self.turbine = None
         # 风机的标号顺序
         self.turbine_id = [x for x in range(76, 151, 1)]
+        self.c_names = ['G01', 'G02', 'G03', 'G04', 'G05', 'G06', 'G07', 'G08', 'G09', 'G10'] + ['G' + str(x) for x in range(11, 76)]
+
         # b1b4 = [142, 143, 144, 145]
         # self.turbine_id = [id for id in self.turbine_id if id not in b1b4]
         # 风机功率数据15分钟级别
@@ -171,12 +187,12 @@ class DataAnalysis:
             smoothness_value = np.std(diff_array)
             print("turbine-{}的平滑度是:{}".format(turbine_i, round(smoothness_value, 2)))
             turbine_diff.append(diff_array)
-            ori_turbine_pic.append(self.turbine[turbine_i]['C_WS'].values[self.data_start:self.data_end])
-            ori_turbine_power.append(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end])
+            ori_turbine_pic.append(self.turbine[turbine_i]['C_WS'].values[self.data_start:self.data_end + 1])
+            ori_turbine_power.append(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end + 1])
 
-        self.ori_turbine_power = ori_turbine_power
-        self.ori_turbine_pic = ori_turbine_pic
-        self.turbine_diff = turbine_diff
+        self.ori_turbine_power = ori_turbine_power      # 风机功率
+        self.ori_turbine_pic = ori_turbine_pic      # 风机风速
+        self.turbine_diff = turbine_diff    # 风机风速差分
 
         diff_change = []
         for diff_i in turbine_diff:
@@ -189,172 +205,25 @@ class DataAnalysis:
                 else:
                     single_diff_change.append(0)
             diff_change.append(single_diff_change)
-        self.diff_change = diff_change
+        self.diff_change = diff_change      # 风机风速差分1 -1 0 标值化
+
+        # 风机风速傅里叶变换
         self.ori_turbine_fft = [self.turbine_fft(i + 1) for i in range(len(self.ori_turbine_pic))]
 
-        # 平滑
+        # 风机风速差分平滑
         self.turbine_smooth(window_size=21)
 
-    def paint_map(self):
-        """
-        绘制经纬度地图
-        :return:
-        """
-        lats = self.info['纬度'].values
-        lons = self.info['经度'].values
-        map = Basemap()
-
-        # 绘制海岸线和国家边界
-        map.drawcoastlines()
-        map.drawcountries()
-
-        # 绘制经纬度坐标
-        map.drawmeridians(range(0, 360, 30))
-        map.drawparallels(range(-90, 90, 30))
-
-        # 绘制点
-
-        x, y = map(lons, lats)
-        map.plot(x, y, 'bo', markersize=10)
-
-        # 显示图表
-        plt.show()
-
-    def paint_power15min(self):
-        """
-        绘制15分钟功率曲线
-        :return:
-        """
-
-        plt.plot(self.power_15min['C_REAL_VALUE'])
-
-        # 设置图表标题和轴标签
-        plt.title('Data Time Change Curve')
-        plt.xlabel('Date')
-        plt.ylabel('Value')
-
-        # 显示图表
-        plt.show()
-
-    def paint_lats_lons(self):
-        """
-        绘制经纬度图
-        :return:
-        """
-        x = self.info['纬度'].values
-        y = self.info['经度'].values
-
-        # 绘制散点图
-        fig, ax = plt.subplots()
-        plt.scatter(x, y)
-
-        for i, txt in enumerate(self.info['id'].values):
-            ax.annotate(txt, (x[i], y[i]))
-
-        # 设置图表标题和轴标签
-        plt.xlabel('lats')
-        plt.ylabel('lons')
-
-        # 显示图表
-        plt.show()
-
-    def similarity_score(self, turbine_diff, threshold=0.5):
-        """
-        使用余弦相似度计算相似度分数并返回相似度大于阈值的index矩阵
-        :param turbine_diff: 需要计算相似的矩阵,数据格式n*m,n为数据条数,m为数据维数
-        :param threshold: 相似度阈值
-        :return: 返回相似计算后的矩阵
-        """
-        similarity = {i: [] for i in range(49)}
-        similarity_index = {i: [] for i in range(49)}
-        for turbine_i in range(49):
-            for turbine_j in range(49):
-                cos_similarity = compute_cos_similarity(turbine_diff[turbine_i], turbine_diff[turbine_j])
-                similarity[turbine_i].append(cos_similarity)
-                if cos_similarity > threshold:
-                    similarity_index[turbine_i].append(turbine_j)
-        return similarity_index
-
     def mapping_turbines(self):
         turbine_clus = {}
+        id_names = {id: self.c_names[x] for x, id in enumerate(self.turbine_id)}
         import pickle
         for a, b in zip(self.turbine_id, self.cluster):
             print("风机编号:{},类别:{}".format(a, b))
-            turbine_clus.setdefault(b, []).append(a)
+            turbine_clus.setdefault(b, []).append(id_names[a])
             path = os.path.join(os.path.dirname(self.turbine_path), 'turbine_cls.pickle')
             with open(path, 'wb') as file:
                 pickle.dump(turbine_clus, file)
 
-    def paint_turbine(self, paint_default=True):
-        """
-        绘制风机地理位置图
-        :param paint_default:默认True,绘制聚类后每个类别的数据折线图
-        :return: None
-        """
-
-        # y = self.info['纬度'].values
-        # x = self.info['经度'].values
-        #
-        # fig, ax = plt.subplots(figsize=(15, 15))
-        #
-        # plt.scatter(x, y, c=self.cluster)
-        # for i, txt in enumerate(self.info['C_ID'].values):
-        #     ax.annotate(txt, (x[i], y[i]))
-
-        # 设置图表标题和轴标签
-        # plt.xlabel('lons')
-        # plt.ylabel('lats')
-        # plt.legend()
-        #
-        # # 显示图表
-        # plt.savefig('analysis_img/turbine_cluster.png')
-        # plt.show()
-
-        plt.figure(figsize=(60, 40))
-        cmap = plt.get_cmap('viridis')
-        linestyle= ['solid', 'dashed', 'dotted', 'dashdot']
-        for i in range(max(self.cluster)):
-            cluster, cluster_fft, cluster_power, = [], [], []
-            for j, item in enumerate(self.cluster):
-                if item == i + 1:
-                    cluster.append(self.ori_turbine_pic[j])
-                    cluster_fft.append(self.ori_turbine_fft[j])
-                    cluster_power.append(self.ori_turbine_power[j])
-            cluster_power = np.average(cluster_power, axis=0)
-            cluster_fft = np.average(cluster_fft, axis=0)
-            cluster = np.average(cluster, axis=0)
-            diff_array = np.diff(cluster)
-            smoothness_value = np.std(diff_array)
-            print("聚类-{}的平滑度是:{}".format(i+1, smoothness_value))
-            color = cmap(i*200)
-            plt.figure(1)
-            # plt.subplot(max(self.cluster), 1, 1)
-            # print("----", cluster, linestyle[i])
-            # plt.plot([j for j in range(len(cluster))], cluster, color=color, label='cluster'+str(i))
-            # plt.subplot(max(self.cluster), 1, 2)
-            # plt.plot([j for j in range(len(cluster_fft))], cluster_fft, color=color, label='cluster'+str(i))
-
-            # ws_power_dict = {}
-            # for c, p in zip(cluster, cluster_power):
-            #     ws_power_dict.setdefault(round(c, 2), []).append(round(p, 2))
-            #
-            # for key, value in ws_power_dict.items():
-            #     ws_power_dict[key] = round(np.average(value), 2)
-            # print(ws_power_dict)
-            # plt.scatter(cluster, cluster_power, color=color, label='cluster' + str(i),
-            #          linestyle=linestyle[i], s=1, alpha=0.2)
-
-        # 添加图例
-        # plt.legend()
-        # # 显示图形
-        # plt.savefig('./clusters.png')
-        # plt.show()
-        # if paint_default:
-        #     for i in range(max(self.cluster)):
-        #         self.paint_turbine_k(i + 1)  # 画出聚类中每个风机的曲线
-
-
-
     def turbine_smooth(self, window_size=50):
         """
         使用滑动平均平滑数据。
@@ -400,7 +269,7 @@ class DataAnalysis:
         # 添加图例
         plt.legend()
         # 显示图形
-        plt.savefig('analysis_img/cluster/cluster_{}.png'.format(k))
+        plt.savefig('analysis_img/cluster_{}.png'.format(k))
         plt.show()
 
     def turbine_fft(self, k):
@@ -473,7 +342,7 @@ class DataAnalysis:
         :return:
         """
         self.turbine_clusters(self.ori_turbine_fft)
-        self.paint_turbine()
+        # self.paint_turbine()
 
     def turbine_clusters(self, data=None):
         """
@@ -483,11 +352,9 @@ class DataAnalysis:
         :return: None
         """
         if data is None:
-            cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4,
-                                              similarity_func=compute_pearsonr)  # 层次聚类
+            cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4,  similarity_func=compute_pearsonr)  # 层次聚类
         else:
-            cluster = hierarchical_clustering(data, threshold=0.6,
-                                              similarity_func=compute_pearsonr)
+            cluster = hierarchical_clustering(data, threshold=0.6,  similarity_func=compute_pearsonr)
         self.cluster = cluster
         # 在这里保存cluster变量
         # from cluster_analysis import cluster_power_list_file, cluster_power_list_folder
@@ -523,3 +390,4 @@ if __name__ == '__main__':
     data_analysis.process_ori_data()
     data_analysis.mapping_turbines()
     # data_analysis.paint_double(1, 56)
+    # data_analysis.paint_turbine()

+ 76 - 0
DataBase/turbine-cluster/draw_curve.py

@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :plt.py
+# @Time      :2025/5/12 13:46
+# @Author    :David
+# @Company: shenyang JY
+import matplotlib.pyplot as plt
+
+
+def paint_map(self):
+    """
+    根据风机经纬度,绘制经纬度地图
+    :return:
+    """
+    lats = self.info['纬度'].values
+    lons = self.info['经度'].values
+    map = Basemap()
+
+    # 绘制海岸线和国家边界
+    map.drawcoastlines()
+    map.drawcountries()
+
+    # 绘制经纬度坐标
+    map.drawmeridians(range(0, 360, 30))
+    map.drawparallels(range(-90, 90, 30))
+
+    # 绘制点
+
+    x, y = map(lons, lats)
+    map.plot(x, y, 'bo', markersize=10)
+
+    # 显示图表
+    plt.show()
+
+
+def paint_power15min(self):
+    """
+    绘制15分钟功率曲线
+    :return:
+    """
+
+    plt.plot(self.power_15min['C_REAL_VALUE'])
+
+    # 设置图表标题和轴标签
+    plt.title('Data Time Change Curve')
+    plt.xlabel('Date')
+    plt.ylabel('Value')
+
+    # 显示图表
+    plt.show()
+
+def paint_lats_lons(self):
+    """
+    绘制经纬度图
+    :return:
+    """
+    x = self.info['纬度'].values
+    y = self.info['经度'].values
+
+    # 绘制散点图
+    fig, ax = plt.subplots()
+    plt.scatter(x, y)
+
+    for i, txt in enumerate(self.info['id'].values):
+        ax.annotate(txt, (x[i], y[i]))
+
+    # 设置图表标题和轴标签
+    plt.xlabel('lats')
+    plt.ylabel('lons')
+
+    # 显示图表
+    plt.show()
+
+
+if __name__ == "__main__":
+    run_code = 0

+ 15 - 6
DataBase/turbine-cluster/z_power.py

@@ -4,28 +4,37 @@
 # @Time      :2024/12/30 14:16
 # @Author    :David
 # @Company: shenyang JY
-import os
-import pandas as pd
+import os, pickle
 
 c_names = ['G01', 'G02', 'G03', 'G04', 'G05', 'G06', 'G07', 'G08', 'G09', 'G10'] + ['G' + str(x) for x in range(11, 76)]
 turbineloc = [x for x in range(76, 151, 1)]
 id_names = {id: c_names[x] for x, id in enumerate(turbineloc)}
-import pickle
-with open('../../cluster/260/turbine_cls.pickle', 'rb') as f:
+path = '../../cluster/260/turbine_cls.pickle'
+with open(path, 'rb') as f:
     turbine_cls = pickle.load(f)
+
+def turbine_cls_adjust(turbine_cls):
+    turbine_cls[2] = [x for x in turbine_cls[2] if x != 'G54']
+    turbine_cls[1].append('G54')
+    turbine_cls[2].append('G14')
+    del turbine_cls[3]
+    with open(path, 'wb') as file:
+        pickle.dump(turbine_cls, file)
+
 def zone_powers(input_dir):
     z_power = {}
     for zone, turbines in turbine_cls.items():
-        dfs = [pd.read_csv(os.path.join(input_dir, f"turbine-{z}.csv")) for z in turbineloc if z in turbines]
+        dfs = [pd.read_csv(os.path.join(input_dir, f"turbine-{z}.csv")) for z in turbineloc if id_names[z] in turbines]
         z_power['C_TIME'] = dfs[0]['C_TIME']
         sum_power = pd.concat([df['C_ACTIVE_POWER'] for df in dfs], ignore_index=True, axis=1).sum(axis=1)
         z_power[zone] = sum_power/1000
     z_power = pd.DataFrame(z_power)
     z_power.iloc[:, 1:] = z_power.iloc[:, 1:].round(2)
-    z_power.to_csv("./turbines-time/z-power-new.csv", index=False)
+    z_power.to_csv("../../cluster/260/z-power.csv", index=False)
 
 
 
 if __name__ == "__main__":
     import pandas as pd
+    # turbine_cls_adjust(turbine_cls)
     zone_powers("../../cluster/260/")