Ver Fonte

first commit

liudawei há 2 anos atrás
commit
63cdf9f914
66 ficheiros alterados com 625 adições e 0 exclusões
  1. 6 0
      clustering/README.md
  2. 69 0
      clustering/cluster_power.py
  3. 425 0
      clustering/data_analysis.py
  4. 78 0
      clustering/data_clean.py
  5. BIN
      clustering/聚类结果/analysis_power_img_3_month.zip
  6. BIN
      clustering/聚类结果/analysis_ws_img_1_month.zip
  7. BIN
      clustering/聚类结果/analysis_ws_img_2_day.zip
  8. BIN
      clustering/聚类结果/analysis_ws_img_3_month.zip
  9. 21 0
      clustering/聚类结果说明/README.md
  10. 11 0
      clustering/聚类结果说明/cluster/README.md
  11. BIN
      clustering/聚类结果说明/cluster/cluster_1.png
  12. BIN
      clustering/聚类结果说明/cluster/cluster_2.png
  13. BIN
      clustering/聚类结果说明/cluster/cluster_3.png
  14. BIN
      clustering/聚类结果说明/cluster/cluster_4.png
  15. BIN
      clustering/聚类结果说明/fft/10_turbine_fft.png
  16. BIN
      clustering/聚类结果说明/fft/11_turbine_fft.png
  17. BIN
      clustering/聚类结果说明/fft/12_turbine_fft.png
  18. BIN
      clustering/聚类结果说明/fft/13_turbine_fft.png
  19. BIN
      clustering/聚类结果说明/fft/14_turbine_fft.png
  20. BIN
      clustering/聚类结果说明/fft/15_turbine_fft.png
  21. BIN
      clustering/聚类结果说明/fft/16_turbine_fft.png
  22. BIN
      clustering/聚类结果说明/fft/17_turbine_fft.png
  23. BIN
      clustering/聚类结果说明/fft/18_turbine_fft.png
  24. BIN
      clustering/聚类结果说明/fft/19_turbine_fft.png
  25. BIN
      clustering/聚类结果说明/fft/1_turbine_fft.png
  26. BIN
      clustering/聚类结果说明/fft/20_turbine_fft.png
  27. BIN
      clustering/聚类结果说明/fft/21_turbine_fft.png
  28. BIN
      clustering/聚类结果说明/fft/22_turbine_fft.png
  29. BIN
      clustering/聚类结果说明/fft/23_turbine_fft.png
  30. BIN
      clustering/聚类结果说明/fft/24_turbine_fft.png
  31. BIN
      clustering/聚类结果说明/fft/25_turbine_fft.png
  32. BIN
      clustering/聚类结果说明/fft/26_turbine_fft.png
  33. BIN
      clustering/聚类结果说明/fft/27_turbine_fft.png
  34. BIN
      clustering/聚类结果说明/fft/28_turbine_fft.png
  35. BIN
      clustering/聚类结果说明/fft/29_turbine_fft.png
  36. BIN
      clustering/聚类结果说明/fft/2_turbine_fft.png
  37. BIN
      clustering/聚类结果说明/fft/30_turbine_fft.png
  38. BIN
      clustering/聚类结果说明/fft/31_turbine_fft.png
  39. BIN
      clustering/聚类结果说明/fft/32_turbine_fft.png
  40. BIN
      clustering/聚类结果说明/fft/33_turbine_fft.png
  41. BIN
      clustering/聚类结果说明/fft/34_turbine_fft.png
  42. BIN
      clustering/聚类结果说明/fft/35_turbine_fft.png
  43. BIN
      clustering/聚类结果说明/fft/36_turbine_fft.png
  44. BIN
      clustering/聚类结果说明/fft/37_turbine_fft.png
  45. BIN
      clustering/聚类结果说明/fft/38_turbine_fft.png
  46. BIN
      clustering/聚类结果说明/fft/39_turbine_fft.png
  47. BIN
      clustering/聚类结果说明/fft/3_turbine_fft.png
  48. BIN
      clustering/聚类结果说明/fft/40_turbine_fft.png
  49. BIN
      clustering/聚类结果说明/fft/41_turbine_fft.png
  50. BIN
      clustering/聚类结果说明/fft/42_turbine_fft.png
  51. BIN
      clustering/聚类结果说明/fft/43_turbine_fft.png
  52. BIN
      clustering/聚类结果说明/fft/44_turbine_fft.png
  53. BIN
      clustering/聚类结果说明/fft/45_turbine_fft.png
  54. BIN
      clustering/聚类结果说明/fft/46_turbine_fft.png
  55. BIN
      clustering/聚类结果说明/fft/47_turbine_fft.png
  56. BIN
      clustering/聚类结果说明/fft/48_turbine_fft.png
  57. BIN
      clustering/聚类结果说明/fft/49_turbine_fft.png
  58. BIN
      clustering/聚类结果说明/fft/4_turbine_fft.png
  59. BIN
      clustering/聚类结果说明/fft/5_turbine_fft.png
  60. BIN
      clustering/聚类结果说明/fft/6_turbine_fft.png
  61. BIN
      clustering/聚类结果说明/fft/7_turbine_fft.png
  62. BIN
      clustering/聚类结果说明/fft/8_turbine_fft.png
  63. BIN
      clustering/聚类结果说明/fft/9_turbine_fft.png
  64. 15 0
      clustering/聚类结果说明/fft/README.md
  65. BIN
      clustering/聚类结果说明/turbine_cluster.png
  66. BIN
      clustering/聚类结果说明/风机标签与风机名称对应表.xlsx

+ 6 - 0
clustering/README.md

@@ -0,0 +1,6 @@
+## 功率预测系统
+
+该模块用于风机聚类,找到风机之间规律,实现特性如下: 
+
+
+

+ 69 - 0
clustering/cluster_power.py

@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/5/11 14:43
+# file: cluster_power.py
+# author: David
+# company: shenyang JY
+
+import os
+import numpy as np
+import pandas as pd
+# 从confinuous_data中拿数据
+cluster = []
+
+def
+
+
+def cluster_power(cluster, out_put):
+    """
+    把聚类的结果
+    """
+    if not os.path.exists(out_put):
+        os.makedirs(out_put)
+    output_filtered_csv_files = '../data/output_filtered_csv_files/'
+    files = os.listdir(output_filtered_csv_files)
+    files.remove('turbine-144.csv')
+    assert len(cluster) == len(files)
+    continuous_data_path = '../data/continuous_data/'
+    # 遍历整个continuous_data_path文件夹
+    continuous_list = []
+    for dirpath, dirnames, filenames in os.walk(continuous_data_path):
+        dirname = dirpath.split('/')[-1]
+        filenames = [file for file in filenames if not file.startswith('turbine-144')]
+        x = [os.path.join(dirpath, filename) for filename in filenames]
+        continuous_list.append((x, dirname))
+    continuous_list.pop(0)
+    for i in range(len(continuous_list)):
+        cfs = {}
+        dfs = [pd.read_csv(path) for path in continuous_list[i][0]]
+        for j in range(1, max(cluster)+1):
+            arr_j = np.where(cluster == j)[0]   # cluster中聚类j的索引列表
+            cfs.setdefault(j, [files[k] for k in arr_j])
+            dfs_j = [dfs[k] for k in arr_j]   # cluster
+            dfj, time_series = dfs_j[0].loc[:, ['C_TIME','C_ACTIVE_POWER']], dfs_j[0]['C_TIME']
+            for df in dfs_j[1:]:
+                if df['C_TIME'].equals(time_series) is False:
+                    print("风机之间的日期不一致!")
+                    raise ValueError
+                dfj['C_ACTIVE_POWER'] += df['C_ACTIVE_POWER']
+            output_1 = os.path.join(out_put,  continuous_list[i][1])
+            if not os.path.exists(output_1):
+                os.makedirs(output_1)
+            dfj.to_csv(os.path.join(output_1, 'power_' + str(j) + '.csv'), index=False)
+        paint_cluster_power(output_1)
+        print("------{}的分组信息----".format(continuous_list[i][1]))
+        for key, value in cfs.items():
+            print("第{}组:{}".format(key, cfs[key]))
+
+def paint_cluster_power(cluster_path):
+    dfs = [pd.read_csv(os.path.join(cluster_path, file_path)).rename(columns={'C_ACTIVE_POWER':file_path.split('/')[-1][:-4]}) for file_path in os.listdir(cluster_path)]
+    df_cluster = pd.DataFrame({df.columns[-1]: df.iloc[:, -1] for df in dfs})
+    df_cluster.insert(loc=0, column='C_TIME', value=dfs[0]['C_TIME'])
+    df_cluster.to_csv(os.path.join(cluster_path, 'cluster_data.csv'), index=False)
+
+
+
+
+
+
+

+ 425 - 0
clustering/data_analysis.py

@@ -0,0 +1,425 @@
+# !usr/bin/env python
+# -*- coding:utf-8 _*-
+"""
+@Author:Lijiaxing
+ 
+@File:data_analysis.py
+@Time:2023/4/24 15:16
+
+"""
+import pandas as pd
+#from mpl_toolkits.basemap import Basemap
+from scipy.signal import savgol_filter
+import numpy as np
+import matplotlib.pyplot as plt
+from scipy.cluster.hierarchy import linkage, fcluster
+
+
+def paint_others(y):
+    """ 绘制其他数据 """
+    plt.plot([j for j in range(y)], y)
+    # 添加标题和标签
+    plt.xlabel('x')
+    plt.ylabel('y')
+
+    # 显示图形
+    plt.show()
+
+
+def compute_cos_similarity(a, b):
+    """
+    计算两个向量的余弦相似度
+    :param a: 向量a
+    :param b: 向量b
+    :return: 余弦相似度值
+    """
+    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
+
+
+def compute_pearsonr(a):
+    """
+    计算数据皮尔逊相关系数并返回相似度矩阵
+    :param a: 数据格式为n*m的矩阵,n为数据个数,m为数据维度
+    :return: 返回相似度矩阵,数据格式为n*n的矩阵
+    """
+    return np.corrcoef(a)
+
+
+def compute_distance(a, b):
+    """
+    计算两个向量的欧式距离
+    :param a:
+    :param b:
+    :return: 返回两个向量的欧式距离
+    """
+    return np.linalg.norm(a - b)
+
+
+def hierarchical_clustering(data, threshold, similarity_func):
+    """
+    层次聚类,使用工具包scipy.cluster.hierarchy中的linkage和fcluster函数进行层次聚类
+    :param data: 二维数据,格式为n*m的矩阵,n为数据个数,m为数据维度
+    :param threshold: 阈值,当两个数据的距离小于阈值时,将两个数据归为一类,阈值为根据相似度矩阵层次聚类后的类别距离阈值,可根据需求进行调整,可大于1
+    :param similarity_func: 相似度计算函数,用于计算两个数据的相似度,可以进行替换,若替换为计算距离的函数需对内部进行修改
+    :return: 返回聚类结果,格式为n*1的矩阵,n为数据个数,每个数据的值为该数据所属的类别
+    """
+    # 计算数据的相似度矩阵
+    similarity_matrix = similarity_func(data)
+
+    # 计算数据的距离矩阵
+    distance_matrix = 1 - similarity_matrix
+
+    # 进行层次聚类返回聚类结果
+    Z = linkage(distance_matrix, method='ward')
+    # 根据相似度阈值获取聚类结果
+    clusters = fcluster(Z, t=threshold, criterion='distance')
+    return clusters
+
+
+class DataAnalysis:
+    """
+    数据分析类
+    """
+
+    def __init__(self, data_length, data_start, data_end):
+        """
+        初始化
+        :param data_length: 分析数据段长度
+        :param data_start: 分析数据段开始位置
+        :param data_end: 分析数据段结束位置
+        """
+        # 原始风机功率数据傅里叶变换滤波后的数据
+        self.ori_turbine_fft = None
+        # 原始风机功率数据片段
+        self.ori_turbine_pic = None
+        # 聚类结果
+        self.cluster = None
+        # 风机功率差分平滑后的结果
+        self.smooth_turbine_diff = None
+        # 风机功率差分变化情况
+        self.diff_change = None
+        # 风机功率差分
+        self.turbine_diff = None
+        # 全部风机数据
+        self.turbine = None
+        # 风机的标号顺序
+        self.turbine_id = list(range(102, 162))
+        self.turbine_id.remove(144)
+        # 风机功率数据15分钟级别
+        self.power_15min = None
+        # 风机经纬度信息
+        self.info = None
+        # 使用数据长度
+        self.data_length = data_length
+        # 使用数据开始位置
+        self.data_start = data_start
+        # 使用数据结束位置
+        self.data_end = data_end
+        # 导入数据
+        self.load_data()
+        # 计算风机功率差分
+        self.compute_turbine_diff()
+
+    def load_data(self):
+        """
+        加载数据
+        :return:
+        """
+        self.info = pd.read_csv('../data/风机信息.csv', encoding='utf-8')
+        # power_15min = pd.read_csv('../data/power_15min.csv')
+        # for i in range(len(power_15min)):
+        #     if power_15min.loc[i, 'C_REAL_VALUE'] == -9999:
+        #         # 方便在曲线中看出缺失数据位置
+        #         power_15min.loc[i, 'C_REAL_VALUE'] = -34.56789
+        # self.power_15min = power_15min
+        turbine_path = '../data/output_filtered_csv_files/turbine-{}.csv'
+        self.turbine = {}
+        for i in self.turbine_id:
+            self.turbine[i] = pd.read_csv(turbine_path.format(i))[21:]
+
+    def compute_turbine_diff(self):
+        """
+        计算风机功率差分
+        :return:
+        """
+        turbine_diff = []
+        ori_turbine_pic = []
+        for turbine_i in self.turbine_id:
+            diff_array = np.diff(
+                np.array(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end+1]))
+            turbine_diff.append(diff_array)
+            ori_turbine_pic.append(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end])
+        self.ori_turbine_pic = ori_turbine_pic
+        self.turbine_diff = turbine_diff
+
+        diff_change = []
+        for diff_i in turbine_diff:
+            single_diff_change = []
+            for diff_i_i in diff_i:
+                if diff_i_i > 0:
+                    single_diff_change.append(1)
+                elif diff_i_i < 0:
+                    single_diff_change.append(-1)
+                else:
+                    single_diff_change.append(0)
+            diff_change.append(single_diff_change)
+        self.diff_change = diff_change
+        self.ori_turbine_fft = [self.turbine_fft(i + 1) for i in range(len(self.ori_turbine_pic))]
+
+        # 平滑
+        self.turbine_smooth(window_size=21)
+
+    def paint_map(self):
+        """
+        绘制经纬度地图
+        :return:
+        """
+        lats = self.info['纬度'].values
+        lons = self.info['经度'].values
+        map = Basemap()
+
+        # 绘制海岸线和国家边界
+        map.drawcoastlines()
+        map.drawcountries()
+
+        # 绘制经纬度坐标
+        map.drawmeridians(range(0, 360, 30))
+        map.drawparallels(range(-90, 90, 30))
+
+        # 绘制点
+
+        x, y = map(lons, lats)
+        map.plot(x, y, 'bo', markersize=10)
+
+        # 显示图表
+        plt.show()
+
+    def paint_power15min(self):
+        """
+        绘制15分钟功率曲线
+        :return:
+        """
+
+        plt.plot(self.power_15min['C_REAL_VALUE'])
+
+        # 设置图表标题和轴标签
+        plt.title('Data Time Change Curve')
+        plt.xlabel('Date')
+        plt.ylabel('Value')
+
+        # 显示图表
+        plt.show()
+
+    def paint_lats_lons(self):
+        """
+        绘制经纬度图
+        :return:
+        """
+        x = self.info['纬度'].values
+        y = self.info['经度'].values
+
+        # 绘制散点图
+        fig, ax = plt.subplots()
+        plt.scatter(x, y)
+
+        for i, txt in enumerate(self.info['id'].values):
+            ax.annotate(txt, (x[i], y[i]))
+
+        # 设置图表标题和轴标签
+        plt.xlabel('lats')
+        plt.ylabel('lons')
+
+        # 显示图表
+        plt.show()
+
+    def similarity_score(self, turbine_diff, threshold=0.5):
+        """
+        使用余弦相似度计算相似度分数并返回相似度大于阈值的index矩阵
+        :param turbine_diff: 需要计算相似的矩阵,数据格式n*m,n为数据条数,m为数据维数
+        :param threshold: 相似度阈值
+        :return: 返回相似计算后的矩阵
+        """
+        similarity = {i: [] for i in range(49)}
+        similarity_index = {i: [] for i in range(49)}
+        for turbine_i in range(49):
+            for turbine_j in range(49):
+                cos_similarity = compute_cos_similarity(turbine_diff[turbine_i], turbine_diff[turbine_j])
+                similarity[turbine_i].append(cos_similarity)
+                if cos_similarity > threshold:
+                    similarity_index[turbine_i].append(turbine_j)
+        return similarity_index
+
+    def paint_turbine(self, paint_default=True):
+        """
+        绘制风机地理位置图
+        :param paint_default:默认True,绘制聚类后每个类别的数据折线图
+        :return: None
+        """
+
+        # y = self.info['纬度'].values
+        # x = self.info['经度'].values
+        #
+        # fig, ax = plt.subplots(figsize=(15, 15))
+        #
+        # plt.scatter(x, y, c=self.cluster)
+        # for i, txt in enumerate(self.info['C_ID'].values):
+        #     ax.annotate(txt, (x[i], y[i]))
+
+        # 设置图表标题和轴标签
+        # plt.xlabel('lons')
+        # plt.ylabel('lats')
+        # plt.legend()
+        #
+        # # 显示图表
+        # plt.savefig('analysis_img/turbine_cluster.png')
+        # plt.show()
+        if paint_default:
+            for i in range(max(self.cluster)):
+                self.paint_turbine_k(i + 1)
+
+    def turbine_smooth(self, window_size=50):
+        """
+        使用滑动平均平滑数据。
+
+        参数:
+        data -- 需要平滑的数据,numpy数组类型
+        window_size -- 滑动窗口大小,整数类型
+
+        返回值:
+        smooth_data -- 平滑后的数据,numpy数组类型
+        """
+
+        # weights = np.repeat(1.0, window_size) / window_size
+        smooth_data = []
+        for turbine_diff_i in self.turbine_diff:
+            smooth_y = savgol_filter(turbine_diff_i, window_length=window_size, polyorder=3)
+            smooth_data.append(smooth_y)
+        #     smooth_data.append(np.convolve(turbine_diff_i, weights, 'valid'))
+        self.smooth_turbine_diff = smooth_data
+
+    def paint_turbine_k(self, k):
+        """
+        绘制第k聚类的风机数据折线图
+        :param k:
+        :return:
+        """
+        pic_label = []
+        y = []
+        plt.figure(figsize=(20, 10))
+        cmap = plt.get_cmap('viridis')
+        for i, item in enumerate(self.cluster):
+            if item == k:
+                pic_label.append('turbine-'+str(self.turbine_id[i]))
+                y.append(self.ori_turbine_fft[i])
+        for i in range(len(y)):
+            color = cmap(i / 10)
+            plt.plot([j for j in range(len(y[i]))], y[i], color=color, label=pic_label[i])
+        # 添加标签和标题
+        plt.xlabel('x')
+        plt.ylabel('y')
+        plt.title('Cluster {}'.format(k))
+
+        # 添加图例
+        plt.legend()
+
+        # 显示图形
+        plt.savefig('analysis_img/cluster/cluster_{}.png'.format(k))
+        plt.show()
+
+    def turbine_fft(self, k):
+        """
+        对第k台原始风机数据进行傅里叶变换,并绘制变换前后曲线
+        :param k: 数据读入时的风机顺序index,从1开始
+        :return: 傅里叶变换清洗后的数据,数据格式
+        """
+        y = self.ori_turbine_pic
+        t = np.linspace(0, 1, self.data_length)
+        signal = y[k - 1]
+
+        # 进行傅里叶变换
+        freq = np.fft.fftfreq(len(signal), t[1] - t[0])
+        spectrum = np.fft.fft(signal)
+        spectrum_abs = np.abs(spectrum)
+        threshold = np.percentile(spectrum_abs, 98)
+        indices = spectrum_abs > threshold
+        spectrum_clean = indices * spectrum
+
+        # 进行傅里叶逆变换
+        signal_clean = np.fft.ifft(spectrum_clean)
+        # plt.figure(figsize=(20, 10))
+        #
+        # # 绘制时域信号
+        # plt.subplot(4, 1, 1)
+        # plt.plot(t, signal)
+        # plt.title(k)
+        #
+        # # 绘制频域信号
+        # plt.subplot(4, 1, 2)
+        # plt.plot(freq, np.abs(spectrum))
+        #
+        # # 绘制过滤后的频域信号
+        # plt.subplot(4, 1, 3)
+        # plt.plot(freq, np.abs(spectrum_clean))
+        #
+        # # 绘制过滤后的时域信号
+        # plt.subplot(4, 1, 4)
+        # plt.plot(t, signal_clean)
+        #
+        # plt.savefig('analysis_img/fft/{}_turbine_fft.png'.format(k))
+        # plt.show()
+        return signal_clean
+
+    def paint_double(self, i, j):
+        """
+        绘制两台风机的数据变换对比
+        :param i: 风机数据读入时数据编号,从1开始
+        :param j: 风机数据读入时数据编号,从1开始
+        :return:
+        """
+        y = self.ori_turbine_fft
+        x = [index for index in range(self.data_length)]
+        data_i = y[i - 1]
+        data_j = y[j - 1]
+
+        plt.figure(figsize=(20, 10))
+        plt.plot(x, data_i, label='turbine {}'.format(self.turbine_id[i-1]), linestyle='solid')
+        plt.plot(x, data_j, label='turbine {}'.format(self.turbine_id[j-1]), linestyle='dashed')
+
+        plt.title('{} and {}'.format(i, j))
+        plt.legend()
+        plt.savefig('analysis_img/{}_{}_turbine.png'.format(self.turbine_id[i-1], self.turbine_id[j-1]))
+        plt.show()
+
+    def process_ori_data(self):
+        """
+        对原始数据进行处理,聚类和绘图
+        :return:
+        """
+        self.turbine_clusters(self.ori_turbine_fft)
+        self.paint_turbine()
+
+    def turbine_clusters(self, data=None):
+        """
+        风机数据聚类,聚类信息保存在self.cluster中
+        :param data: 默认为空,也可以使用其他数据聚类,并体现在绘图中,
+        数据格式:二维数据n*m,n为数据条数,m为每条数据维数
+        :return: None
+        """
+        if data is None:
+            cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4,
+                                              similarity_func=compute_pearsonr)  # 层次聚类
+        else:
+            cluster = hierarchical_clustering(data, threshold=1,
+                                              similarity_func=compute_pearsonr)
+        self.cluster = cluster
+        from cluster_power import cluster_power
+        out_put = '../data/cluester_power/'
+        cluster_power(self.cluster, out_put)
+
+
+data_analysis = DataAnalysis(data_length=9771,
+                             data_start=0,
+                             data_end=9771)
+
+data_analysis.process_ori_data()
+data_analysis.paint_double(20, 21)

+ 78 - 0
clustering/data_clean.py

@@ -0,0 +1,78 @@
+# !usr/bin/env python
+# -*- coding:utf-8 _*-
+"""
+@Author:Lijiaxing
+ 
+@File:data_clean.py
+@Time:2023/4/26 18:06
+
+"""
+import os.path
+
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+
+def paint_data(clean_data, clean_index=1):
+    x = [index for index in range(len(clean_data))]
+    plt.figure(figsize=(20, 10))
+    plt.title('clean_{}'.format(clean_index))
+
+    # 绘制曲线
+    plt.plot(x, clean_data, color='red', label='clean_data')
+
+    plt.savefig('data_{}.png'.format(clean_index))
+    plt.show()
+
+
+class clean_file:
+    """
+        清洗数据
+    """
+
+    def __init__(self, output_path='./'):
+        """
+        :param output_path: 清洗后的数据存放路径 ,只传入路径,不包括文件名
+        """
+        self.data = []
+        output_path = os.path.join(output_path, 'clean_data')
+        if not os.path.exists(output_path):
+            os.makedirs(output_path)
+        self.output_path = output_path
+
+    def clean_data(self, file_path, clean_name, clean_value, multi_value=False, clean_index=1, paint=True):
+        """
+        数据清洗
+        将-9999或-99数据进行插值处理,并绘制处理后的数据图像保存至output_path路径下
+        :param paint: 是否绘制图像
+        :param multi_value: 若为True,则clean_value为list
+        :param clean_value: 清洗数据中异常值
+        :param clean_name: 清洗数据中异常值列名
+        :param file_path: 需要清洗的数据,csv格式
+        :param clean_index: 清洗数据输出文件名格式为 clean_${clean_index}.csv
+        :return: None
+        """
+        data = pd.read_csv(file_path)
+        if paint:
+            paint_old_data = [item for item in data[clean_name].values]
+        old_data = data[clean_name].values
+        if multi_value:
+            for clean_value_i in clean_value:
+                data[clean_name][old_data == clean_value_i] = np.nan
+        else:
+            data[clean_name][old_data == clean_value] = np.nan
+
+        data[clean_name] = data[clean_name].interpolate()
+        data.to_csv(os.path.join(self.output_path, 'clean_{}.csv'.format(clean_index)), index=False)
+        already_clean = data[clean_name].values
+        if paint:
+            paint_data(already_clean, clean_index)
+
+
+# 使用示例
+cleaner = clean_file(output_path='Dataset_training/power/')
+for i in range(6):
+    cleaner.clean_data(file_path='Dataset_training/power/power_{}.csv'.format(i), clean_name='C_REAL_VALUE',
+                       clean_value=[-9999.0, -99], multi_value=True,
+                       clean_index=i)

BIN
clustering/聚类结果/analysis_power_img_3_month.zip


BIN
clustering/聚类结果/analysis_ws_img_1_month.zip


BIN
clustering/聚类结果/analysis_ws_img_2_day.zip


BIN
clustering/聚类结果/analysis_ws_img_3_month.zip


+ 21 - 0
clustering/聚类结果说明/README.md

@@ -0,0 +1,21 @@
+# 聚类结果
+
+---
+
+## 目录结构
+
+|------ cluster		                           			            # 聚类结果目录
+|------------- cluster_1.png	       					        # 聚类数据趋势图片,index为类别标签
+|------------- cluster_2.png
+|------------- cluster_3.png
+|------------- cluster_4.png
+|------ fft                                           			        # 傅里叶变换滤波结果目录
+|------------- index_turbine_fft.png 				    # 傅里叶变换滤波前后数据对比
+|------------- index_turbine_fft.png
+|------------- index_turbine_fft.png
+|------------- ......
+|------ turbine_cluster.png               			        # 聚类体现在经纬度位置图中的表现
+|------ 风机标签与风机名称对应表.xlsx               # 使用的index与风机名称对应表
+
+
+

+ 11 - 0
clustering/聚类结果说明/cluster/README.md

@@ -0,0 +1,11 @@
+# 聚类文件说明
+
+---
+
+![样例图片,对应类目1](./cluster_1.png)
+
+## Title:1对应类目编号
+
+## 图例:该类目下对应的风机编号,从1开始,1-49
+
+## 纵坐标:对应功率值

BIN
clustering/聚类结果说明/cluster/cluster_1.png


BIN
clustering/聚类结果说明/cluster/cluster_2.png


BIN
clustering/聚类结果说明/cluster/cluster_3.png


BIN
clustering/聚类结果说明/cluster/cluster_4.png


BIN
clustering/聚类结果说明/fft/10_turbine_fft.png


BIN
clustering/聚类结果说明/fft/11_turbine_fft.png


BIN
clustering/聚类结果说明/fft/12_turbine_fft.png


BIN
clustering/聚类结果说明/fft/13_turbine_fft.png


BIN
clustering/聚类结果说明/fft/14_turbine_fft.png


BIN
clustering/聚类结果说明/fft/15_turbine_fft.png


BIN
clustering/聚类结果说明/fft/16_turbine_fft.png


BIN
clustering/聚类结果说明/fft/17_turbine_fft.png


BIN
clustering/聚类结果说明/fft/18_turbine_fft.png


BIN
clustering/聚类结果说明/fft/19_turbine_fft.png


BIN
clustering/聚类结果说明/fft/1_turbine_fft.png


BIN
clustering/聚类结果说明/fft/20_turbine_fft.png


BIN
clustering/聚类结果说明/fft/21_turbine_fft.png


BIN
clustering/聚类结果说明/fft/22_turbine_fft.png


BIN
clustering/聚类结果说明/fft/23_turbine_fft.png


BIN
clustering/聚类结果说明/fft/24_turbine_fft.png


BIN
clustering/聚类结果说明/fft/25_turbine_fft.png


BIN
clustering/聚类结果说明/fft/26_turbine_fft.png


BIN
clustering/聚类结果说明/fft/27_turbine_fft.png


BIN
clustering/聚类结果说明/fft/28_turbine_fft.png


BIN
clustering/聚类结果说明/fft/29_turbine_fft.png


BIN
clustering/聚类结果说明/fft/2_turbine_fft.png


BIN
clustering/聚类结果说明/fft/30_turbine_fft.png


BIN
clustering/聚类结果说明/fft/31_turbine_fft.png


BIN
clustering/聚类结果说明/fft/32_turbine_fft.png


BIN
clustering/聚类结果说明/fft/33_turbine_fft.png


BIN
clustering/聚类结果说明/fft/34_turbine_fft.png


BIN
clustering/聚类结果说明/fft/35_turbine_fft.png


BIN
clustering/聚类结果说明/fft/36_turbine_fft.png


BIN
clustering/聚类结果说明/fft/37_turbine_fft.png


BIN
clustering/聚类结果说明/fft/38_turbine_fft.png


BIN
clustering/聚类结果说明/fft/39_turbine_fft.png


BIN
clustering/聚类结果说明/fft/3_turbine_fft.png


BIN
clustering/聚类结果说明/fft/40_turbine_fft.png


BIN
clustering/聚类结果说明/fft/41_turbine_fft.png


BIN
clustering/聚类结果说明/fft/42_turbine_fft.png


BIN
clustering/聚类结果说明/fft/43_turbine_fft.png


BIN
clustering/聚类结果说明/fft/44_turbine_fft.png


BIN
clustering/聚类结果说明/fft/45_turbine_fft.png


BIN
clustering/聚类结果说明/fft/46_turbine_fft.png


BIN
clustering/聚类结果说明/fft/47_turbine_fft.png


BIN
clustering/聚类结果说明/fft/48_turbine_fft.png


BIN
clustering/聚类结果说明/fft/49_turbine_fft.png


BIN
clustering/聚类结果说明/fft/4_turbine_fft.png


BIN
clustering/聚类结果说明/fft/5_turbine_fft.png


BIN
clustering/聚类结果说明/fft/6_turbine_fft.png


BIN
clustering/聚类结果说明/fft/7_turbine_fft.png


BIN
clustering/聚类结果说明/fft/8_turbine_fft.png


BIN
clustering/聚类结果说明/fft/9_turbine_fft.png


+ 15 - 0
clustering/聚类结果说明/fft/README.md

@@ -0,0 +1,15 @@
+# FFT 图片说明
+
+---
+
+![样例图片,对应风机1](1_turbine_fft.png)
+
+## Title:1对应风机编号
+
+## 图1:原始数据曲线
+
+## 图2:使用傅里叶变换后将原始数据曲线从时域转换为频域
+
+## 图3:频域曲线过滤噪声后结果
+
+## 图4:将滤噪后的曲线转换回时域的结果

BIN
clustering/聚类结果说明/turbine_cluster.png


BIN
clustering/聚类结果说明/风机标签与风机名称对应表.xlsx