|
@@ -14,8 +14,8 @@ import pandas as pd
|
|
from scipy.signal import savgol_filter
|
|
from scipy.signal import savgol_filter
|
|
import numpy as np
|
|
import numpy as np
|
|
import matplotlib.pyplot as plt
|
|
import matplotlib.pyplot as plt
|
|
-from scipy.cluster.hierarchy import linkage, fcluster
|
|
|
|
-
|
|
|
|
|
|
+from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
|
|
|
|
+from sklearn.metrics import silhouette_samples, silhouette_score
|
|
|
|
|
|
def paint_others(y):
|
|
def paint_others(y):
|
|
""" 绘制其他数据 """
|
|
""" 绘制其他数据 """
|
|
@@ -75,6 +75,14 @@ def hierarchical_clustering(data, threshold, similarity_func):
|
|
Z = linkage(distance_matrix, method='ward')
|
|
Z = linkage(distance_matrix, method='ward')
|
|
# 根据相似度阈值获取聚类结果
|
|
# 根据相似度阈值获取聚类结果
|
|
clusters = fcluster(Z, t=threshold, criterion='distance')
|
|
clusters = fcluster(Z, t=threshold, criterion='distance')
|
|
|
|
+ # 画出层次聚类树形结构
|
|
|
|
+ fig = plt.figure(figsize=(5, 3))
|
|
|
|
+ dn = dendrogram(Z)
|
|
|
|
+ plt.show()
|
|
|
|
+ # clusters[42] = 1
|
|
|
|
+ silhouette = silhouette_samples(np.abs(distance_matrix), clusters, metric='euclidean')
|
|
|
|
+ silhouette1 = silhouette_score(np.abs(distance_matrix), clusters, metric='euclidean')
|
|
|
|
+ print(f"平均轮廓系数为:{silhouette1}, 单个样本的轮廓系数:{silhouette}")
|
|
return clusters
|
|
return clusters
|
|
|
|
|
|
|
|
|
|
@@ -106,8 +114,8 @@ class DataAnalysis:
|
|
self.turbine = None
|
|
self.turbine = None
|
|
# 风机的标号顺序
|
|
# 风机的标号顺序
|
|
self.turbine_id = list(range(102, 162))
|
|
self.turbine_id = list(range(102, 162))
|
|
- b1b4 = [142, 143, 144, 145]
|
|
|
|
- self.turbine_id = [id for id in self.turbine_id if id not in b1b4]
|
|
|
|
|
|
+ # b1b4 = [142, 143, 144, 145]
|
|
|
|
+ # self.turbine_id = [id for id in self.turbine_id if id not in b1b4]
|
|
# 风机功率数据15分钟级别
|
|
# 风机功率数据15分钟级别
|
|
self.power_15min = None
|
|
self.power_15min = None
|
|
# 风机经纬度信息
|
|
# 风机经纬度信息
|
|
@@ -119,7 +127,7 @@ class DataAnalysis:
|
|
# 使用数据结束位置
|
|
# 使用数据结束位置
|
|
self.data_end = data_end
|
|
self.data_end = data_end
|
|
# 导入数据
|
|
# 导入数据
|
|
- self.load_data()
|
|
|
|
|
|
+ self.load_data(normalize=True)
|
|
# 计算风机功率差分
|
|
# 计算风机功率差分
|
|
self.compute_turbine_diff()
|
|
self.compute_turbine_diff()
|
|
|
|
|
|
@@ -139,7 +147,7 @@ class DataAnalysis:
|
|
self.turbine, turbines = {}, []
|
|
self.turbine, turbines = {}, []
|
|
for i in self.turbine_id:
|
|
for i in self.turbine_id:
|
|
self.turbine[i] = pd.read_csv(turbine_path.format(i))[20:].reset_index(drop=True)
|
|
self.turbine[i] = pd.read_csv(turbine_path.format(i))[20:].reset_index(drop=True)
|
|
- if normalize:
|
|
|
|
|
|
+ if normalize is True:
|
|
self.normalize()
|
|
self.normalize()
|
|
|
|
|
|
def normalize(self):
|
|
def normalize(self):
|
|
@@ -160,10 +168,12 @@ class DataAnalysis:
|
|
turbine_diff = []
|
|
turbine_diff = []
|
|
ori_turbine_pic = []
|
|
ori_turbine_pic = []
|
|
for turbine_i in self.turbine_id:
|
|
for turbine_i in self.turbine_id:
|
|
- diff_array = np.diff(
|
|
|
|
- np.array(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end + 1]))
|
|
|
|
|
|
+ ori = np.array(self.turbine[turbine_i]['C_WS'].values[self.data_start:self.data_end + 1])
|
|
|
|
+ diff_array = np.diff(ori)
|
|
|
|
+ smoothness_value = np.std(diff_array)
|
|
|
|
+ print("turbine-{}的平滑度是:{}".format(turbine_i, round(smoothness_value, 2)))
|
|
turbine_diff.append(diff_array)
|
|
turbine_diff.append(diff_array)
|
|
- ori_turbine_pic.append(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end])
|
|
|
|
|
|
+ ori_turbine_pic.append(self.turbine[turbine_i]['C_WS'].values[self.data_start:self.data_end])
|
|
self.ori_turbine_pic = ori_turbine_pic
|
|
self.ori_turbine_pic = ori_turbine_pic
|
|
self.turbine_diff = turbine_diff
|
|
self.turbine_diff = turbine_diff
|
|
|
|
|
|
@@ -288,9 +298,37 @@ class DataAnalysis:
|
|
# # 显示图表
|
|
# # 显示图表
|
|
# plt.savefig('analysis_img/turbine_cluster.png')
|
|
# plt.savefig('analysis_img/turbine_cluster.png')
|
|
# plt.show()
|
|
# plt.show()
|
|
|
|
+
|
|
|
|
+ plt.figure(figsize=(20, 10))
|
|
|
|
+ cmap = plt.get_cmap('viridis')
|
|
|
|
+ linestyle= ['solid', 'dashed']
|
|
|
|
+ for i in range(max(self.cluster)):
|
|
|
|
+ cluster, cluster_fft = [], []
|
|
|
|
+ for j, item in enumerate(self.cluster):
|
|
|
|
+ if item == i + 1:
|
|
|
|
+ cluster.append(self.ori_turbine_pic[j])
|
|
|
|
+ cluster_fft.append(self.ori_turbine_fft[j])
|
|
|
|
+ cluster_fft = np.average(cluster_fft, axis=0)
|
|
|
|
+ cluster = np.average(cluster, axis=0)
|
|
|
|
+ diff_array = np.diff(cluster)
|
|
|
|
+ smoothness_value = np.std(diff_array)
|
|
|
|
+ print("聚类-{}的平滑度是:{}".format(i+1, smoothness_value))
|
|
|
|
+ color = cmap(i*200)
|
|
|
|
+ plt.subplot(2, 1, 1)
|
|
|
|
+ plt.plot([j for j in range(len(cluster))], cluster, color=color, label='cluster'+str(i), linestyle=linestyle[i])
|
|
|
|
+ plt.subplot(2, 1, 2)
|
|
|
|
+ plt.plot([j for j in range(len(cluster_fft))], cluster_fft, color=color, label='cluster'+str(i), linestyle=linestyle[i])
|
|
|
|
+
|
|
|
|
+ # 添加图例
|
|
|
|
+ plt.legend()
|
|
|
|
+ # 显示图形
|
|
|
|
+ plt.savefig('analysis_img/cluster/clusters.png')
|
|
|
|
+ plt.show()
|
|
if paint_default:
|
|
if paint_default:
|
|
for i in range(max(self.cluster)):
|
|
for i in range(max(self.cluster)):
|
|
- self.paint_turbine_k(i + 1)
|
|
|
|
|
|
+ self.paint_turbine_k(i + 1) # 画出聚类中每个风机的曲线
|
|
|
|
+
|
|
|
|
+
|
|
|
|
|
|
def turbine_smooth(self, window_size=50):
|
|
def turbine_smooth(self, window_size=50):
|
|
"""
|
|
"""
|
|
@@ -365,7 +403,7 @@ class DataAnalysis:
|
|
# # 绘制时域信号
|
|
# # 绘制时域信号
|
|
# plt.subplot(4, 1, 1)
|
|
# plt.subplot(4, 1, 1)
|
|
# plt.plot(t, signal)
|
|
# plt.plot(t, signal)
|
|
- # plt.title(k)
|
|
|
|
|
|
+ # plt.title(self.turbine_id[k-1])
|
|
#
|
|
#
|
|
# # 绘制频域信号
|
|
# # 绘制频域信号
|
|
# plt.subplot(4, 1, 2)
|
|
# plt.subplot(4, 1, 2)
|
|
@@ -379,7 +417,7 @@ class DataAnalysis:
|
|
# plt.subplot(4, 1, 4)
|
|
# plt.subplot(4, 1, 4)
|
|
# plt.plot(t, signal_clean)
|
|
# plt.plot(t, signal_clean)
|
|
#
|
|
#
|
|
- # plt.savefig('analysis_img/fft/{}_turbine_fft.png'.format(k))
|
|
|
|
|
|
+ # plt.savefig('analysis_img/fft/{}_turbine_fft.png'.format(self.turbine_id[k-1]))
|
|
# plt.show()
|
|
# plt.show()
|
|
return signal_clean
|
|
return signal_clean
|
|
|
|
|
|
@@ -423,11 +461,11 @@ class DataAnalysis:
|
|
cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4,
|
|
cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4,
|
|
similarity_func=compute_pearsonr) # 层次聚类
|
|
similarity_func=compute_pearsonr) # 层次聚类
|
|
else:
|
|
else:
|
|
- cluster = hierarchical_clustering(data, threshold=1.2,
|
|
|
|
|
|
+ cluster = hierarchical_clustering(data, threshold=0.8,
|
|
similarity_func=compute_pearsonr)
|
|
similarity_func=compute_pearsonr)
|
|
self.cluster = cluster
|
|
self.cluster = cluster
|
|
# 在这里保存cluster变量
|
|
# 在这里保存cluster变量
|
|
- from cluster_power import cluster_power_list_file, cluster_power_list_folder
|
|
|
|
|
|
+ from cluster_analysis import cluster_power_list_file, cluster_power_list_folder
|
|
|
|
|
|
output_path = '../data-process/data/cluster_power/'
|
|
output_path = '../data-process/data/cluster_power/'
|
|
|
|
|
|
@@ -442,4 +480,4 @@ data_analysis = DataAnalysis(data_length=9773,
|
|
data_end=9773)
|
|
data_end=9773)
|
|
|
|
|
|
data_analysis.process_ori_data()
|
|
data_analysis.process_ori_data()
|
|
-data_analysis.paint_double(20, 21)
|
|
|
|
|
|
+data_analysis.paint_double(1, 56)
|