|
@@ -8,8 +8,6 @@
|
|
|
|
|
|
"""
|
|
|
import os.path
|
|
|
-import matplotlib.colors as mcolors
|
|
|
-colors = list(mcolors.XKCD_COLORS.keys())
|
|
|
import pandas as pd
|
|
|
# from mpl_toolkits.basemap import Basemap
|
|
|
from scipy.signal import savgol_filter
|
|
@@ -57,6 +55,22 @@ def compute_distance(a, b):
|
|
|
"""
|
|
|
return np.linalg.norm(a - b)
|
|
|
|
|
|
+def similarity_score(turbine_diff, threshold=0.5):
|
|
|
+ """
|
|
|
+ 使用余弦相似度计算相似度分数并返回相似度大于阈值的index矩阵
|
|
|
+ :param turbine_diff: 需要计算相似的矩阵,数据格式n*m,n为数据条数,m为数据维数
|
|
|
+ :param threshold: 相似度阈值
|
|
|
+ :return: 返回相似计算后的矩阵
|
|
|
+ """
|
|
|
+ similarity = {i: [] for i in range(49)}
|
|
|
+ similarity_index = {i: [] for i in range(49)}
|
|
|
+ for turbine_i in range(49):
|
|
|
+ for turbine_j in range(49):
|
|
|
+ cos_similarity = compute_cos_similarity(turbine_diff[turbine_i], turbine_diff[turbine_j])
|
|
|
+ similarity[turbine_i].append(cos_similarity)
|
|
|
+ if cos_similarity > threshold:
|
|
|
+ similarity_index[turbine_i].append(turbine_j)
|
|
|
+ return similarity_index
|
|
|
|
|
|
def hierarchical_clustering(data, threshold, similarity_func):
|
|
|
"""
|
|
@@ -89,7 +103,7 @@ def hierarchical_clustering(data, threshold, similarity_func):
|
|
|
return clusters
|
|
|
|
|
|
|
|
|
-class DataAnalysis:
|
|
|
+class DataAnalysis(object):
|
|
|
"""
|
|
|
数据分析类
|
|
|
"""
|
|
@@ -118,6 +132,8 @@ class DataAnalysis:
|
|
|
self.turbine = None
|
|
|
# 风机的标号顺序
|
|
|
self.turbine_id = [x for x in range(76, 151, 1)]
|
|
|
+ self.c_names = ['G01', 'G02', 'G03', 'G04', 'G05', 'G06', 'G07', 'G08', 'G09', 'G10'] + ['G' + str(x) for x in range(11, 76)]
|
|
|
+
|
|
|
# b1b4 = [142, 143, 144, 145]
|
|
|
# self.turbine_id = [id for id in self.turbine_id if id not in b1b4]
|
|
|
# 风机功率数据15分钟级别
|
|
@@ -171,12 +187,12 @@ class DataAnalysis:
|
|
|
smoothness_value = np.std(diff_array)
|
|
|
print("turbine-{}的平滑度是:{}".format(turbine_i, round(smoothness_value, 2)))
|
|
|
turbine_diff.append(diff_array)
|
|
|
- ori_turbine_pic.append(self.turbine[turbine_i]['C_WS'].values[self.data_start:self.data_end])
|
|
|
- ori_turbine_power.append(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end])
|
|
|
+ ori_turbine_pic.append(self.turbine[turbine_i]['C_WS'].values[self.data_start:self.data_end + 1])
|
|
|
+ ori_turbine_power.append(self.turbine[turbine_i]['C_ACTIVE_POWER'].values[self.data_start:self.data_end + 1])
|
|
|
|
|
|
- self.ori_turbine_power = ori_turbine_power
|
|
|
- self.ori_turbine_pic = ori_turbine_pic
|
|
|
- self.turbine_diff = turbine_diff
|
|
|
+ self.ori_turbine_power = ori_turbine_power # 风机功率
|
|
|
+ self.ori_turbine_pic = ori_turbine_pic # 风机风速
|
|
|
+ self.turbine_diff = turbine_diff # 风机风速差分
|
|
|
|
|
|
diff_change = []
|
|
|
for diff_i in turbine_diff:
|
|
@@ -189,172 +205,25 @@ class DataAnalysis:
|
|
|
else:
|
|
|
single_diff_change.append(0)
|
|
|
diff_change.append(single_diff_change)
|
|
|
- self.diff_change = diff_change
|
|
|
+ self.diff_change = diff_change # 风机风速差分1 -1 0 标值化
|
|
|
+
|
|
|
+ # 风机风速傅里叶变换
|
|
|
self.ori_turbine_fft = [self.turbine_fft(i + 1) for i in range(len(self.ori_turbine_pic))]
|
|
|
|
|
|
- # 平滑
|
|
|
+ # 风机风速差分平滑化
|
|
|
self.turbine_smooth(window_size=21)
|
|
|
|
|
|
- def paint_map(self):
|
|
|
- """
|
|
|
- 绘制经纬度地图
|
|
|
- :return:
|
|
|
- """
|
|
|
- lats = self.info['纬度'].values
|
|
|
- lons = self.info['经度'].values
|
|
|
- map = Basemap()
|
|
|
-
|
|
|
- # 绘制海岸线和国家边界
|
|
|
- map.drawcoastlines()
|
|
|
- map.drawcountries()
|
|
|
-
|
|
|
- # 绘制经纬度坐标
|
|
|
- map.drawmeridians(range(0, 360, 30))
|
|
|
- map.drawparallels(range(-90, 90, 30))
|
|
|
-
|
|
|
- # 绘制点
|
|
|
-
|
|
|
- x, y = map(lons, lats)
|
|
|
- map.plot(x, y, 'bo', markersize=10)
|
|
|
-
|
|
|
- # 显示图表
|
|
|
- plt.show()
|
|
|
-
|
|
|
- def paint_power15min(self):
|
|
|
- """
|
|
|
- 绘制15分钟功率曲线
|
|
|
- :return:
|
|
|
- """
|
|
|
-
|
|
|
- plt.plot(self.power_15min['C_REAL_VALUE'])
|
|
|
-
|
|
|
- # 设置图表标题和轴标签
|
|
|
- plt.title('Data Time Change Curve')
|
|
|
- plt.xlabel('Date')
|
|
|
- plt.ylabel('Value')
|
|
|
-
|
|
|
- # 显示图表
|
|
|
- plt.show()
|
|
|
-
|
|
|
- def paint_lats_lons(self):
|
|
|
- """
|
|
|
- 绘制经纬度图
|
|
|
- :return:
|
|
|
- """
|
|
|
- x = self.info['纬度'].values
|
|
|
- y = self.info['经度'].values
|
|
|
-
|
|
|
- # 绘制散点图
|
|
|
- fig, ax = plt.subplots()
|
|
|
- plt.scatter(x, y)
|
|
|
-
|
|
|
- for i, txt in enumerate(self.info['id'].values):
|
|
|
- ax.annotate(txt, (x[i], y[i]))
|
|
|
-
|
|
|
- # 设置图表标题和轴标签
|
|
|
- plt.xlabel('lats')
|
|
|
- plt.ylabel('lons')
|
|
|
-
|
|
|
- # 显示图表
|
|
|
- plt.show()
|
|
|
-
|
|
|
- def similarity_score(self, turbine_diff, threshold=0.5):
|
|
|
- """
|
|
|
- 使用余弦相似度计算相似度分数并返回相似度大于阈值的index矩阵
|
|
|
- :param turbine_diff: 需要计算相似的矩阵,数据格式n*m,n为数据条数,m为数据维数
|
|
|
- :param threshold: 相似度阈值
|
|
|
- :return: 返回相似计算后的矩阵
|
|
|
- """
|
|
|
- similarity = {i: [] for i in range(49)}
|
|
|
- similarity_index = {i: [] for i in range(49)}
|
|
|
- for turbine_i in range(49):
|
|
|
- for turbine_j in range(49):
|
|
|
- cos_similarity = compute_cos_similarity(turbine_diff[turbine_i], turbine_diff[turbine_j])
|
|
|
- similarity[turbine_i].append(cos_similarity)
|
|
|
- if cos_similarity > threshold:
|
|
|
- similarity_index[turbine_i].append(turbine_j)
|
|
|
- return similarity_index
|
|
|
-
|
|
|
def mapping_turbines(self):
|
|
|
turbine_clus = {}
|
|
|
+ id_names = {id: self.c_names[x] for x, id in enumerate(self.turbine_id)}
|
|
|
import pickle
|
|
|
for a, b in zip(self.turbine_id, self.cluster):
|
|
|
print("风机编号:{},类别:{}".format(a, b))
|
|
|
- turbine_clus.setdefault(b, []).append(a)
|
|
|
+ turbine_clus.setdefault(b, []).append(id_names[a])
|
|
|
path = os.path.join(os.path.dirname(self.turbine_path), 'turbine_cls.pickle')
|
|
|
with open(path, 'wb') as file:
|
|
|
pickle.dump(turbine_clus, file)
|
|
|
|
|
|
- def paint_turbine(self, paint_default=True):
|
|
|
- """
|
|
|
- 绘制风机地理位置图
|
|
|
- :param paint_default:默认True,绘制聚类后每个类别的数据折线图
|
|
|
- :return: None
|
|
|
- """
|
|
|
-
|
|
|
- # y = self.info['纬度'].values
|
|
|
- # x = self.info['经度'].values
|
|
|
- #
|
|
|
- # fig, ax = plt.subplots(figsize=(15, 15))
|
|
|
- #
|
|
|
- # plt.scatter(x, y, c=self.cluster)
|
|
|
- # for i, txt in enumerate(self.info['C_ID'].values):
|
|
|
- # ax.annotate(txt, (x[i], y[i]))
|
|
|
-
|
|
|
- # 设置图表标题和轴标签
|
|
|
- # plt.xlabel('lons')
|
|
|
- # plt.ylabel('lats')
|
|
|
- # plt.legend()
|
|
|
- #
|
|
|
- # # 显示图表
|
|
|
- # plt.savefig('analysis_img/turbine_cluster.png')
|
|
|
- # plt.show()
|
|
|
-
|
|
|
- plt.figure(figsize=(60, 40))
|
|
|
- cmap = plt.get_cmap('viridis')
|
|
|
- linestyle= ['solid', 'dashed', 'dotted', 'dashdot']
|
|
|
- for i in range(max(self.cluster)):
|
|
|
- cluster, cluster_fft, cluster_power, = [], [], []
|
|
|
- for j, item in enumerate(self.cluster):
|
|
|
- if item == i + 1:
|
|
|
- cluster.append(self.ori_turbine_pic[j])
|
|
|
- cluster_fft.append(self.ori_turbine_fft[j])
|
|
|
- cluster_power.append(self.ori_turbine_power[j])
|
|
|
- cluster_power = np.average(cluster_power, axis=0)
|
|
|
- cluster_fft = np.average(cluster_fft, axis=0)
|
|
|
- cluster = np.average(cluster, axis=0)
|
|
|
- diff_array = np.diff(cluster)
|
|
|
- smoothness_value = np.std(diff_array)
|
|
|
- print("聚类-{}的平滑度是:{}".format(i+1, smoothness_value))
|
|
|
- color = cmap(i*200)
|
|
|
- plt.figure(1)
|
|
|
- # plt.subplot(max(self.cluster), 1, 1)
|
|
|
- # print("----", cluster, linestyle[i])
|
|
|
- # plt.plot([j for j in range(len(cluster))], cluster, color=color, label='cluster'+str(i))
|
|
|
- # plt.subplot(max(self.cluster), 1, 2)
|
|
|
- # plt.plot([j for j in range(len(cluster_fft))], cluster_fft, color=color, label='cluster'+str(i))
|
|
|
-
|
|
|
- # ws_power_dict = {}
|
|
|
- # for c, p in zip(cluster, cluster_power):
|
|
|
- # ws_power_dict.setdefault(round(c, 2), []).append(round(p, 2))
|
|
|
- #
|
|
|
- # for key, value in ws_power_dict.items():
|
|
|
- # ws_power_dict[key] = round(np.average(value), 2)
|
|
|
- # print(ws_power_dict)
|
|
|
- # plt.scatter(cluster, cluster_power, color=color, label='cluster' + str(i),
|
|
|
- # linestyle=linestyle[i], s=1, alpha=0.2)
|
|
|
-
|
|
|
- # 添加图例
|
|
|
- # plt.legend()
|
|
|
- # # 显示图形
|
|
|
- # plt.savefig('./clusters.png')
|
|
|
- # plt.show()
|
|
|
- # if paint_default:
|
|
|
- # for i in range(max(self.cluster)):
|
|
|
- # self.paint_turbine_k(i + 1) # 画出聚类中每个风机的曲线
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
def turbine_smooth(self, window_size=50):
|
|
|
"""
|
|
|
使用滑动平均平滑数据。
|
|
@@ -400,7 +269,7 @@ class DataAnalysis:
|
|
|
# 添加图例
|
|
|
plt.legend()
|
|
|
# 显示图形
|
|
|
- plt.savefig('analysis_img/cluster/cluster_{}.png'.format(k))
|
|
|
+ plt.savefig('analysis_img/cluster_{}.png'.format(k))
|
|
|
plt.show()
|
|
|
|
|
|
def turbine_fft(self, k):
|
|
@@ -473,7 +342,7 @@ class DataAnalysis:
|
|
|
:return:
|
|
|
"""
|
|
|
self.turbine_clusters(self.ori_turbine_fft)
|
|
|
- self.paint_turbine()
|
|
|
+ # self.paint_turbine()
|
|
|
|
|
|
def turbine_clusters(self, data=None):
|
|
|
"""
|
|
@@ -483,11 +352,9 @@ class DataAnalysis:
|
|
|
:return: None
|
|
|
"""
|
|
|
if data is None:
|
|
|
- cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4,
|
|
|
- similarity_func=compute_pearsonr) # 层次聚类
|
|
|
+ cluster = hierarchical_clustering(self.turbine_diff, threshold=1.4, similarity_func=compute_pearsonr) # 层次聚类
|
|
|
else:
|
|
|
- cluster = hierarchical_clustering(data, threshold=0.6,
|
|
|
- similarity_func=compute_pearsonr)
|
|
|
+ cluster = hierarchical_clustering(data, threshold=0.6, similarity_func=compute_pearsonr)
|
|
|
self.cluster = cluster
|
|
|
# 在这里保存cluster变量
|
|
|
# from cluster_analysis import cluster_power_list_file, cluster_power_list_folder
|
|
@@ -523,3 +390,4 @@ if __name__ == '__main__':
|
|
|
data_analysis.process_ori_data()
|
|
|
data_analysis.mapping_turbines()
|
|
|
# data_analysis.paint_double(1, 56)
|
|
|
+ # data_analysis.paint_turbine()
|