#!/usr/bin/env python # -*- coding: utf-8 -*- # time: 2023/4/12 18:57 # file: data_analyse.py # author: David # company: shenyang JY import sys import numpy as np import pandas as pd class data_analyse(object): def __init__(self, opt, logger, process): self.opt = opt self.logger = logger self.ds = process def dq_acc(self): excel_data_path = self.opt.csv_data_path data_format = self.opt.data_format dq_path = excel_data_path + data_format["dq"] dq_columns = ['C_TIME', 'C_ABLE_VALUE'] dq = pd.read_csv(dq_path, usecols=dq_columns) dq['C_TIME'] = dq['C_TIME'].astype(float)/1000 dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], unit='s') return dq def cdq_acc(self): excel_data_path = self.opt.excel_data_path data_format = self.opt.data_format dq_path = excel_data_path + data_format["cdq"] dq_columns = ['C_TIME', 'C_ABLE_VALUE'] cdq = pd.read_excel(dq_path, usecols=dq_columns) cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S') return cdq def calculate_acc(self, label_data, predict_data): loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse loss_sqrt = np.sqrt(loss) # rmse loss_acc = 1 - loss_sqrt / self.opt.cap return loss_acc def calculate_acc_307(self, label_data, predict_data): p1 = label_data - predict_data p2 = p1 / self.opt.cap p3 = p2 ** 2 p4 = np.sum(p3) p5 = p4 / len(label_data) p6 = np.sqrt(p5) p7 = 1 - p6 return p7 def get_16_points(self, results): # results为模型预测的一维数组,遍历,取每16个点的最后一个点 preds = [] for res in results: preds.append(res.iloc[-1].values) return np.array(preds) def predict_acc(self, predict_data, dfy, predict_all=False): if predict_all is True: predict_data = predict_data * self.ds.std['SUM'] + self.ds.mean['SUM'] else: predict_data0 = predict_data[0] predict_data1 = predict_data[1] predict_data0 = predict_data0 * self.ds.std['C_ACTIVE_POWER1'] + self.ds.mean['C_ACTIVE_POWER1'] predict_data1 = predict_data1 * self.ds.std['C_ACTIVE_POWER2'] + self.ds.mean['C_ACTIVE_POWER2'] predict_data = predict_data0 + predict_data1 # predict_data = predict_data[2] * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE'] # dfs = dfy[0] # for i in range(1, len(dfy)): # dfs.extend(dfy[i]) for i, df in enumerate(dfy): df["PREDICT"] = predict_data[i] dfy[i] = df data = self.get_16_points(dfy) df = pd.DataFrame(data, columns=['C_TIME', 'C_ACTIVE_POWER1', 'C_ACTIVE_POWER2', 'SUM', 'C_REAL_VALUE', 'PREDICT']) # label_data = label_data.reshape((-1, self.opt.output_size)) # label_data 要进行反归一化 dq = self.dq_acc() df = pd.merge(df, dq, on='C_TIME') # df.to_csv(self.opt.excel_data_path + "nwp+rp+环境(LSTM+CNN).csv") # label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index] loss_norm = self.calculate_acc(df['SUM'], df['PREDICT']) self.logger.info("The mean squared error of power {} is ".format('power') + str(loss_norm)) loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_ABLE_VALUE']) self.logger.info("The mean squared error of power {} is ".format('CDQ') + str(loss_norm))