#!/usr/bin/env python # -*- coding: utf-8 -*- # time: 2023/4/12 18:57 # file: data_analyse.py # author: David # company: shenyang JY import sys import numpy as np np.random.seed(42) import matplotlib.pyplot as plt import pandas as pd from calculate import calculate_acc class data_analyse(object): def __init__(self, opt, logger): self.opt = opt self.logger = logger def dq_acc(self): excel_data_path = self.opt.excel_data_path data_format = self.opt.data_format dq_path = excel_data_path + data_format["dq"] dq_columns = ['C_TIME', 'C_ABLE_VALUE'] dq = pd.read_csv(dq_path, usecols=dq_columns) dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], format='%Y-%m-%d %H:%M:%S') return dq def cdq_acc(self): excel_data_path = self.opt.excel_data_path data_format = self.opt.data_format dq_path = excel_data_path + data_format["cdq"] dq_columns = ['C_TIME', 'C_ABLE_VALUE'] cdq = pd.read_csv(dq_path, usecols=dq_columns) cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S') return cdq def calculate_acc(self, label_data, predict_data): loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse loss_sqrt = np.sqrt(loss) # rmse return loss_sqrt def predict_acc(self, predict_data, dfs, predict_all=False): if predict_all is True: # predict_data = predict_data * self.opt.std['C_VALUE'] + self.opt.mean['C_VALUE'] predict_data = predict_data * self.opt.std['sum_power'] + self.opt.mean['sum_power'] else: predict_data0 = predict_data[0] * self.opt.std['col1_power'] + self.opt.mean['col1_power'] predict_data1 = predict_data[1] * self.opt.std['col2_power'] + self.opt.mean['col2_power'] predict_data = predict_data0 + predict_data1 dfs1 = [] dfs2 = [] for i, df in enumerate(dfs): df["forecastAbleValue"] = predict_data[i] dfs1.append(df.iloc[0]) # 第1个点 dfs2.append(df.iloc[-1]) # if df.iloc[-1, -1] < 0: # print("预测中有一个负值,为:", df.iloc[-1, -1]) # else: # print("预测结果为:", df.iloc[-1, -1]) # dfs1.append(df.iloc[0]) # if df.iloc[-1, -1] < 0: # print("预测中有一个负值,为:", df.iloc[-1, -1]) # else: # print("预测结果为:", df.iloc[-1, -1]) # dfs2.append(df.iloc[-1]) # dfs[i] = df.iloc[self.opt.predict_point] # 第16个点 df1 = pd.concat(dfs1, axis=1).T df2 = pd.concat(dfs2, axis=1).T # df = pd.concat(dfs, axis=1).T # df1 = df.drop(['label'], axis=1) # df1 = df1.iloc[15:, :] # df2 = df2.iloc[:-15, :] # fig, ax = plt.subplots() # ax.plot(df1["C_TIME"], df1["forecastAbleValue"], color='b') # ax.plot(df2["C_TIME"], df2["forecastAbleValue"], color='r') # ax.plot(df2["C_TIME"], df2["C_VALUE"], color='y') # plt.show() # rmse = self.calculate_acc(label_data=df['realValue'], predict_data=df['forecastAbleValue']) # df1.to_csv('./figure/fenqu.csv') self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df1))) self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df2))) # self.logger.info("新模型预测rmse是: {} ".format('公式') + str(rmse)) # self.predict_draw(df1) # self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df1['realValue'], df1['forecastAbleValue']))) # self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df2['realValue'], df2['forecastAbleValue']))) def cal_acc(self, df): df.rename(columns={'C_VALUE': 'realValue'}, inplace=True) df['ableValue'] = df['realValue'] acc = calculate_acc(df, self.opt) return acc def calculate_acc(self, label_data, predict_data): loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse loss_sqrt = np.sqrt(loss) # rmse loss_acc = 1 - loss_sqrt / self.opt.cap return loss_acc def predict_draw(self, df): df.realValue.plot() df.forecastAbleValue.plot() plt.show() if __name__ == '__main__': pass