123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # time: 2023/4/12 18:57
- # file: data_analyse.py
- # author: David
- # company: shenyang JY
- import sys
- import numpy as np
- np.random.seed(42)
- import matplotlib.pyplot as plt
- import pandas as pd
- from calculate import calculate_acc
- class data_analyse(object):
- def __init__(self, opt, logger):
- self.opt = opt
- self.logger = logger
- def dq_acc(self):
- excel_data_path = self.opt.excel_data_path
- data_format = self.opt.data_format
- dq_path = excel_data_path + data_format["dq"]
- dq_columns = ['C_TIME', 'C_ABLE_VALUE']
- dq = pd.read_csv(dq_path, usecols=dq_columns)
- dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
- return dq
- def cdq_acc(self):
- excel_data_path = self.opt.excel_data_path
- data_format = self.opt.data_format
- dq_path = excel_data_path + data_format["cdq"]
- dq_columns = ['C_TIME', 'C_ABLE_VALUE']
- cdq = pd.read_csv(dq_path, usecols=dq_columns)
- cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
- return cdq
- def calculate_acc(self, label_data, predict_data):
- loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse
- loss_sqrt = np.sqrt(loss) # rmse
- return loss_sqrt
- def predict_acc(self, predict_data, dfs, predict_all=False):
- if predict_all is True:
- # predict_data = predict_data * self.opt.std['C_VALUE'] + self.opt.mean['C_VALUE']
- predict_data = predict_data * self.opt.std['sum_power'] + self.opt.mean['sum_power']
- else:
- predict_data0 = predict_data[0] * self.opt.std['col1_power'] + self.opt.mean['col1_power']
- predict_data1 = predict_data[1] * self.opt.std['col2_power'] + self.opt.mean['col2_power']
- predict_data = predict_data0 + predict_data1
- dfs1 = []
- dfs2 = []
- for i, df in enumerate(dfs):
- df["forecastAbleValue"] = predict_data[i]
- dfs1.append(df.iloc[0]) # 第1个点
- dfs2.append(df.iloc[-1])
- # if df.iloc[-1, -1] < 0:
- # print("预测中有一个负值,为:", df.iloc[-1, -1])
- # else:
- # print("预测结果为:", df.iloc[-1, -1])
- # dfs1.append(df.iloc[0])
- # if df.iloc[-1, -1] < 0:
- # print("预测中有一个负值,为:", df.iloc[-1, -1])
- # else:
- # print("预测结果为:", df.iloc[-1, -1])
- # dfs2.append(df.iloc[-1])
- # dfs[i] = df.iloc[self.opt.predict_point] # 第16个点
- df1 = pd.concat(dfs1, axis=1).T
- df2 = pd.concat(dfs2, axis=1).T
- # df = pd.concat(dfs, axis=1).T
- # df1 = df.drop(['label'], axis=1)
- # df1 = df1.iloc[15:, :]
- # df2 = df2.iloc[:-15, :]
- # fig, ax = plt.subplots()
- # ax.plot(df1["C_TIME"], df1["forecastAbleValue"], color='b')
- # ax.plot(df2["C_TIME"], df2["forecastAbleValue"], color='r')
- # ax.plot(df2["C_TIME"], df2["C_VALUE"], color='y')
- # plt.show()
- # rmse = self.calculate_acc(label_data=df['realValue'], predict_data=df['forecastAbleValue'])
- # df1.to_csv('./figure/fenqu.csv')
- self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df1)))
- self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df2)))
- # self.logger.info("新模型预测rmse是: {} ".format('公式') + str(rmse))
- # self.predict_draw(df1)
- # self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df1['realValue'], df1['forecastAbleValue'])))
- # self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df2['realValue'], df2['forecastAbleValue'])))
- def cal_acc(self, df):
- df.rename(columns={'C_VALUE': 'realValue'}, inplace=True)
- df['ableValue'] = df['realValue']
- acc = calculate_acc(df, self.opt)
- return acc
- def calculate_acc(self, label_data, predict_data):
- loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse
- loss_sqrt = np.sqrt(loss) # rmse
- loss_acc = 1 - loss_sqrt / self.opt.cap
- return loss_acc
- def predict_draw(self, df):
- df.realValue.plot()
- df.forecastAbleValue.plot()
- plt.show()
- if __name__ == '__main__':
- pass
|