data_analyse.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # time: 2023/4/12 18:57
  4. # file: data_analyse.py
  5. # author: David
  6. # company: shenyang JY
  7. import sys
  8. import numpy as np
  9. import matplotlib.pyplot as plt
  10. import pandas as pd
  11. from data_utils import *
  12. class data_analyse(object):
  13. def __init__(self, opt, logger, process):
  14. self.opt = opt
  15. self.logger = logger
  16. self.ds = process
  17. def formula_acc(self):
  18. excel_data_path = self.opt.excel_data_path
  19. data_format = self.opt.data_format
  20. formula_path = excel_data_path + data_format["formula"]
  21. formula = pd.read_csv(formula_path, usecols=['C_ABLE_VALUE', 'C_FORECAST_HOW_LONG_AGO', 'C_FORECAST_TIME'])
  22. formula["C_FORECAST_TIME"] = formula["C_FORECAST_TIME"].apply(timestr_to_datetime)
  23. formula = formula.rename(columns={"C_FORECAST_TIME": "C_TIME"})
  24. formula = formula.loc[formula['C_FORECAST_HOW_LONG_AGO'] == 16]
  25. return formula
  26. def calculate_acc(self, label_data, predict_data):
  27. loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse
  28. loss_sqrt = np.sqrt(loss) # rmse
  29. loss_acc = 1 - loss_sqrt / self.opt.cap
  30. return loss_acc
  31. def get_16_points(self, results):
  32. # results为模型预测的一维数组,遍历,取每16个点的最后一个点
  33. preds = []
  34. for res in results:
  35. preds.append(res.iloc[-1].values)
  36. return np.array(preds)
  37. def predict_acc(self, predict_data, dfy):
  38. predict_data = predict_data * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
  39. dfs = dfy[0]
  40. for i in range(1, len(dfy)):
  41. dfs.extend(dfy[i])
  42. for i, df in enumerate(dfs):
  43. df["PREDICT"] = predict_data[i]
  44. dfs[i] = df
  45. data = self.get_16_points(dfs)
  46. df = pd.DataFrame(data, columns=['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', 'PREDICT'])
  47. # label_data = label_data.reshape((-1, self.opt.output_size))
  48. # label_data 要进行反归一化
  49. df.to_csv(self.opt.excel_data_path + "dq+rp.csv")
  50. formula = self.formula_acc()
  51. df = pd.merge(df, formula, on='C_TIME')
  52. label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
  53. loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['PREDICT'])
  54. self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
  55. loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_FP_VALUE'])
  56. self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
  57. loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_ABLE_VALUE'])
  58. self.logger.info("The mean squared error of power {} is ".format(label_name) + str(loss_norm))
  59. self.preidct_draw(df['C_REAL_VALUE'].values, df['PREDICT'].values)
  60. def preidct_draw(self, label_data, predict_data):
  61. X = list(range(label_data.shape[0]))
  62. print("label_x = ", X)
  63. label_column_num = len(self.opt.label_columns)
  64. label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
  65. if not sys.platform.startswith('linux'): # 无桌面的Linux下无法输出,如果是有桌面的Linux,如Ubuntu,可去掉这一行
  66. for i in range(label_column_num):
  67. plt.figure(i+1) # 预测数据绘制
  68. plt.plot(X, label_data, label='label', color='b')
  69. plt.plot(X, predict_data, label='predict', color='g')
  70. # plt.plot(predict_X, dq_data[:, i], label='dq', color='y')
  71. # plt.title("Predict actual {} power with {}".format(label_name[i], self.opt.used_frame))
  72. # self.logger.info("The predicted power {} for the last {} point(s) is: ".format(label_name[i], self.opt.predict_points) +
  73. # str(np.squeeze(predict_data[-self.opt.predict_points:, i])))
  74. if self.opt.do_figure_save:
  75. plt.savefig(self.opt.figure_save_path+"{}predict_{}_with_{}.png".format(self.opt.continue_flag, label_name[i], self.opt.used_frame))
  76. plt.show()
  77. def tangle_results(self):
  78. pass