data_analyse.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # time: 2023/4/12 18:57
  4. # file: data_analyse.py
  5. # author: David
  6. # company: shenyang JY
  7. import sys
  8. import numpy as np
  9. import pandas as pd
  10. class data_analyse(object):
  11. def __init__(self, opt, logger, process):
  12. self.opt = opt
  13. self.logger = logger
  14. self.ds = process
  15. def dq_acc(self):
  16. excel_data_path = self.opt.csv_data_path
  17. data_format = self.opt.data_format
  18. dq_path = excel_data_path + data_format["dq"]
  19. dq_columns = ['C_TIME', 'C_ABLE_VALUE']
  20. dq = pd.read_csv(dq_path, usecols=dq_columns)
  21. dq['C_TIME'] = dq['C_TIME'].astype(float)/1000
  22. dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], unit='s')
  23. return dq
  24. def cdq_acc(self):
  25. excel_data_path = self.opt.excel_data_path
  26. data_format = self.opt.data_format
  27. dq_path = excel_data_path + data_format["cdq"]
  28. dq_columns = ['C_TIME', 'C_ABLE_VALUE']
  29. cdq = pd.read_excel(dq_path, usecols=dq_columns)
  30. cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
  31. return cdq
  32. def calculate_acc(self, label_data, predict_data):
  33. loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse
  34. loss_sqrt = np.sqrt(loss) # rmse
  35. loss_acc = 1 - loss_sqrt / self.opt.cap
  36. return loss_acc
  37. def calculate_acc_307(self, label_data, predict_data):
  38. p1 = label_data - predict_data
  39. p2 = p1 / self.opt.cap
  40. p3 = p2 ** 2
  41. p4 = np.sum(p3)
  42. p5 = p4 / len(label_data)
  43. p6 = np.sqrt(p5)
  44. p7 = 1 - p6
  45. return p7
  46. def get_16_points(self, results):
  47. # results为模型预测的一维数组,遍历,取每16个点的最后一个点
  48. preds = []
  49. for res in results:
  50. preds.append(res.iloc[-1].values)
  51. return np.array(preds)
  52. def predict_acc(self, predict_data, dfy, predict_all=False):
  53. if predict_all is True:
  54. predict_data = predict_data * self.ds.std['SUM'] + self.ds.mean['SUM']
  55. else:
  56. predict_data0 = predict_data[0]
  57. predict_data1 = predict_data[1]
  58. predict_data0 = predict_data0 * self.ds.std['C_ACTIVE_POWER1'] + self.ds.mean['C_ACTIVE_POWER1']
  59. predict_data1 = predict_data1 * self.ds.std['C_ACTIVE_POWER2'] + self.ds.mean['C_ACTIVE_POWER2']
  60. predict_data = predict_data0 + predict_data1
  61. # predict_data = predict_data[2] * self.ds.std['C_REAL_VALUE'] + self.ds.mean['C_REAL_VALUE']
  62. # dfs = dfy[0]
  63. # for i in range(1, len(dfy)):
  64. # dfs.extend(dfy[i])
  65. for i, df in enumerate(dfy):
  66. df["PREDICT"] = predict_data[i]
  67. dfy[i] = df
  68. data = self.get_16_points(dfy)
  69. df = pd.DataFrame(data, columns=['C_TIME', 'C_ACTIVE_POWER1', 'C_ACTIVE_POWER2', 'SUM', 'C_REAL_VALUE', 'PREDICT'])
  70. # label_data = label_data.reshape((-1, self.opt.output_size))
  71. # label_data 要进行反归一化
  72. dq = self.dq_acc()
  73. df = pd.merge(df, dq, on='C_TIME')
  74. # df.to_csv(self.opt.excel_data_path + "nwp+rp+环境(LSTM+CNN).csv")
  75. # label_name = [self.opt.feature_columns[i] for i in self.opt.label_in_feature_index]
  76. loss_norm = self.calculate_acc(df['SUM'], df['PREDICT'])
  77. self.logger.info("The mean squared error of power {} is ".format('power') + str(loss_norm))
  78. loss_norm = self.calculate_acc(df['C_REAL_VALUE'], df['C_ABLE_VALUE'])
  79. self.logger.info("The mean squared error of power {} is ".format('CDQ') + str(loss_norm))