data_analyse.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # time: 2023/4/12 18:57
  4. # file: data_analyse.py
  5. # author: David
  6. # company: shenyang JY
  7. import sys
  8. import numpy as np
  9. np.random.seed(42)
  10. import matplotlib.pyplot as plt
  11. import pandas as pd
  12. from calculate import calculate_acc
  13. class data_analyse(object):
  14. def __init__(self, opt, logger):
  15. self.opt = opt
  16. self.logger = logger
  17. def dq_acc(self):
  18. excel_data_path = self.opt.excel_data_path
  19. data_format = self.opt.data_format
  20. dq_path = excel_data_path + data_format["dq"]
  21. dq_columns = ['C_TIME', 'C_ABLE_VALUE']
  22. dq = pd.read_csv(dq_path, usecols=dq_columns)
  23. dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
  24. return dq
  25. def cdq_acc(self):
  26. excel_data_path = self.opt.excel_data_path
  27. data_format = self.opt.data_format
  28. dq_path = excel_data_path + data_format["cdq"]
  29. dq_columns = ['C_TIME', 'C_ABLE_VALUE']
  30. cdq = pd.read_csv(dq_path, usecols=dq_columns)
  31. cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], format='%Y-%m-%d %H:%M:%S')
  32. return cdq
  33. def calculate_acc(self, label_data, predict_data):
  34. loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse
  35. loss_sqrt = np.sqrt(loss) # rmse
  36. return loss_sqrt
  37. def predict_acc(self, predict_data, dfs, predict_all=False):
  38. if predict_all is True:
  39. # predict_data = predict_data * self.opt.std['C_VALUE'] + self.opt.mean['C_VALUE']
  40. predict_data = predict_data * self.opt.std['sum_power'] + self.opt.mean['sum_power']
  41. else:
  42. predict_data0 = predict_data[0] * self.opt.std['col1_power'] + self.opt.mean['col1_power']
  43. predict_data1 = predict_data[1] * self.opt.std['col2_power'] + self.opt.mean['col2_power']
  44. predict_data = predict_data0 + predict_data1
  45. dfs1 = []
  46. dfs2 = []
  47. for i, df in enumerate(dfs):
  48. df["forecastAbleValue"] = predict_data[i]
  49. dfs1.append(df.iloc[0]) # 第1个点
  50. dfs2.append(df.iloc[-1])
  51. # if df.iloc[-1, -1] < 0:
  52. # print("预测中有一个负值,为:", df.iloc[-1, -1])
  53. # else:
  54. # print("预测结果为:", df.iloc[-1, -1])
  55. # dfs1.append(df.iloc[0])
  56. # if df.iloc[-1, -1] < 0:
  57. # print("预测中有一个负值,为:", df.iloc[-1, -1])
  58. # else:
  59. # print("预测结果为:", df.iloc[-1, -1])
  60. # dfs2.append(df.iloc[-1])
  61. # dfs[i] = df.iloc[self.opt.predict_point] # 第16个点
  62. df1 = pd.concat(dfs1, axis=1).T
  63. df2 = pd.concat(dfs2, axis=1).T
  64. # df = pd.concat(dfs, axis=1).T
  65. # df1 = df.drop(['label'], axis=1)
  66. # df1 = df1.iloc[15:, :]
  67. # df2 = df2.iloc[:-15, :]
  68. # fig, ax = plt.subplots()
  69. # ax.plot(df1["C_TIME"], df1["forecastAbleValue"], color='b')
  70. # ax.plot(df2["C_TIME"], df2["forecastAbleValue"], color='r')
  71. # ax.plot(df2["C_TIME"], df2["C_VALUE"], color='y')
  72. # plt.show()
  73. # rmse = self.calculate_acc(label_data=df['realValue'], predict_data=df['forecastAbleValue'])
  74. # df1.to_csv('./figure/fenqu.csv')
  75. self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df1)))
  76. self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.cal_acc(df2)))
  77. # self.logger.info("新模型预测rmse是: {} ".format('公式') + str(rmse))
  78. # self.predict_draw(df1)
  79. # self.logger.info("1 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df1['realValue'], df1['forecastAbleValue'])))
  80. # self.logger.info("16 新模型预测准确率是: {} ".format('接口') + str(self.calculate_acc(df2['realValue'], df2['forecastAbleValue'])))
  81. def cal_acc(self, df):
  82. df.rename(columns={'C_VALUE': 'realValue'}, inplace=True)
  83. df['ableValue'] = df['realValue']
  84. acc = calculate_acc(df, self.opt)
  85. return acc
  86. def calculate_acc(self, label_data, predict_data):
  87. loss = np.sum((label_data - predict_data) ** 2) / len(label_data) # mse
  88. loss_sqrt = np.sqrt(loss) # rmse
  89. loss_acc = 1 - loss_sqrt / self.opt.cap
  90. return loss_acc
  91. def predict_draw(self, df):
  92. df.realValue.plot()
  93. df.forecastAbleValue.plot()
  94. plt.show()
  95. if __name__ == '__main__':
  96. pass