env_data.py 3.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # time: 2023/6/12 13:24
  4. # file: env_data.py
  5. # author: David
  6. # company: shenyang JY
  7. import datetime
  8. import math
  9. import pandas as pd
  10. import numpy as np
  11. def process_env_data():
  12. path = './xiangzhou/weather/weather-1-process.csv'
  13. envn = pd.read_csv(path, usecols=['C_TIME', 'C_GLOBALR', 'C_DIFFUSER', 'C_RH']) # C_GLOBALR 总辐射 C_DIFFUSER 散辐射 C_RH 湿度
  14. envn['C_TIME'] = pd.to_datetime(envn['C_TIME'])
  15. path1 = './xiangzhou/power5.csv'
  16. power = pd.read_csv(path1, usecols=['C_TIME', 'C_REAL_VALUE'])
  17. power['C_TIME'] = pd.to_datetime(power['C_TIME'])
  18. envn = pd.merge(envn, power, on='C_TIME')
  19. envn_filter = envn[envn['C_GLOBALR'] > 0].reset_index(drop=True) # 过滤夜间环境 (总辐射大于0)
  20. envn = normalize(envn)
  21. pre = envn_filter.iloc[0, 0].hour
  22. envn.set_index('C_TIME', inplace=True)
  23. envs, env = [], []
  24. for index, row in envn_filter.iterrows():
  25. if pre != row[0].hour:
  26. con = pd.concat(env, axis=1).T
  27. # 数据不齐,要么联立后缺点,要不是日出或日落时分
  28. if len(con) != 12:
  29. con = envn.loc[str(con.iloc[0, 0])[:-6]].reset_index()
  30. print("数据不齐,该时间点为:", row[0], "新的长度为:", len(con))
  31. envs.append(con.reset_index(drop=True))
  32. pre = row[0].hour
  33. env = [row]
  34. else:
  35. env.append(row)
  36. return envs
  37. def envn_features(envs, path):
  38. for i, env in enumerate(envs):
  39. zero_indexs = env[env['C_GLOBALR'] == 0].index
  40. print("----", env)
  41. if len(zero_indexs) > 0:
  42. env.iloc[zero_indexs, env.columns.get_loc('C_GLOBALR')] = 0.1
  43. print("++++", env)
  44. x = list(map(lambda x,y: x/y, env['C_DIFFUSER'], env['C_GLOBALR']))
  45. f1 = round(np.mean(x), 2)
  46. env['diff1'] = env['C_REAL_VALUE'].diff()
  47. env['diff_1'] = env['C_REAL_VALUE'].diff(-1)
  48. ei = (env['diff1']*env['diff_1']).tolist()[1:-1]
  49. ei = [1 if e > 0 else 0 for e in ei]
  50. f2 = round(np.mean(ei), 2)
  51. f3 = round(np.mean(env['C_RH'].tolist()), 2)
  52. time = env.iloc[-1]['C_TIME'].replace(minute=0)
  53. time += datetime.timedelta(hours=1) # 反应的是下一个小时的环境特征
  54. envs[i] = [time, f1, f2, f3]
  55. envn_features = pd.DataFrame(envs, columns=['C_TIME', 'f1', 'f2', 'f3'])
  56. # envn_features = normalize(envn_features)
  57. envn_features.to_csv(path, index=False)
  58. def normalize(df):
  59. """
  60. 暂时不将C_TIME归一化
  61. :param dfs:
  62. :return: 归一化后的DataFrame
  63. """
  64. df1 = df.iloc[:, 1:]
  65. mean = np.mean(df1, axis=0) # 数据的均值
  66. std = np.std(df1, axis=0) # 标准差
  67. print("归一化参数,均值为:{},方差为:{}".format(mean.to_dict(), std.to_dict()))
  68. df_Zscore = df1.apply(lambda x: np.around((x - x.mean())/math.sqrt(sum((x-x.mean())**2/len(x))), decimals=2))
  69. df_Zscore.insert(0, 'C_TIME', df["C_TIME"])
  70. return df_Zscore
  71. if __name__ == '__main__':
  72. feaP = './xiangzhou/features.csv'
  73. envs = process_env_data()
  74. envn_features(envs, feaP)