12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- # time: 2023/6/12 13:24
- # file: env_data.py
- # author: David
- # company: shenyang JY
- import datetime
- import math
- import pandas as pd
- import numpy as np
- def process_env_data():
- path = './xiangzhou/weather/weather-1-process.csv'
- envn = pd.read_csv(path, usecols=['C_TIME', 'C_GLOBALR', 'C_DIFFUSER', 'C_RH']) # C_GLOBALR 总辐射 C_DIFFUSER 散辐射 C_RH 湿度
- envn['C_TIME'] = pd.to_datetime(envn['C_TIME'])
- path1 = './xiangzhou/power5.csv'
- power = pd.read_csv(path1, usecols=['C_TIME', 'C_REAL_VALUE'])
- power['C_TIME'] = pd.to_datetime(power['C_TIME'])
- envn = pd.merge(envn, power, on='C_TIME')
- envn_filter = envn[envn['C_GLOBALR'] > 0].reset_index(drop=True) # 过滤夜间环境 (总辐射大于0)
- envn = normalize(envn)
- pre = envn_filter.iloc[0, 0].hour
- envn.set_index('C_TIME', inplace=True)
- envs, env = [], []
- for index, row in envn_filter.iterrows():
- if pre != row[0].hour:
- con = pd.concat(env, axis=1).T
- # 数据不齐,要么联立后缺点,要不是日出或日落时分
- if len(con) != 12:
- con = envn.loc[str(con.iloc[0, 0])[:-6]].reset_index()
- print("数据不齐,该时间点为:", row[0], "新的长度为:", len(con))
- envs.append(con.reset_index(drop=True))
- pre = row[0].hour
- env = [row]
- else:
- env.append(row)
- return envs
- def envn_features(envs, path):
- for i, env in enumerate(envs):
- zero_indexs = env[env['C_GLOBALR'] == 0].index
- print("----", env)
- if len(zero_indexs) > 0:
- env.iloc[zero_indexs, env.columns.get_loc('C_GLOBALR')] = 0.1
- print("++++", env)
- x = list(map(lambda x,y: x/y, env['C_DIFFUSER'], env['C_GLOBALR']))
- f1 = round(np.mean(x), 2)
- env['diff1'] = env['C_REAL_VALUE'].diff()
- env['diff_1'] = env['C_REAL_VALUE'].diff(-1)
- ei = (env['diff1']*env['diff_1']).tolist()[1:-1]
- ei = [1 if e > 0 else 0 for e in ei]
- f2 = round(np.mean(ei), 2)
- f3 = round(np.mean(env['C_RH'].tolist()), 2)
- time = env.iloc[-1]['C_TIME'].replace(minute=0)
- time += datetime.timedelta(hours=1) # 反应的是下一个小时的环境特征
- envs[i] = [time, f1, f2, f3]
- envn_features = pd.DataFrame(envs, columns=['C_TIME', 'f1', 'f2', 'f3'])
- # envn_features = normalize(envn_features)
- envn_features.to_csv(path, index=False)
- def normalize(df):
- """
- 暂时不将C_TIME归一化
- :param dfs:
- :return: 归一化后的DataFrame
- """
- df1 = df.iloc[:, 1:]
- mean = np.mean(df1, axis=0) # 数据的均值
- std = np.std(df1, axis=0) # 标准差
- print("归一化参数,均值为:{},方差为:{}".format(mean.to_dict(), std.to_dict()))
- df_Zscore = df1.apply(lambda x: np.around((x - x.mean())/math.sqrt(sum((x-x.mean())**2/len(x))), decimals=2))
- df_Zscore.insert(0, 'C_TIME', df["C_TIME"])
- return df_Zscore
- if __name__ == '__main__':
- feaP = './xiangzhou/features.csv'
- envs = process_env_data()
- envn_features(envs, feaP)
|