#!/usr/bin/env python # -*- coding: utf-8 -*- # time: 2023/6/12 13:24 # file: env_data.py # author: David # company: shenyang JY import datetime import math import pandas as pd import numpy as np def process_env_data(): path = './xiangzhou/weather/weather-1-process.csv' envn = pd.read_csv(path, usecols=['C_TIME', 'C_GLOBALR', 'C_DIFFUSER', 'C_RH']) # C_GLOBALR 总辐射 C_DIFFUSER 散辐射 C_RH 湿度 envn['C_TIME'] = pd.to_datetime(envn['C_TIME']) path1 = './xiangzhou/power5.csv' power = pd.read_csv(path1, usecols=['C_TIME', 'C_REAL_VALUE']) power['C_TIME'] = pd.to_datetime(power['C_TIME']) envn = pd.merge(envn, power, on='C_TIME') envn_filter = envn[envn['C_GLOBALR'] > 0].reset_index(drop=True) # 过滤夜间环境 (总辐射大于0) envn = normalize(envn) pre = envn_filter.iloc[0, 0].hour envn.set_index('C_TIME', inplace=True) envs, env = [], [] for index, row in envn_filter.iterrows(): if pre != row[0].hour: con = pd.concat(env, axis=1).T # 数据不齐,要么联立后缺点,要不是日出或日落时分 if len(con) != 12: con = envn.loc[str(con.iloc[0, 0])[:-6]].reset_index() print("数据不齐,该时间点为:", row[0], "新的长度为:", len(con)) envs.append(con.reset_index(drop=True)) pre = row[0].hour env = [row] else: env.append(row) return envs def envn_features(envs, path): for i, env in enumerate(envs): zero_indexs = env[env['C_GLOBALR'] == 0].index print("----", env) if len(zero_indexs) > 0: env.iloc[zero_indexs, env.columns.get_loc('C_GLOBALR')] = 0.1 print("++++", env) x = list(map(lambda x,y: x/y, env['C_DIFFUSER'], env['C_GLOBALR'])) f1 = round(np.mean(x), 2) env['diff1'] = env['C_REAL_VALUE'].diff() env['diff_1'] = env['C_REAL_VALUE'].diff(-1) ei = (env['diff1']*env['diff_1']).tolist()[1:-1] ei = [1 if e > 0 else 0 for e in ei] f2 = round(np.mean(ei), 2) f3 = round(np.mean(env['C_RH'].tolist()), 2) time = env.iloc[-1]['C_TIME'].replace(minute=0) time += datetime.timedelta(hours=1) # 反应的是下一个小时的环境特征 envs[i] = [time, f1, f2, f3] envn_features = pd.DataFrame(envs, columns=['C_TIME', 'f1', 'f2', 'f3']) # envn_features = normalize(envn_features) envn_features.to_csv(path, index=False) def normalize(df): """ 暂时不将C_TIME归一化 :param dfs: :return: 归一化后的DataFrame """ df1 = df.iloc[:, 1:] mean = np.mean(df1, axis=0) # 数据的均值 std = np.std(df1, axis=0) # 标准差 print("归一化参数,均值为:{},方差为:{}".format(mean.to_dict(), std.to_dict())) df_Zscore = df1.apply(lambda x: np.around((x - x.mean())/math.sqrt(sum((x-x.mean())**2/len(x))), decimals=2)) df_Zscore.insert(0, 'C_TIME', df["C_TIME"]) return df_Zscore if __name__ == '__main__': feaP = './xiangzhou/features.csv' envs = process_env_data() envn_features(envs, feaP)