12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152 |
- import random
- from datetime import date, timedelta
- import pandas as pd
- def str_to_list(arg):
- if arg == '':
- return []
- else:
- return arg.split(',')
- # 随机生成唯一颜色
- def generate_unique_colors(num_colors):
- generated_colors = set()
- while len(generated_colors) < num_colors:
- color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"
- generated_colors.add(color)
- return list(generated_colors)
- def missing_features(df, features, col_time, threshold=0.2):
- df['day'] = df[col_time].str[:10]
- # 按日期分组,计算缺失率
- missing_rates = df[['day']+features].groupby('day').apply(
- lambda group: (group.isnull().sum() / group.shape[0]).mean()
- )
- # 筛选特征平均缺失率大于 20% 的日期
- days_with_high_missing = missing_rates[missing_rates >= threshold].index
- # 打印结果
- print("特征缺失率超过50%的日期:",days_with_high_missing)
- print()
- print("**********删除前维度", df.shape)
- df = df[~df['day'].isin(days_with_high_missing)]
- print("**********删除后维度", df.shape)
- return df.drop('day',axis=1)
- def check_nwp_data(nwp_df,features):
- tomorrow = (date.today() + timedelta(days=1)).strftime('%Y-%m-%d')
- if ~all(item in nwp_df.columns for item in features):
- diff = set(features)-set(nwp_df.columns)
- message = f"NWP特征列缺失!features:{diff}"
- #判断日前短期NWP是否缺数据
- elif len(nwp_df[nwp_df['date_time'].contains(tomorrow)])<96:
- message = "日前数据记录缺失,不足96条!"
- else:
- message=''
- return message
- def get_xxl_dq(farm_id, dt):
- path_dir = f'data_processing/cache/data/xxl/{farm_id}/meteoforce_{farm_id}_{dt}06_power.csv'
- df = pd.read_csv(path_dir,index_col=0)
- return df[['farm_id', 'date_time', 'power_forecast']]
|