import random from datetime import date, timedelta import pandas as pd def str_to_list(arg): if arg == '': return [] else: return arg.split(',') # 随机生成唯一颜色 def generate_unique_colors(num_colors): generated_colors = set() while len(generated_colors) < num_colors: color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})" generated_colors.add(color) return list(generated_colors) def missing_features(df, features, col_time, threshold=0.2): df['day'] = df[col_time].str[:10] # 按日期分组,计算缺失率 missing_rates = df[['day']+features].groupby('day').apply( lambda group: (group.isnull().sum() / group.shape[0]).mean() ) # 筛选特征平均缺失率大于 20% 的日期 days_with_high_missing = missing_rates[missing_rates >= threshold].index # 打印结果 print("特征缺失率超过50%的日期:",days_with_high_missing) print() print("**********删除前维度", df.shape) df = df[~df['day'].isin(days_with_high_missing)] print("**********删除后维度", df.shape) return df.drop('day',axis=1) def check_nwp_data(nwp_df,features): tomorrow = (date.today() + timedelta(days=1)).strftime('%Y-%m-%d') if ~all(item in nwp_df.columns for item in features): diff = set(features)-set(nwp_df.columns) message = f"NWP特征列缺失!features:{diff}" #判断日前短期NWP是否缺数据 elif len(nwp_df[nwp_df['date_time'].contains(tomorrow)])<96: message = "日前数据记录缺失,不足96条!" else: message='' return message def get_xxl_dq(farm_id, dt): path_dir = f'data_processing/cache/data/xxl/{farm_id}/meteoforce_{farm_id}_{dt}06_power.csv' df = pd.read_csv(path_dir,index_col=0) return df[['farm_id', 'date_time', 'power_forecast']]