import random def str_to_list(arg): if arg == '': return [] else: return arg.split(',') # 随机生成唯一颜色 def generate_unique_colors(num_colors): generated_colors = set() while len(generated_colors) < num_colors: color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})" generated_colors.add(color) return list(generated_colors) def missing_features(df, features, col_time, threshold=0.2): df['day'] = df[col_time].str[:10] # 按日期分组,计算缺失率 missing_rates = df[['day']+features].groupby('day').apply( lambda group: (group.isnull().sum() / group.shape[0]).mean() ) # 筛选特征平均缺失率大于 20% 的日期 days_with_high_missing = missing_rates[missing_rates >= threshold].index # 打印结果 print("特征缺失率超过50%的日期:",days_with_high_missing) print() print("**********删除前维度", df.shape) df = df[~df['day'].isin(days_with_high_missing)] print("**********删除后维度", df.shape) return df.drop('day',axis=1)