12345678910111213141516171819202122232425262728293031323334 |
- import random
- def str_to_list(arg):
- if arg == '':
- return []
- else:
- return arg.split(',')
- # 随机生成唯一颜色
- def generate_unique_colors(num_colors):
- generated_colors = set()
- while len(generated_colors) < num_colors:
- color = f"rgb({random.randint(0, 255)}, {random.randint(0, 255)}, {random.randint(0, 255)})"
- generated_colors.add(color)
- return list(generated_colors)
- def missing_features(df, features, col_time, threshold=0.2):
- df['day'] = df[col_time].str[:10]
- # 按日期分组,计算缺失率
- missing_rates = df[['day']+features].groupby('day').apply(
- lambda group: (group.isnull().sum() / group.shape[0]).mean()
- )
- # 筛选特征平均缺失率大于 20% 的日期
- days_with_high_missing = missing_rates[missing_rates >= threshold].index
- # 打印结果
- print("特征缺失率超过50%的日期:",days_with_high_missing)
- print()
- print("**********删除前维度", df.shape)
- df = df[~df['day'].isin(days_with_high_missing)]
- print("**********删除后维度", df.shape)
- return df.drop('day',axis=1)
|