import pandas as pd import numpy as np def calculate_missing_rate(file_path): # 读取CSV文件 df = pd.read_csv(file_path) # 确保C_TIME列的格式是字符串并转换为datetime对象 df['C_TIME'] = pd.to_datetime(df['C_TIME'], format='%Y-%m-%d %H:%M:%S') # 提取年月信息作为分组依据 df['YearMonth'] = df['C_TIME'].dt.to_period('M') # 按月份分组处理数据 grouped = df.groupby('YearMonth') # 结果字典保存每月缺失率和缺失的时间点 missing_rates = {} for period, group in grouped: # 获取该月的最小和最大时间 start_time = group['C_TIME'].min() end_time = group['C_TIME'].max() # 生成该月所有的15分钟时间点 full_time_range = pd.date_range(start=start_time, end=end_time, freq='15T') # 找到该月的时间点缺失部分 missing_times = full_time_range.difference(group['C_TIME']) # 计算缺失率:缺失时间点的数量 / 全部应该有的时间点数量 missing_rate = len(missing_times) / len(full_time_range) # 保存结果 missing_rates[period] = { 'missing_rate': missing_rate, 'missing_times': missing_times } # 输出结果 for period, data in missing_rates.items(): print(f"Month: {period}, Missing Rate: {data['missing_rate']:.2%}") if len(data['missing_times']) > 0: print(f" Missing Time Points:") for missing_time in data['missing_times']: print(f" {missing_time}") else: print(" No missing time points") # 调用函数,假设文件路径是'tower-1-process.csv' file_path = 'weather-1-process.csv' calculate_missing_rate(file_path)