import glob import os from Arg import Arg import pandas as pd arg = Arg() def data_split_by_month(getpath,savepath,name): # 读取你的大数据集 df = pd.read_csv(getpath) # 15分钟一个点,看看数据集是否存在超过半个月的数据集,如果不存在就不导出 if len(df) < 15*24*60/15: print("当前数据集个数不足半个月,不予导出!!") return # 确保你的日期列是 datetime 类型 df['C_TIME'] = pd.to_datetime(df['C_TIME']) # 设置日期列为索引 df = df.set_index('C_TIME') # 按照年份和月份进行分组 grouped = df.groupby([df.index.year, df.index.month]) # 遍历分组后的数据,将每个月的数据保存为一个新的 csv 文件 for (year, month), group in grouped: group.to_csv(savepath + f'{name}_{year}_{month}.csv') return grouped def split_for_month(): path = "../data/Dataset_training/NWP" csv_files = glob.glob(os.path.join(path, '*.csv')) data_len = len(csv_files) for i in range(data_len-1): getpath = f'../data/Dataset_training/NWP/NWP_{i}.csv' savepath = '../data/training/NWP/' if not os.path.exists(savepath): # 如果路径不存在 os.makedirs(savepath) data_split_by_month(getpath, savepath,"NWP") getpath = f'../data/Dataset_training/power/power_{i}.csv' savepath = '../data/training/power/' if not os.path.exists(savepath): # 如果路径不存在 os.makedirs(savepath) data_split_by_month(getpath, savepath,"power") if __name__ == '__main__': split_for_month()