import pandas as pd import utils.savedata def a(): # 读取大文件 # large_file = "power_15min.csv" # large_df = utils.savedata.readData(large_file) # # # 读取每个小文件并将其存储在一个字典中 # small_files = [] # for i in range(6): # small_files.append("/Dataset_training/NWP/NWP_{}.csv".format(i)) # small_dfs = {} # # for file in small_files: # small_dfs[file] = utils.savedata.readData(file) # # # 根据每个小文件的时间范围拆分大文件 # i = 0 # for file, df in small_dfs.items(): # min_time = df["C_TIME"].min() # max_time = df["C_TIME"].max() # splitted_df = large_df[(large_df["C_TIME"] >= min_time) & (large_df["C_TIME"] <= max_time)] # utils.savedata.saveData("/Dataset_training/power/power_{}.csv".format(i), splitted_df) # i = i + 1 filenames = ["Dataset_training/NWP/NWP_0.csv","Dataset_training/power/power_0.csv"] dataframes = [] for name in filenames: dataframes.append(utils.savedata.readData(name)) # 查找最大起始时间和最小结束时间 max_start_time = max(df['C_TIME'].min() for df in dataframes) min_end_time = min(df['C_TIME'].max() for df in dataframes) print(max_start_time) print(min_end_time) # 重新调整每个 DataFrame 的时间范围,只保留在 [max_start_time, min_end_time] 区间内的数据 for i, df in enumerate(dataframes): df['C_TIME'] = pd.to_datetime(df['C_TIME']) # 确保时间列是 datetime 类型 df_filtered = df[(df['C_TIME'] >= max_start_time) & (df['C_TIME'] <= min_end_time)] # 将结果保存到新文件,文件名为原文件名加上 "_filtered" 后缀 utils.savedata.saveData(filenames[i],df_filtered) def split_test(): # 读取 NWP_5.csv 和 power_5.csv 文件 nwp_df = utils.savedata.readData("Dataset_training/NWP/NWP_0.csv") power_df = utils.savedata.readData("Dataset_training/power/power_0.csv") small_files = [] for i in range(6): small_files.append("/Dataset_training/NWP/NWP_{}.csv".format(i)) for i in range(6): small_files.append("/Dataset_training/power/power_{}.csv".format(i)) dataframes = [] for name in small_files: dataframes.append(utils.savedata.readData(name)) small_dfs = {} l = 0 for df in dataframes: l = l + len(df) test_size = int(l* 0.1) nwp_test = nwp_df.iloc[-test_size:] power_test = power_df.iloc[-test_size:] nwp_train = nwp_df[~nwp_df["C_TIME"].isin(nwp_test["C_TIME"])] power_train = power_df[~power_df["C_TIME"].isin(power_test["C_TIME"])] utils.savedata.saveData("/Dataset_test/NWP/NWP_test.csv", nwp_test) utils.savedata.saveData("/Dataset_test/power/power_test.csv", power_test) utils.savedata.saveData("/Dataset_training/NWP/NWP_5.csv", nwp_train) utils.savedata.saveData("/Dataset_training/power/power_5.csv", power_train) if __name__ == '__main__': a() #split_test()