1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980 |
- import pandas as pd
- import utils.savedata
- def a():
- # 读取大文件
- # large_file = "power_15min.csv"
- # large_df = utils.savedata.readData(large_file)
- #
- # # 读取每个小文件并将其存储在一个字典中
- # small_files = []
- # for i in range(6):
- # small_files.append("/Dataset_training/NWP/NWP_{}.csv".format(i))
- # small_dfs = {}
- #
- # for file in small_files:
- # small_dfs[file] = utils.savedata.readData(file)
- #
- # # 根据每个小文件的时间范围拆分大文件
- # i = 0
- # for file, df in small_dfs.items():
- # min_time = df["C_TIME"].min()
- # max_time = df["C_TIME"].max()
- # splitted_df = large_df[(large_df["C_TIME"] >= min_time) & (large_df["C_TIME"] <= max_time)]
- # utils.savedata.saveData("/Dataset_training/power/power_{}.csv".format(i), splitted_df)
- # i = i + 1
- filenames = ["Dataset_training/NWP/NWP_0.csv","Dataset_training/power/power_0.csv"]
- dataframes = []
- for name in filenames:
- dataframes.append(utils.savedata.readData(name))
- # 查找最大起始时间和最小结束时间
- max_start_time = max(df['C_TIME'].min() for df in dataframes)
- min_end_time = min(df['C_TIME'].max() for df in dataframes)
- print(max_start_time)
- print(min_end_time)
- # 重新调整每个 DataFrame 的时间范围,只保留在 [max_start_time, min_end_time] 区间内的数据
- for i, df in enumerate(dataframes):
- df['C_TIME'] = pd.to_datetime(df['C_TIME']) # 确保时间列是 datetime 类型
- df_filtered = df[(df['C_TIME'] >= max_start_time) & (df['C_TIME'] <= min_end_time)]
- # 将结果保存到新文件,文件名为原文件名加上 "_filtered" 后缀
- utils.savedata.saveData(filenames[i],df_filtered)
- def split_test():
- # 读取 NWP_5.csv 和 power_5.csv 文件
- nwp_df = utils.savedata.readData("Dataset_training/NWP/NWP_0.csv")
- power_df = utils.savedata.readData("Dataset_training/power/power_0.csv")
- small_files = []
- for i in range(6):
- small_files.append("/Dataset_training/NWP/NWP_{}.csv".format(i))
- for i in range(6):
- small_files.append("/Dataset_training/power/power_{}.csv".format(i))
- dataframes = []
- for name in small_files:
- dataframes.append(utils.savedata.readData(name))
- small_dfs = {}
- l = 0
- for df in dataframes:
- l = l + len(df)
- test_size = int(l* 0.1)
- nwp_test = nwp_df.iloc[-test_size:]
- power_test = power_df.iloc[-test_size:]
- nwp_train = nwp_df[~nwp_df["C_TIME"].isin(nwp_test["C_TIME"])]
- power_train = power_df[~power_df["C_TIME"].isin(power_test["C_TIME"])]
- utils.savedata.saveData("/Dataset_test/NWP/NWP_test.csv", nwp_test)
- utils.savedata.saveData("/Dataset_test/power/power_test.csv", power_test)
- utils.savedata.saveData("/Dataset_training/NWP/NWP_5.csv", nwp_train)
- utils.savedata.saveData("/Dataset_training/power/power_5.csv", power_train)
- if __name__ == '__main__':
- a()
- #split_test()
|