import copy import os import pandas as pd import numpy as np import matplotlib.pyplot as plt import pickle current_path = os.path.dirname(__file__) class LimitPower(object): def __init__(self, logger, args, weather_power): self.logger = logger self.args = args self.opt = self.args.parse_args_and_yaml() self.weather_power = weather_power self.step = self.opt.usable_power['step'] self.segs = np.array([x * self.step for x in range(1, 50)]) # 对风速以50为间隔进行分段 self.xs = np.array([self.segs[i - 1] + x if i > 0 else self.step / 2 for i, x in enumerate([self.step / 2 for _ in self.segs])]) # 分段的中间点 self.polynomial = None self.width = 0 self.max_ws = 50 def segment_statis(self): """ 对机头风速-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率 :return: glob_rp 总辐射分段 """ glob_rp = {} # dict: key 辐照度分段中间点 value 分段内的实际功率 for index, row in self.weather_power.iterrows(): ws_ = row[self.opt.usable_power["env"]] rp = row['C_REAL_VALUE'] for i, seg in enumerate(self.segs): if ws_ <= seg: glob_rp.setdefault(self.xs[i], []).append(rp) break rms = [] for i, x in enumerate(self.xs): rps = glob_rp.get(x) if rps is None: continue rps = np.array(rps) up = self.opt.usable_power['up_fractile'] down = self.opt.usable_power['down_fractile'] offset = 0 while True: index = i-1 if i > 0 else 0 while (self.xs[index] in rms or self.xs[index] not in glob_rp) and index >0: index -= 1 x_l = self.xs[index] if index > 0 else 0 q2_l = glob_rp[self.xs[index]][0] if index > 0 else 0 down = down + offset if down > up: rms.append(x) self.logger.info("删除的坐标点为:{}".format(x)) break q1 = np.percentile(rps, down) # 下四分位点 q2 = round(np.percentile(rps, down+(up-down)/2), 3) # 中位点 q3 = np.percentile(rps, up) # 上四分为点 # q2 = np.around(np.mean(rps[(rps >= q1) & (rps <= q3)]), 3) iqr = q3 - q1 # 四分位间距 k2 = round((q2-q2_l)/(x-x_l), 3) # 趋势斜率 # std = np.around(np.std(rps[(rps >= q1) & (rps <= q3)]), 3) # mean = np.around(np.mean(rps), 3) # 实际功率均值 # std = np.around(np.std(rps), 3) # 实际功率标准差 # print("看看q2={},mean={}".format(q2, mean)) if k2 >= 0: glob_rp[x] = [q2, iqr] # 更新dict break else: offset += 1 glob_rp = {k: glob_rp[k] for k in glob_rp.keys() if k not in rms} glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())} return glob_rp def mapping_relation(self, glob_rp): degree = self.opt.usable_power['degree'] xs = list(glob_rp.keys()) ys = [y[0] for y in glob_rp.values()] self.width = np.median(np.array([y[1] for y in glob_rp.values()])) coefficients = np.polyfit(xs, ys, degree) self.polynomial = np.poly1d(coefficients) self.max_ws = max(xs) # y_fit = self.polynomial(xs) # plt.scatter(xs, ys, label='Data', color='red') # plt.plot(xs, y_fit, label='Fitted polynomial', color='blue') # plt.plot(xs, y_fit+self.width/2, label='up polynomial', color='purple') # plt.plot(xs, y_fit-self.width/2, label='down polynomial', color='green') # plt.legend() # plt.xlabel('x') # plt.ylabel('y') # plt.title(f'Polynomial Fit (degree {degree})') # plt.show() def saveVar(self, path, data): os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'wb') as file: pickle.dump(data, file) def filter_unlimited_power(self, ws, real_power): """ 预测可用功主方法 :param zfs: 要预测可用功率的总辐射 :param k: 斜率 :param b: 偏移量 :return: 预测的可用功率 """ # coe = self.opt.usable_power['outliers_threshold'] # seg = self.xs[np.argmax(self.segs >= ws)] up_offset = self.opt.usable_power['up_offset'] down_offset = self.opt.usable_power['down_offset'] high = self.polynomial(ws) + self.width/up_offset if self.polynomial(ws) + self.width/up_offset < self.opt.cap else self.opt.cap low = self.polynomial(ws) - self.width/down_offset if self.polynomial(ws) - self.width/down_offset > 0 else 0 if low <= real_power <= high: return True else: return False def clean_limited_power(self, name, cluster=False): if cluster is True: glob_rp = self.segment_statis() self.saveVar(os.path.dirname(current_path) + '/var/glob_rp.pickle', glob_rp) self.mapping_relation(glob_rp) new_weather_power, number = [], 0 # fig, ax = plt.subplots() for index, row in self.weather_power.iterrows(): zfs = row[self.opt.usable_power["env"]] rp = row['C_REAL_VALUE'] if zfs < 0 or rp < 0: continue if self.filter_unlimited_power(zfs, rp) and zfs <= self.max_ws: row['c'] = 'red' new_weather_power.append(row) else: row['c'] = 'blue' new_weather_power.append(row) new_weather_power = pd.concat(new_weather_power, axis=1).T new_weather_power.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c') plt.savefig(current_path + '/figs/测风法{}.png'.format(name)) new_weather_power = new_weather_power[new_weather_power['c'] == 'red'] number = len(new_weather_power) self.logger.info("未清洗限电前,总共有:{}条数据".format(len(self.weather_power))) self.logger.info("清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2))) return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power['env']]] def clean_limited_power_by_signal(self, name): weather_power1 = self.weather_power.copy() weather_power1["signal"] = weather_power1.apply( lambda x: self.signal_result(x["C_IS_RATIONING_BY_MANUAL_CONTROL"], x["C_IS_RATIONING_BY_AUTO_CONTROL"]), axis=1) weather_power1['c'] = weather_power1.apply(lambda x: 'cornflowerblue' if bool(x["signal"]) is True else 'pink', axis=1) weather_power1.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c') plt.savefig(current_path + '/figs/信号法{}.png'.format(name)) weather_power1 = weather_power1[weather_power1['signal'] == False] self.logger.info("信号法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power))) self.logger.info("信号法-清除限电后保留的点有:" + str(len(weather_power1)) + " 占比:" + str( round(len(weather_power1) / len(self.weather_power), 2))) return weather_power1.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power['env']]] def signal_result(self, manual, auto): if int(manual) == 0: if int(auto) == 0: return False else: return True else: if int(auto) == 1: return True else: return False if __name__ == '__main__': from logs import Log from config import myargparse log = Log().logger # 实例化配置类 args = myargparse(discription="场站端配置", add_help=False) power = pd.read_csv('./data/power.csv') weather = pd.read_csv('./data/tower-1-process.csv') weather_power = pd.merge(weather, power, on='C_TIME') # 联立数据 lp = LimitPower(log, args, weather_power) # glob_rp = lp.segment_statis() # lp.mapping_relation(glob_rp) lp.clean_limited_power('测试1') # glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())} # keys = list(glob_rp.keys()) # values = [v[0] for v in glob_rp.values()] # import matplotlib.pyplot as plt # fig, ax = plt.subplots() # ax.plot(keys, values) # plt.show()