123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194 |
- import copy
- import os
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import pickle
- current_path = os.path.dirname(__file__)
- class LimitPower(object):
- def __init__(self, logger, args, weather_power):
- self.logger = logger
- self.args = args
- self.opt = self.args.parse_args_and_yaml()
- self.weather_power = weather_power
- self.step = self.opt.usable_power['step']
- self.segs = np.array([x * self.step for x in range(1, 50)]) # 对风速以50为间隔进行分段
- self.xs = np.array([self.segs[i - 1] + x if i > 0 else self.step / 2 for i, x in
- enumerate([self.step / 2 for _ in self.segs])]) # 分段的中间点
- self.polynomial = None
- self.width = 0
- self.max_ws = 50
- def segment_statis(self):
- """
- 对机头风速-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
- :return: glob_rp 总辐射分段
- """
- glob_rp = {} # dict: key 辐照度分段中间点 value 分段内的实际功率
- for index, row in self.weather_power.iterrows():
- ws_ = row[self.opt.usable_power["env"]]
- rp = row['C_REAL_VALUE']
- for i, seg in enumerate(self.segs):
- if ws_ <= seg:
- glob_rp.setdefault(self.xs[i], []).append(rp)
- break
- rms = []
- for i, x in enumerate(self.xs):
- rps = glob_rp.get(x)
- if rps is None:
- continue
- rps = np.array(rps)
- up = self.opt.usable_power['up_fractile']
- down = self.opt.usable_power['down_fractile']
- offset = 0
- while True:
- index = i-1 if i > 0 else 0
- while (self.xs[index] in rms or self.xs[index] not in glob_rp) and index >0:
- index -= 1
- x_l = self.xs[index] if index > 0 else 0
- q2_l = glob_rp[self.xs[index]][0] if index > 0 else 0
- down = down + offset
- if down > up:
- rms.append(x)
- self.logger.info("删除的坐标点为:{}".format(x))
- break
- q1 = np.percentile(rps, down) # 下四分位点
- q2 = round(np.percentile(rps, down+(up-down)/2), 3) # 中位点
- q3 = np.percentile(rps, up) # 上四分为点
- # q2 = np.around(np.mean(rps[(rps >= q1) & (rps <= q3)]), 3)
- iqr = q3 - q1 # 四分位间距
- k2 = round((q2-q2_l)/(x-x_l), 3) # 趋势斜率
- # std = np.around(np.std(rps[(rps >= q1) & (rps <= q3)]), 3)
- # mean = np.around(np.mean(rps), 3) # 实际功率均值
- # std = np.around(np.std(rps), 3) # 实际功率标准差
- # print("看看q2={},mean={}".format(q2, mean))
- if k2 >= 0:
- glob_rp[x] = [q2, iqr] # 更新dict
- break
- else:
- offset += 1
- glob_rp = {k: glob_rp[k] for k in glob_rp.keys() if k not in rms}
- glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())}
- return glob_rp
- def mapping_relation(self, glob_rp):
- degree = self.opt.usable_power['degree']
- xs = list(glob_rp.keys())
- ys = [y[0] for y in glob_rp.values()]
- self.width = np.median(np.array([y[1] for y in glob_rp.values()]))
- coefficients = np.polyfit(xs, ys, degree)
- self.polynomial = np.poly1d(coefficients)
- self.max_ws = max(xs)
- # y_fit = self.polynomial(xs)
- # plt.scatter(xs, ys, label='Data', color='red')
- # plt.plot(xs, y_fit, label='Fitted polynomial', color='blue')
- # plt.plot(xs, y_fit+self.width/2, label='up polynomial', color='purple')
- # plt.plot(xs, y_fit-self.width/2, label='down polynomial', color='green')
- # plt.legend()
- # plt.xlabel('x')
- # plt.ylabel('y')
- # plt.title(f'Polynomial Fit (degree {degree})')
- # plt.show()
- def saveVar(self, path, data):
- os.makedirs(os.path.dirname(path), exist_ok=True)
- with open(path, 'wb') as file:
- pickle.dump(data, file)
- def filter_unlimited_power(self, ws, real_power):
- """
- 预测可用功主方法
- :param zfs: 要预测可用功率的总辐射
- :param k: 斜率
- :param b: 偏移量
- :return: 预测的可用功率
- """
- # coe = self.opt.usable_power['outliers_threshold']
- # seg = self.xs[np.argmax(self.segs >= ws)]
- up_offset = self.opt.usable_power['up_offset']
- down_offset = self.opt.usable_power['down_offset']
- high = self.polynomial(ws) + self.width/up_offset if self.polynomial(ws) + self.width/up_offset < self.opt.cap else self.opt.cap
- low = self.polynomial(ws) - self.width/down_offset if self.polynomial(ws) - self.width/down_offset > 0 else 0
- if low <= real_power <= high:
- return True
- else:
- return False
- def clean_limited_power(self, name, cluster=False):
- if cluster is True:
- glob_rp = self.segment_statis()
- self.saveVar(os.path.dirname(current_path) + '/var/glob_rp.pickle', glob_rp)
- self.mapping_relation(glob_rp)
- new_weather_power, number = [], 0
- # fig, ax = plt.subplots()
- for index, row in self.weather_power.iterrows():
- zfs = row[self.opt.usable_power["env"]]
- rp = row['C_REAL_VALUE']
- if zfs < 0 or rp < 0:
- continue
- if self.filter_unlimited_power(zfs, rp) and zfs <= self.max_ws:
- row['c'] = 'red'
- new_weather_power.append(row)
- else:
- row['c'] = 'blue'
- new_weather_power.append(row)
- new_weather_power = pd.concat(new_weather_power, axis=1).T
- new_weather_power.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
- plt.savefig(current_path + '/figs/测风法{}.png'.format(name))
- new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
- number = len(new_weather_power)
- self.logger.info("未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
- self.logger.info("清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
- return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power['env']]]
- def clean_limited_power_by_signal(self, name):
- weather_power1 = self.weather_power.copy()
- weather_power1["signal"] = weather_power1.apply(
- lambda x: self.signal_result(x["C_IS_RATIONING_BY_MANUAL_CONTROL"], x["C_IS_RATIONING_BY_AUTO_CONTROL"]),
- axis=1)
- weather_power1['c'] = weather_power1.apply(lambda x: 'cornflowerblue' if bool(x["signal"]) is True else 'pink',
- axis=1)
- weather_power1.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
- plt.savefig(current_path + '/figs/信号法{}.png'.format(name))
- weather_power1 = weather_power1[weather_power1['signal'] == False]
- self.logger.info("信号法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
- self.logger.info("信号法-清除限电后保留的点有:" + str(len(weather_power1)) + " 占比:" + str(
- round(len(weather_power1) / len(self.weather_power), 2)))
- return weather_power1.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power['env']]]
- def signal_result(self, manual, auto):
- if int(manual) == 0:
- if int(auto) == 0:
- return False
- else:
- return True
- else:
- if int(auto) == 1:
- return True
- else:
- return False
- if __name__ == '__main__':
- from logs import Log
- from config import myargparse
- log = Log().logger
- # 实例化配置类
- args = myargparse(discription="场站端配置", add_help=False)
- power = pd.read_csv('./data/power.csv')
- weather = pd.read_csv('./data/tower-1-process.csv')
- weather_power = pd.merge(weather, power, on='C_TIME') # 联立数据
- lp = LimitPower(log, args, weather_power)
- # glob_rp = lp.segment_statis()
- # lp.mapping_relation(glob_rp)
- lp.clean_limited_power('测试1')
- # glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())}
- # keys = list(glob_rp.keys())
- # values = [v[0] for v in glob_rp.values()]
- # import matplotlib.pyplot as plt
- # fig, ax = plt.subplots()
- # ax.plot(keys, values)
- # plt.show()
|