David преди 2 месеца
родител
ревизия
6d14c687ad
променени са 4 файла, в които са добавени 320 реда и са изтрити 7 реда
  1. 130 0
      app/common/limited_power_solar.py
  2. 168 0
      app/common/limited_power_wind.py
  3. 12 3
      app/common/neu.yaml
  4. 10 4
      app/model/main.py

+ 130 - 0
app/common/limited_power_solar.py

@@ -0,0 +1,130 @@
+import pandas as pd
+import os
+import numpy as np
+np.random.seed(42)
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+current_path = os.path.dirname(__file__)
+
+
+class LimitPower(object):
+    def __init__(self, logger, args, weather_power):
+        self.logger = logger
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.weather_power = weather_power
+
+    def segment_statis(self):
+        """
+        对总辐射-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
+        :return: glob_rp 总辐射分段
+        """
+        segs = [x for x in range(50, 2000, 100)]    # 对辐照度以100为间隔进行分段
+        xs = [segs[i-1]+x if i>0 else 25 for i, x in enumerate([50 for _ in segs])]  # 分段的中间点
+        glob_rp = {}       # dict: key 辐照度分段中间点 value 分段内的实际功率
+        for index, row in self.weather_power.iterrows():
+            glob_ = row[self.opt.usable_power_s["env"]]
+            rp = row['C_REAL_VALUE']
+            for i, seg in enumerate(segs):
+                if glob_ <= seg and not (i > 0 and rp < 1):
+                    glob_rp.setdefault(xs[i], []).append(rp)
+                    break
+        for i, x in enumerate(xs):
+            rps = glob_rp.get(x)
+            if rps is None:
+                glob_rp = {k: v for k, v in glob_rp.items() if k not in xs[xs.index(x):]}
+                break
+            x_l = xs[i-1] if i > 0 else 0
+            q2_l = glob_rp[xs[i-1]][0] if i > 0 else 0
+            q1 = np.percentile(rps, self.opt.usable_power_s['down_fractile'])     # 实际功率下四分位点
+            q2 = np.percentile(rps, 50)  # 实际功率中位点
+            q3 = np.percentile(rps, self.opt.usable_power_s['up_fractile'])     # 实际功率上四分位点
+            iqr = q3 -q1    # 四分位间距
+            k1 = round(q2/x, 5)  # 整体斜率
+            k2 = round((q2-q2_l)/(x-x_l), 5)    # 趋势斜率,相对上一个中位点
+            glob_rp[x] = [q2, iqr, k1, k2]   # 更新dict
+        return glob_rp
+
+    def mapping_relation(self, glob_rp):
+        """
+        拟合分段处理后的斜率和偏移量
+        :param glob_rp: 总辐射分段
+        :return: k_final 斜率 bias 实际功率的分布宽度, glob_rp 总辐射分段
+        """
+        ks, iqrs, delete_x, tag_x = [], [], [], []   # ks所有分段斜率集合,iqrs所有分段间距集合,delete_x删除的x坐标集合
+        for x, values in glob_rp.items():
+            k1 = values[-2]
+            k2 = values[-1]
+            iqrs.append(values[-3])
+            if k1 > 0 and k2 > 0:   # 清除趋势小于等于0的斜率
+                ks.append(k1)
+                tag_x.append(x)
+            else:
+                delete_x.append(x)
+                # print("删除的斜率:", k1, k2)
+        bias = round(np.median(iqrs), 3)  # 中位点
+        # print("++++1", ks)
+        mean = np.mean(ks)  # 均值
+        std = np.std(ks)    # 标准差
+        ks = np.array(ks)
+        z_score = (ks-mean)/std # z均值
+        # print("----", z_score)
+        outliers = np.abs(z_score) > self.opt.usable_power_s['outliers_threshold']    # 超过阈值为离群点
+        ks = ks[~outliers]  # 消除离群点
+        delete_x1 = list(np.array(tag_x)[outliers]) # 清除大于阈值的离群点
+        k_final = round(np.mean(ks), 5)  # 对清洗后的斜率做平均
+        # print("++++2:", ks)
+        delete_x.extend(delete_x1)
+        self.logger.info("拟合可用功率,删除的斜率:" + ' '.join([str(x) for x in delete_x]))
+        glob_rp = {k: v for k, v in glob_rp.items() if k not in delete_x}   # 清洗后剩下的分段点位
+        return k_final, bias, glob_rp
+
+    def filter_unlimited_power(self, zfs, real_power, k, b):
+        """
+        预测可用功主方法
+        :param zfs: 要预测可用功率的总辐射
+        :param k: 斜率
+        :param b: 偏移量
+        :return: 预测的可用功率
+        """
+        high = k*zfs+b/2 if k*zfs+b/2 < self.opt.cap else self.opt.cap
+        low = k*zfs-b/2 if k*zfs-b/2 > 0 else 0
+        if low <= real_power <= high:
+            return True
+        else:
+            return False
+
+    def clean_limited_power(self, name, is_repair=False):
+        if is_repair is True:
+            glob_rp = self.segment_statis()
+            k_final, bias, glob_rp = self.mapping_relation(glob_rp)
+            self.opt.usable_power_s['k'] = float(k_final)
+            self.opt.usable_power_s['bias'] = float(bias)
+        new_weather_power = []
+        for index, row in self.weather_power.iterrows():
+            zfs = row[self.opt.usable_power_s["env"]]
+            rp = row['C_REAL_VALUE']
+            if self.filter_unlimited_power(zfs, rp, self.opt.usable_power_s['k'], self.opt.usable_power_s['bias'] * self.opt.usable_power_s['coe']):
+                row['c'] = 'red'
+                new_weather_power.append(row)
+            else:
+                row['c'] = 'blue'
+                new_weather_power.append(row)
+        new_weather_power = pd.concat(new_weather_power, axis=1).T
+        new_weather_power.plot.scatter(x=self.opt.usable_power_s["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/测光法{}.png'.format(name))
+        new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
+        number = len(new_weather_power)
+        self.logger.info("测光法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("测光法-清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
+        return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+
+
+if __name__ == '__main__':
+    power = pd.read_csv('2023-12-01至2023-12-23实际功率导出文件.csv', date_parser=['时间'])
+    weather = pd.read_csv('2023-12-01至2023-12-23气象站数据导出文件.csv', date_parser=['时间'])
+    weather_power = pd.merge(weather, power, on='时间')  # 联立数据
+    # glob_rp = segment_statis(weather_power)
+    # k_final, bias, glob_rp = mapping_relation(glob_rp)

+ 168 - 0
app/common/limited_power_wind.py

@@ -0,0 +1,168 @@
+import copy
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle
+current_path = os.path.dirname(__file__)
+
+
+class LimitPower(object):
+    def __init__(self, logger, args, weather_power):
+        self.logger = logger
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.weather_power = weather_power
+        self.step = self.opt.usable_power_w['step']
+        self.segs = np.array([x * self.step for x in range(1, 50)])  # 对风速以50为间隔进行分段
+        self.xs = np.array([self.segs[i - 1] + x if i > 0 else self.step / 2 for i, x in
+              enumerate([self.step / 2 for _ in self.segs])])  # 分段的中间点
+        self.polynomial = None
+        self.width = 0
+        self.max_ws = 50
+
+    def segment_statis(self):
+        """
+        对机头风速-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
+        :return: glob_rp 总辐射分段
+        """
+        glob_rp = {}       # dict: key 辐照度分段中间点 value 分段内的实际功率
+        for index, row in self.weather_power.iterrows():
+            ws_ = row[self.opt.usable_power_w["env"]]
+            rp = row['C_REAL_VALUE']
+            for i, seg in enumerate(self.segs):
+                if ws_ <= seg:
+                    glob_rp.setdefault(self.xs[i], []).append(rp)
+                    break
+        rms = []
+        for i, x in enumerate(self.xs):
+            rps = glob_rp.get(x)
+            if rps is None:
+                continue
+            rps = np.array(rps)
+            up = self.opt.usable_power_w['up_fractile']
+            down = self.opt.usable_power_w['down_fractile']
+            offset = 0
+            while True:
+                index = i-1 if i > 0 else 0
+                while (self.xs[index] in rms or self.xs[index] not in glob_rp) and index >0:
+                    index -= 1
+                x_l = self.xs[index] if index > 0 else 0
+                q2_l = glob_rp[self.xs[index]][0] if index > 0 else 0
+                down = down + offset
+                if down > up:
+                    rms.append(x)
+                    self.logger.info("删除的坐标点为:{}".format(x))
+                    break
+                q1 = np.percentile(rps, down)   # 下四分位点
+                q2 = round(np.percentile(rps, down+(up-down)/2), 3) # 中位点
+                q3 = np.percentile(rps, up)     # 上四分为点
+                # q2 = np.around(np.mean(rps[(rps >= q1) & (rps <= q3)]), 3)
+                iqr = q3 - q1  # 四分位间距
+                k2 = round((q2-q2_l)/(x-x_l), 3)    # 趋势斜率
+
+                # std = np.around(np.std(rps[(rps >= q1) & (rps <= q3)]), 3)
+                # mean = np.around(np.mean(rps), 3)  # 实际功率均值
+                # std = np.around(np.std(rps), 3)  # 实际功率标准差
+                # print("看看q2={},mean={}".format(q2, mean))
+                if k2 >= 0:
+                    glob_rp[x] = [q2, iqr]   # 更新dict
+                    break
+                else:
+                    offset += 1
+        glob_rp = {k: glob_rp[k] for k in glob_rp.keys() if k not in rms}
+        glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())}
+        return glob_rp
+
+    def mapping_relation(self, glob_rp):
+        degree = self.opt.usable_power_w['degree']
+        xs = list(glob_rp.keys())
+        ys = [y[0] for y in glob_rp.values()]
+        self.width = np.median(np.array([y[1] for y in glob_rp.values()]))
+        coefficients = np.polyfit(xs, ys, degree)
+        self.polynomial = np.poly1d(coefficients)
+        self.max_ws = max(xs)
+        # y_fit = self.polynomial(xs)
+        # plt.scatter(xs, ys, label='Data', color='red')
+        # plt.plot(xs, y_fit, label='Fitted polynomial', color='blue')
+        # plt.plot(xs, y_fit+self.width/2, label='up polynomial', color='purple')
+        # plt.plot(xs, y_fit-self.width/2, label='down polynomial', color='green')
+        # plt.legend()
+        # plt.xlabel('x')
+        # plt.ylabel('y')
+        # plt.title(f'Polynomial Fit (degree {degree})')
+        # plt.show()
+
+    def saveVar(self, path, data):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, 'wb') as file:
+            pickle.dump(data, file)
+
+    def filter_unlimited_power(self, ws, real_power):
+        """
+        预测可用功主方法
+        :param zfs: 要预测可用功率的总辐射
+        :param k: 斜率
+        :param b: 偏移量
+        :return: 预测的可用功率
+        """
+        # coe = self.opt.usable_power['outliers_threshold']
+        # seg = self.xs[np.argmax(self.segs >= ws)]
+        up_offset = self.opt.usable_power_w['up_offset']
+        down_offset = self.opt.usable_power_w['down_offset']
+        high = self.polynomial(ws) + self.width/up_offset if self.polynomial(ws) + self.width/up_offset < self.opt.cap else self.opt.cap
+        low = self.polynomial(ws) - self.width/down_offset if self.polynomial(ws) - self.width/down_offset > 0 else 0
+        if low <= real_power <= high:
+            return True
+        else:
+            return False
+
+    def clean_limited_power(self, name, cluster=False):
+        if cluster is True:
+            glob_rp = self.segment_statis()
+            self.saveVar(os.path.dirname(current_path) + '/var/glob_rp.pickle', glob_rp)
+            self.mapping_relation(glob_rp)
+        new_weather_power, number = [], 0
+        # fig, ax = plt.subplots()
+        for index, row in self.weather_power.iterrows():
+            zfs = row[self.opt.usable_power_w["env"]]
+            rp = row['C_REAL_VALUE']
+            if zfs < 0 or rp < 0:
+                continue
+            if self.filter_unlimited_power(zfs, rp) and zfs <= self.max_ws:
+                row['c'] = 'red'
+                new_weather_power.append(row)
+            else:
+                row['c'] = 'blue'
+                new_weather_power.append(row)
+        new_weather_power = pd.concat(new_weather_power, axis=1).T
+        new_weather_power.plot.scatter(x=self.opt.usable_power_w["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/测风法{}.png'.format(name))
+        new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
+        number = len(new_weather_power)
+        self.logger.info("未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
+        return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power_w['env']]]
+
+
+
+if __name__ == '__main__':
+    from logs import Log
+    from config import myargparse
+    log = Log().logger
+    # 实例化配置类
+    args = myargparse(discription="场站端配置", add_help=False)
+    power = pd.read_csv('./data/power.csv')
+    weather = pd.read_csv('./data/tower-1-process.csv')
+    weather_power = pd.merge(weather, power, on='C_TIME')  # 联立数据
+    lp = LimitPower(log, args, weather_power)
+    # glob_rp = lp.segment_statis()
+    # lp.mapping_relation(glob_rp)
+    lp.clean_limited_power('测试1')
+    # glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())}
+    # keys = list(glob_rp.keys())
+    # values = [v[0] for v in glob_rp.values()]
+    # import matplotlib.pyplot as plt
+    # fig, ax = plt.subplots()
+    # ax.plot(keys, values)
+    # plt.show()

+ 12 - 3
app/common/neu.yaml

@@ -73,16 +73,25 @@ repair_days: 81
 repair_model_cycle: 5
 update_add_train_days: 60
 update_coe_days: 3
-usable_power:
+usable_power_s:
   api_able_power: true
   bias: 2.524
-  clean_power_which: 1
   coe: 4
   down_fractile: 30
-  env: C_GLOBALR
+  env: Irradiance
   k: 0.04079
   outliers_threshold: 1.5
   up_fractile: 70
+usable_power_w:
+  api_able_power: true
+  degree: 4
+  down_fractile: 0
+  down_offset: 3
+  env: HubSpeed
+  outliers_threshold: 1.5
+  step: 0.5
+  up_fractile: 100
+  up_offset: 2
 version: solar-0.0.1.south
 mongodb_database: south_compete
 scaler_table: scaler

+ 10 - 4
app/model/main.py

@@ -29,8 +29,10 @@ def material(input_file, isDq=True):
         'DQYC_IN_FORECAST_WEATHER_WIND_H.txt', 'DQYC_IN_FORECAST_WEATHER_SOLAR_H.txt', 'DQYC_IN_HISTORY_POWER_LONG.txt')
     basi_area = 'DQYC_AREA_IN_BASIC'
     nwp_v, nwp_v_h = 'DQYC_IN_FORECAST_WEATHER.txt', 'DQYC_IN_FORECAST_WEATHER_H.txt' # 多版本气象
-    nwp_own, nwp_own_h = 'DQYC_IN_FORECAST_WEATHER_OWNER.txt', 'DQYC_IN_FORECAST_WEATHER_OWNER_H.txt',
+    nwp_own, nwp_own_h = 'DQYC_IN_FORECAST_WEATHER_OWNER.txt', 'DQYC_IN_FORECAST_WEATHER_OWNER_H.txt', # 自有气象
+    env_wf, env_sf = 'DQYC_IN_ACTUAL_WEATHER_WIND', 'DQYC_IN_ACTUAL_WEATHER_SOLAR' # 实测气象
     input_file = Path(input_file)
+    env_w, env_s = None, None
     basic = pd.read_csv(input_file / basi, sep='\s+', header=0)
     power = pd.read_csv(input_file / power, sep='\s+', header=0)
     plant_type = int(basic.loc[basic['PropertyID'].to_list().index(('PlantType')), 'Value'])
@@ -47,14 +49,18 @@ def material(input_file, isDq=True):
             station_info_d = pd.read_csv(input_file / station_info_d_w, sep='\s+', header=0)
             nwp = pd.read_csv(input_file / nwp_w, sep='\s+', header=0)
             nwp_h = pd.read_csv(input_file / nwp_w_h, sep='\s+', header=0)
-            return station_info, station_info_d, nwp, nwp_h, power, nwp_v, nwp_v_h
+            if (input_file / env_wf).exists():
+                env_w = pd.read_csv(input_file / env_wf, sep='\s+', header=0)
+            return station_info, station_info_d, nwp, nwp_h, power, nwp_v, nwp_v_h, env_w
         # 如果是光伏
         elif plant_type == 1:
             station_info = pd.read_csv(input_file / station_info_s, sep='\s+', header=0)
             station_info_d = pd.read_csv(input_file / station_info_d_s, sep='\s+', header=0)
             nwp = pd.read_csv(input_file / nwp_s, sep='\s+', header=0)
             nwp_h = pd.read_csv(input_file / nwp_s_h, sep='\s+', header=0)
-            return station_info, station_info_d, nwp, nwp_h, power, nwp_v, nwp_v_h
+            if (input_file / env_sf).exists():
+                env_s = pd.read_csv(input_file / env_sf, sep='\s+', header=0)
+            return station_info, station_info_d, nwp, nwp_h, power, nwp_v, nwp_v_h, env_s
     else:
         # 区域级预测待定,可能需要遍历获取场站数据
         basic_area = pd.read_csv(input_file / basi_area, sep='\s+', header=0)
@@ -63,7 +69,7 @@ def material(input_file, isDq=True):
 def input_file_handler(input_file: str):
     # DQYC:短期预测,qy:区域级
     if 'dqyc' in input_file.lower():
-        station_info, station_info_d, nwp, nwp_h, power, nwp_v, nwp_v_h = material(input_file, True)
+        station_info, station_info_d, nwp, nwp_h, power, nwp_v, nwp_v_h, env = material(input_file, True)
         cap = round(station_info['PlantCap'][0], 2)
         # 含有model,训练
         if 'model' in input_file.lower():