123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192 |
- #!/usr/bin/env python
- # -*- coding:utf-8 -*-
- # @FileName :processing_limit_power_by_solar.py
- # @Time :2024/12/3 18:40
- # @Author :David
- # @Company: shenyang JY
- import os, time
- import pandas as pd
- import numpy as np
- from pymongo import MongoClient
- from flask import request, app
- from logs import Log
- import matplotlib.pyplot as plt
- import traceback
- current_path = os.path.dirname(__file__)
- parent_path = os.path.dirname(current_path)
- def get_data_from_mongo(args):
- mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table']
- client = MongoClient(mongodb_connection)
- # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
- db = client[mongodb_database]
- collection = db[mongodb_read_table] # 集合名称
- data_from_db = collection.find() # 这会返回一个游标(cursor)
- # 将游标转换为列表,并创建 pandas DataFrame
- df = pd.DataFrame(list(data_from_db))
- client.close()
- return df
- def insert_data_into_mongo(res_df,args):
- mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
- client = MongoClient(mongodb_connection)
- db = client[mongodb_database]
- if mongodb_write_table in db.list_collection_names():
- db[mongodb_write_table].drop()
- print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
- collection = db[mongodb_write_table] # 集合名称
- # 将 DataFrame 转为字典格式
- data_dict = res_df.to_dict("records") # 每一行作为一个字典
- # 插入到 MongoDB
- collection.insert_many(data_dict)
- print("data inserted successfully!")
- @app.route('/processing_limit_power_by_solar', methods=['POST', 'GET'])
- def processing_limit_power_by_solar():
- # 获取程序开始时间
- start_time = time.time()
- result = {}
- success = 0
- print("Program starts execution!")
- try:
- logger = Log().logger
- args = request.values.to_dict()
- weather_power = get_data_from_mongo(args)
- lp = LimitPower(logger, args, weather_power)
- weather_power = lp.clean_limited_power('')
- print('args', args)
- logger.info(args)
- insert_data_into_mongo(weather_power, args)
- success = 1
- except Exception as e:
- my_exception = traceback.format_exc()
- my_exception.replace("\n", "\t")
- result['msg'] = my_exception
- end_time = time.time()
- result['success'] = success
- result['args'] = args
- result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
- result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
- print("Program execution ends!")
- return result
- class LimitPower(object):
- def __init__(self, logger, args, weather_power):
- self.logger = logger
- self.args = args
- self.weather_power = weather_power
- def segment_statis(self):
- """
- 对总辐射-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
- :return: glob_rp 总辐射分段
- """
- segs = [x for x in range(50, 2000, 100)] # 对辐照度以100为间隔进行分段
- xs = [segs[i-1]+x if i>0 else 25 for i, x in enumerate([50 for _ in segs])] # 分段的中间点
- glob_rp = {} # dict: key 辐照度分段中间点 value 分段内的实际功率
- for index, row in self.weather_power.iterrows():
- glob_ = row[self.args.usable_power["env"]]
- rp = row['C_REAL_VALUE']
- for i, seg in enumerate(segs):
- if glob_ <= seg and not (i > 0 and rp < 1):
- glob_rp.setdefault(xs[i], []).append(rp)
- break
- for i, x in enumerate(xs):
- rps = glob_rp.get(x)
- if rps is None:
- glob_rp = {k: v for k, v in glob_rp.items() if k not in xs[xs.index(x):]}
- break
- x_l = xs[i-1] if i > 0 else 0
- q2_l = glob_rp[xs[i-1]][0] if i > 0 else 0
- q1 = np.percentile(rps, self.args.usable_power['down_fractile']) # 实际功率下四分位点
- q2 = np.percentile(rps, 50) # 实际功率中位点
- q3 = np.percentile(rps, self.args.usable_power['up_fractile']) # 实际功率上四分位点
- iqr = q3 -q1 # 四分位间距
- k1 = round(q2/x, 5) # 整体斜率
- k2 = round((q2-q2_l)/(x-x_l), 5) # 趋势斜率,相对上一个中位点
- glob_rp[x] = [q2, iqr, k1, k2] # 更新dict
- return glob_rp
- def mapping_relation(self, glob_rp):
- """
- 拟合分段处理后的斜率和偏移量
- :param glob_rp: 总辐射分段
- :return: k_final 斜率 bias 实际功率的分布宽度, glob_rp 总辐射分段
- """
- ks, iqrs, delete_x, tag_x = [], [], [], [] # ks所有分段斜率集合,iqrs所有分段间距集合,delete_x删除的x坐标集合
- for x, values in glob_rp.items():
- k1 = values[-2]
- k2 = values[-1]
- iqrs.append(values[-3])
- if k1 > 0 and k2 > 0: # 清除趋势小于等于0的斜率
- ks.append(k1)
- tag_x.append(x)
- else:
- delete_x.append(x)
- # print("删除的斜率:", k1, k2)
- bias = round(np.median(iqrs), 3) # 中位点
- # print("++++1", ks)
- mean = np.mean(ks) # 均值
- std = np.std(ks) # 标准差
- ks = np.array(ks)
- z_score = (ks-mean)/std # z均值
- # print("----", z_score)
- outliers = np.abs(z_score) > self.args.usable_power['outliers_threshold'] # 超过阈值为离群点
- ks = ks[~outliers] # 消除离群点
- delete_x1 = list(np.array(tag_x)[outliers]) # 清除大于阈值的离群点
- k_final = round(np.mean(ks), 5) # 对清洗后的斜率做平均
- # print("++++2:", ks)
- delete_x.extend(delete_x1)
- self.logger.info("拟合可用功率,删除的斜率:" + ' '.join([str(x) for x in delete_x]))
- glob_rp = {k: v for k, v in glob_rp.items() if k not in delete_x} # 清洗后剩下的分段点位
- return k_final, bias, glob_rp
- def filter_unlimited_power(self, zfs, real_power, k, b):
- """
- 预测可用功主方法
- :param zfs: 要预测可用功率的总辐射
- :param k: 斜率
- :param b: 偏移量
- :return: 预测的可用功率
- """
- high = k*zfs+b/2 if k*zfs+b/2 < self.args.cap else self.args.cap
- low = k*zfs-b/2 if k*zfs-b/2 > 0 else 0
- if low <= real_power <= high:
- return True
- else:
- return False
- def clean_limited_power(self, name, is_repair=False):
- if is_repair is True:
- glob_rp = self.segment_statis()
- k_final, bias, glob_rp = self.mapping_relation(glob_rp)
- self.args.usable_power['k'] = float(k_final)
- self.args.usable_power['bias'] = float(bias)
- new_weather_power = []
- for index, row in self.weather_power.iterrows():
- zfs = row[self.args.usable_power["env"]]
- rp = row['C_REAL_VALUE']
- if self.filter_unlimited_power(zfs, rp, self.args.usable_power['k'], self.args.usable_power['bias'] * self.args.usable_power['coe']):
- row['c'] = 'red'
- new_weather_power.append(row)
- else:
- row['c'] = 'blue'
- new_weather_power.append(row)
- new_weather_power = pd.concat(new_weather_power, axis=1).T
- new_weather_power.plot.scatter(x=self.args.usable_power["env"], y='C_REAL_VALUE', c='c')
- plt.savefig(parent_path + '/figs/测光法{}.png'.format(name))
- new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
- number = len(new_weather_power)
- self.logger.info("测光法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
- self.logger.info("测光法-清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
- return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
- if __name__ == '__main__':
- power = pd.read_csv('2023-12-01至2023-12-23实际功率导出文件.csv', date_parser=['时间'])
- weather = pd.read_csv('2023-12-01至2023-12-23气象站数据导出文件.csv', date_parser=['时间'])
- weather_power = pd.merge(weather, power, on='时间') # 联立数据
- # glob_rp = segment_statis(weather_power)
- # k_final, bias, glob_rp = mapping_relation(glob_rp)
|