David 1 miesiąc temu
rodzic
commit
1489e91bb6

+ 209 - 0
DataCalculate/accuracy.py

@@ -0,0 +1,209 @@
+import pandas as pd
+import numpy as np
+from pymongo import MongoClient
+import requests
+import json
+from datetime import datetime
+from flask import Flask, request
+import time
+import logging
+import traceback
+app = Flask('evaluation_accuracy——service')
+url = 'http://49.4.78.194:17160/apiCalculate/calculate'
+'''
+准确率接口使用手顺:
+①入口方法为 calculate_acc
+② 按照参数传传值
+data含有C_TIME时间、realValue实际功率、ableValue可用功率(没有数值用实际功率替代)、forecastAbleValue预测功率
+opt为包含场站必要信息的字典,字段为:cap装机容量 province省份 formulaType公式类型 electricType电站类型 stationCode场站编码
+具体介绍参考接口文档
+③公式计算分为按天和按点两种,指定好opt.formulaType,即可设置公式类型,再在求取的每天或每个点的结果上进行平均,结果返回
+'''
+
+
+def wrap_json(df, opt):
+    """
+    包装json
+    :param df: 列名为 C_TIME realValue ableValue forecastAbleValue的DataFrame
+    :param opt: 参数字典
+    :return: json列表
+    """
+    d = opt['formulaType'].split('_')[0]
+    jata, dfs = [], []
+    if d == 'POINT':
+        df['time'] = df['C_TIME'].apply(datetime_to_timestamp)
+        for i, row in df.iterrows():
+            dfs.append(row.to_frame().T)
+    elif d == 'DAY':
+        df = df.copy()
+        # df['time'] = df['C_TIME'].apply(datetime_to_timestamp) int(round(time.mktime(df['C_TIME'].timetuple()))*1000)
+        # df.loc[:, 'time'] = df['C_TIME'].apply(datetime_to_timestamp)
+        df['time'] = df['C_TIME'].apply(lambda x: datetime_to_timestamp(x))
+        # df['time'] = df.apply(lambda row: datetime_to_timestamp(row['C_TIME']), axis=1)
+
+        # df['C_TIME'] = df['C_TIME'].dt.strftime('%y%m%d')   # 转换成年月日
+        df.loc[:, 'C_TIME'] = df['C_TIME'].dt.strftime('%y%m%d')
+        for i, group in df.groupby('C_TIME'):
+            dfs.append(group)
+    outter_dict = {"electricCapacity": str(opt['cap']), "province": opt['province'], "formulaType": opt['formulaType'], "electricType":opt['electricType'], "stationCode": opt['stationCode']}
+    timestamp = int(time.mktime(datetime.now().timetuple()) * 1000 + datetime.now().microsecond / 1000.0)
+    inner_dict = {"genTime": str(timestamp)+"L", "capacity": str(opt['cap']), "openCapacity": str(opt['cap'])}
+    for df in dfs:
+        calculationInfoList = df.iloc[:, 1:].to_json(orient='records')
+        outter_dict['calculationInfoList'] = [dict(calculation, **inner_dict) for calculation in eval(calculationInfoList)]
+        jata.append(json.dumps(outter_dict))
+    return jata
+
+
+def send_reqest(url, jata):
+    """
+    发送请求
+    :param url: 请求地址
+    :param jata: Json数据
+    :return: 准确率
+    """
+    headers = {
+        'content-type': 'application/json;charset=UTF-8',
+        "Authorization": "dXNlcjoxMjM0NTY="
+    }
+    acc, number = 0, 0
+    for i in range(len(jata)):
+        res = requests.post(url, headers=headers, data=jata[i])
+        if res.json()['code'] == '500':
+            print("没通过考核标准", end=' ')
+            continue
+        number += 1
+        acc += float(res.json()['data'][:-1])
+    if number != 0:
+        acc /= number
+    else:
+        print("无法迭代计算准确率平均值,分母为0")
+    return acc
+
+
+def calculate_acc(data, opt):
+    """
+    准确率调用接口计算
+    :param data: 列名为 C_TIME realValue ableValue forecastAbleValue的DataFrame
+    :param opt: 参数字段
+    :return: 计算结果
+    """
+    jata = wrap_json(data, opt)
+    acc = send_reqest(url=url, jata=jata)
+    return acc
+
+
+def datetime_to_timestamp(dt):
+    return int(round(time.mktime(dt.timetuple()))*1000)
+
+
+
+def get_data_from_mongo(args):
+    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table']
+    client = MongoClient(mongodb_connection)
+    # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
+    db = client[mongodb_database]
+    collection = db[mongodb_read_table]  # 集合名称
+    data_from_db = collection.find()  # 这会返回一个游标(cursor)
+    # 将游标转换为列表,并创建 pandas DataFrame
+    df = pd.DataFrame(list(data_from_db))
+    client.close()
+    return df
+    
+
+def insert_data_into_mongo(res_df,args):
+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
+    client = MongoClient(mongodb_connection)
+    db = client[mongodb_database]
+    if mongodb_write_table in db.list_collection_names():
+        db[mongodb_write_table].drop()
+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
+    collection = db[mongodb_write_table]  # 集合名称
+    # 将 DataFrame 转为字典格式
+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
+    # 插入到 MongoDB
+    collection.insert_many(data_dict)
+    print("data inserted successfully!")
+    
+
+# def compute_accuracy(df,args):
+#     col_time,col_rp,col_pp,formulaType = args['col_time'],args['col_rp'],args['col_pp'],args['formulaType'].split('_')[0]
+#     dates = []
+#     accuracy = []
+#     df = df[(~np.isnan(df[col_rp]))&(~np.isnan(df[col_pp]))]
+#     df = df[[col_time,col_rp,col_pp]].rename(columns={col_time:'C_TIME',col_rp:'realValue',col_pp:'forecastAbleValue'})
+#     df['ableValue'] = df['realValue']
+#     df['C_TIME'] = df['C_TIME'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
+#     if formulaType=='DAY':
+#         df['C_DATE'] = df['C_TIME'].apply(lambda x: x.strftime("%Y-%m-%d"))
+#         days_list = df['C_DATE'].unique().tolist()
+#         for day in days_list:
+#             df_tmp = df[df['C_DATE'] == day]
+#             dates.append(day)
+#             accuracy.append(calculate_acc(df_tmp, args))
+#     else:
+#         points = df['C_TIME'].unique().tolist()
+#         for point in points:
+#             df_tmp = df[df['C_TIME'] == point]
+#             dates.append(point)
+#             accuracy.append(calculate_acc(df_tmp, args))
+#     print("accuray compute successfully!")
+#     return pd.DataFrame({'date':dates,'accuracy':accuracy})
+
+# 定义 RMSE 和 MAE 计算函数
+def rmse(y_true, y_pred):
+    return np.sqrt(np.mean((y_true - y_pred) ** 2))
+
+def mae(y_true, y_pred):
+    return np.mean(np.abs(y_true - y_pred))
+    
+def compute_accuracy(df,args):
+    col_time,col_rp,col_pp = args['col_time'],args['col_rp'],args['col_pp']
+    df[col_time] = df[col_time].apply(lambda x:pd.to_datetime(x).strftime("%Y-%m-%d")) 
+    # 按日期分组并计算 RMSE 和 MAE
+
+    results = df.groupby(col_time).apply(
+        lambda group: pd.Series({
+            "RMSE": rmse(group[col_rp], group[col_pp]),
+            "MAE": mae(group[col_rp], group[col_pp])
+        })
+    ).reset_index()
+    return results
+    
+
+@app.route('/evaluation_accuracy', methods=['POST'])
+def evaluation_accuracy():
+    # 获取程序开始时间  
+    start_time = time.time()  
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args',args)
+        logger.info(args)
+        power_df = get_data_from_mongo(args)
+        acc_result = compute_accuracy(power_df,args)
+        insert_data_into_mongo(acc_result,args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n","\t")
+        result['msg'] = my_exception
+    end_time = time.time() 
+   
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+    
+
+if __name__=="__main__":  
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("evaluation_accuracy log")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10091)
+    print("server start!")

+ 81 - 0
DataClean/data_cleaning.py

@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/10/11 11:00
+# file: data_cleaning.py
+# author: David
+# company: shenyang JY
+import numpy as np
+np.random.seed(42)
+
+def cleaning(df, name, logger, cols=None, dup=True, col_time='dateTime'):
+    logger.info("开始清洗:{}……".format(name))
+    data = df.copy()
+    data = data_column_cleaning(data, logger)
+    if dup:
+        data = rm_duplicated(data, logger, col_time)
+    if cols is not None:
+        data = key_field_row_cleaning(data, cols, logger)
+    return data
+
+
+def data_column_cleaning(data, logger, clean_value=[-99.0, -99]):
+    """
+    列的清洗
+    :param data:
+    :param logger:
+    :param clean_value:
+    :return:
+    """
+    data1 = data.copy()
+    cols_pre = data.columns.to_list()
+    for val in clean_value:
+        data1 = data1.replace(val, np.nan)
+    # nan 列超过80% 删除
+    data1 = data1.dropna(axis=1, thresh=len(data) * 0.8)
+    # 删除取值全部相同的列
+    data1 = data1.loc[:, (data1 != data1.iloc[0]).any()]
+    data = data[data1.columns.tolist()]
+    cols_late = data.columns.tolist()
+    if len(cols_pre) > len(cols_late):
+        logger.info("列清洗:清洗的列有:{}".format(set(cols_pre) - set(cols_late)))
+    return data
+
+
+def key_field_row_cleaning(data, cols, logger):
+    """
+    行的重要字段清洗: 过滤含有- 99的数字,过滤空值
+    :param data:
+    :param cols: 指定的字段列表
+    :param logger:
+    :return:
+    """
+    rows_pre = len(data)
+    nan_cols = []
+    for col in cols:
+        begin = len(data)
+        if col in data.columns.tolist():
+            # data = data[~((data.loc[:, col] < 0) & (data.loc[:, col].astype(str).str.contains('99')))]
+            data = data[~(data[col] == -99)]
+            data = data[~data.loc[:, col].isnull()]
+        end = len(data)
+        if end - begin > 0:
+            nan_cols.append(col)
+    rows_late = len(data)
+    if rows_pre - rows_late > 0:
+        logger.info("行清洗:清洗的行数有:{},缺失的列有:{}".format(rows_pre-rows_late, ', '.join(nan_cols)))
+    return data
+
+def rm_duplicated(data, logger, col_time='dateTime'):
+    """
+    按照时间去重
+    :param data:
+    :param logger:
+    :return:
+    """
+    # 按照时间去重
+    rows_pre = len(data)
+    data = data.drop_duplicates(subset=col_time)
+    rows_late = len(data)
+    if rows_pre - rows_late > 0:
+        logger.info("时间去重的行数有:{}".format(rows_pre - rows_late))
+    return data

+ 165 - 0
DataClean/turbine_cleaning.py

@@ -0,0 +1,165 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/6/21 13:49
+# file: turbine_cleaning.py
+# author: David
+# company: shenyang JY
+import os
+import pandas as pd
+from datetime import timedelta
+
+
+# ——————————————————————————机头风速-99和连续异常值清洗代码——————————————————————————————
+def mark_abnormal_streaks(df, columns, min_streak):
+    abnormal_mask = pd.Series(False, index=df.index)
+    streak_start = None
+    for i in range(len(df)):
+        if i == 0 or any(df.at[i - 1, col] != df.at[i, col] for col in columns):
+            streak_start = i
+
+        if i - streak_start >= min_streak - 1:
+            abnormal_mask[i - min_streak + 1:i + 1] = True
+
+    return abnormal_mask
+
+
+def remove_abnormal_values(df, N):
+    # 标记C_ACTIVE_POWER为-99的行为异常值
+    abnormal_mask1 = df['C_ACTIVE_POWER'] == -99
+    count_abnormal1 = abnormal_mask1.sum()
+
+    # 标记C_WS, A, B连续5行不变的行为异常值
+    columns = ['C_WS', 'C_WD', 'C_ACTIVE_POWER']
+    abnormal_mask2 = mark_abnormal_streaks(df, columns, N)
+    count_abnormal2 = abnormal_mask2.sum()
+    # 获得所有异常值的布尔掩码
+    abnormal_mask = abnormal_mask1 | abnormal_mask2
+    # 获取连续异常值具体数值
+    removed_continuous_values = {column: df.loc[abnormal_mask2, column].unique() for column in columns}
+    # 剔除异常值
+    df_clean = df[~abnormal_mask]
+    total_removed = abnormal_mask.sum()
+    return df_clean, count_abnormal1, count_abnormal2, total_removed, removed_continuous_values
+
+
+def process_csv_files(input_dir, output_dir, turbines_id, M,N):  # MBD:没有考虑时间重复
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    for i in turbines_id:
+        input_file = os.path.join(input_dir, f"turbine-{i}.csv")
+        output_file = os.path.join(output_dir, f"turbine-{i}.csv")
+
+        # 读取csv文件
+        df = pd.read_csv(input_file)
+
+        # 剔除异常值,并获取异常值统计信息
+        df_clean, count_abnormal1, count_abnormal2, total_removed, removed_continuous_values = remove_abnormal_values(df,N)
+
+        # 输出异常值统计信息
+        print(f"处理文件:{input_file}")
+        print(f"剔除 -99 点异常值数量:{count_abnormal1}")
+        print(f"剔除连续异常值数量:{count_abnormal2}")
+        print(f"总共剔除数据量:{total_removed}")
+        print(f"剔除的连续异常值具体数值:{removed_continuous_values}\n")
+
+        # 保存处理过的CSV文件
+        df_clean.to_csv(output_file, index=False)
+
+
+# ——————————————————————————风机单机时间对齐——————————————————————————————
+def TimeMerge(input_dir, output_dir, turbines_id, M):
+    # 读取所有CSV文件
+    files = [os.path.join(input_dir, f"turbine-{i}.csv") for i in turbines_id]
+    dataframes = [pd.read_csv(f) for f in files]
+
+    # 获取C_TIME列的交集
+    c_time_intersection = set(dataframes[0]["C_TIME"])
+    for df in dataframes[1:]:
+        c_time_intersection.intersection_update(df["C_TIME"])
+
+    # 只保留C_TIME交集中的数据
+    filtered_dataframes = [df[df["C_TIME"].isin(c_time_intersection)] for df in dataframes]
+
+    # 将每个过滤后的DataFrame写入新的CSV文件
+    os.makedirs(output_dir, exist_ok=True)
+    turbines_all, names = [], ['C_TIME']
+    for (filtered_df, i) in zip(filtered_dataframes, turbines_id):
+        # if i == 144:
+        #     filtered_df['C_ACTIVE_POWER'] /= 1000
+        filtered_df.to_csv(os.path.join(output_dir, f"turbine-{i}.csv"), index=False)
+        names.append('C_ACTIVE_POWER_{}'.format(i))
+        turbines_all.append(filtered_df['C_ACTIVE_POWER'].reset_index(drop=True))
+    turbines_all.insert(0, filtered_dataframes[0]['C_TIME'].reset_index(drop=True))
+    turbines_all = pd.concat(turbines_all, axis=1)
+    turbines_all.columns = names
+    turbines_all.to_csv(os.path.join(output_dir, f"turbines.csv"), index=False)
+
+
+# ——————————————————————————风机缺失点处理——————————————————————————————
+def MissingPointProcessing(input_dir,output_dir, turbines_id, M,N):
+
+    # 存储数据的列表
+
+    # 读取M个文件
+    for k in turbines_id:
+        file_name = input_dir + '/' + f"turbine-{k}.csv"
+        # file_name = os.path.join(input_dir, f"turbine-{k}.csv")
+    # 读取CSV文件
+        data = pd.read_csv(file_name, parse_dates=['C_TIME'])
+
+    # 计算时间差
+        data['time_diff'] = data['C_TIME'].diff().dt.total_seconds()
+
+    # 找出缺失的时间点
+        missing_data_points = data[data['time_diff'] > 900]
+
+    # 存储填充的时间和值
+        filled_data = []
+
+    # 输出缺失的开始时刻和数量
+        print("缺失的开始时刻:")
+        for index, row in missing_data_points.iterrows():
+            missing_start = row['C_TIME'] - timedelta(seconds=row['time_diff'])
+            missing_count = int(row['time_diff'] // 900) - 1
+
+            # 如果缺失的点数小于N个,则进行填充 MBD:填充代码比较啰嗦
+            if missing_count <= N:
+                prev_values = data.iloc[index - 1][['C_WS', 'C_WD', 'C_ACTIVE_POWER']]
+                next_values = row[['C_WS', 'C_WD', 'C_ACTIVE_POWER']]
+
+                for i in range(1, missing_count + 1):
+                    t = i / (missing_count + 1)
+                    filled_time = missing_start + timedelta(minutes=15 * i)
+
+                    filled_values = {
+                        'C_TIME': filled_time,
+                        'C_WS': prev_values['C_WS'] + (next_values['C_WS'] - prev_values['C_WS']) * t,
+                        'C_WD': prev_values['C_WD']+(next_values['C_WD']-prev_values['C_WD'])*t,
+                        'C_ACTIVE_POWER': prev_values['C_ACTIVE_POWER'] + (
+                                    next_values['C_ACTIVE_POWER'] - prev_values['C_ACTIVE_POWER']) * t,
+                    }
+
+                    # 将角度值限制在-180到180的范围内
+                    filled_values['C_WD'] = (filled_values['C_WD'] + 180) % 360 - 180
+
+                    filled_data.append(filled_values)
+                    print(f"填充的时间: {filled_time}, 填充的值: {filled_values}")
+
+            print(f"{missing_start} - 缺失的点的数量: {missing_count}")
+
+        # 将填充的数据插入原始数据中
+        filled_df = pd.DataFrame(filled_data)
+        data = pd.concat([data, filled_df], ignore_index=True)
+        # 对数据按时间排序并重置索引
+        data = data.sort_values(by='C_TIME').reset_index(drop=True)
+
+        # 输出总缺失点数
+        missing_data_points = data[data['time_diff'] > 900]
+        print(f"总缺失点数: {int(missing_data_points['time_diff'].sum() // 900) - len(missing_data_points)}")
+        data.drop(columns=['time_diff'], inplace=True)
+        os.makedirs(output_dir, exist_ok=True)
+        output_path_name = os.path.join(output_dir, f"turbine-{k}.csv")
+        print(output_path_name)
+    # 保存插值后的文件
+        data.to_csv(output_path_name, index=False)

+ 45 - 0
DataCoe/hotmap.py

@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/8/2 9:49
+# file: hotmap.py
+# author: David
+# company: shenyang JY
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import math
+
+weather = pd.read_csv('./weather-1-process.csv')
+power = pd.read_csv('./power.csv')
+data = pd.merge(weather, power, on='C_TIME')
+data['C_TIME'] = pd.to_datetime(data['C_TIME'])
+data['时间1'] = data['C_TIME'].dt.strftime('%Y-%m')
+data = data[['C_TIME', '时间1', 'C_AIRT', 'C_CELLT', 'C_DIFFUSER', 'C_DIRECTR', 'C_GLOBALR', 'C_REAL_VALUE']]
+pearson = pd.DataFrame()
+data.iloc[:, 2:] = data.iloc[:, 2:].astype(float)
+indexs= []
+new = {}
+for name, group in data.groupby('时间1'):
+    print("name=", name, "长度:", len(group))
+    indexs.append(name)
+#     new = {}
+    for key, value in group.iloc[:, 2:].iteritems():
+        cov = group[key].cov(group['C_REAL_VALUE'])
+        var = group['C_REAL_VALUE'].var()
+        vari = group[key].var()
+        if var * vari != 0 :
+            res_pearson = cov/math.sqrt(var*vari)
+        else:
+            res_pearson = 0
+#         new[key] = res_pearson
+        new.setdefault(key, []).append(res_pearson)
+pearson = pd.DataFrame(new, index=indexs)
+#     pearson = pearson.append(new, ignore_index=True)
+# plt.style.use('ggplot')
+# plt.rcParams['font.sans-serif'] = ['SimHei']
+# plt.rcParams['axes.unicode_minus'] = False
+fig,ax = plt.subplots(figsize=(50,6))
+sns.set(font_scale=1.25)
+# cmap = sns.cm.hot_r
+hm = sns.heatmap(pearson, cbar=True, square=True, fmt='.2f', annot=True, annot_kws={'size': 10}, cmap="hot_r")
+plt.show()

+ 37 - 0
DataLimitpower/itil_limited_power_cleaning.py

@@ -0,0 +1,37 @@
+import requests
+import pandas as pd
+url = 'http://itil.jiayuepowertech.com:9958/itil/api/power-limitation'
+
+
+def get_station_info(timeBegin, timeEnd):
+    url_f = url + '?timeBegin=' + timeBegin + '&timeEnd=' + timeEnd
+    res = requests.get(url_f).json()
+    ele_info = list(filter(lambda x: x['stationCode'] == 'J00557', res['data']))
+    return ele_info
+
+def cleaning_powers(power, ele_info):
+    power = power.copy()
+    start = len(power)
+    print("限电清洗前,有{}条".format(start))
+    for ele in ele_info:
+        begin = ele['timeBegin']
+        end = ele['timeEnd']
+        ele_limits = power[begin: end].index.to_list()
+        if len(ele_limits) > 0:
+            print("清洗时段:{}-{}".format(ele_limits[0], ele_limits[-1]))
+            power.drop(ele_limits, inplace=True)
+    print("限电清洗后,有{}条, 一共清洗了{}条".format(len(power), start-len(power)))
+    return power
+
+if __name__ == '__main__':
+    power = pd.read_csv('./557/power.csv', parse_dates=['C_TIME'], index_col='C_TIME')
+    dq = pd.read_csv('./557/dq.csv', parse_dates=['C_TIME'])
+
+    ele_info = get_station_info('2023-01', '2023-10-31')
+    power_filter = cleaning_powers(power, ele_info)
+    power_filter.to_csv('./557/power_filter.csv')
+
+
+    power_able = pd.merge(power, dq, on='C_TIME')
+    power_able['error'] = round(power_able['ABLE'] - power_able['C_FP_VALUE'], 3)
+    power_able.to_csv('./557/power_able.csv', index=False)

+ 194 - 0
DataLimitpower/limited_power_curve_wind.py

@@ -0,0 +1,194 @@
+import copy
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle
+current_path = os.path.dirname(__file__)
+
+
+class LimitPower(object):
+    def __init__(self, logger, args, weather_power):
+        self.logger = logger
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.weather_power = weather_power
+        self.step = self.opt.usable_power['step']
+        self.segs = np.array([x * self.step for x in range(1, 50)])  # 对风速以50为间隔进行分段
+        self.xs = np.array([self.segs[i - 1] + x if i > 0 else self.step / 2 for i, x in
+              enumerate([self.step / 2 for _ in self.segs])])  # 分段的中间点
+        self.polynomial = None
+        self.width = 0
+        self.max_ws = 50
+
+    def segment_statis(self):
+        """
+        对机头风速-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
+        :return: glob_rp 总辐射分段
+        """
+        glob_rp = {}       # dict: key 辐照度分段中间点 value 分段内的实际功率
+        for index, row in self.weather_power.iterrows():
+            ws_ = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            for i, seg in enumerate(self.segs):
+                if ws_ <= seg:
+                    glob_rp.setdefault(self.xs[i], []).append(rp)
+                    break
+        rms = []
+        for i, x in enumerate(self.xs):
+            rps = glob_rp.get(x)
+            if rps is None:
+                continue
+            rps = np.array(rps)
+            up = self.opt.usable_power['up_fractile']
+            down = self.opt.usable_power['down_fractile']
+            offset = 0
+            while True:
+                index = i-1 if i > 0 else 0
+                while (self.xs[index] in rms or self.xs[index] not in glob_rp) and index >0:
+                    index -= 1
+                x_l = self.xs[index] if index > 0 else 0
+                q2_l = glob_rp[self.xs[index]][0] if index > 0 else 0
+                down = down + offset
+                if down > up:
+                    rms.append(x)
+                    self.logger.info("删除的坐标点为:{}".format(x))
+                    break
+                q1 = np.percentile(rps, down)   # 下四分位点
+                q2 = round(np.percentile(rps, down+(up-down)/2), 3) # 中位点
+                q3 = np.percentile(rps, up)     # 上四分为点
+                # q2 = np.around(np.mean(rps[(rps >= q1) & (rps <= q3)]), 3)
+                iqr = q3 - q1  # 四分位间距
+                k2 = round((q2-q2_l)/(x-x_l), 3)    # 趋势斜率
+
+                # std = np.around(np.std(rps[(rps >= q1) & (rps <= q3)]), 3)
+                # mean = np.around(np.mean(rps), 3)  # 实际功率均值
+                # std = np.around(np.std(rps), 3)  # 实际功率标准差
+                # print("看看q2={},mean={}".format(q2, mean))
+                if k2 >= 0:
+                    glob_rp[x] = [q2, iqr]   # 更新dict
+                    break
+                else:
+                    offset += 1
+        glob_rp = {k: glob_rp[k] for k in glob_rp.keys() if k not in rms}
+        glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())}
+        return glob_rp
+
+    def mapping_relation(self, glob_rp):
+        degree = self.opt.usable_power['degree']
+        xs = list(glob_rp.keys())
+        ys = [y[0] for y in glob_rp.values()]
+        self.width = np.median(np.array([y[1] for y in glob_rp.values()]))
+        coefficients = np.polyfit(xs, ys, degree)
+        self.polynomial = np.poly1d(coefficients)
+        self.max_ws = max(xs)
+        # y_fit = self.polynomial(xs)
+        # plt.scatter(xs, ys, label='Data', color='red')
+        # plt.plot(xs, y_fit, label='Fitted polynomial', color='blue')
+        # plt.plot(xs, y_fit+self.width/2, label='up polynomial', color='purple')
+        # plt.plot(xs, y_fit-self.width/2, label='down polynomial', color='green')
+        # plt.legend()
+        # plt.xlabel('x')
+        # plt.ylabel('y')
+        # plt.title(f'Polynomial Fit (degree {degree})')
+        # plt.show()
+
+    def saveVar(self, path, data):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, 'wb') as file:
+            pickle.dump(data, file)
+
+    def filter_unlimited_power(self, ws, real_power):
+        """
+        预测可用功主方法
+        :param zfs: 要预测可用功率的总辐射
+        :param k: 斜率
+        :param b: 偏移量
+        :return: 预测的可用功率
+        """
+        # coe = self.opt.usable_power['outliers_threshold']
+        # seg = self.xs[np.argmax(self.segs >= ws)]
+        up_offset = self.opt.usable_power['up_offset']
+        down_offset = self.opt.usable_power['down_offset']
+        high = self.polynomial(ws) + self.width/up_offset if self.polynomial(ws) + self.width/up_offset < self.opt.cap else self.opt.cap
+        low = self.polynomial(ws) - self.width/down_offset if self.polynomial(ws) - self.width/down_offset > 0 else 0
+        if low <= real_power <= high:
+            return True
+        else:
+            return False
+
+    def clean_limited_power(self, name, cluster=False):
+        if cluster is True:
+            glob_rp = self.segment_statis()
+            self.saveVar(os.path.dirname(current_path) + '/var/glob_rp.pickle', glob_rp)
+            self.mapping_relation(glob_rp)
+        new_weather_power, number = [], 0
+        # fig, ax = plt.subplots()
+        for index, row in self.weather_power.iterrows():
+            zfs = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            if zfs < 0 or rp < 0:
+                continue
+            if self.filter_unlimited_power(zfs, rp) and zfs <= self.max_ws:
+                row['c'] = 'red'
+                new_weather_power.append(row)
+            else:
+                row['c'] = 'blue'
+                new_weather_power.append(row)
+        new_weather_power = pd.concat(new_weather_power, axis=1).T
+        new_weather_power.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/测风法{}.png'.format(name))
+        new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
+        number = len(new_weather_power)
+        self.logger.info("未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
+        return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power['env']]]
+
+    def clean_limited_power_by_signal(self, name):
+        weather_power1 = self.weather_power.copy()
+        weather_power1["signal"] = weather_power1.apply(
+            lambda x: self.signal_result(x["C_IS_RATIONING_BY_MANUAL_CONTROL"], x["C_IS_RATIONING_BY_AUTO_CONTROL"]),
+            axis=1)
+        weather_power1['c'] = weather_power1.apply(lambda x: 'cornflowerblue' if bool(x["signal"]) is True else 'pink',
+                                                   axis=1)
+        weather_power1.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/信号法{}.png'.format(name))
+        weather_power1 = weather_power1[weather_power1['signal'] == False]
+        self.logger.info("信号法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("信号法-清除限电后保留的点有:" + str(len(weather_power1)) + " 占比:" + str(
+            round(len(weather_power1) / len(self.weather_power), 2)))
+        return weather_power1.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE', self.opt.usable_power['env']]]
+
+    def signal_result(self, manual, auto):
+        if int(manual) == 0:
+            if int(auto) == 0:
+                return False
+            else:
+                return True
+        else:
+            if int(auto) == 1:
+                return True
+            else:
+                return False
+
+
+if __name__ == '__main__':
+    from logs import Log
+    from config import myargparse
+    log = Log().logger
+    # 实例化配置类
+    args = myargparse(discription="场站端配置", add_help=False)
+    power = pd.read_csv('./data/power.csv')
+    weather = pd.read_csv('./data/tower-1-process.csv')
+    weather_power = pd.merge(weather, power, on='C_TIME')  # 联立数据
+    lp = LimitPower(log, args, weather_power)
+    # glob_rp = lp.segment_statis()
+    # lp.mapping_relation(glob_rp)
+    lp.clean_limited_power('测试1')
+    # glob_rp = {k: glob_rp[k] for k in sorted(glob_rp.keys())}
+    # keys = list(glob_rp.keys())
+    # values = [v[0] for v in glob_rp.values()]
+    # import matplotlib.pyplot as plt
+    # fig, ax = plt.subplots()
+    # ax.plot(keys, values)
+    # plt.show()

+ 152 - 0
DataLimitpower/limited_power_solar.py

@@ -0,0 +1,152 @@
+import pandas as pd
+import os
+import numpy as np
+np.random.seed(42)
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+current_path = os.path.dirname(__file__)
+
+
+class LimitPower(object):
+    def __init__(self, logger, args, weather_power):
+        self.logger = logger
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.weather_power = weather_power
+
+    def segment_statis(self):
+        """
+        对总辐射-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
+        :return: glob_rp 总辐射分段
+        """
+        segs = [x for x in range(50, 2000, 100)]    # 对辐照度以100为间隔进行分段
+        xs = [segs[i-1]+x if i>0 else 25 for i, x in enumerate([50 for _ in segs])]  # 分段的中间点
+        glob_rp = {}       # dict: key 辐照度分段中间点 value 分段内的实际功率
+        for index, row in self.weather_power.iterrows():
+            glob_ = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            for i, seg in enumerate(segs):
+                if glob_ <= seg and not (i > 0 and rp < 1):
+                    glob_rp.setdefault(xs[i], []).append(rp)
+                    break
+        for i, x in enumerate(xs):
+            rps = glob_rp.get(x)
+            if rps is None:
+                glob_rp = {k: v for k, v in glob_rp.items() if k not in xs[xs.index(x):]}
+                break
+            x_l = xs[i-1] if i > 0 else 0
+            q2_l = glob_rp[xs[i-1]][0] if i > 0 else 0
+            q1 = np.percentile(rps, self.opt.usable_power['down_fractile'])     # 实际功率下四分位点
+            q2 = np.percentile(rps, 50)  # 实际功率中位点
+            q3 = np.percentile(rps, self.opt.usable_power['up_fractile'])     # 实际功率上四分位点
+            iqr = q3 -q1    # 四分位间距
+            k1 = round(q2/x, 5)  # 整体斜率
+            k2 = round((q2-q2_l)/(x-x_l), 5)    # 趋势斜率,相对上一个中位点
+            glob_rp[x] = [q2, iqr, k1, k2]   # 更新dict
+        return glob_rp
+
+    def mapping_relation(self, glob_rp):
+        """
+        拟合分段处理后的斜率和偏移量
+        :param glob_rp: 总辐射分段
+        :return: k_final 斜率 bias 实际功率的分布宽度, glob_rp 总辐射分段
+        """
+        ks, iqrs, delete_x, tag_x = [], [], [], []   # ks所有分段斜率集合,iqrs所有分段间距集合,delete_x删除的x坐标集合
+        for x, values in glob_rp.items():
+            k1 = values[-2]
+            k2 = values[-1]
+            iqrs.append(values[-3])
+            if k1 > 0 and k2 > 0:   # 清除趋势小于等于0的斜率
+                ks.append(k1)
+                tag_x.append(x)
+            else:
+                delete_x.append(x)
+                # print("删除的斜率:", k1, k2)
+        bias = round(np.median(iqrs), 3)  # 中位点
+        # print("++++1", ks)
+        mean = np.mean(ks)  # 均值
+        std = np.std(ks)    # 标准差
+        ks = np.array(ks)
+        z_score = (ks-mean)/std # z均值
+        # print("----", z_score)
+        outliers = np.abs(z_score) > self.opt.usable_power['outliers_threshold']    # 超过阈值为离群点
+        ks = ks[~outliers]  # 消除离群点
+        delete_x1 = list(np.array(tag_x)[outliers]) # 清除大于阈值的离群点
+        k_final = round(np.mean(ks), 5)  # 对清洗后的斜率做平均
+        # print("++++2:", ks)
+        delete_x.extend(delete_x1)
+        self.logger.info("拟合可用功率,删除的斜率:" + ' '.join([str(x) for x in delete_x]))
+        glob_rp = {k: v for k, v in glob_rp.items() if k not in delete_x}   # 清洗后剩下的分段点位
+        return k_final, bias, glob_rp
+
+    def filter_unlimited_power(self, zfs, real_power, k, b):
+        """
+        预测可用功主方法
+        :param zfs: 要预测可用功率的总辐射
+        :param k: 斜率
+        :param b: 偏移量
+        :return: 预测的可用功率
+        """
+        high = k*zfs+b/2 if k*zfs+b/2 < self.opt.cap else self.opt.cap
+        low = k*zfs-b/2 if k*zfs-b/2 > 0 else 0
+        if low <= real_power <= high:
+            return True
+        else:
+            return False
+
+    def clean_limited_power(self, name, is_repair=False):
+        if is_repair is True:
+            glob_rp = self.segment_statis()
+            k_final, bias, glob_rp = self.mapping_relation(glob_rp)
+            self.opt.usable_power['k'] = float(k_final)
+            self.opt.usable_power['bias'] = float(bias)
+        new_weather_power = []
+        for index, row in self.weather_power.iterrows():
+            zfs = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            if self.filter_unlimited_power(zfs, rp, self.opt.usable_power['k'], self.opt.usable_power['bias'] * self.opt.usable_power['coe']):
+                row['c'] = 'red'
+                new_weather_power.append(row)
+            else:
+                row['c'] = 'blue'
+                new_weather_power.append(row)
+        new_weather_power = pd.concat(new_weather_power, axis=1).T
+        new_weather_power.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/测光法{}.png'.format(name))
+        new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
+        number = len(new_weather_power)
+        self.logger.info("测光法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("测光法-清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
+        return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+    def clean_limited_power_by_signal(self, name):
+        weather_power1 = self.weather_power.copy()
+        weather_power1["signal"] = weather_power1.apply(lambda x: self.signal_result(x["C_IS_RATIONING_BY_MANUAL_CONTROL"], x["C_IS_RATIONING_BY_AUTO_CONTROL"]), axis=1)
+        weather_power1['c'] = weather_power1.apply(lambda x: 'cornflowerblue' if bool(x["signal"]) is True else 'pink', axis=1)
+        weather_power1.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/信号法{}.png'.format(name))
+        weather_power1 = weather_power1[weather_power1['signal'] == False]
+        self.logger.info("信号法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("信号法-清除限电后保留的点有:" + str(len(weather_power1)) + " 占比:" + str(round(len(weather_power1) / len(self.weather_power), 2)))
+        return weather_power1.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+    def signal_result(self, manual, auto):
+        if int(manual) == 0:
+            if int(auto) == 0:
+                return False
+            else:
+                return True
+        else:
+            if int(auto) == 1:
+                return True
+            else:
+                return False
+
+
+if __name__ == '__main__':
+    power = pd.read_csv('2023-12-01至2023-12-23实际功率导出文件.csv', date_parser=['时间'])
+    weather = pd.read_csv('2023-12-01至2023-12-23气象站数据导出文件.csv', date_parser=['时间'])
+    weather_power = pd.merge(weather, power, on='时间')  # 联立数据
+    # glob_rp = segment_statis(weather_power)
+    # k_final, bias, glob_rp = mapping_relation(glob_rp)

+ 127 - 0
DataLimitpower/limited_power_wind.py

@@ -0,0 +1,127 @@
+import os
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import pickle
+current_path = os.path.dirname(__file__)
+
+
+class LimitPower(object):
+    def __init__(self, logger, args, weather_power):
+        self.logger = logger
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.weather_power = weather_power
+        self.step = self.opt.usable_power['step']
+        self.segs = np.array([x * self.step for x in range(1, 50)])  # 对风速以50为间隔进行分段
+        self.xs = np.array([self.segs[i - 1] + x if i > 0 else self.step / 2 for i, x in
+              enumerate([self.step / 2 for _ in self.segs])])  # 分段的中间点
+
+    def segment_statis(self):
+        """
+        对机头风速-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
+        :return: glob_rp 总辐射分段
+        """
+        glob_rp = {}       # dict: key 辐照度分段中间点 value 分段内的实际功率
+        for index, row in self.weather_power.iterrows():
+            ws_ = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            for i, seg in enumerate(self.segs):
+                if ws_ <= seg:
+                    glob_rp.setdefault(self.xs[i], []).append(rp)
+                    break
+        for i, x in enumerate(self.xs):
+            rps = glob_rp.get(x)
+            if rps is None:
+                continue
+            mean = np.around(np.mean(rps), 3)     # 实际功率均值
+            std = np.around(np.std(rps), 3)  # 实际功率标准差
+
+            glob_rp[x] = [mean, std]   # 更新dict
+        return glob_rp
+
+    def saveVar(self, path, data):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, 'wb') as file:
+            pickle.dump(data, file)
+
+    def filter_unlimited_power(self, ws, real_power, glob_rp):
+        """
+        预测可用功主方法
+        :param zfs: 要预测可用功率的总辐射
+        :param k: 斜率
+        :param b: 偏移量
+        :return: 预测的可用功率
+        """
+        coe = self.opt.usable_power['outliers_threshold']
+        seg = self.xs[np.argmax(self.segs >= ws)]
+        if seg in glob_rp:
+            mean, std = glob_rp[seg][0], glob_rp[seg][1]
+            high = mean + std*coe if mean + std*coe < self.opt.cap else self.opt.cap
+            low = mean - std*coe if mean - std*coe > 0 else 0
+            if low <= real_power <= high:
+                return True
+            else:
+                return False
+        else:
+            return True
+
+    def clean_limited_power(self, name, cluster=False):
+        glob_rp = self.segment_statis()
+        if cluster is True:
+            self.saveVar(os.path.dirname(current_path) + '/var/glob_rp.pickle', glob_rp)
+        new_weather_power, number = [], 0
+        # fig, ax = plt.subplots()
+        for index, row in self.weather_power.iterrows():
+            zfs = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            if zfs < 0 or rp < 0:
+                continue
+            if self.filter_unlimited_power(zfs, rp, glob_rp):
+                row['c'] = 'red'
+                new_weather_power.append(row)
+            else:
+                row['c'] = 'blue'
+                new_weather_power.append(row)
+        new_weather_power = pd.concat(new_weather_power, axis=1).T
+        new_weather_power.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/测风法{}.png'.format(name))
+        new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
+        number = len(new_weather_power)
+        self.logger.info("未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
+        return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+    def clean_limited_power_by_signal(self, name):
+        weather_power1 = self.weather_power.copy()
+        weather_power1["signal"] = weather_power1.apply(
+            lambda x: self.signal_result(x["C_IS_RATIONING_BY_MANUAL_CONTROL"], x["C_IS_RATIONING_BY_AUTO_CONTROL"]),
+            axis=1)
+        weather_power1['c'] = weather_power1.apply(lambda x: 'cornflowerblue' if bool(x["signal"]) is True else 'pink',
+                                                   axis=1)
+        weather_power1.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/信号法{}.png'.format(name))
+        weather_power1 = weather_power1[weather_power1['signal'] == False]
+        self.logger.info("信号法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("信号法-清除限电后保留的点有:" + str(len(weather_power1)) + " 占比:" + str(
+            round(len(weather_power1) / len(self.weather_power), 2)))
+        return weather_power1.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+    def signal_result(self, manual, auto):
+        if int(manual) == 0:
+            if int(auto) == 0:
+                return False
+            else:
+                return True
+        else:
+            if int(auto) == 1:
+                return True
+            else:
+                return False
+
+
+
+if __name__ == '__main__':
+    power = pd.read_csv('2023-12-01至2023-12-23实际功率导出文件.csv', date_parser=['时间'])
+    weather = pd.read_csv('2023-12-01至2023-12-23气象站数据导出文件.csv', date_parser=['时间'])
+    weather_power = pd.merge(weather, power, on='时间')  # 联立数据

+ 75 - 0
DataMatplot/plt.py

@@ -0,0 +1,75 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/4/26 11:53
+# file: plt.py
+# author: David
+# company: shenyang JY
+
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import math
+
+
+tower = pd.read_csv('./tower-1-process.csv')
+tower = tower.apply(pd.to_numeric, errors='ignore')
+powers = pd.read_csv('./power.csv')
+tower['C_TIME'] = pd.to_datetime(tower['C_TIME'])
+powers['C_TIME'] = pd.to_datetime(powers['C_TIME'])
+union = pd.merge(powers, tower, on='C_TIME')
+
+union.set_index('C_TIME', inplace=True)
+# union = union.loc['2023-06-01': '2023-06-10']
+union = union.loc['2023-06']
+union.reset_index(inplace=True)
+union = union[['C_TIME', 'C_REAL_VALUE', 'C_WS_INST_HUB_HEIGHT']]
+union['C_TIME'] = pd.to_datetime(union['C_TIME'])
+# union.to_csv("./趋势对比1分钟.csv", index=False)
+
+
+union5, union_index = [], [0]  # 功率表,索引表
+ps, wss = 0, 0
+for i, power in union.iterrows():
+    real_value = power['C_REAL_VALUE']
+    ws = power["C_WS_INST_HUB_HEIGHT"]
+    ps += real_value
+    wss += ws
+    if str(power['C_TIME'].minute) in ('0', '15', '30', '45'):
+        union_index.append(i)
+        num = union_index[-1] - union_index[-2]
+        num = num if num != 0 else 1
+        psa = round(ps / num, 2)
+        wsa = round(wss / num, 2)
+        union5.append([power['C_TIME'], psa, wsa])
+        ps = 0
+        wss = 0
+
+
+union5 = pd.DataFrame(union5, columns=['C_TIME', 'C_REAL_VALUE', 'C_WS_INST_HUB_HEIGHT'])
+union5.rename(columns={'C_REAL_VALUE':"C_REAL_VALUE_AVE", 'C_WS_INST_HUB_HEIGHT': 'C_WS_INST_HUB_HEIGHT_AVE'}, inplace=True)
+union5['C_TIME'] = pd.to_datetime(union5['C_TIME'])
+UNION = pd.merge(union, union5, on='C_TIME', how='left')
+
+UNION.to_csv('./趋势对比union1.csv', index=False)
+
+fig, ax = plt.subplots()
+wss, rps, indexs = [], [], []
+indexs1, wss1, rps1 = [],[],[]
+UNION = UNION.iloc[:575, :]
+for index, row in UNION.iterrows():
+    ws = round(row['C_WS_INST_HUB_HEIGHT']*1.5, 3)
+    ws_ave = round(row['C_WS_INST_HUB_HEIGHT_AVE']*1.5, 3)
+    rp = row['C_REAL_VALUE']
+    rp_ave = row['C_REAL_VALUE_AVE']
+    wss.append(ws)
+    rps.append(rp)
+    indexs.append(index)
+    if not math.isnan(rp_ave):
+        indexs1.append(index)
+        wss1.append(ws_ave)
+        rps1.append(rp_ave)
+ax.plot(indexs, rps, color='red', label='1minRP')
+ax.plot(indexs1, wss1, color='blue', label='15minWS')
+plt.legend()
+plt.savefig('./趋势对比4.png')

+ 10 - 0
DataMiss/data_missing.py

@@ -0,0 +1,10 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :data_missing.py
+# @Time      :2025/3/17 16:01
+# @Author    :David
+# @Company: shenyang JY
+
+
+if __name__ == "__main__":
+    run_code = 0