#!/usr/bin/env python # -*- coding: utf-8 -*- # time: 2023/11/23 14:07 # file: clocking.py # author: David # company: shenyang JY import pandas as pd import threading import datetime from cache.limited_power import LimitPower from apscheduler.schedulers.background import BackgroundScheduler import time import os import pickle import numpy as np from datetime import timedelta np.random.seed(42) from cache.calculate import calculate_acc from cache.formula import Formulas, Assessment from cache.inputData import DataBase from cache.monitor import Monitor current_path = os.path.dirname(__file__) from pytz import timezone class Clock(object): def __init__(self, logger, args, process, features, fmi, fix): self.logger = logger self.args = args self.process = process self.features = features self.fmi = fmi self.fix = fix self.opt = self.args.parse_args_and_yaml() self.mo = Monitor(logger, args) self.target = self.opt.predict self.logger.info("---以 {} 进行修模---".format(self.target)) self.lp = LimitPower(self.logger, self.args, None) def update_thread(self): thread = threading.Thread(target=self.start_jobs) thread.start() def start_jobs(self): scheduler = BackgroundScheduler() scheduler.configure({'timezone': timezone("Asia/Shanghai")}) scheduler.add_job(func=self.calculate_coe, trigger="cron", hour=23, minute=0) scheduler.add_job(func=self.mo.update_config, trigger="interval", seconds=60) scheduler.start() def date_diff(self, current_dt, repair_dt): difference = (current_dt - datetime.datetime.strptime(repair_dt, '%Y-%m-%d').date()) return difference.days def cal_acc(self, df, target, opt): df = df.copy() df.rename(columns={'C_REAL_VALUE': 'realValue'}, inplace=True) df['ableValue'] = df['realValue'] df['forecastAbleValue'] = df[target] df = df.apply(pd.to_numeric, errors='ignore') df['C_TIME'] = pd.to_datetime(df['C_TIME']) acc = calculate_acc(df, opt=opt) return acc def calculate_coe(self, install=False): try: start = time.time() self.logger.info("检测系统当前的时间为:{}".format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start)))) self.opt = self.args.parse_args_and_yaml() if self.opt.algorithm_platform['switch']: dt = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d') dt = dt + timedelta(days=1) self.opt.authentication['date'] = dt.strftime('%Y-%m-%d') else: self.opt.authentication['date'] = time.strftime('%Y-%m-%d', time.localtime(start)) day_end = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d') coe_start = day_end - pd.Timedelta(days=self.opt.update_coe_days) day_start_db = coe_start - pd.Timedelta(days=self.opt.repair_days) if install is True else coe_start - pd.Timedelta(days=self.opt.update_add_train_days) if self.date_diff(day_start_db.date(), self.opt.authentication['full_cap']) < 0: day_start_db = datetime.datetime.strptime(self.opt.authentication['full_cap'], '%Y-%m-%d') self.logger.info("更新初始修模起始时间为全容量并网:{}".format(self.opt.authentication['full_cap'])) db = DataBase(begin=day_start_db, end=day_end, opt=self.opt, logger=self.logger) db.data_process() self.opt.cap = db.opt.cap self.args.save_args_yml(self.opt) self.lp.opt = self.opt formula = Formulas(self.opt) assess = Assessment(self.opt, self.logger) day = int(time.strftime('%d', time.localtime(start))) repair, repairs = int(self.opt.repair_model_cycle), [] terval = repair while repair <= 30: repairs.append(repair) repair += terval if day in repairs or install is True: # ------------------- 修模 ------------------------ self.repairing_model(day_start_db, coe_start) self.opt.authentication['repair'] = self.opt.authentication['date'] self.args.save_args_yml(self.opt) self.logger.info("------------进入测试集自动计算-------------") coe_start += pd.Timedelta(days=1) nwp, env, dq, rp, rp_his = self.material(coe_start, day_end) rp_his.set_index("C_TIME", inplace=True) last_day_rps = rp_his.loc[self.opt.authentication['date'], self.target].values last_day_dt = rp_his.loc[self.opt.authentication['date'], self.target].index rp_his.reset_index(drop=False, inplace=True) sun_up = last_day_rps > 0 if np.any(sun_up): sun_up_i = np.argmax(sun_up) self.opt.first_point['sun_up_time'] = last_day_dt[sun_up_i].strftime('%Y-%m-%d %H:%M:%S') self.opt.first_point['sun_up_value'] = float(last_day_rps[sun_up_i]) nwp = pd.merge(nwp, dq, on='C_TIME') if self.opt.full_field is False: nwp = nwp[self.opt.nwp_columns + ['C_REAL_VALUE']] mean = [self.opt.mean.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']] std = [self.opt.std.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']] _, _, nwp_features = self.normalize(nwp, ['C_TIME', 'C_REAL_VALUE'], mean=mean, std=std) env = pd.merge(env, rp_his, on='C_TIME') env = env.loc[:, self.opt.env_columns] mean = [self.opt.mean.get(x) for x in env.columns.to_list() if x not in ['C_TIME']] std = [self.opt.std.get(x) for x in env.columns.to_list() if x not in ['C_TIME']] _, _, env_features = self.normalize(env, mean=mean, std=std) data_test, env = self.process.get_test_data(nwp_features, env_features) test_X, test_Y, data_Y = self.features.get_test_data(data_test, env) result = self.fmi.predict(test_X, batch_size=8) # 2.历史数据 for point in range(0, 16, 1): dfs_point = [] for i, df in enumerate(data_Y): df["dq_fix"] = result[i] dfs_point.append(df.iloc[point]) pre_data = pd.concat(dfs_point, axis=1).T pre_data[["C_REAL_VALUE", "dq_fix"]] = pre_data[["C_REAL_VALUE", "dq_fix"]].apply(pd.to_numeric, errors='ignore') pre_data = pd.merge(pre_data, dq[['C_TIME', 'C_FP_VALUE']], on='C_TIME') pre_data['dq_fix'] = pre_data['dq_fix'] * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE'] pre_data['dq_fix'] = pre_data['dq_fix'].round(2) pre_data.loc[pre_data['C_FP_VALUE'].values == 0, 'dq_fix'] = 0 pre_data.loc[pre_data['dq_fix'] > self.opt.cap, 'dq_fix'] = self.opt.cap pre_data.loc[pre_data['dq_fix'] < 0, 'dq_fix'] = 0 T = 'T' + str(point + 1) if self.opt.coe[T]['update'] is False: continue pre_data['history'] = self.fix.history_error_clock(pre_data, rp_his, 16 - point - 1) self.logger.info("第{}点的时间周期为:{}-{}".format(T, pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1])) self.update_coe(pre_data, assess, formula, point, test=False) self.args.save_args_yml(self.opt) if self.opt.algorithm_platform['switch']: dt = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d') dt = dt + timedelta(days=1) self.opt.authentication['date'] = dt.strftime('%Y-%m-%d') self.algorithm_platform() end = time.time() self.logger.info("定时任务:周期频率修模,用了 %.2f 秒 " % (end - start)) except Exception as e: self.logger.critical("定时任务出错:{}".format(e.args)) def repairing_model(self, day_start, day_end): self.logger.info("-----进入FMI神经网络修模-----") nwp, env, dq, rp, rp_his = self.material(day_start, day_end, is_repair=True) nwp = pd.merge(nwp, dq, on='C_TIME') if self.opt.full_field is False: nwp = nwp[self.opt.nwp_columns+['C_REAL_VALUE']] mean = [self.opt.mean.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME']] std = [self.opt.std.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME']] _, _, nwp_features = self.normalize(nwp, mean=mean, std=std) env = pd.merge(env, rp_his, on='C_TIME') env = env.loc[:, self.opt.env_columns] mean = [self.opt.mean.get(x) for x in env.columns.to_list() if x not in ['C_TIME']] std = [self.opt.std.get(x) for x in env.columns.to_list() if x not in ['C_TIME']] _, _, env_features = self.normalize(env, mean=mean, std=std) self.opt.nwp_columns = nwp_features.columns.to_list() self.opt.env_columns = env_features.columns.to_list() if 'C_REAL_VALUE' in self.opt.nwp_columns: self.opt.nwp_columns.pop(self.opt.nwp_columns.index('C_REAL_VALUE')) self.opt.Model["input_size_nwp"] = len(self.opt.nwp_columns) - 1 self.opt.Model["input_size_env"] = len(self.opt.env_columns) - 1 self.update_property() data_train, env = self.process.get_train_data(nwp_features, env_features) train_X, valid_X, train_Y, valid_Y = self.features.get_train_data(data_train, env) self.fmi.training(self.opt, [train_X, train_Y, valid_X, valid_Y]) def update_coe(self, pre_data, assess, formula, point, test=False): cdq = pd.read_csv(current_path + '/data/cdq.csv', parse_dates=['C_TIME']) cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME']) cdq = cdq[cdq['C_FORECAST_HOW_LONG_AGO'] == int(point + 1)] pre_data = pd.merge(cdq, pre_data, on='C_TIME') T = 'T' + str(point + 1) if test is False: best_acc, best, best_coe_m, best_coe_n = 0, 0, 0, 0 region = self.opt.Model['region'] dq_acc, dq_score = assess.electricity_solar_cdq(pre_data, region, 'C_FP_VALUE') dq_fix_acc, dq_fix_score = assess.electricity_solar_cdq(pre_data, region, 'dq_fix') his_fix_acc, his_fix_score = assess.electricity_solar_cdq(pre_data, region, 'history') cdq_acc, cdq_score = assess.electricity_solar_cdq(pre_data, region, 'C_ABLE_VALUE') for i in range(5, 210): for j in range(5, 210): pre_data["new"] = round(i / 170 * pre_data['dq_fix'] + j / 170 * pre_data['history'], 3) acc, acc_score = assess.electricity_solar_cdq(pre_data, region, 'new', output=False) if acc > best_acc: best_acc = acc best = acc_score best_coe_m = i / 170 best_coe_n = j / 170 self.logger.info( "1.过去{}天的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f}, 超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format( str(self.opt.update_coe_days), dq_acc, T, best_acc, cdq_acc, dq_fix_acc, his_fix_acc)) self.logger.info( "2.过去{}天的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format( str(self.opt.update_coe_days), dq_score, T, best, cdq_score, dq_fix_score, his_fix_score)) best_score, best, best_score_m, best_score_n = 999, 0, 0, 0 for i in range(210, 5, -1): for j in range(210, 5, -1): pre_data["new"] = round(i / 170 * pre_data['dq_fix'] + j / 170 * pre_data['history'], 3) acc, acc_score = assess.electricity_solar_cdq(pre_data, region, 'new', output=False) if acc_score < best_score: best_score = acc_score best = acc best_score_m = i / 170 best_score_n = j / 170 self.logger.info( "3.过去{}天的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format( str(self.opt.update_coe_days), dq_acc, T, best, cdq_acc, dq_fix_acc, his_fix_acc)) self.logger.info( "4.过去{}天的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format( str(self.opt.update_coe_days), dq_score, T, best_score, cdq_score, dq_fix_score, his_fix_score)) if self.opt.coe[T]['score']: self.opt.coe[T]['m'] = round(best_score_m, 3) self.opt.coe[T]['n'] = round(best_score_n, 3) pre_data["new"] = round(best_score_m * pre_data['dq_fix'] + best_score_n * pre_data['history'], 3) else: self.opt.coe[T]['m'] = round(best_coe_m, 3) self.opt.coe[T]['n'] = round(best_coe_n, 3) pre_data["new"] = round(best_coe_m * pre_data['dq_fix'] + best_coe_n * pre_data['history'], 3) pre_data.to_csv(current_path + '/data/测试集{}.csv'.format(point + 1), index=False) else: best, best_coe_m, best_coe_n = 0, 0, 0 pre_data['时间'] = pre_data['C_TIME'].dt.strftime("%Y-%m-%d") cc = { 'formulaType': 'DAY_SHORT_ACCURACY', 'cap': '100', 'province': 'E46', 'electricType': 'E1', 'stationCode': 'J00629' } import argparse config = argparse.Namespace(**cc) cdqs = pre_data.groupby('时间') dq_accs, dq_fix_accs, cdq_accs, his_accs = [], [], [], [] for dt, group in cdqs: dq_acc = self.cal_acc(group, 'C_FP_VALUE', config) dq_fix_acc = self.cal_acc(group, 'dq_fix', config) cdq_acc = self.cal_acc(group, 'C_ABLE_VALUE', config) his_acc = self.cal_acc(group, 'history', config) dq_accs.append(dq_acc) dq_fix_accs.append(dq_fix_acc) cdq_accs.append(cdq_acc) his_accs.append(his_acc) print(dt, "这一天, 短期准确率:", dq_acc, "超短期公式准确率:", cdq_acc, "神经网络准确率:", dq_fix_acc, "历史功率准确率:", his_acc) for i in range(5, 210): for j in range(5, 210): cdq["new"] = round(i / 170 * cdq['dq_fix'] + j / 170 * cdq['history'], 2) acc = formula.calculate_acc_northeast(cdq['C_REAL_VALUE'].values, cdq['new'].values) if acc > best: best = acc best_coe_m = i / 170 best_coe_n = j / 170 cdq['new'] = round(best_coe_m * cdq['dq_fix'] + best_coe_n * cdq['history'], 2) cdq.to_csv(current_path + '/data/测试集{}.csv'.format(point + 1), index=False) self.logger.info( "过去{}天的短期的准确率:{:.4f},自动确认系数后,{} 超短期:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format( str(self.opt.update_coe_days), np.mean(dq_accs), T, best, np.mean(cdq_accs), np.mean(dq_fix_accs), np.mean(his_accs))) self.opt.coe[T]['m'] = round(best_coe_m, 3) self.opt.coe[T]['n'] = round(best_coe_n, 3) def algorithm_platform(self): try: start = time.time() day_end = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d') day_start = day_end - pd.Timedelta(days=1) db = DataBase(begin=day_start, end=day_end, opt=self.opt, logger=self.logger) db.data_process() formula = Formulas(self.opt) assess = Assessment(self.opt, self.logger) self.logger.info("------------进入测试集自动计算-------------") nwp, env, dq, rp, rp_his = self.material(day_start, day_end) nwp = pd.merge(nwp, dq, on="C_TIME") if self.opt.full_field is False: nwp = nwp[self.opt.nwp_columns + ['C_REAL_VALUE']] mean = [self.opt.mean.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']] std = [self.opt.std.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']] _, _, nwp_features = self.normalize(nwp, ['C_TIME', 'C_REAL_VALUE'], mean=mean, std=std) env = pd.merge(env, rp_his, on='C_TIME') env = env.loc[:, self.opt.env_columns] mean = [self.opt.mean.get(x) for x in env.columns.to_list() if x not in ['C_TIME']] std = [self.opt.std.get(x) for x in env.columns.to_list() if x not in ['C_TIME']] _, _, env_features = self.normalize(env, mean=mean, std=std) data_test, env = self.process.get_test_data(nwp_features, env_features) test_X, data_Y = self.features.get_test_data(data_test, env) result = self.fmi.predict(test_X, batch_size=8) mongo_data = [] # 2.历史数据 for point in range(0, 16, 1): dfs_point = [] for i, df in enumerate(data_Y): df["dq_fix"] = result[i] dfs_point.append(df.iloc[point]) pre_data = pd.concat(dfs_point, axis=1).T pre_data[["C_REAL_VALUE", "dq_fix"]] = pre_data[["C_REAL_VALUE", "dq_fix"]].apply(pd.to_numeric, errors='ignore') pre_data = pd.merge(pre_data, dq[['C_TIME', 'C_FP_VALUE']], on='C_TIME') pre_data['dq_fix'] = pre_data['dq_fix'] * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE'] pre_data['dq_fix'] = pre_data['dq_fix'].round(2) pre_data.loc[pre_data['dq_fix'] > self.opt.cap, 'dq_fix'] = self.opt.cap pre_data.loc[pre_data['dq_fix'] < 0, 'dq_fix'] = 0 T = 'T' + str(point + 1) if self.opt.coe[T]['update'] is False: continue pre_data['history'] = self.fix.history_error_clock(pre_data, rp_his, 16 - point - 1) self.logger.info("第{}点的时间周期为:{}-{}".format(T, pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1])) pre_data = self.update_coe(pre_data, assess, formula, point, test=False) pre_data['howLongAgo'] = point + 1 pre_data = pre_data.loc[:,['C_TIME', 'dq_fix', 'C_FP_VALUE', 'history', 'coe-acc', 'coe-ass', 'howLongAgo']] df_melted = pre_data.melt(id_vars=['C_TIME', 'howLongAgo'], var_name='model', value_name='power_forecast') df_melted['farm_id'] = self.opt.algorithm_platform['farm_id'] mongo_data.append(df_melted) from cache.mongo import insert_data_into_mongo mongo_data = pd.concat(mongo_data, axis=0) insert_data_into_mongo(mongo_data, self.opt.algorithm_platform) end = time.time() self.logger.info("算法平台测试,用了 %s 秒 " % (end - start)) except Exception as e: self.logger.critical("算法平台定时任务出错:{}".format(e.args)) def update_property(self): self.process.opt = self.opt self.features.opt = self.opt self.fix.opt = self.opt def material(self, begin, end, is_repair=False): his_begin = (begin - pd.Timedelta(hours=self.opt.Model["his_points"]/4)).strftime('%Y-%m-%d %H:%M:%S') begin, end = begin.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d') if is_repair is True: self.logger.info("进入修模的起止时间为:{}-{}".format(begin, end)) else: self.logger.info("进入测试集的起止时间为:{}-{}".format(begin, end)) nwp = pd.read_csv(current_path + '/data/NWP.csv', parse_dates=['C_TIME']) rp = pd.read_csv(current_path + '/data/power.csv', parse_dates=['C_TIME']) dq = pd.read_csv(current_path + '/data/dq.csv', parse_dates=['C_TIME']) env = pd.read_csv(current_path + '/data/weather-{}-process.csv'.format(self.opt.weatherloc[0]), parse_dates=['C_TIME']) rp['C_TIME'] = pd.to_datetime(rp['C_TIME']) rp.set_index('C_TIME', inplace=True) rp = rp.loc[his_begin: end].reset_index(inplace=False) rp_his = rp.copy() if self.opt.Model['fusion'] is False: env_fill = pd.DataFrame({'C_TIME': rp['C_TIME']}) for col in self.opt.env_columns: if col not in ['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', self.opt.usable_power['env']]: env_fill[col] = np.random.rand(len(env_fill)) env = pd.merge(env_fill, env.loc[:, ['C_TIME', self.opt.usable_power['env']]], on='C_TIME', how='left') env = env.fillna(method='ffill') env = env.fillna(method='bfill') if self.target == 'C_ABLE_VALUE': rp.drop(['C_REAL_VALUE'], axis=1, inplace=True) rp.rename(columns={'C_ABLE_VALUE': 'C_REAL_VALUE'}, inplace=True) rp_his = rp.copy() else: plt_name = '-训练集-' + begin + '-' + end if is_repair is True else '-测试集-' + begin + '-' + end weather_power = pd.merge(env, rp, on='C_TIME') self.lp.weather_power = weather_power if self.opt.usable_power["clean_power_which"] == 0: rp_signal = self.lp.clean_limited_power_by_signal(plt_name) rp = rp_signal elif self.opt.usable_power["clean_power_which"] == 1: rp_solar = self.lp.clean_limited_power(plt_name, is_repair=is_repair) rp = rp_solar if is_repair is True: self.opt.usable_power['k'] = self.lp.opt.usable_power['k'] self.opt.usable_power['bias'] = self.lp.opt.usable_power['bias'] elif self.opt.usable_power["clean_power_which"] == 2: rp_signal = self.lp.clean_limited_power_by_signal(plt_name) rp_solar = self.lp.clean_limited_power(plt_name, is_repair=is_repair) time_intersection = set(rp_signal['C_TIME']) time_intersection.intersection_update(rp_solar['C_TIME']) rp = rp_solar[rp_solar['C_TIME'].isin(time_intersection)] if is_repair is True: self.opt.usable_power['k'] = self.lp.opt.usable_power['k'] self.opt.usable_power['bias'] = self.lp.opt.usable_power['bias'] elif self.opt.usable_power["clean_power_which"] == 3: rp['diff'] = rp['C_REFERENCE_POWER_BY_SAMPLE'] - rp['C_REAL_VALUE'] rp = rp[rp['diff'] < 0.2*self.opt.cap] elif self.opt.usable_power["clean_power_which"] == 4: rp['diff'] = rp['C_REFERENCE_POWER_BY_SAMPLE'] - rp['C_REAL_VALUE'] rp_sample = rp[rp['diff'] < 0.2 * self.opt.cap] rp_signal = self.lp.clean_limited_power_by_signal(plt_name) time_intersection = set(rp_sample['C_TIME']) time_intersection.intersection_update(rp_signal['C_TIME']) rp = rp_sample[rp_sample['C_TIME'].isin(time_intersection)] else: self.logger.info("不进行限电清洗") rp = rp.loc[:, ['C_TIME', 'C_REAL_VALUE']] rp['C_REAL_VALUE'] = rp['C_REAL_VALUE'].apply(pd.to_numeric) rp_his = pd.merge(rp_his.loc[:, ['C_TIME', 'C_ABLE_VALUE']], rp, on='C_TIME', how='left') rp_his['C_REAL_VALUE'] = rp_his['C_REAL_VALUE'].fillna(rp_his['C_ABLE_VALUE']) rp_his = pd.merge(rp_his, dq, on='C_TIME') rp_his = rp_his.loc[:, ['C_TIME', 'C_FP_VALUE', 'C_ABLE_VALUE', 'C_REAL_VALUE']] dq = rp_his[rp_his['C_TIME'].isin(rp['C_TIME'])].copy() dq.drop(columns=['C_ABLE_VALUE'], inplace=True) nwp.set_index('C_TIME', inplace=True) nwp = nwp.loc[begin: end].reset_index() env.set_index('C_TIME', inplace=True) env = env.loc[his_begin: end].reset_index() if is_repair: mean, std = {}, {} for df in [nwp, env, dq, rp]: m, s, _ = self.normalize(df) mean.update(m) std.update(s) self.opt.mean = {k: float(v) for k, v in mean.items()} self.opt.std = {k: float(v) for k, v in std.items()} return nwp, env, dq, rp, rp_his def saveVar(self, path, data): os.makedirs(os.path.dirname(path), exist_ok=True) with open(path, 'wb') as file: pickle.dump(data, file) def normalize(self, df, drop_list=['C_TIME'], mean=None, std=None): df1 = df.copy() drop_index = [list(df1).index(x) for x in drop_list] df1.drop(drop_list, axis=1, inplace=True, errors='ignore') df1 = df1.apply(pd.to_numeric, errors='ignore') if mean is None or std is None: mean = np.mean(df1, axis=0).round(3) # 数据的均值 std = np.std(df1, axis=0).round(3) # 标准差 new = [] for i, row in df1.iterrows(): d = (row - mean) / std # 归一化 new.append(d.round(3)) if len(new) > 0: new = pd.concat(new, axis=1).T for index, col_name in zip(drop_index, drop_list): new.insert(index, col_name, df[col_name].values) return mean, std, new if __name__ == '__main__': pass