Explorar o código

测出好心情0122

David hai 3 meses
achega
20e6040bf0

+ 11 - 0
.gitignore

@@ -0,0 +1,11 @@
+*/__pycache__
+/__pycache__
+/.idea
+/checkpoint
+/logs
+/cache/data/*.csv
+/var
+*.log
+*.swp
+app_test.py
+app_test_fmi.py

+ 7 - 0
README.md

@@ -0,0 +1,7 @@
+## 超短期功率预测系统
+
+### 主要功能:超短期功率预测的API接口算法包
+
+### 实现功能:
+    具体见接口文档
+

+ 15 - 0
app.ini

@@ -0,0 +1,15 @@
+[uwsgi]
+http = 0.0.0.0:9008
+chdir = /root/桌面/ipfcst-forecast-solar3-tf2/
+wsgi-file = app_uwsgi.py
+callable = app
+processes = 1
+threads = 1
+# stats = 127.0.0.1:9191
+# pidfile = uwsgi.pid
+# master = true
+# daemonize = ./log/uwsgi.log
+# lazy-apps = true
+# master-fifo = /opt/mt-search/web-service/mfifo
+# touch-chain-reload = true
+

+ 196 - 0
app.py

@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/27 16:29
+# file: app.py.py
+# author: David
+# company: shenyang JY
+import os
+import numpy as np
+np.random.seed(42)
+import pandas as pd
+from flask import Flask, request, g
+from startup import start_up
+from cache.clocking import Clock
+import threading
+import json, time
+from datetime import datetime
+
+app = Flask(__name__)
+
+with app.app_context():
+    import tensorflow as tf
+    global graph, sess
+    tf.compat.v1.set_random_seed(1234)
+    graph = tf.compat.v1.get_default_graph()
+    session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
+    sess = tf.compat.v1.Session(graph=graph, config=session_conf)
+
+    logger, va, args, req, process, features, fmi, fix = start_up(graph, sess)  # 程序初始化
+    # model = fmi.fmi_model
+    # 实例化定时任务类
+    clock = Clock(logger=logger, args=args, process=process, features=features, fmi=fmi, fix=fix)
+    logger.info("定时任务类初始化")
+    # clock.calculate_coe(cluster=True)  # 实际场站中要先修模
+    clock.update_thread()  # 定时任务开启
+    result = {
+        "errorCode": 1,
+        "msg": "无异常",
+        "res": []
+    }
+
+
+@app.before_request
+def update_config():
+    print("-----------------beofore_request------------------")
+    opt = args.parse_args_and_yaml()
+    g.opt = opt
+    va.opt = opt
+    process.opt = opt
+    features.opt = opt
+    fix.opt = opt
+    # va.status = 0
+
+
+# class StandaloneApplication(gunicorn.app.base.BaseApplication):
+#     def __init__(self, app, options=None):
+#         self.options = options or {}
+#         self.application = app
+#         super().__init__()
+#
+#     def load_config(self):
+#         config = {key: value for key, value in self.options.items()
+#                   if key in self.cfg.settings and value is not None}
+#         for key, value in config.items():
+#             self.cfg.set(key.lower(), value)
+#
+#     def load(self):
+#         return self.application
+integral = 0
+
+@app.route('/neu', methods=['post'])
+def cdq():
+    try:
+        opt = g.opt
+        start = time.time()
+        # 初始化请求处理类
+        history_dq, history_rp, env, nwp, dq = req.get_form_data(request)
+        his = va.validate_his_data(history_rp, env, history_dq).reset_index(drop=True)
+        print("----进入预处理算法----")
+        history_rp = va.validate_power(his)
+        history_rp.rename(columns={'NEW_RP': 'C_REAL_VALUE'}, inplace=True)
+        his.drop(columns=['C_REAL_VALUE'], axis=1, inplace=True)
+        his = pd.merge(his, history_rp, on='C_TIME')
+        s1 = time.time()
+        logger.info(f"测光-信号限电处理-执行时间:{(s1 - start) * 1000:.2f}毫秒")
+
+        nwp = pd.merge(nwp, dq, on='C_TIME')
+        nwp = va.validate_nwp(nwp)
+        nwp = process.get_predict_data(nwp, dq)
+        va.status = 0
+
+        va.validate_authentic(dq, history_dq)
+        start1 = time.time()
+        logger.info(f"数据验证-执行时间:{(start1 - s1) * 1000:.2f}毫秒")
+
+        mean = [opt.mean.get(x) for x in opt.nwp_columns if x not in ['C_TIME']]
+        std = [opt.std.get(x) for x in opt.nwp_columns if x not in ['C_TIME']]
+        nwp = nwp[opt.nwp_columns]
+        _, _, nwp_features = clock.normalize(nwp, mean=mean, std=std)
+
+        if len(nwp_features) > opt.Model["output_size"]:
+            nwp_features = nwp_features.head(opt.Model["output_size"])
+            dq = dq.head(opt.Model["output_size"])
+
+        mean = [opt.mean.get(x) for x in opt.env_columns if x not in ['C_TIME']]
+        std = [opt.std.get(x) for x in opt.env_columns if x not in ['C_TIME']]
+        his = his[opt.env_columns]
+        _, _, env_features = clock.normalize(his, mean=mean, std=std)
+        start2 = time.time()
+        logger.info(f"归一化-执行时间:{(start2 - start1) * 1000:.2f}毫秒")
+
+        test_X = features.get_realtime_data([nwp_features], env_features)
+        start3 = time.time()
+        logger.info(f"预处理及特征处理-执行时间:{(start3 - start2) * 1000:.2f}毫秒")
+
+        logger.info("-----进入超短期预测算法-----")
+        res = fmi.predict(test_X)[0]
+        res = np.array([r * opt.std['C_REAL_VALUE'] + opt.mean['C_REAL_VALUE'] for r in res])
+        res[res < 0] = 0  # 如果出现负数,置为0
+        res[res > opt.cap] = opt.cap  # 出现大于实际装机量的数,置为实际装机量
+        res = np.around(res, decimals=2)
+        start4 = time.time()
+        logger.info(f"算法推理-执行时间:{(start4 - start3) * 1000:.2f}毫秒")
+
+        dq_res = fix.history_error(history_dq, history_rp, dq)
+        dq_res['dq_fix'] = res
+        res = fix.cdq(dq_res)
+        end = time.time()
+        logger.info(f"生成超短期-执行时间:{(end - start4) * 1000:.2f}毫秒")
+        logger.info(f"总时间:{(end - start) * 1000:.2f}毫秒")
+
+        logger.info("----{}".format(res))
+        va.status = 1
+        result["errorCode"] = 1
+        result["res"] = res
+        result["msg"] = "无异常"
+        return json.dumps(result, ensure_ascii=False)
+    except Exception as e:
+        global integral
+        logger.error(e.args)
+        if va.status == 2 and integral > 60:
+            va.status = 3
+            integral = 0
+        result["errorCode"] = va.status if va.status != 1 else 0
+        result["res"] = None
+        result["msg"] = e.args
+        return json.dumps(result, ensure_ascii=False)
+
+
+@app.route('/forecastVersion', methods=['get'])
+def forecast_version():
+    return g.opt.version
+
+
+def date_diff(current_dt, repair_dt):
+    format_pattern = '%Y-%m-%d'
+    difference = (datetime.strptime(current_dt, format_pattern) - datetime.strptime(repair_dt, format_pattern))
+    return difference.days
+
+@app.route('/last_model_update', methods=['get'])
+def last_model_update():
+    dt = time.strftime('%Y-%m-%d', time.localtime(time.time()))
+    repair, repair_dt = int(g.opt.repair_model_cycle), g.opt.authentication['repair']
+    if repair_dt == 'null':
+        return {"model_status": 0, "time": 'null', "msg": "neu算法:未修模"}
+    elif date_diff(dt, repair_dt) > repair*2:
+        return {"model_status": 1, "time": repair_dt, "msg": "neu算法:距上次修模{}天".format(date_diff(dt, repair_dt))}
+    elif va.status != 1:
+        status_msg = {2: "环境数据缺失", 3: "重载环境数据"}
+        global integral
+        if va.status == 2 and integral <= 60:
+            integral += 1
+        return {"model_status": 1, "time": repair_dt, "msg": "neu算法:接口状态{}".format(status_msg.get(va.status, '检查'))}
+    else:
+        return {"model_status": 2, "time": repair_dt, "msg": "neu算法:修模正常"}
+
+
+if __name__ == "__main__":
+    opt = args.parse_args_and_yaml()
+    current_path = os.path.dirname(__file__)
+    # gunicorn_config = {
+    #     'bind': '%s:%s' % ('0.0.0.0', str(opt.port)),
+    #     'certfile': current_path + '/ssl/server.pem',
+    #     'keyfile': current_path + '/ssl/server.key',
+    #     "check_config": True,
+    #     "worker_class": "gthread",
+    #     "workers": 1,
+    #     "threads": 1,
+    #     'timeout': 100,
+    #     "loglevel": "info",
+    #     "access_log_format": "gunicorn %(h)s - %(t)s - %(r)s - %(s)s - %(f)s",
+    #     "backlog": 30,
+    # }
+    threading.Thread(target=clock.calculate_coe, args=(True,)).start()
+    # # 启动服务
+    # StandaloneApplication(app, options=gunicorn_config).run()
+    app.run(host='0.0.0.0', port=opt.port, debug=False, ssl_context=(current_path + '/ssl/server.pem', current_path + '/ssl/server.key'))

+ 183 - 0
app_gunicorn.py

@@ -0,0 +1,183 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/27 16:29
+# file: app.py.py
+# author: David
+# company: shenyang JY
+import os
+import numpy as np
+np.random.seed(42)
+import pandas as pd
+from flask import Flask, request
+from startup import start_up
+from cache.clocking import Clock
+import gunicorn.app.base
+import threading
+import json, time
+from datetime import datetime
+
+app = Flask(__name__)
+
+with app.app_context():
+    # import tensorflow as tf
+    # global graph, sess
+    # tf.compat.v1.set_random_seed(1234)
+    # graph = tf.compat.v1.get_default_graph()
+    # session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
+    # sess = tf.compat.v1.Session(graph=graph, config=session_conf)
+
+    logger, va, args, req, process, features, fmi = start_up()  # 程序初始化
+    # model = fmi.fmi_model
+    # 实例化定时任务类
+    clock = Clock(logger=logger, args=args, process=process, features=features, fmi=fmi)
+    logger.info("定时任务类初始化")
+    # clock.calculate_coe(cluster=True)  # 实际场站中要先修模
+    clock.update_thread()  # 定时任务开启
+    result = {
+        "errorCode": 1,
+        "msg": "无异常",
+        "res": []
+    }
+
+
+@app.before_request
+def update_config():
+    print("-----------------beofore_request------------------")
+    global opt
+    opt = args.parse_args_and_yaml()
+    va.opt = opt
+    process.opt = opt
+    va.status = 0
+
+
+class StandaloneApplication(gunicorn.app.base.BaseApplication):
+    def __init__(self, app, options=None):
+        self.options = options or {}
+        self.application = app
+        super().__init__()
+
+    def load_config(self):
+        config = {key: value for key, value in self.options.items()
+                  if key in self.cfg.settings and value is not None}
+        for key, value in config.items():
+            self.cfg.set(key.lower(), value)
+
+    def load(self):
+        return self.application
+
+
+@app.route('/neu', methods=['post'])
+def cdq():
+    try:
+        start = time.time()
+        # 初始化请求处理类
+        nwp, dq, history_dq, history_rp, env = req.get_form_data(request)
+        print("----进入预处理算法----")
+        history_rp = va.validate_power(history_rp, env)
+        history_rp.drop(['C_REAL_VALUE'], axis=1, inplace=True)
+        history_rp.rename(columns={'NEW_RP': 'C_REAL_VALUE'}, inplace=True)
+        s1 = time.time()
+        logger.info(f"1解析数据验证-执行时间:{(s1 - start) * 1000}毫秒")
+
+        nwp = pd.merge(nwp, dq, on='C_TIME')
+        his = pd.merge(history_rp, history_dq, on='C_TIME')
+        his = pd.merge(env, his, on='C_TIME')
+        nwp = va.validate_nwp(nwp)
+        his = va.validate_env(his)
+        va.validate_authentic(dq, history_dq)
+        start1 = time.time()
+        logger.info(f"2解析数据验证-执行时间:{(start1 - s1) * 1000}毫秒")
+
+        mean = [opt.mean.get(x) for x in opt.nwp_columns if x not in ['C_TIME']]
+        std = [opt.std.get(x) for x in opt.nwp_columns if x not in ['C_TIME']]
+        nwp = nwp[opt.nwp_columns]
+        _, _, nwp_features = clock.normalize(nwp, mean=mean, std=std)
+
+        mean = [opt.mean.get(x) for x in opt.env_columns if x not in ['C_TIME']]
+        std = [opt.std.get(x) for x in opt.env_columns if x not in ['C_TIME']]
+        his = his[opt.env_columns]
+        _, _, env_features = clock.normalize(his, mean=mean, std=std)
+        start2 = time.time()
+        logger.info(f"归一化-执行时间:{(start2 - start1) * 1000}毫秒")
+
+        data_test, env = process.get_test_data(nwp_features, env_features)
+        test_X = features.get_realtime_data(data_test, env)
+        start3 = time.time()
+        logger.info(f"特征处理-执行时间:{(start3 - start2) * 1000}毫秒")
+
+        logger.info("-----进入超短期预测算法-----")
+        # with graph.as_default():
+        #     with sess.as_default():
+        res = fmi.fmi_model.predict(test_X, batch_size=1)[0]
+        start4 = time.time()
+        logger.info(f"算法推理-执行时间:{(start4 - start3) * 1000}毫秒")
+
+        # res = fmi.fmi_model.predict(opt, test_X)[0]
+        res = np.array([r*opt.std['C_REAL_VALUE'] + opt.mean['C_REAL_VALUE'] for r in res])
+        res = np.array([r*opt.calculate['coe'] + opt.calculate['abs'] for r in res])
+        res[res < 0] = 0
+        res[res > opt.cap] = opt.cap
+        res = np.around(res, decimals=2)
+        times = dq['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S').values
+        res = [{"C_TIME": times[i], "CDQ_VALUE": x} for i, x in enumerate(res)]
+        end = time.time()
+        logger.info(f"反归一化-执行时间:{(end - start4) * 1000}毫秒")
+        print(f"总时间:{(end - start) * 1000}毫秒")
+
+        logger.info("----{}".format(res))
+        result["errorCode"] = 1
+        result["res"] = res
+        result["msg"] = "无异常"
+        return json.dumps(result, ensure_ascii=False)
+    except Exception as e:
+        logger.error(e.args)
+        result["errorCode"] = va.status if va.status != 1 else 0
+        result["res"] = None
+        result["msg"] = e.args
+        return json.dumps(result, ensure_ascii=False)
+
+
+@app.route('/forecastVersion', methods=['get'])
+def forecast_version():
+    opt = args.parse_args_and_yaml()
+    return opt.version
+
+
+def date_diff(current_dt, repair_dt):
+    format_pattern = '%Y-%m-%d'
+    difference = (datetime.strptime(current_dt, format_pattern) - datetime.strptime(repair_dt, format_pattern))
+    return difference.days
+
+
+@app.route('/last_model_update', methods=['get'])
+def last_model_update():
+    dt = time.strftime('%Y-%m-%d', time.localtime(time.time()))
+    repair, repair_dt = int(opt.repair_model_cycle), opt.authentication['repair']
+    if repair_dt == 'null':
+        return {"model_status": 0, "time": 'null', "msg": "未修模"}
+    elif date_diff(dt, repair_dt) > repair*2:
+        return {"model_status": 1, "time": repair_dt, "msg": "距上次修模已过{}天".format(date_diff(dt, repair_dt))}
+    else:
+        return {"model_status": 2, "time": repair_dt, "msg": "修模正常"}
+
+
+if __name__ == "__main__":
+    opt = args.parse_args_and_yaml()
+    current_path = os.path.dirname(__file__)
+    gunicorn_config = {
+        'bind': '%s:%s' % ('0.0.0.0', str(opt.port)),
+        'certfile': current_path + '/ssl/server.pem',
+        'keyfile': current_path + '/ssl/server.key',
+        "check_config": True,
+        "worker_class": "gthread",
+        "workers": 1,
+        "threads": 1,
+        'timeout': 100,
+        "loglevel": "info",
+        "access_log_format": "gunicorn %(h)s - %(t)s - %(r)s - %(s)s - %(f)s",
+        "backlog": 30,
+    }
+    threading.Thread(target=clock.calculate_coe, args=(True,)).start()
+    # # 启动服务
+    StandaloneApplication(app, options=gunicorn_config).run()
+    # app.run(host='0.0.0.0', port=7999, debug=False)

+ 167 - 0
app_uwsgi.py

@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/27 16:29
+# file: app.py.py
+# author: David
+# company: shenyang JY
+import os
+import numpy as np
+np.random.seed(42)
+import pandas as pd
+from flask import Flask, request
+from startup import start_up
+from cache.clocking import Clock
+import threading
+import json, time
+from datetime import datetime
+
+app = Flask(__name__)
+
+with app.app_context():
+    # import tensorflow as tf
+    # global graph, sess
+    # tf.compat.v1.set_random_seed(1234)
+    # graph = tf.compat.v1.get_default_graph()
+    # session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
+    # sess = tf.compat.v1.Session(graph=graph, config=session_conf)
+
+    logger, va, args, req, process, features, fmi = start_up()  # 程序初始化
+    # model = fmi.fmi_model
+    # 实例化定时任务类
+    clock = Clock(logger=logger, args=args, process=process, features=features, fmi=fmi)
+    logger.info("定时任务类初始化")
+    # clock.calculate_coe(cluster=True)  # 实际场站中要先修模
+    clock.update_thread()  # 定时任务开启
+    result = {
+        "errorCode": 1,
+        "msg": "无异常",
+        "res": []
+    }
+
+
+@app.before_request
+def update_config():
+    print("-----------------beofore_request------------------")
+    global opt
+    opt = args.parse_args_and_yaml()
+    va.opt = opt
+    process.opt = opt
+    va.status = 0
+
+
+@app.route('/neu', methods=['post'])
+def cdq():
+    try:
+        start = time.time()
+        # 初始化请求处理类
+        nwp, dq, history_dq, history_rp, env = req.get_form_data(request)
+        print("----进入预处理算法----")
+        history_rp = va.validate_power(history_rp, env)
+        history_rp.drop(['C_REAL_VALUE'], axis=1, inplace=True)
+        history_rp.rename(columns={'NEW_RP': 'C_REAL_VALUE'}, inplace=True)
+        s1 = time.time()
+        logger.info(f"1解析数据验证-执行时间:{(s1 - start) * 1000}毫秒")
+
+        nwp = pd.merge(nwp, dq, on='C_TIME')
+        his = pd.merge(history_rp, history_dq, on='C_TIME')
+        his = pd.merge(env, his, on='C_TIME')
+        nwp = va.validate_nwp(nwp)
+        his = va.validate_env(his)
+        va.validate_authentic(dq, history_dq)
+        start1 = time.time()
+        logger.info(f"2解析数据验证-执行时间:{(start1 - s1) * 1000}毫秒")
+
+        mean = [opt.mean.get(x) for x in opt.nwp_columns if x not in ['C_TIME']]
+        std = [opt.std.get(x) for x in opt.nwp_columns if x not in ['C_TIME']]
+        nwp = nwp[opt.nwp_columns]
+        _, _, nwp_features = clock.normalize(nwp, mean=mean, std=std)
+
+        mean = [opt.mean.get(x) for x in opt.env_columns if x not in ['C_TIME']]
+        std = [opt.std.get(x) for x in opt.env_columns if x not in ['C_TIME']]
+        his = his[opt.env_columns]
+        _, _, env_features = clock.normalize(his, mean=mean, std=std)
+        start2 = time.time()
+        logger.info(f"归一化-执行时间:{(start2 - start1) * 1000}毫秒")
+
+        data_test, env = process.get_test_data(nwp_features, env_features)
+        test_X = features.get_realtime_data(data_test, env)
+        start3 = time.time()
+        logger.info(f"特征处理-执行时间:{(start3 - start2) * 1000}毫秒")
+
+        logger.info("-----进入超短期预测算法-----")
+        # with graph.as_default():
+        #     with sess.as_default():
+        res = fmi.fmi_model.predict(test_X, batch_size=1)[0]
+        start4 = time.time()
+        logger.info(f"算法推理-执行时间:{(start4 - start3) * 1000}毫秒")
+
+        # res = fmi.fmi_model.predict(opt, test_X)[0]
+        res = np.array([r*opt.std['C_REAL_VALUE'] + opt.mean['C_REAL_VALUE'] for r in res])
+        res = np.array([r*opt.calculate['coe'] + opt.calculate['abs'] for r in res])
+        res[res < 0] = 0
+        res[res > opt.cap] = opt.cap
+        res = np.around(res, decimals=2)
+        times = dq['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S').values
+        res = [{"C_TIME": times[i], "CDQ_VALUE": x} for i, x in enumerate(res)]
+        end = time.time()
+        logger.info(f"反归一化-执行时间:{(end - start4) * 1000}毫秒")
+        print(f"总时间:{(end - start) * 1000}毫秒")
+
+        logger.info("----{}".format(res))
+        result["errorCode"] = 1
+        result["res"] = res
+        result["msg"] = "无异常"
+        return json.dumps(result, ensure_ascii=False)
+    except Exception as e:
+        logger.error(e.args)
+        result["errorCode"] = va.status if va.status != 1 else 0
+        result["res"] = None
+        result["msg"] = e.args
+        return json.dumps(result, ensure_ascii=False)
+
+
+@app.route('/forecastVersion', methods=['get'])
+def forecast_version():
+    opt = args.parse_args_and_yaml()
+    return opt.version
+
+
+def date_diff(current_dt, repair_dt):
+    format_pattern = '%Y-%m-%d'
+    difference = (datetime.strptime(current_dt, format_pattern) - datetime.strptime(repair_dt, format_pattern))
+    return difference.days
+
+
+@app.route('/last_model_update', methods=['get'])
+def last_model_update():
+    dt = time.strftime('%Y-%m-%d', time.localtime(time.time()))
+    repair, repair_dt = int(opt.repair_model_cycle), opt.authentication['repair']
+    if repair_dt == 'null':
+        return {"model_status": 0, "time": 'null', "msg": "未修模"}
+    elif date_diff(dt, repair_dt) > repair*2:
+        return {"model_status": 1, "time": repair_dt, "msg": "距上次修模已过{}天".format(date_diff(dt, repair_dt))}
+    else:
+        return {"model_status": 2, "time": repair_dt, "msg": "修模正常"}
+
+
+if __name__ == "__main__":
+    opt = args.parse_args_and_yaml()
+    current_path = os.path.dirname(__file__)
+    gunicorn_config = {
+        'bind': '%s:%s' % ('0.0.0.0', str(opt.port)),
+        'certfile': current_path + '/ssl/server.pem',
+        'keyfile': current_path + '/ssl/server.key',
+        "check_config": True,
+        "worker_class": "gthread",
+        "workers": 1,
+        "threads": 1,
+        'timeout': 100,
+        "loglevel": "info",
+        "access_log_format": "gunicorn %(h)s - %(t)s - %(r)s - %(s)s - %(f)s",
+        "backlog": 30,
+    }
+    threading.Thread(target=clock.calculate_coe, args=(True,)).start()
+    # # 启动服务
+    # app.run(host='0.0.0.0', port=9008, debug=False)
+    init_file = './app.ini'
+    os.system("uwsgi --init {}".format(init_file))

+ 56 - 0
app_uwsgi.spec

@@ -0,0 +1,56 @@
+# -*- mode: python ; coding: utf-8 -*-
+
+uWSGI_EXECUTABLE = '/usr/local/bin/uwsgi'
+block_cipher = None
+
+
+a = Analysis(
+    ['app_uwsgi.py'],
+    pathex=[],
+    binaries=[(uWSGI_EXECUTABLE, '.')],
+    datas=[('./config.yml', './'), ('./cache/data/*.csv', './cache/data/'), ('./var', './var'), ('./ssl', './ssl'), ('app.ini', '.')],
+    hiddenimports=[],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    win_no_prefer_redirects=False,
+    win_private_assemblies=False,
+    cipher=block_cipher,
+    noarchive=False,
+)
+for bi in a.binaries:
+    if bi[0] == uWSGI_EXECUTABLE:
+        bi[1] = 'uwsgi'
+
+pyz = PYZ(a.pure, a.zipped_data, cipher=block_cipher)
+
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name='app_uwsgi',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
+
+# exe.files.append(uWSGI_EXECUTABLE)
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.zipfiles,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name='app_uwsgi',
+)

+ 75 - 0
bin/cdq

@@ -0,0 +1,75 @@
+#!/bin/sh
+#
+# cdq	This shell script takes care of starting and stopping
+#		the ipfcst-cdq subsystem (cdq).
+#
+# chkconfig: 345 64 36
+# description:	ipfcst-cdq.
+# processname: cdq
+
+# Source function library.
+#. /etc/rc.d/init.d/functions
+
+# Source networking configuration.
+#. /etc/sysconfig/network
+
+# Support for extra options passed to cdq
+command=$1 && shift
+extra_opts="$@"
+
+start(){
+    sh /home/syjy/ipfcstV3/bin/start-cdq.sh
+}
+
+stop(){
+	sh /home/syjy/ipfcstV3/bin/stop-cdq.sh
+}
+
+restart(){
+    stop
+    start
+}
+
+status(){
+	sh /home/syjy/ipfcstV3/bin/status-cdq.sh
+}
+
+
+# See how we were called.
+case "$command" in
+  start)
+	start
+	;;
+  startup)
+	start
+	;;
+  stop)
+    stop
+    ;;
+  shutdown)
+    stop
+    ;;
+  status)
+    status
+    ;;
+  state)
+    status
+    ;;
+  restart)
+    restart
+    ;;
+  restartup)
+    restart
+    ;;
+  reload)
+    exit 3
+    ;;
+  force-reload)
+    restart
+    ;;
+  *)
+    echo $"Usage: $0 {start|startup|stop|shutdown|status|state|restart|restartup|reload|force-reload}"
+    exit 2
+esac
+
+exit $?

+ 13 - 0
bin/cdq.service

@@ -0,0 +1,13 @@
+[Unit]
+Description=ipfcst-cdq
+After=network.target
+
+[Service]
+Type=forking
+User=root
+ExecStart=/home/syjy/ipfcstV3/bin/start-cdq.sh
+ExecStop=/home/syjy/ipfcstV3/bin/stop-cdq.sh
+ExecRestart=/home/syjy/ipfcstV3/bin/restart-cdq.sh
+
+[Install]
+WantedBy=multi-user.target

+ 58 - 0
bin/install-ipfcst-centos.sh

@@ -0,0 +1,58 @@
+#!/bin/sh
+
+export basedir=/home/syjy/ipfcstV3
+
+chmod -R 777 $basedir
+
+cd $basedir/jdk
+
+tar -zxvf jdk-8u261-linux-x64.tar.gz
+
+if test -e /home/syjy/patch.zip
+	then
+	cd /home/syjy
+	unzip -o patch.zip
+fi
+
+cp -f $basedir/settings/monitor /etc/init.d/
+cp -f $basedir/settings/console /etc/init.d/
+cp -f $basedir/settings/reportquery /etc/init.d/
+cp -f $basedir/settings/cdq /etc/init.d/
+
+chmod 777 /etc/init.d/monitor
+chmod 777 /etc/init.d/console
+chmod 777 /etc/init.d/reportquery
+chmod 777 /etc/init.d/cdq
+
+
+if test -x /usr/lib/systemd/system
+	then
+	cp -f $basedir/settings/monitor.service /usr/lib/systemd/system
+	cp -f $basedir/settings/console.service /usr/lib/systemd/system
+	cp -f $basedir/settings/reportquery.service /usr/lib/systemd/system
+	cp -f $basedir/settings/cdq.service /usr/lib/systemd/system
+	
+	chmod 777 /usr/lib/systemd/system/monitor.service
+	chmod 777 /usr/lib/systemd/system/console.service
+	chmod 777 /usr/lib/systemd/system/reportquery.service
+	chmod 777 /usr/lib/systemd/system/cdq.service
+	
+	systemctl daemon-reload
+	systemctl enable monitor
+	systemctl enable console
+	systemctl enable reportquery
+	systemctl enable cdq
+	
+	else
+	chkconfig monitor on
+	chkconfig console on
+	chkconfig reportquery on
+	chkconfig cdq on
+fi
+
+service monitor start
+service console start
+service reportquery start
+service cdq start
+
+echo "Installation is complete!"

+ 5 - 0
bin/restart-cdq.sh

@@ -0,0 +1,5 @@
+export basedir=/home/syjy/ipfcstV3
+export bindir=$basedir/bin
+sh $bindir/stop-cdq.sh
+sh $bindir/start-cdq.sh
+exit 0

+ 20 - 0
bin/start-cdq.sh

@@ -0,0 +1,20 @@
+#!/bin/sh
+
+export app_path=/home/syjy/ipfcstV3/cdq
+
+
+export LANG="zh_CN.UTF-8"
+export LC_ALL="zh_CN.UTF-8"
+
+cd $app_path
+echo "Starting cdq"
+ulimit -n 65535
+if test -x $app_path/app
+  then
+	nohup $app_path/app > output.out 2>&1 &
+	echo "cdq started, PID is $!" | sed -e "s/\b\(.\)/\u\1/g"
+  else
+	echo "can't find cdq server($app_path/app)"
+fi
+
+exit 0

+ 16 - 0
bin/status-cdq.sh

@@ -0,0 +1,16 @@
+#!/bin/sh
+
+pid_list="$(ps aux | grep cdq/app | grep -v grep | awk '{print $2}')"
+pid_count=`echo $pid_list | wc -w`
+if test $pid_count -eq 1 ; then
+  echo "cdq running, pid is $pid_list"
+  exit 0
+elif test $pid_count -gt 1 ; then
+  echo "Multiple cdq running $pid_list"
+  exit 1
+elif test -z $pid_list ; then
+  echo "cdq is not running"
+  exit 1
+fi
+
+exit 0

+ 17 - 0
bin/stop-cdq.sh

@@ -0,0 +1,17 @@
+#!/bin/sh
+
+echo "Shutting down cdq"
+pid_list="$(ps aux | grep cdq/app | grep -v grep | awk '{print $2}')"
+if test -z $pid_list ; then
+	echo "cdq is not running"
+else
+	for pid in $pid_list;  
+	do  
+	if kill -0 "$pid" 2>/dev/null; then
+		echo "kill pid $pid"
+		kill -9 $pid
+	fi
+	done  
+fi
+
+exit 0

+ 86 - 0
cache/calculate.py

@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/6/16 10:15
+# file: calculate.py
+# author: David
+# company: shenyang JY
+import requests
+import json, time, datetime
+
+url = 'http://49.4.78.194:17160/apiCalculate/calculate'
+'''
+准确率接口使用手顺:
+①入口方法为 calculate_acc
+② 按照参数传传值
+data含有C_TIME时间、realValue实际功率、ableValue可用功率(没有数值用实际功率替代)、forecastAbleValue预测功率
+opt为包含场站必要信息的字典,字段为:cap装机容量 province省份 formulaType公式类型 electricType电站类型 stationCode场站编码
+具体介绍参考接口文档
+③公式计算分为按天和按点两种,指定好opt.formulaType,即可设置公式类型,再在求取的每天或每个点的结果上进行平均,结果返回
+'''
+
+
+def wrap_json(df, opt):
+    """
+    包装json
+    :param df: 列名为 C_TIME realValue ableValue forecastAbleValue的DataFrame
+    :param opt: 参数字典
+    :return: json列表
+    """
+    d = opt.formulaType.split('_')[0]
+    jata, dfs = [], []
+    if d == 'POINT':
+        df['time'] = df['C_TIME'].apply(datetime_to_timestamp)
+        for i, row in df.iterrows():
+            dfs.append(row.to_frame().T)
+    elif d == 'DAY':
+        df['time'] = df['C_TIME'].apply(datetime_to_timestamp)
+        df['C_TIME'] = df['C_TIME'].dt.strftime('%y%m%d')   # 转换成年月日
+        for i, group in df.groupby('C_TIME'):
+            dfs.append(group)
+    outter_dict = {"electricCapacity": str(opt.cap), "province": opt.province, "formulaType": opt.formulaType, "electricType":opt.electricType, "stationCode": opt.stationCode}
+    timestamp = int(time.mktime(datetime.datetime.now().timetuple()) * 1000 + datetime.datetime.now().microsecond / 1000.0)
+    inner_dict = {"genTime": str(timestamp)+"L", "capacity": str(opt.cap), "openCapacity": str(opt.cap)}
+    for df in dfs:
+        calculationInfoList = df.iloc[:, 1:].to_json(orient='records')
+        outter_dict['calculationInfoList'] = [dict(calculation, **inner_dict) for calculation in eval(calculationInfoList)]
+        jata.append(json.dumps(outter_dict))
+    return jata
+
+
+def send_reqest(url, jata):
+    """
+    发送请求
+    :param url: 请求地址
+    :param jata: Json数据
+    :return: 准确率
+    """
+    headers = {
+        'content-type': 'application/json;charset=UTF-8',
+        "Authorization": "dXNlcjoxMjM0NTY="
+    }
+    acc, number = 0, 0
+    for i in range(len(jata)):
+        res = requests.post(url, headers=headers, data=jata[i])
+        if res.json()['code'] == '500':
+            print("没通过考核标准", res.json()['msg'])
+            continue
+        number += 1
+        acc += float(res.json()['data'][:-1])
+    acc = acc / number if number != 0 else -99
+    return acc
+
+
+def calculate_acc(data, opt):
+    """
+    准确率调用接口计算
+    :param data: 列名为 C_TIME realValue ableValue forecastAbleValue的DataFrame
+    :param opt: 参数字段
+    :return: 计算结果
+    """
+    jata = wrap_json(data, opt)
+    acc = send_reqest(url=url, jata=jata)
+    return acc
+
+
+def datetime_to_timestamp(dt):
+    return int(round(time.mktime(dt.timetuple()))*1000)

+ 479 - 0
cache/clocking.py

@@ -0,0 +1,479 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/11/23 14:07
+# file: clocking.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+import threading
+import datetime
+from cache.limited_power import LimitPower
+from apscheduler.schedulers.background import BackgroundScheduler
+import time
+import os
+import pickle
+import numpy as np
+from datetime import timedelta
+np.random.seed(42)
+from cache.calculate import calculate_acc
+from cache.formula import Formulas, Assessment
+from cache.inputData import DataBase
+from cache.monitor import Monitor
+current_path = os.path.dirname(__file__)
+from pytz import timezone
+
+
+class Clock(object):
+
+    def __init__(self, logger, args, process, features, fmi, fix):
+        self.logger = logger
+        self.args = args
+        self.process = process
+        self.features = features
+        self.fmi = fmi
+        self.fix = fix
+        self.opt = self.args.parse_args_and_yaml()
+        self.mo = Monitor(logger, args)
+        self.target = self.opt.predict
+        self.logger.info("---以 {} 进行修模---".format(self.target))
+        self.lp = LimitPower(self.logger, self.args, None)
+
+    def update_thread(self):
+        thread = threading.Thread(target=self.start_jobs)
+        thread.start()
+
+    def start_jobs(self):
+        scheduler = BackgroundScheduler()
+        scheduler.configure({'timezone': timezone("Asia/Shanghai")})
+        scheduler.add_job(func=self.calculate_coe, trigger="cron", hour=23, minute=0)
+        scheduler.add_job(func=self.mo.update_config, trigger="interval", seconds=60)
+        scheduler.start()
+
+    def date_diff(self, current_dt, repair_dt):
+        difference = (current_dt - datetime.datetime.strptime(repair_dt, '%Y-%m-%d').date())
+        return difference.days
+
+    def cal_acc(self, df, target, opt):
+        df = df.copy()
+        df.rename(columns={'C_REAL_VALUE': 'realValue'}, inplace=True)
+        df['ableValue'] = df['realValue']
+        df['forecastAbleValue'] = df[target]
+        df = df.apply(pd.to_numeric, errors='ignore')
+        df['C_TIME'] = pd.to_datetime(df['C_TIME'])
+        acc = calculate_acc(df, opt=opt)
+        return acc
+
+    def calculate_coe(self, install=False):
+        try:
+            start = time.time()
+            self.logger.info("检测系统当前的时间为:{}".format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start))))
+            self.opt = self.args.parse_args_and_yaml()
+            if self.opt.algorithm_platform['switch']:
+                dt = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d')
+                dt = dt + timedelta(days=1)
+                self.opt.authentication['date'] = dt.strftime('%Y-%m-%d')
+            else:
+                self.opt.authentication['date'] = time.strftime('%Y-%m-%d', time.localtime(start))
+            day_end = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d')
+            coe_start = day_end - pd.Timedelta(days=self.opt.update_coe_days)
+            day_start_db = coe_start - pd.Timedelta(days=self.opt.repair_days) if install is True else coe_start - pd.Timedelta(days=self.opt.update_add_train_days)
+            if self.date_diff(day_start_db.date(), self.opt.authentication['full_cap']) < 0:
+                day_start_db = datetime.datetime.strptime(self.opt.authentication['full_cap'], '%Y-%m-%d')
+                self.logger.info("更新初始修模起始时间为全容量并网:{}".format(self.opt.authentication['full_cap']))
+            db = DataBase(begin=day_start_db, end=day_end, opt=self.opt, logger=self.logger)
+            db.data_process()
+            self.opt.cap = db.opt.cap
+            self.args.save_args_yml(self.opt)
+            self.lp.opt = self.opt
+            formula = Formulas(self.opt)
+            assess = Assessment(self.opt, self.logger)
+            day = int(time.strftime('%d', time.localtime(start)))
+            repair, repairs = int(self.opt.repair_model_cycle), []
+            terval = repair
+            while repair <= 30:
+                repairs.append(repair)
+                repair += terval
+            if day in repairs or install is True:
+                # ------------------- 修模 ------------------------
+                self.repairing_model(day_start_db, coe_start)
+                self.opt.authentication['repair'] = self.opt.authentication['date']
+                self.args.save_args_yml(self.opt)
+            self.logger.info("------------进入测试集自动计算-------------")
+            coe_start += pd.Timedelta(days=1)
+            nwp, env, dq, rp, rp_his = self.material(coe_start, day_end)
+            rp_his.set_index("C_TIME", inplace=True)
+            last_day_rps = rp_his.loc[self.opt.authentication['date'], self.target].values
+            last_day_dt = rp_his.loc[self.opt.authentication['date'], self.target].index
+            rp_his.reset_index(drop=False, inplace=True)
+            sun_up = last_day_rps > 0
+            if np.any(sun_up):
+                sun_up_i = np.argmax(sun_up)
+                self.opt.first_point['sun_up_time'] = last_day_dt[sun_up_i].strftime('%Y-%m-%d %H:%M:%S')
+                self.opt.first_point['sun_up_value'] = float(last_day_rps[sun_up_i])
+            nwp = pd.merge(nwp, dq, on='C_TIME')
+            if self.opt.full_field is False:
+                nwp = nwp[self.opt.nwp_columns + ['C_REAL_VALUE']]
+            mean = [self.opt.mean.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']]
+            std = [self.opt.std.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']]
+            _, _, nwp_features = self.normalize(nwp, ['C_TIME', 'C_REAL_VALUE'], mean=mean, std=std)
+
+            env = pd.merge(env, rp_his, on='C_TIME')
+            env = env.loc[:, self.opt.env_columns]
+            mean = [self.opt.mean.get(x) for x in env.columns.to_list() if x not in ['C_TIME']]
+            std = [self.opt.std.get(x) for x in env.columns.to_list() if x not in ['C_TIME']]
+            _, _, env_features = self.normalize(env, mean=mean, std=std)
+
+            data_test, env = self.process.get_test_data(nwp_features, env_features)
+            test_X, test_Y, data_Y = self.features.get_test_data(data_test, env)
+            result = self.fmi.predict(test_X, batch_size=8)
+            # 2.历史数据
+            for point in range(0, 16, 1):
+                dfs_point = []
+                for i, df in enumerate(data_Y):
+                    df["dq_fix"] = result[i]
+                    dfs_point.append(df.iloc[point])
+                pre_data = pd.concat(dfs_point, axis=1).T
+                pre_data[["C_REAL_VALUE", "dq_fix"]] = pre_data[["C_REAL_VALUE", "dq_fix"]].apply(pd.to_numeric, errors='ignore')
+                pre_data = pd.merge(pre_data, dq[['C_TIME', 'C_FP_VALUE']], on='C_TIME')
+                pre_data['dq_fix'] = pre_data['dq_fix'] * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+                pre_data['dq_fix'] = pre_data['dq_fix'].round(2)
+
+                pre_data.loc[pre_data['C_FP_VALUE'].values == 0, 'dq_fix'] = 0
+                pre_data.loc[pre_data['dq_fix'] > self.opt.cap, 'dq_fix'] = self.opt.cap
+                pre_data.loc[pre_data['dq_fix'] < 0, 'dq_fix'] = 0
+
+                T = 'T' + str(point + 1)
+                if self.opt.coe[T]['update'] is False:
+                    continue
+                pre_data['history'] = self.fix.history_error_clock(pre_data, rp_his, 16 - point - 1)
+                self.logger.info("第{}点的时间周期为:{}-{}".format(T, pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1]))
+                self.update_coe(pre_data, assess, formula, point, test=False)
+            self.args.save_args_yml(self.opt)
+            if self.opt.algorithm_platform['switch']:
+                dt = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d')
+                dt = dt + timedelta(days=1)
+                self.opt.authentication['date'] = dt.strftime('%Y-%m-%d')
+                self.algorithm_platform()
+            end = time.time()
+            self.logger.info("定时任务:周期频率修模,用了 %.2f 秒 " % (end - start))
+
+        except Exception as e:
+            self.logger.critical("定时任务出错:{}".format(e.args))
+
+    def repairing_model(self, day_start, day_end):
+        self.logger.info("-----进入FMI神经网络修模-----")
+        nwp, env, dq, rp, rp_his = self.material(day_start, day_end, is_repair=True)
+        nwp = pd.merge(nwp, dq, on='C_TIME')
+        if self.opt.full_field is False:
+            nwp = nwp[self.opt.nwp_columns+['C_REAL_VALUE']]
+        mean = [self.opt.mean.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME']]
+        std = [self.opt.std.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME']]
+        _, _, nwp_features = self.normalize(nwp, mean=mean, std=std)
+
+        env = pd.merge(env, rp_his, on='C_TIME')
+        env = env.loc[:, self.opt.env_columns]
+        mean = [self.opt.mean.get(x) for x in env.columns.to_list() if x not in ['C_TIME']]
+        std = [self.opt.std.get(x) for x in env.columns.to_list() if x not in ['C_TIME']]
+        _, _, env_features = self.normalize(env, mean=mean, std=std)
+
+        self.opt.nwp_columns = nwp_features.columns.to_list()
+        self.opt.env_columns = env_features.columns.to_list()
+        if 'C_REAL_VALUE' in self.opt.nwp_columns:
+            self.opt.nwp_columns.pop(self.opt.nwp_columns.index('C_REAL_VALUE'))
+        self.opt.Model["input_size_nwp"] = len(self.opt.nwp_columns) - 1
+        self.opt.Model["input_size_env"] = len(self.opt.env_columns) - 1
+
+        self.update_property()
+
+        data_train, env = self.process.get_train_data(nwp_features, env_features)
+        train_X, valid_X, train_Y, valid_Y = self.features.get_train_data(data_train, env)
+        self.fmi.training(self.opt, [train_X, train_Y, valid_X, valid_Y])
+
+    def update_coe(self, pre_data, assess, formula, point, test=False):
+        cdq = pd.read_csv(current_path + '/data/cdq.csv', parse_dates=['C_TIME'])
+        cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'])
+        cdq = cdq[cdq['C_FORECAST_HOW_LONG_AGO'] == int(point + 1)]
+        pre_data = pd.merge(cdq, pre_data, on='C_TIME')
+        T = 'T' + str(point + 1)
+        if test is False:
+            best_acc, best, best_coe_m, best_coe_n = 0, 0, 0, 0
+            region = self.opt.Model['region']
+            dq_acc, dq_score = assess.electricity_solar_cdq(pre_data, region, 'C_FP_VALUE')
+            dq_fix_acc, dq_fix_score = assess.electricity_solar_cdq(pre_data, region, 'dq_fix')
+            his_fix_acc, his_fix_score = assess.electricity_solar_cdq(pre_data, region, 'history')
+            cdq_acc, cdq_score = assess.electricity_solar_cdq(pre_data, region, 'C_ABLE_VALUE')
+            for i in range(5, 210):
+                for j in range(5, 210):
+                    pre_data["new"] = round(i / 170 * pre_data['dq_fix'] + j / 170 * pre_data['history'], 3)
+                    acc, acc_score = assess.electricity_solar_cdq(pre_data, region, 'new', output=False)
+                    if acc > best_acc:
+                        best_acc = acc
+                        best = acc_score
+                        best_coe_m = i / 170
+                        best_coe_n = j / 170
+
+            self.logger.info(
+                "1.过去{}天的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f}, 超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format(
+                    str(self.opt.update_coe_days), dq_acc, T, best_acc, cdq_acc, dq_fix_acc, his_fix_acc))
+            self.logger.info(
+                "2.过去{}天的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format(
+                    str(self.opt.update_coe_days), dq_score, T, best, cdq_score, dq_fix_score, his_fix_score))
+            best_score, best, best_score_m, best_score_n = 999, 0, 0, 0
+            for i in range(210, 5, -1):
+                for j in range(210, 5, -1):
+                    pre_data["new"] = round(i / 170 * pre_data['dq_fix'] + j / 170 * pre_data['history'], 3)
+                    acc, acc_score = assess.electricity_solar_cdq(pre_data, region, 'new', output=False)
+                    if acc_score < best_score:
+                        best_score = acc_score
+                        best = acc
+                        best_score_m = i / 170
+                        best_score_n = j / 170
+            self.logger.info(
+                "3.过去{}天的短期的准确率:{:.4f},自动确认系数后,{} 超短期的准确率:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format(
+                    str(self.opt.update_coe_days), dq_acc, T, best, cdq_acc, dq_fix_acc, his_fix_acc))
+            self.logger.info(
+                "4.过去{}天的短期的考核分:{:.4f},自动确认系数后,{} 超短期的考核分:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format(
+                    str(self.opt.update_coe_days), dq_score, T, best_score, cdq_score, dq_fix_score, his_fix_score))
+            if self.opt.coe[T]['score']:
+                self.opt.coe[T]['m'] = round(best_score_m, 3)
+                self.opt.coe[T]['n'] = round(best_score_n, 3)
+                pre_data["new"] = round(best_score_m * pre_data['dq_fix'] + best_score_n * pre_data['history'], 3)
+            else:
+                self.opt.coe[T]['m'] = round(best_coe_m, 3)
+                self.opt.coe[T]['n'] = round(best_coe_n, 3)
+                pre_data["new"] = round(best_coe_m * pre_data['dq_fix'] + best_coe_n * pre_data['history'], 3)
+            pre_data.to_csv(current_path + '/data/测试集{}.csv'.format(point + 1), index=False)
+        else:
+            best, best_coe_m, best_coe_n = 0, 0, 0
+            pre_data['时间'] = pre_data['C_TIME'].dt.strftime("%Y-%m-%d")
+
+            cc = {
+                'formulaType': 'DAY_SHORT_ACCURACY',
+                'cap': '100',
+                'province': 'E46',
+                'electricType': 'E1',
+                'stationCode': 'J00629'
+            }
+            import argparse
+            config = argparse.Namespace(**cc)
+
+            cdqs = pre_data.groupby('时间')
+            dq_accs, dq_fix_accs, cdq_accs, his_accs = [], [], [], []
+            for dt, group in cdqs:
+                dq_acc = self.cal_acc(group, 'C_FP_VALUE', config)
+                dq_fix_acc = self.cal_acc(group, 'dq_fix', config)
+                cdq_acc = self.cal_acc(group, 'C_ABLE_VALUE', config)
+                his_acc = self.cal_acc(group, 'history', config)
+                dq_accs.append(dq_acc)
+                dq_fix_accs.append(dq_fix_acc)
+                cdq_accs.append(cdq_acc)
+                his_accs.append(his_acc)
+                print(dt, "这一天, 短期准确率:", dq_acc, "超短期公式准确率:", cdq_acc, "神经网络准确率:", dq_fix_acc,
+                      "历史功率准确率:", his_acc)
+
+            for i in range(5, 210):
+                for j in range(5, 210):
+                    cdq["new"] = round(i / 170 * cdq['dq_fix'] + j / 170 * cdq['history'], 2)
+                    acc = formula.calculate_acc_northeast(cdq['C_REAL_VALUE'].values, cdq['new'].values)
+                    if acc > best:
+                        best = acc
+                        best_coe_m = i / 170
+                        best_coe_n = j / 170
+            cdq['new'] = round(best_coe_m * cdq['dq_fix'] + best_coe_n * cdq['history'], 2)
+            cdq.to_csv(current_path + '/data/测试集{}.csv'.format(point + 1), index=False)
+            self.logger.info(
+                "过去{}天的短期的准确率:{:.4f},自动确认系数后,{} 超短期:{:.4f},超短期公式:{:.4f},神经网络:{:.4f},历史功率:{:.4f}".format(
+                    str(self.opt.update_coe_days), np.mean(dq_accs), T, best, np.mean(cdq_accs), np.mean(dq_fix_accs),
+                    np.mean(his_accs)))
+            self.opt.coe[T]['m'] = round(best_coe_m, 3)
+            self.opt.coe[T]['n'] = round(best_coe_n, 3)
+
+    def algorithm_platform(self):
+        try:
+            start = time.time()
+            day_end = datetime.datetime.strptime(self.opt.authentication['date'], '%Y-%m-%d')
+            day_start = day_end - pd.Timedelta(days=1)
+            db = DataBase(begin=day_start, end=day_end, opt=self.opt, logger=self.logger)
+            db.data_process()
+            formula = Formulas(self.opt)
+            assess = Assessment(self.opt, self.logger)
+            self.logger.info("------------进入测试集自动计算-------------")
+            nwp, env, dq, rp, rp_his = self.material(day_start, day_end)
+            nwp = pd.merge(nwp, dq, on="C_TIME")
+            if self.opt.full_field is False:
+                nwp = nwp[self.opt.nwp_columns + ['C_REAL_VALUE']]
+            mean = [self.opt.mean.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']]
+            std = [self.opt.std.get(x) for x in nwp.columns.to_list() if x not in ['C_TIME', 'C_REAL_VALUE']]
+            _, _, nwp_features = self.normalize(nwp, ['C_TIME', 'C_REAL_VALUE'], mean=mean, std=std)
+
+            env = pd.merge(env, rp_his, on='C_TIME')
+            env = env.loc[:, self.opt.env_columns]
+            mean = [self.opt.mean.get(x) for x in env.columns.to_list() if x not in ['C_TIME']]
+            std = [self.opt.std.get(x) for x in env.columns.to_list() if x not in ['C_TIME']]
+            _, _, env_features = self.normalize(env, mean=mean, std=std)
+
+            data_test, env = self.process.get_test_data(nwp_features, env_features)
+            test_X, data_Y = self.features.get_test_data(data_test, env)
+            result = self.fmi.predict(test_X, batch_size=8)
+            mongo_data = []
+            # 2.历史数据
+            for point in range(0, 16, 1):
+                dfs_point = []
+                for i, df in enumerate(data_Y):
+                    df["dq_fix"] = result[i]
+                    dfs_point.append(df.iloc[point])
+                pre_data = pd.concat(dfs_point, axis=1).T
+                pre_data[["C_REAL_VALUE", "dq_fix"]] = pre_data[["C_REAL_VALUE", "dq_fix"]].apply(pd.to_numeric,
+                                                                                                  errors='ignore')
+                pre_data = pd.merge(pre_data, dq[['C_TIME', 'C_FP_VALUE']], on='C_TIME')
+                pre_data['dq_fix'] = pre_data['dq_fix'] * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+                pre_data['dq_fix'] = pre_data['dq_fix'].round(2)
+
+                pre_data.loc[pre_data['dq_fix'] > self.opt.cap, 'dq_fix'] = self.opt.cap
+                pre_data.loc[pre_data['dq_fix'] < 0, 'dq_fix'] = 0
+
+                T = 'T' + str(point + 1)
+                if self.opt.coe[T]['update'] is False:
+                    continue
+                pre_data['history'] = self.fix.history_error_clock(pre_data, rp_his, 16 - point - 1)
+                self.logger.info("第{}点的时间周期为:{}-{}".format(T, pre_data['C_TIME'][0], pre_data['C_TIME'].iloc[-1]))
+
+                pre_data = self.update_coe(pre_data, assess, formula, point, test=False)
+                pre_data['howLongAgo'] = point + 1
+                pre_data = pre_data.loc[:,['C_TIME', 'dq_fix', 'C_FP_VALUE', 'history', 'coe-acc', 'coe-ass', 'howLongAgo']]
+                df_melted = pre_data.melt(id_vars=['C_TIME', 'howLongAgo'], var_name='model', value_name='power_forecast')
+                df_melted['farm_id'] = self.opt.algorithm_platform['farm_id']
+                mongo_data.append(df_melted)
+            from cache.mongo import insert_data_into_mongo
+            mongo_data = pd.concat(mongo_data, axis=0)
+            insert_data_into_mongo(mongo_data, self.opt.algorithm_platform)
+            end = time.time()
+            self.logger.info("算法平台测试,用了 %s 秒 " % (end - start))
+        except Exception as e:
+            self.logger.critical("算法平台定时任务出错:{}".format(e.args))
+
+    def update_property(self):
+        self.process.opt = self.opt
+        self.features.opt = self.opt
+        self.fix.opt = self.opt
+
+    def material(self, begin, end, is_repair=False):
+        his_begin = (begin - pd.Timedelta(hours=self.opt.Model["his_points"]/4)).strftime('%Y-%m-%d %H:%M:%S')
+        begin, end = begin.strftime('%Y-%m-%d'), end.strftime('%Y-%m-%d')
+        if is_repair is True:
+            self.logger.info("进入修模的起止时间为:{}-{}".format(begin, end))
+        else:
+            self.logger.info("进入测试集的起止时间为:{}-{}".format(begin, end))
+
+        nwp = pd.read_csv(current_path + '/data/NWP.csv', parse_dates=['C_TIME'])
+        rp = pd.read_csv(current_path + '/data/power.csv', parse_dates=['C_TIME'])
+        dq = pd.read_csv(current_path + '/data/dq.csv', parse_dates=['C_TIME'])
+        env = pd.read_csv(current_path + '/data/weather-{}-process.csv'.format(self.opt.weatherloc[0]), parse_dates=['C_TIME'])
+        rp['C_TIME'] = pd.to_datetime(rp['C_TIME'])
+        rp.set_index('C_TIME', inplace=True)
+        rp = rp.loc[his_begin: end].reset_index(inplace=False)
+        rp_his = rp.copy()
+
+        if self.opt.Model['fusion'] is False:
+            env_fill = pd.DataFrame({'C_TIME': rp['C_TIME']})
+            for col in self.opt.env_columns:
+                if col not in ['C_TIME', 'C_REAL_VALUE', 'C_FP_VALUE', self.opt.usable_power['env']]:
+                    env_fill[col] = np.random.rand(len(env_fill))
+            env = pd.merge(env_fill, env.loc[:, ['C_TIME', self.opt.usable_power['env']]], on='C_TIME', how='left')
+            env = env.fillna(method='ffill')
+            env = env.fillna(method='bfill')
+
+
+        if self.target == 'C_ABLE_VALUE':
+            rp.drop(['C_REAL_VALUE'], axis=1, inplace=True)
+            rp.rename(columns={'C_ABLE_VALUE': 'C_REAL_VALUE'}, inplace=True)
+            rp_his = rp.copy()
+        else:
+            plt_name = '-训练集-' + begin + '-' + end if is_repair is True else '-测试集-' + begin + '-' + end
+            weather_power = pd.merge(env, rp, on='C_TIME')
+            self.lp.weather_power = weather_power
+            if self.opt.usable_power["clean_power_which"] == 0:
+                rp_signal = self.lp.clean_limited_power_by_signal(plt_name)
+                rp = rp_signal
+            elif self.opt.usable_power["clean_power_which"] == 1:
+                rp_solar = self.lp.clean_limited_power(plt_name, is_repair=is_repair)
+                rp = rp_solar
+                if is_repair is True:
+                    self.opt.usable_power['k'] = self.lp.opt.usable_power['k']
+                    self.opt.usable_power['bias'] = self.lp.opt.usable_power['bias']
+            elif self.opt.usable_power["clean_power_which"] == 2:
+                rp_signal = self.lp.clean_limited_power_by_signal(plt_name)
+                rp_solar = self.lp.clean_limited_power(plt_name, is_repair=is_repair)
+                time_intersection = set(rp_signal['C_TIME'])
+                time_intersection.intersection_update(rp_solar['C_TIME'])
+                rp = rp_solar[rp_solar['C_TIME'].isin(time_intersection)]
+                if is_repair is True:
+                    self.opt.usable_power['k'] = self.lp.opt.usable_power['k']
+                    self.opt.usable_power['bias'] = self.lp.opt.usable_power['bias']
+            elif self.opt.usable_power["clean_power_which"] == 3:
+                rp['diff'] = rp['C_REFERENCE_POWER_BY_SAMPLE'] - rp['C_REAL_VALUE']
+                rp = rp[rp['diff'] < 0.2*self.opt.cap]
+            elif self.opt.usable_power["clean_power_which"] == 4:
+                rp['diff'] = rp['C_REFERENCE_POWER_BY_SAMPLE'] - rp['C_REAL_VALUE']
+                rp_sample = rp[rp['diff'] < 0.2 * self.opt.cap]
+                rp_signal = self.lp.clean_limited_power_by_signal(plt_name)
+                time_intersection = set(rp_sample['C_TIME'])
+                time_intersection.intersection_update(rp_signal['C_TIME'])
+                rp = rp_sample[rp_sample['C_TIME'].isin(time_intersection)]
+            else:
+                self.logger.info("不进行限电清洗")
+
+        rp = rp.loc[:, ['C_TIME', 'C_REAL_VALUE']]
+        rp['C_REAL_VALUE'] = rp['C_REAL_VALUE'].apply(pd.to_numeric)
+        rp_his = pd.merge(rp_his.loc[:, ['C_TIME', 'C_ABLE_VALUE']], rp, on='C_TIME', how='left')
+        rp_his['C_REAL_VALUE'] = rp_his['C_REAL_VALUE'].fillna(rp_his['C_ABLE_VALUE'])
+        rp_his = pd.merge(rp_his, dq, on='C_TIME')
+        rp_his = rp_his.loc[:, ['C_TIME', 'C_FP_VALUE', 'C_ABLE_VALUE', 'C_REAL_VALUE']]
+
+        dq = rp_his[rp_his['C_TIME'].isin(rp['C_TIME'])].copy()
+        dq.drop(columns=['C_ABLE_VALUE'], inplace=True)
+
+        nwp.set_index('C_TIME', inplace=True)
+        nwp = nwp.loc[begin: end].reset_index()
+
+        env.set_index('C_TIME', inplace=True)
+        env = env.loc[his_begin: end].reset_index()
+
+        if is_repair:
+            mean, std = {}, {}
+            for df in [nwp, env, dq, rp]:
+                m, s, _ = self.normalize(df)
+                mean.update(m)
+                std.update(s)
+
+            self.opt.mean = {k: float(v) for k, v in mean.items()}
+            self.opt.std = {k: float(v) for k, v in std.items()}
+        return nwp, env, dq, rp, rp_his
+
+    def saveVar(self, path, data):
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        with open(path, 'wb') as file:
+            pickle.dump(data, file)
+
+    def normalize(self, df, drop_list=['C_TIME'], mean=None, std=None):
+        df1 = df.copy()
+        drop_index = [list(df1).index(x) for x in drop_list]
+        df1.drop(drop_list, axis=1, inplace=True, errors='ignore')
+        df1 = df1.apply(pd.to_numeric, errors='ignore')
+        if mean is None or std is None:
+            mean = np.mean(df1, axis=0).round(3)  # 数据的均值
+            std = np.std(df1, axis=0).round(3)  # 标准差
+        new = []
+        for i, row in df1.iterrows():
+            d = (row - mean) / std  # 归一化
+            new.append(d.round(3))
+        if len(new) > 0:
+            new = pd.concat(new, axis=1).T
+            for index, col_name in zip(drop_index, drop_list):
+                new.insert(index, col_name, df[col_name].values)
+        return mean, std, new
+
+
+
+if __name__ == '__main__':
+    pass

+ 45 - 0
cache/data/hotmap.py

@@ -0,0 +1,45 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/8/2 9:49
+# file: hotmap.py
+# author: David
+# company: shenyang JY
+import matplotlib.pyplot as plt
+import seaborn as sns
+import pandas as pd
+import math
+
+weather = pd.read_csv('./weather-1-process.csv')
+power = pd.read_csv('./power.csv')
+data = pd.merge(weather, power, on='C_TIME')
+data['C_TIME'] = pd.to_datetime(data['C_TIME'])
+data['时间1'] = data['C_TIME'].dt.strftime('%Y-%m')
+data = data[['C_TIME', '时间1', 'C_AIRT', 'C_CELLT', 'C_DIFFUSER', 'C_DIRECTR', 'C_GLOBALR', 'C_REAL_VALUE']]
+pearson = pd.DataFrame()
+data.iloc[:, 2:] = data.iloc[:, 2:].astype(float)
+indexs= []
+new = {}
+for name, group in data.groupby('时间1'):
+    print("name=", name, "长度:", len(group))
+    indexs.append(name)
+#     new = {}
+    for key, value in group.iloc[:, 2:].iteritems():
+        cov = group[key].cov(group['C_REAL_VALUE'])
+        var = group['C_REAL_VALUE'].var()
+        vari = group[key].var()
+        if var * vari != 0 :
+            res_pearson = cov/math.sqrt(var*vari)
+        else:
+            res_pearson = 0
+#         new[key] = res_pearson
+        new.setdefault(key, []).append(res_pearson)
+pearson = pd.DataFrame(new, index=indexs)
+#     pearson = pearson.append(new, ignore_index=True)
+# plt.style.use('ggplot')
+# plt.rcParams['font.sans-serif'] = ['SimHei']
+# plt.rcParams['axes.unicode_minus'] = False
+fig,ax = plt.subplots(figsize=(50,6))
+sns.set(font_scale=1.25)
+# cmap = sns.cm.hot_r
+hm = sns.heatmap(pearson, cbar=True, square=True, fmt='.2f', annot=True, annot_kws={'size': 10}, cmap="hot_r")
+plt.show()

+ 81 - 0
cache/data_cleaning.py

@@ -0,0 +1,81 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/10/11 11:00
+# file: data_cleaning.py
+# author: David
+# company: shenyang JY
+import numpy as np
+np.random.seed(42)
+
+def cleaning(df, name, logger, cols=None, dup=True):
+    logger.info("开始清洗:{}……".format(name))
+    data = df.copy()
+    data = data_column_cleaning(data, logger)
+    if dup:
+        data = rm_duplicated(data, logger)
+    if cols is not None:
+        data = key_field_row_cleaning(data, cols, logger)
+    return data
+
+
+def data_column_cleaning(data, logger, clean_value=[-99.0, -99]):
+    """
+    列的清洗
+    :param data:
+    :param logger:
+    :param clean_value:
+    :return:
+    """
+    data1 = data.copy()
+    cols_pre = data.columns.to_list()
+    for val in clean_value:
+        data1 = data1.replace(val, np.nan)
+    # nan 列超过80% 删除
+    data1 = data1.dropna(axis=1, thresh=len(data) * 0.8)
+    # 删除取值全部相同的列
+    data1 = data1.loc[:, (data1 != data1.iloc[0]).any()]
+    data = data[data1.columns.tolist()]
+    cols_late = data.columns.tolist()
+    if len(cols_pre) > len(cols_late):
+        logger.info("列清洗:清洗的列有:{}".format(set(cols_pre) - set(cols_late)))
+    return data
+
+
+def key_field_row_cleaning(data, cols, logger):
+    """
+    行的重要字段清洗: 过滤含有- 99的数字,过滤空值
+    :param data:
+    :param cols: 指定的字段列表
+    :param logger:
+    :return:
+    """
+    rows_pre = len(data)
+    nan_cols = []
+    for col in cols:
+        begin = len(data)
+        if col in data.columns.tolist():
+            # data = data[~((data.loc[:, col] < 0) & (data.loc[:, col].astype(str).str.contains('99')))]
+            data = data[~(data[col] == -99)]
+            data = data[~data.loc[:, col].isnull()]
+        end = len(data)
+        if end - begin > 0:
+            nan_cols.append(col)
+    rows_late = len(data)
+    if rows_pre - rows_late > 0:
+        logger.info("行清洗:清洗的行数有:{},缺失的列有:{}".format(rows_pre-rows_late, ', '.join(nan_cols)))
+    return data
+
+def rm_duplicated(data, logger):
+    """
+    按照时间去重
+    :param data:
+    :param logger:
+    :return:
+    """
+    # 按照时间去重
+    rows_pre = len(data)
+    data = data.drop_duplicates(subset='C_TIME')
+    rows_late = len(data)
+    if rows_pre - rows_late > 0:
+        logger.info("时间去重的行数有:{}".format(rows_pre - rows_late))
+    return data

BIN=BIN
cache/figs/测光法-测试集-2024-11-01-2024-11-10.png


BIN=BIN
cache/figs/测光法-测试集-2025-01-06-2025-01-08.png


BIN=BIN
cache/figs/测光法-训练集-2024-08-11-2024-10-31.png


BIN=BIN
cache/figs/测光法-训练集-2024-10-08-2024-12-28.png


BIN=BIN
cache/figs/测光法-训练集-2024-10-16-2025-01-05.png


+ 148 - 0
cache/formula.py

@@ -0,0 +1,148 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/9/23 15:27
+# file: formula.py
+# author: David
+# company: shenyang JY
+import numpy as np
+import pandas as pd
+'''
+准确率formula.py模块使用手顺:
+① Assessment: 考核类 Formular: 准确率计算类
+② 初始化并传参:初始化考核类,传递opt命名空间 (包括cap装机容量)、logger日志对象(也可以用print代替)
+③ 调用方法:按照考核类标注的方法和参数含义,进行传参和调用,获得日期平均准确率和考核分数
+----------
+注意:
+regulations > solar/wind []列表含义(按顺序):短期准确率考核线,超短期准确率考核线,短期考核系数,超短期考核系数
+西北,东北地区短期采用考核积分电量和偏差带进行考核,这里不进行计算
+'''
+class Assessment(object):
+    def __init__(self, opt, logger):
+        self.logger = logger
+        self.formula = Formulas(opt)
+        self.regulations = {
+            "south129":{
+                'func': self.formula.calculate_acc,
+                'solar': [65, 70, 1 * 0.2, 1 * 0.2],
+                'wind': [60, 65, 0.2 * 2, 0.2 * 2],
+                'check': True
+            },
+            "south": {
+                'func': self.formula.calculate_acc_south,
+                'solar': [65, 70, 1*0.2, 1*0.2],
+                'wind': [60, 65, 0.2*2, 0.2*2],
+                'check': True
+            },
+            "east":{
+                'func': self.formula.calculate_acc,
+                'solar': ['', 97, '', 0.09*1],
+                'wind': ['', 96, '', 0.09*1],
+                'check': False
+            },
+            "northeast":{
+                'func': self.formula.calculate_acc_northeast,
+                'solar': ['', 85, '', 0.02*0.1],
+                'wind': ['', 80, '', 0.02*0.1]
+            },
+            "northwest":{
+                'func': self.formula.calculate_acc_northwest,
+                'solar': ['', 75, '', 0.015*0.1*0.5],
+                'wind': ['', 75, '', 0.015*0.1*0.5]
+            }
+
+        }
+
+    def electricity_solar_cdq(self, df, province, predict, label='C_REAL_VALUE', output=True):
+        df['C_TIME'] = pd.to_datetime(df['C_TIME'])
+        df['C_TIME_DAY'] = df['C_TIME'].dt.strftime("%Y-%m-%d")
+        dfs = df.groupby('C_TIME_DAY')
+        limit = self.regulations[province]['solar'][1]
+        alpha = self.regulations[province]['solar'][3]
+        sum_score, sum_acc = 0, 0
+        for dt, data in dfs:
+            acc = self.regulations[province]['func'](data[label].values, data[predict].values)
+            score = (limit - acc) * alpha if acc < limit else 0
+            sum_acc += acc
+            sum_score += score
+            if output:
+                self.logger.info("预测名称:{},日期:{},区域:{},超短期的准确率:{:.4f},考核分数:{:.4f}".format(predict, str(dt), province, acc, score))
+        return round(sum_acc/len(dfs), 5), round(sum_score/len(dfs), 5)
+
+    def electricity_wind_cdq(self, df, province, predict, label='C_REAL_VALUE', output=True):
+        df['C_TIME'] = pd.to_datetime(df['C_TIME'])
+        df['C_TIME_DAY'] = df['C_TIME'].dt.strftime("%Y-%m-%d")
+        dfs = df.groupby('C_TIME_DAY')
+        limit = self.regulations[province]['solar'][1]
+        alpha = self.regulations[province]['solar'][3]
+        sum_score, sum_acc = 0, 0
+        for dt, data in dfs:
+            acc = self.regulations[province]['func'](data[label].values, data[predict].values)
+            score = (limit - acc) * alpha if acc < limit else 0
+            sum_acc += acc
+            sum_score += score
+            if output:
+                self.logger.info("预测名称:{},日期:{},区域:{},超短期的准确率:{:.4f},考核分数:{:.4f}".format(predict, str(dt), province, acc, score))
+        return round(sum_acc / len(dfs), 2), round(sum_score / len(dfs), 2)
+
+
+class Formulas(object):
+    def __init__(self, opt):
+        self.opt = opt
+
+    def calculate_acc(self, label_data, predict_data):
+        loss = np.sum((label_data - predict_data) ** 2) / len(label_data)  # mse
+        loss_sqrt = np.sqrt(loss)  # rmse
+        loss_acc = (1 - loss_sqrt / self.opt.cap) * 100
+        return loss_acc
+
+
+    def calculate_acc_south(self, label_data, predict_data):
+        cap = 0.1 * self.opt.cap
+        mask = (label_data < cap) & (predict_data < cap)
+        label_data = label_data[~mask]
+        predict_data = predict_data[~mask]
+        if len(predict_data) == 0:
+            return 0
+        diff = label_data - predict_data
+        base = np.where(label_data < self.opt.cap * 0.2, self.opt.cap * 0.2, label_data)
+        acc = np.sum((diff / base) ** 2) / len(diff)
+        acc = (1 - np.sqrt(acc)) * 100
+        return acc
+
+
+    def calculate_acc_northwest(self, label_data, predict_data):
+        cap = 0.03 * self.opt.cap
+        mask = (label_data < cap) & (predict_data < cap)
+        label_data = label_data[~mask]
+        predict_data = predict_data[~mask]
+        if len(predict_data) == 0:
+            return 0
+        diff = np.abs(label_data - predict_data)
+        base1 = label_data + predict_data + 1e-9
+        base2 = np.sum(diff) + 1e-9
+        acc = (1 - 2 * np.sum(np.abs(label_data / base1 - 0.5) * diff / base2)) * 100
+        return acc
+
+    def calculate_acc_northeast(self, label_data, predict_data):
+        cap = 0.1 * self.opt.cap
+        mask = (label_data < cap) & (predict_data < cap)
+        label_data = label_data[~mask]
+        predict_data = predict_data[~mask]
+        if len(predict_data) == 0:
+            return 0
+        diff = np.abs(predict_data - label_data)
+        deviation = diff / np.abs(predict_data + 1e-9)
+        acc = np.where(deviation >= 1, 1, deviation)
+        acc = 1 - np.mean(acc)
+        return acc
+
+if __name__ == '__main__':
+    from config import myargparse
+
+    args = myargparse(discription="场站端配置", add_help=False)
+    opt = args.parse_args_and_yaml()
+    formula = Formulas(opt)
+    test = pd.read_csv('./data/测试集16.csv')
+    test = test.iloc[:96, :]
+    acc = formula.calculate_acc_northeast(test['C_REAL_VALUE'], test['C_FP_VALUE'])
+    print(acc)

+ 298 - 0
cache/inputData.py

@@ -0,0 +1,298 @@
+import pandas as pd
+import datetime, time
+import yaml
+import pymysql
+import os
+from sqlalchemy import create_engine
+import pytz
+from cache.data_cleaning import cleaning, rm_duplicated
+current_path = os.path.dirname(__file__)
+dataloc = current_path + '/data/'
+
+
+def readData(name):
+    """
+    读取数据
+    :param name: 名字
+    :return:
+    """
+    path = dataloc + r"/" + name
+    return pd.read_csv(path)
+
+
+def saveData(name, data):
+    """
+    存放数据
+    :param name: 名字
+    :param data: 数据
+    :return:
+    """
+    path = dataloc + r"/" + name
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    data.to_csv(path, index=False)
+
+
+def timestamp_to_datetime(ts):
+    local_timezone = pytz.timezone('Asia/Shanghai')
+    if type(ts) is not int:
+        raise ValueError("timestamp-时间格式必须是整型")
+    if len(str(ts)) == 13:
+        dt = datetime.datetime.fromtimestamp(ts/1000, tz=pytz.utc).astimezone(local_timezone)
+        return dt
+    elif len(str(ts)) == 10:
+        dt = datetime.datetime.fromtimestamp(ts, tz=pytz.utc).astimezone(local_timezone)
+        return dt
+    else:
+        raise ValueError("timestamp-时间格式错误")
+
+
+def dt_tag(dt):
+    date = dt.replace(hour=0, minute=0, second=0)
+    delta = (dt - date) / pd.Timedelta(minutes=15)
+    return delta + 1
+
+
+def timestr_to_timestamp(time_str):
+    """
+    将时间戳或时间字符串转换为datetime.datetime类型
+    :param time_data: int or str
+    :return:datetime.datetime
+    """
+    if isinstance(time_str, str):
+        if len(time_str) == 10:
+            dt = datetime.datetime.strptime(time_str, '%Y-%m-%d')
+            return int(round(time.mktime(dt.timetuple())) * 1000)
+        elif len(time_str) in {17, 18, 19}:
+            dt = datetime.datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')   # strptime字符串解析必须严格按照字符串中的格式
+            return int(round(time.mktime(dt.timetuple())) * 1000)   # 转换成毫秒级的时间戳
+        else:
+            raise ValueError("时间字符串长度不满足要求!")
+    else:
+        return time_str
+
+
+class DataBase(object):
+    def __init__(self, begin, end, opt, logger):
+        self.opt = opt
+        self.begin = begin
+        self.his_begin = self.begin - pd.Timedelta(hours=self.opt.Model["his_points"]/4)
+        self.end = end + pd.Timedelta(days=1) - pd.Timedelta(minutes=15)
+        self.begin_stamp = timestr_to_timestamp(str(begin))
+        self.his_begin_stamp = timestr_to_timestamp(str(self.his_begin))
+        self.end_stamp = timestr_to_timestamp(str(self.end))
+        self.database = opt.database
+        self.logger = logger
+
+    def clear_data(self):
+        """
+        删除所有csv
+        :return:
+        """
+        # 设置文件夹路径
+        import glob
+        import os
+        folder_path = dataloc
+
+        # 使用 glob 获取所有的 .csv 文件路径
+        csv_files = glob.glob(os.path.join(folder_path, '**/*.csv'), recursive=True)
+
+        # 遍历所有 .csv 文件并删除
+        for file_path in csv_files:
+            os.remove(file_path)
+        self.logger.info("清除所有csv文件")
+
+    def create_database(self):
+        """
+        创建数据库连接
+        :param database: 数据库地址
+        :return:
+        """
+        engine = create_engine(self.database)
+        return engine
+
+    def exec_sql(self, sql, engine):
+        """
+        从数据库获取数据
+        :param sql: sql语句
+        :param engine: 数据库对象
+        :return:
+        """
+        df = pd.read_sql_query(sql, engine)
+        return df
+
+    def split_time(self, data):
+        data['C_TIME'] = pd.to_datetime(data["C_TIME"])
+        data.set_index('C_TIME', inplace=True)
+        data = data.sort_index().loc[self.begin: self.end]
+        data.reset_index(drop=False, inplace=True)
+        return data
+
+    def get_process_NWP(self):
+        """
+        从数据库中获取NWP数据,并进行简单处理
+        :param database:
+        :return:
+        """
+        # NPW数据
+        engine = self.create_database()
+        if self.opt.new_field:
+            sql_NWP = "select C_PRE_TIME,C_T,C_RH,C_PRESSURE, C_SWR, C_TPR," \
+                      "C_DIFFUSE_RADIATION, C_DIRECT_RADIATION, C_SOLAR_ZENITH," \
+                      "C_LCC, C_MCC, C_HCC, C_TCC, C_CLEARSKY_GHI, C_DNI_CALCD," \
+                      "C_WD10,C_WD30,C_WD50,C_WD70,C_WD80,C_WD90,C_WD100,C_WD170," \
+                      "C_WS10,C_WS30,C_WS50,C_WS70,C_WS80,C_WS90,C_WS100,C_WS170 from t_nwp " \
+                      " where C_PRE_TIME between {} and {}".format(self.begin_stamp, self.end_stamp)  # 新NWP字段
+        else:
+            sql_NWP = "select C_PRE_TIME,C_T,C_RH,C_PRESSURE, C_SWR," \
+                      "C_DIFFUSE_RADIATION, C_DIRECT_RADIATION,  " \
+                      "C_WD10,C_WD30,C_WD50,C_WD70,C_WD80,C_WD90,C_WD100,C_WD170," \
+                      "C_WS10,C_WS30,C_WS50,C_WS70,C_WS80,C_WS90,C_WS100,C_WS170 from t_nwp " \
+                      " where C_PRE_TIME between {} and {}".format(self.begin_stamp, self.end_stamp)  # 老NWP字段
+        NWP = self.exec_sql(sql_NWP, engine)
+
+        NWP['C_PRE_TIME'] = NWP['C_PRE_TIME'].apply(timestamp_to_datetime)
+
+        NWP = NWP.rename(columns={'C_PRE_TIME': 'C_TIME'})
+        NWP['DT_TAG'] = NWP.apply(lambda x: dt_tag(x['C_TIME']), axis=1)
+        NWP = cleaning(NWP, 'NWP', self.logger)
+        # NWP = self.split_time(NWP)
+        NWP['C_TIME'] = NWP['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')
+        saveData("NWP.csv", NWP)
+        self.logger.info("导出nwp数据")
+        return NWP
+
+    def get_process_weather(self):
+        """
+        获取环境检测仪数据
+        :param database:
+        :return:
+        """
+        engine = self.create_database()
+        self.logger.info("现有环境监测仪:{}".format(self.opt.weatherloc))
+        for i in self.opt.weatherloc:
+            # 删除没用的列
+            drop_colmns = ["C_ID", "C_EQUIPMENT_NO", "C_DATA1","C_DATA2","C_DATA3","C_DATA4","C_DATA5","C_DATA6","C_DATA7","C_DATA8","C_DATA9","C_DATA10", "C_STATUS", "C_IS_GENERATED","C_ABNORMAL_CODE"]
+
+            get_colmns = []
+            # 查询表的所有列名
+            result_set = self.exec_sql("SHOW COLUMNS FROM t_weather_station_status_data", engine)
+            for name in result_set.iloc[:,0]:
+                if name not in drop_colmns:
+                    get_colmns.append(name)
+
+            all_columns_str = ", ".join([f'{col}' for col in get_colmns])
+
+            weather_sql = "select " + all_columns_str + " from t_weather_station_status_data where C_EQUIPMENT_NO="+ str(i) + " and C_TIME between '{}' and '{}'".format(self.his_begin, self.end)
+            weather = self.exec_sql(weather_sql, engine)
+            weather['C_TIME'] = pd.to_datetime(weather['C_TIME'])
+            # weather = self.split_time(weather)
+            saveData("/weather-{}.csv".format(i), weather)
+            self.logger.info("环境监测仪{}导出数据".format(i))
+
+    def get_process_power(self):
+        """
+        获取整体功率数据
+        :param database:
+        :return:
+        """
+        engine = self.create_database()
+        sql_cap = "select C_CAPACITY from t_electric_field"
+        cap = self.exec_sql(sql_cap, engine)['C_CAPACITY']
+        self.opt.cap = float(cap)
+        sql_power = "select C_TIME,C_REAL_VALUE, C_ABLE_VALUE, C_REFERENCE_POWER_BY_SAMPLE, C_IS_RATIONING_BY_MANUAL_CONTROL," \
+                    " C_IS_RATIONING_BY_AUTO_CONTROL from t_power_station_status_data" \
+                    " where C_TIME between '{}' and '{}'".format(self.his_begin, self.end)
+        powers = self.exec_sql(sql_power, engine)
+        mask2 = powers[self.opt.predict] < 0
+        mask1 = powers.loc[:, 'C_REAL_VALUE'].astype(float) > float(cap)
+        mask = powers['C_REAL_VALUE'] == -99
+
+        mask = mask | mask1 | mask2
+        self.logger.info("实际功率共{}条,要剔除功率有{}条".format(len(powers), mask.sum()))
+        powers = powers[~mask]
+        self.logger.info("剔除完后还剩{}条".format(len(powers)))
+        powers.reset_index(drop=True, inplace=True)
+        binary_map = {b'\x00': 0, b'\x01': 1}
+        powers['C_IS_RATIONING_BY_AUTO_CONTROL'] = powers['C_IS_RATIONING_BY_AUTO_CONTROL'].map(binary_map)
+        powers = rm_duplicated(powers, self.logger)
+        saveData("power.csv", powers)
+
+    def get_process_dq(self):
+        """
+        获取短期预测结果
+        :param database:
+        :return:
+        """
+        engine = self.create_database()
+        sql_dq = "select C_FORECAST_TIME AS C_TIME, C_FP_VALUE from t_forecast_power_short_term " \
+                 "where C_FORECAST_TIME between {} and {}".format(self.his_begin_stamp, self.end_stamp)
+        dq = self.exec_sql(sql_dq, engine)
+        # dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], unit='ms')
+        dq['C_TIME'] = dq['C_TIME'].apply(timestamp_to_datetime)
+        # dq = dq[dq['C_FORECAST_HOW_LONG_AGO'] == 1]
+        # dq.drop('C_FORECAST_HOW_LONG_AGO', axis=1, inplace=True)
+        dq = cleaning(dq, 'dq', self.logger, cols=['C_FP_VALUE'])
+        dq['C_TIME'] = dq['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')
+        saveData("dq.csv", dq)
+        self.logger.info("导出dq数据")
+
+    def get_process_cdq(self):
+        """
+        获取超短期预测结果
+        :param database:
+        :return:
+        """
+        engine = self.create_database()
+        sql_cdq = "select C_FORECAST_TIME AS C_TIME, C_ABLE_VALUE, C_FORECAST_HOW_LONG_AGO from " \
+                  "t_forecast_power_ultra_short_term_his" \
+                  " where C_FORECAST_TIME between {} and {}".format(self.begin_stamp, self.end_stamp)
+        cdq = self.exec_sql(sql_cdq, engine)
+        cdq['C_TIME'] = cdq['C_TIME'].apply(timestamp_to_datetime)
+        cdq = cleaning(cdq, 'cdq', self.logger, cols=['C_ABLE_VALUE'], dup=False)
+        # cdq = cdq[cdq['C_FORECAST_HOW_LONG_AGO'] == int(str(self.opt.predict_point)[1:])]
+        cdq['C_TIME'] = cdq['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')
+        saveData("cdq.csv", cdq)
+
+    def indep_process(self):
+        """
+        进一步数据处理:时间统一处理等
+        :return:
+        """
+        # 环境监测仪数据处理
+        for i in self.opt.weatherloc:
+            weather = readData("/weather-{}.csv".format(i))
+            env_columns = [ele for ele in self.opt.env_columns if ele not in ['C_TIME', 'C_FP_VALUE', 'C_REAL_VALUE', 'error']]
+            weather = cleaning(weather, 'weather', self.logger, cols=env_columns)
+            weather = weather[weather[self.opt.usable_power["env"]] >= 0]
+
+            weather['C_TIME'] = pd.to_datetime(weather['C_TIME'])
+            weather_ave = weather.resample('15T', on='C_TIME').mean().reset_index()
+            weather_ave = weather_ave.dropna(subset=[self.opt.usable_power['env']])
+            weather_ave.set_index('C_TIME', inplace=True)
+            weather_ave = weather_ave.interpolate(method='linear')
+            weather_ave = weather_ave.fillna(method='ffill')
+            weather_ave = weather_ave.fillna(method='bfill')
+            weather_ave.reset_index(drop=False, inplace=True)
+            weather_ave.iloc[:, 1:] = weather_ave.iloc[:, 1:].round(2)
+            saveData("/weather-{}-process.csv".format(i), weather_ave)
+
+    def data_process(self):
+        """
+        数据导出+初步处理的总操控代码
+        :param database:
+        :return:
+        """
+        self.clear_data()
+        try:
+            self.get_process_power()
+            self.get_process_dq()
+            self.get_process_cdq()
+            self.get_process_NWP()
+            self.get_process_weather()
+            self.indep_process()
+        except Exception as e:
+            self.logger.critical("导出数据出错:{}".format(e.args))
+
+
+
+

+ 152 - 0
cache/limited_power.py

@@ -0,0 +1,152 @@
+import pandas as pd
+import os
+import numpy as np
+np.random.seed(42)
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+current_path = os.path.dirname(__file__)
+
+
+class LimitPower(object):
+    def __init__(self, logger, args, weather_power):
+        self.logger = logger
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.weather_power = weather_power
+
+    def segment_statis(self):
+        """
+        对总辐射-实际功率进行分段处理,获取分度的中位点,四分位间距和斜率
+        :return: glob_rp 总辐射分段
+        """
+        segs = [x for x in range(50, 2000, 100)]    # 对辐照度以100为间隔进行分段
+        xs = [segs[i-1]+x if i>0 else 25 for i, x in enumerate([50 for _ in segs])]  # 分段的中间点
+        glob_rp = {}       # dict: key 辐照度分段中间点 value 分段内的实际功率
+        for index, row in self.weather_power.iterrows():
+            glob_ = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            for i, seg in enumerate(segs):
+                if glob_ <= seg and not (i > 0 and rp < 1):
+                    glob_rp.setdefault(xs[i], []).append(rp)
+                    break
+        for i, x in enumerate(xs):
+            rps = glob_rp.get(x)
+            if rps is None:
+                glob_rp = {k: v for k, v in glob_rp.items() if k not in xs[xs.index(x):]}
+                break
+            x_l = xs[i-1] if i > 0 else 0
+            q2_l = glob_rp[xs[i-1]][0] if i > 0 else 0
+            q1 = np.percentile(rps, self.opt.usable_power['down_fractile'])     # 实际功率下四分位点
+            q2 = np.percentile(rps, 50)  # 实际功率中位点
+            q3 = np.percentile(rps, self.opt.usable_power['up_fractile'])     # 实际功率上四分位点
+            iqr = q3 -q1    # 四分位间距
+            k1 = round(q2/x, 5)  # 整体斜率
+            k2 = round((q2-q2_l)/(x-x_l), 5)    # 趋势斜率,相对上一个中位点
+            glob_rp[x] = [q2, iqr, k1, k2]   # 更新dict
+        return glob_rp
+
+    def mapping_relation(self, glob_rp):
+        """
+        拟合分段处理后的斜率和偏移量
+        :param glob_rp: 总辐射分段
+        :return: k_final 斜率 bias 实际功率的分布宽度, glob_rp 总辐射分段
+        """
+        ks, iqrs, delete_x, tag_x = [], [], [], []   # ks所有分段斜率集合,iqrs所有分段间距集合,delete_x删除的x坐标集合
+        for x, values in glob_rp.items():
+            k1 = values[-2]
+            k2 = values[-1]
+            iqrs.append(values[-3])
+            if k1 > 0 and k2 > 0:   # 清除趋势小于等于0的斜率
+                ks.append(k1)
+                tag_x.append(x)
+            else:
+                delete_x.append(x)
+                # print("删除的斜率:", k1, k2)
+        bias = round(np.median(iqrs), 3)  # 中位点
+        # print("++++1", ks)
+        mean = np.mean(ks)  # 均值
+        std = np.std(ks)    # 标准差
+        ks = np.array(ks)
+        z_score = (ks-mean)/std # z均值
+        # print("----", z_score)
+        outliers = np.abs(z_score) > self.opt.usable_power['outliers_threshold']    # 超过阈值为离群点
+        ks = ks[~outliers]  # 消除离群点
+        delete_x1 = list(np.array(tag_x)[outliers]) # 清除大于阈值的离群点
+        k_final = round(np.mean(ks), 5)  # 对清洗后的斜率做平均
+        # print("++++2:", ks)
+        delete_x.extend(delete_x1)
+        self.logger.info("拟合可用功率,删除的斜率:" + ' '.join([str(x) for x in delete_x]))
+        glob_rp = {k: v for k, v in glob_rp.items() if k not in delete_x}   # 清洗后剩下的分段点位
+        return k_final, bias, glob_rp
+
+    def filter_unlimited_power(self, zfs, real_power, k, b):
+        """
+        预测可用功主方法
+        :param zfs: 要预测可用功率的总辐射
+        :param k: 斜率
+        :param b: 偏移量
+        :return: 预测的可用功率
+        """
+        high = k*zfs+b/2 if k*zfs+b/2 < self.opt.cap else self.opt.cap
+        low = k*zfs-b/2 if k*zfs-b/2 > 0 else 0
+        if low <= real_power <= high:
+            return True
+        else:
+            return False
+
+    def clean_limited_power(self, name, is_repair=False):
+        if is_repair is True:
+            glob_rp = self.segment_statis()
+            k_final, bias, glob_rp = self.mapping_relation(glob_rp)
+            self.opt.usable_power['k'] = float(k_final)
+            self.opt.usable_power['bias'] = float(bias)
+        new_weather_power = []
+        for index, row in self.weather_power.iterrows():
+            zfs = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            if self.filter_unlimited_power(zfs, rp, self.opt.usable_power['k'], self.opt.usable_power['bias'] * self.opt.usable_power['coe']):
+                row['c'] = 'red'
+                new_weather_power.append(row)
+            else:
+                row['c'] = 'blue'
+                new_weather_power.append(row)
+        new_weather_power = pd.concat(new_weather_power, axis=1).T
+        new_weather_power.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/测光法{}.png'.format(name))
+        new_weather_power = new_weather_power[new_weather_power['c'] == 'red']
+        number = len(new_weather_power)
+        self.logger.info("测光法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("测光法-清除限电后保留的点有:" + str(number) + " 占比:" + str(round(number / len(self.weather_power), 2)))
+        return new_weather_power.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+    def clean_limited_power_by_signal(self, name):
+        weather_power1 = self.weather_power.copy()
+        weather_power1["signal"] = weather_power1.apply(lambda x: self.signal_result(x["C_IS_RATIONING_BY_MANUAL_CONTROL"], x["C_IS_RATIONING_BY_AUTO_CONTROL"]), axis=1)
+        weather_power1['c'] = weather_power1.apply(lambda x: 'cornflowerblue' if bool(x["signal"]) is True else 'pink', axis=1)
+        weather_power1.plot.scatter(x=self.opt.usable_power["env"], y='C_REAL_VALUE', c='c')
+        plt.savefig(current_path + '/figs/信号法{}.png'.format(name))
+        weather_power1 = weather_power1[weather_power1['signal'] == False]
+        self.logger.info("信号法-未清洗限电前,总共有:{}条数据".format(len(self.weather_power)))
+        self.logger.info("信号法-清除限电后保留的点有:" + str(len(weather_power1)) + " 占比:" + str(round(len(weather_power1) / len(self.weather_power), 2)))
+        return weather_power1.loc[:, ['C_TIME', 'C_REAL_VALUE', 'C_ABLE_VALUE']]
+
+    def signal_result(self, manual, auto):
+        if int(manual) == 0:
+            if int(auto) == 0:
+                return False
+            else:
+                return True
+        else:
+            if int(auto) == 1:
+                return True
+            else:
+                return False
+
+
+if __name__ == '__main__':
+    power = pd.read_csv('2023-12-01至2023-12-23实际功率导出文件.csv', date_parser=['时间'])
+    weather = pd.read_csv('2023-12-01至2023-12-23气象站数据导出文件.csv', date_parser=['时间'])
+    weather_power = pd.merge(weather, power, on='时间')  # 联立数据
+    # glob_rp = segment_statis(weather_power)
+    # k_final, bias, glob_rp = mapping_relation(glob_rp)

+ 68 - 0
cache/mongo.py

@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding:utf-8 -*-
+# @FileName  :compare.py
+# @Time      :2025/1/20 15:57
+# @Author    :David
+# @Company: shenyang JY
+
+from pymongo import MongoClient, UpdateOne
+import pandas as pd
+import os
+import numpy as np
+np.random.seed(42)
+current_path = os.path.dirname(__file__)
+current_path = os.path.dirname(__file__)
+
+def insert_data_into_mongo(res_df, args):
+    mongodb_connection = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/"
+    mongodb_database = args['mongodb_database']
+    mongodb_write_table = args['mongodb_write_table']
+    overwrite = 1
+    update_keys = None
+    if 'overwrite' in args.keys():
+        overwrite = int(args['overwrite'])
+    if 'update_keys' in args.keys():
+        update_keys = args['update_keys'].split(',')
+
+    client = MongoClient(mongodb_connection)
+    db = client[mongodb_database]
+    collection = db[mongodb_write_table]
+
+    # 覆盖模式:删除现有集合
+    if overwrite:
+        if mongodb_write_table in db.list_collection_names():
+            collection.drop()
+            print(f"Collection '{mongodb_write_table}' already exists, deleted successfully!")
+
+    # 将 DataFrame 转为字典格式
+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
+
+    # 如果没有数据,直接返回
+    if not data_dict:
+        print("No data to insert.")
+        return
+
+    # 如果指定了 update_keys,则执行 upsert(更新或插入)
+    if update_keys and not overwrite:
+        operations = []
+        for record in data_dict:
+            # 构建查询条件,用于匹配要更新的文档
+            query = {key: record[key] for key in update_keys}
+            operations.append(UpdateOne(query, {'$set': record}, upsert=True))
+
+        # 批量执行更新/插入操作
+        if operations:
+            result = collection.bulk_write(operations)
+            print(f"Matched: {result.matched_count}, Upserts: {result.upserted_count}")
+    else:
+        # 追加模式:直接插入新数据
+        collection.insert_many(data_dict)
+        print("Data inserted successfully!")
+
+
+if __name__ == "__main__":
+    pre_data = pd.read_csv('./data/测试集1.csv')
+    pre_data = pre_data.loc[:, ['C_TIME', 'dq_fix', 'C_FP_VALUE', 'history', 'coe-acc', 'coe-ass', 'howLongAgo']]
+    df_melted = pre_data.melt(id_vars=['C_TIME', 'howLongAgo'], var_name='model', value_name='power_forecast')
+    df_melted['farm_id'] = 'J00000'
+    pass

+ 66 - 0
cache/monitor.py

@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/4/11 9:07
+# file: monitor.py
+# author: David
+# company: shenyang JY
+import json
+import os
+import subprocess
+from pathlib import Path
+
+script_path = str(Path.cwd().resolve().parent.parent.parent / 'bin' / 'restart-forecast.sh')
+config_folder = Path.cwd().resolve().parent
+
+
+class Monitor(object):
+    def __init__(self, logger, args):
+        self.args = args
+        self.logger = logger
+
+    def update_config(self):
+        files = list(config_folder.rglob("ModelCDQ_*"))
+        files.sort(key=lambda x: (str(x)[str(x).index("ModelCDQ_")+9:-1]))
+        if len(files) > 0:
+            try:
+                opt = vars(self.args.parse_args_and_yaml())
+                file = files[-1]
+                with open(file, 'r', encoding='utf-8') as f:
+                    lines = f.readlines()  # 读取第一行参数
+                    if "ModelCDQ" in lines[0].strip():
+                        content = lines[0].strip()
+                    else:
+                        content = lines[1].strip()
+                    content = eval(content[content.index("ModelCDQ")+11:])
+                    content = content[0]['str']
+                    update_opt = json.loads(content)
+                    for key, value in update_opt.items():
+                        opt[key] = value
+                        print("key:", key, "value:", value)
+                self.logger.info("下发配置文件,更新后的参数为:{}".format(opt))
+                self.args.save_args_yml(opt, isdict=True)
+                self.delete_file(files)
+                if opt['reset_sys'] is True:
+                    self.logger.info("-----重启超短期算法包-----")
+                    subprocess.run(["sh", script_path])
+            except Exception as e:
+                self.logger.critical("超短期monitor任务出错:{}".format(e.args))
+                self.delete_file(files)
+
+    def delete_file(self, files):
+        for filename in files:
+            if os.path.exists(filename):
+                os.remove(filename)
+                self.logger.info(f"文件 {filename} 删除成功!")
+            else:
+                self.logger.info(f"文件 {filename} 不存在。")
+
+
+if __name__ == '__main__':
+    from config import myargparse
+    from logs import Log
+
+    args = myargparse(discription="场站端配置", add_help=False)
+    log = Log()
+    mo = Monitor(log.logger, args)
+    mo.update_config()

+ 146 - 0
cache/nn_bp.py

@@ -0,0 +1,146 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/5/6 13:25
+# file: time_series.py
+# author: David
+# company: shenyang JY
+import os.path
+
+from flask import Flask
+from keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, BatchNormalization, Flatten, Dropout, Reshape, Lambda, TimeDistributed
+from keras.models import Model, load_model
+from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
+from keras import optimizers, regularizers
+import keras.backend as K
+import numpy as np
+np.random.seed(42)
+from cache.sloss import SouthLoss, NorthEastLoss
+import tensorflow as tf
+tf.compat.v1.set_random_seed(1234)
+from threading import Lock
+model_lock = Lock()
+
+
+def rmse(y_true, y_pred):
+    return K.sqrt(K.mean(K.square(y_pred - y_true)))
+
+
+def mae(y_true, y_pred):
+    return K.mean(K.abs(y_pred - y_true), axis=-1)
+
+
+var_dir = os.path.dirname(os.path.dirname(__file__))
+
+
+class FMI(object):
+    model = None
+    train = False
+
+    def __init__(self, log, args, graph, sess):
+        self.logger = log
+        self.graph = graph
+        self.sess = sess
+        opt = args.parse_args_and_yaml()
+        with self.graph.as_default():
+            tf.compat.v1.keras.backend.set_session(self.sess)
+            FMI.get_model(opt)
+
+    @staticmethod
+    def get_model(opt):
+        """
+        单例模式+线程锁,防止在异步加载时引发线程安全
+        """
+        try:
+            if FMI.model is None or FMI.train is True:
+                with model_lock:
+                    FMI.model = FMI.get_keras_model(opt)
+                    FMI.model.load_weights(os.path.join(var_dir, 'var', 'fmi.h5'))
+        except Exception as e:
+            print("加载模型权重失败:{}".format(e.args))
+
+    @staticmethod
+    def get_keras_model(opt):
+        db_loss = NorthEastLoss(opt)
+        south_loss = SouthLoss(opt)
+        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
+        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
+        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size_nwp']), name='nwp')
+        env_input = Input(shape=(opt.Model['his_points'], opt.Model['input_size_env']), name='env')
+
+        con1 = Conv1D(filters=64, kernel_size=1, strides=1, padding='valid', activation='relu',
+                      kernel_regularizer=l2_reg)(nwp_input)
+        d1 = Dense(32, activation='relu', name='d1', kernel_regularizer=l1_reg)(con1)
+        nwp = Dense(8, activation='relu', name='d2', kernel_regularizer=l1_reg)(d1)
+
+        con2 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(env_input)
+        env = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con2)
+        for i in range(opt.Model['lstm_layers']):
+            rs = True
+            if i == opt.Model['lstm_layers']-1:
+                rs = False
+            env = LSTM(units=opt.Model['hidden_size'], return_sequences=rs, name='env_lstm'+str(i), kernel_regularizer=l2_reg)(env)
+        tiao = Dense(16, name='d4', kernel_regularizer=l1_reg)(env)
+
+        if opt.Model['fusion']:
+            nwpf = Flatten()(nwp)
+            fusion = concatenate([nwpf, tiao])
+        else:
+            fusion = Flatten()(nwp)
+
+        output = Dense(opt.Model['output_size'], name='d5')(fusion)
+        model = Model([env_input, nwp_input], output)
+        adam = optimizers.Adam(learning_rate=opt.Model['learning_rate'], beta_1=0.9, beta_2=0.999, epsilon=1e-7,
+                               amsgrad=True)
+        model.compile(loss=rmse, optimizer=adam)
+        return model
+
+    def train_init(self, opt):
+        tf.compat.v1.keras.backend.set_session(self.sess)
+        model = FMI.get_keras_model(opt)
+        try:
+            if opt.Model['add_train'] and opt.authentication['repair'] != "null":
+                # 进行加强训练,支持修模
+                model.load_weights(os.path.join(var_dir, 'var', 'fmi.h5'))
+                self.logger.info("已加载加强训练基础模型")
+        except Exception as e:
+            self.logger.info("加强训练加载模型权重失败:{}".format(e.args))
+        model.summary()
+        return model
+
+    def training(self, opt, train_and_valid_data):
+        model = self.train_init(opt)
+        train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+        print("----------", np.array(train_X[0]).shape)
+        print("++++++++++", np.array(train_X[1]).shape)
+        # weight_lstm_1, bias_lstm_1 = model.get_layer('d1').get_weights()
+        # print("weight_lstm_1 = ", weight_lstm_1)
+        # print("bias_lstm_1 = ", bias_lstm_1)
+
+        check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss',
+                                      save_best_only=True, mode='auto')
+        early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+        # tbCallBack = TensorBoard(log_dir='../figure',
+        #                          histogram_freq=0,
+        #                          write_graph=True,
+        #                          write_images=True)
+        history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+                            validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+        loss = np.round(history.history['loss'], decimals=5)
+        val_loss = np.round(history.history['val_loss'], decimals=5)
+        self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
+        self.logger.info("训练集损失函数为:{}".format(loss))
+        self.logger.info("验证集损失函数为:{}".format(val_loss))
+        self.logger.info("训练结束,原模型地址:{}".format(id(FMI.model)))
+        with self.graph.as_default():
+            tf.compat.v1.keras.backend.set_session(self.sess)
+            FMI.train = True
+            FMI.get_model(opt)
+            FMI.train = False
+        self.logger.info("保护线程,加载模型,地址:{}".format(id(FMI.model)))
+
+    def predict(self, test_X, batch_size=1):
+        with self.graph.as_default():
+            with self.sess.as_default():
+                result = FMI.model.predict(test_X, batch_size=batch_size)
+        self.logger.info("执行预测方法")
+        return result

+ 155 - 0
cache/nn_cnn_ts.py

@@ -0,0 +1,155 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/5/6 13:25
+# file: time_series.py
+# author: David
+# company: shenyang JY
+import os.path
+from keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, BatchNormalization, Flatten, Dropout
+from keras.models import Model, load_model
+from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
+from keras import optimizers, regularizers
+import keras.backend as K
+import numpy as np
+from sqlalchemy.ext.instrumentation import find_native_user_instrumentation_hook
+
+np.random.seed(42)
+from cache.sloss import SouthLoss, NorthEastLoss
+import tensorflow as tf
+tf.compat.v1.set_random_seed(1234)
+from threading import Lock
+model_lock = Lock()
+
+def rmse(y_true, y_pred):
+    return K.sqrt(K.mean(K.square(y_pred - y_true)))
+
+
+var_dir = os.path.dirname(os.path.dirname(__file__))
+
+
+class FMI(object):
+    model = None
+    train = False
+
+    def __init__(self, log, args, graph, sess):
+        self.logger = log
+        self.graph = graph
+        self.sess = sess
+        opt = args.parse_args_and_yaml()
+        with self.graph.as_default():
+            tf.compat.v1.keras.backend.set_session(self.sess)
+            FMI.get_model(opt)
+
+    @staticmethod
+    def get_model(opt):
+        """
+        单例模式+线程锁,防止在异步加载时引发线程安全
+        """
+        try:
+            if FMI.model is None or FMI.train is True:
+                with model_lock:
+                    FMI.model = FMI.get_keras_model(opt)
+                    FMI.model.load_weights(os.path.join(var_dir, 'var', 'fmi.h5'))
+        except Exception as e:
+            print("加载模型权重失败:{}".format(e.args))
+
+    @staticmethod
+    def get_keras_model(opt):
+        db_loss = NorthEastLoss(opt)
+        south_loss = SouthLoss(opt)
+        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
+        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
+        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size_nwp']), name='nwp')
+        env_input = Input(shape=(opt.Model['his_points'], opt.Model['input_size_env']), name='env')
+
+        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
+        nwp = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
+        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(nwp)
+
+        con2 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(env_input)
+        env = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con2)
+        env_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, name='env_lstm',kernel_regularizer=l2_reg)(env)
+        tiao = Dense(4, name='d4', kernel_regularizer=l1_reg)(env_lstm)
+
+        if opt.Model['fusion']:
+            fusion = concatenate([nwp_lstm, tiao])
+        else:
+            fusion = nwp_lstm
+
+        output = Dense(opt.Model['output_size'], name='cdq_output')(fusion)
+
+        model = Model([env_input, nwp_input], output)
+        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+        model.compile(loss=south_loss, optimizer=adam)
+        return model
+
+    def train_init(self, opt):
+        tf.compat.v1.keras.backend.set_session(self.sess)
+        model = FMI.get_keras_model(opt)
+        try:
+            if opt.Model['add_train'] and opt.authentication['repair'] != "null":
+                # 进行加强训练,支持修模
+                model.load_weights(os.path.join(var_dir, 'var', 'fmi.h5'))
+                self.logger.info("已加载加强训练基础模型")
+        except Exception as e:
+            self.logger.info("加强训练加载模型权重失败:{}".format(e.args))
+        model.summary()
+        return model
+
+    def training(self, opt, train_and_valid_data):
+        model = self.train_init(opt)
+        train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+        print("----------", np.array(train_X[0]).shape)
+        print("++++++++++", np.array(train_X[1]).shape)
+        # weight_lstm_1, bias_lstm_1 = model.get_layer('d1').get_weights()
+        # print("weight_lstm_1 = ", weight_lstm_1)
+        # print("bias_lstm_1 = ", bias_lstm_1)
+
+        check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss',
+                                      save_best_only=True, mode='auto')
+        early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+        # tbCallBack = TensorBoard(log_dir='../figure',
+        #                          histogram_freq=0,
+        #                          write_graph=True,
+        #                          write_images=True)
+        history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+                            validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+        loss = np.round(history.history['loss'], decimals=5)
+        val_loss = np.round(history.history['val_loss'], decimals=5)
+        self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
+        self.logger.info("训练集损失函数为:{}".format(loss))
+        self.logger.info("验证集损失函数为:{}".format(val_loss))
+        self.logger.info("训练结束,原模型地址:{}".format(id(FMI.model)))
+        with self.graph.as_default():
+            tf.compat.v1.keras.backend.set_session(self.sess)
+            FMI.train = True
+            FMI.get_model(opt)
+            FMI.train = False
+        self.logger.info("保护线程,加载模型,地址:{}".format(id(FMI.model)))
+
+    def predict(self, test_X, batch_size=1):
+        with self.graph.as_default():
+            with self.sess.as_default():
+                result = FMI.model.predict(test_X, batch_size=batch_size)
+        self.logger.info("执行预测方法")
+        return result
+
+    def train_custom(self, train_X, train_Y, model, opt):
+        epochs = opt.Model['epoch']
+        batch_size = opt.Model['batch_size']
+        num_batches = len(train_X) // batch_size    # 取整
+
+        optimizer = tf.keras.optimizers.Adam(learning_rate=opt.Model[""])
+        for epoch in range(epochs):
+            for batch_index in range(epochs):
+                start = batch_index * batch_size
+                end = start + batch_size
+                x_batch, y_batch = train_X[start: end], train_Y[start: end]
+
+                with tf.GradientTape() as tape:
+                    res = model(x_batch)
+                    loss = rmse(y_batch, res)
+
+                gradients = tape.gradient(loss, model.trainable_variables)
+
+

+ 163 - 0
cache/nn_south.py

@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/5/6 13:25
+# file: time_series.py
+# author: David
+# company: shenyang JY
+import os.path
+from keras.layers import Input, Dense, LSTM, concatenate, Conv1D, Conv2D, MaxPooling1D, BatchNormalization, Flatten, Dropout
+from keras.models import Model, load_model
+from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard
+from keras import optimizers, regularizers
+import keras.backend as K
+import numpy as np
+from sqlalchemy.ext.instrumentation import find_native_user_instrumentation_hook
+
+np.random.seed(42)
+from cache.sloss import SouthLoss, NorthEastLoss
+import tensorflow as tf
+tf.compat.v1.set_random_seed(1234)
+from threading import Lock
+model_lock = Lock()
+
+def rmse(y_true, y_pred):
+    return K.sqrt(K.mean(K.square(y_pred - y_true)))
+
+
+var_dir = os.path.dirname(os.path.dirname(__file__))
+
+
+class FMI(object):
+    model = None
+    train = False
+
+    def __init__(self, log, args, graph, sess):
+        self.logger = log
+        self.graph = graph
+        self.sess = sess
+        opt = args.parse_args_and_yaml()
+        with self.graph.as_default():
+            tf.compat.v1.keras.backend.set_session(self.sess)
+            FMI.get_model(opt)
+
+    @staticmethod
+    def get_model(opt):
+        """
+        单例模式+线程锁,防止在异步加载时引发线程安全
+        """
+        try:
+            if FMI.model is None or FMI.train is True:
+                with model_lock:
+                    FMI.model = FMI.get_keras_model(opt)
+                    FMI.model.load_weights(os.path.join(var_dir, 'var', 'fmi.h5'))
+        except Exception as e:
+            print("加载模型权重失败:{}".format(e.args))
+
+    @staticmethod
+    def get_keras_model(opt):
+        db_loss = NorthEastLoss(opt)
+        south_loss = SouthLoss(opt)
+        l1_reg = regularizers.l1(opt.Model['lambda_value_1'])
+        l2_reg = regularizers.l2(opt.Model['lambda_value_2'])
+        nwp_input = Input(shape=(opt.Model['time_step'], opt.Model['input_size_nwp']), name='nwp')
+        env_input = Input(shape=(opt.Model['his_points'], opt.Model['input_size_env']), name='env')
+
+        con1 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(nwp_input)
+        nwp = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con1)
+        # for i in range(opt.Model['lstm_layers']):
+        #     rs = True
+        #     if i == opt.Model['lstm_layers']-1:
+        #         rs = False
+        nwp_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, kernel_regularizer=l2_reg)(nwp)
+
+        con2 = Conv1D(filters=64, kernel_size=5, strides=1, padding='valid', activation='relu', kernel_regularizer=l2_reg)(env_input)
+        env = MaxPooling1D(pool_size=5, strides=1, padding='valid', data_format='channels_last')(con2)
+        env_lstm = LSTM(units=opt.Model['hidden_size'], return_sequences=False, name='env_lstm',kernel_regularizer=l2_reg)(env)
+        tiao = Dense(4, name='d4', kernel_regularizer=l1_reg)(env_lstm)
+
+        if opt.Model['fusion']:
+            fusion = concatenate([nwp_lstm, tiao])
+        else:
+            fusion = nwp_lstm
+
+        output = Dense(opt.Model['output_size'], name='cdq_output')(fusion)
+
+        model = Model([env_input, nwp_input], output)
+        adam = optimizers.Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-7, amsgrad=True)
+
+        if opt.Model['region'] == 'south129':
+            south_loss = rmse
+
+        model.compile(loss=south_loss, optimizer=adam)
+        return model
+
+    def train_init(self, opt):
+        tf.compat.v1.keras.backend.set_session(self.sess)
+        model = FMI.get_keras_model(opt)
+        try:
+            if opt.Model['add_train'] and opt.authentication['repair'] != "null":
+                # 进行加强训练,支持修模
+                model.load_weights(os.path.join(var_dir, 'var', 'fmi.h5'))
+                self.logger.info("已加载加强训练基础模型")
+        except Exception as e:
+            self.logger.info("加强训练加载模型权重失败:{}".format(e.args))
+        model.summary()
+        return model
+
+    def training(self, opt, train_and_valid_data):
+        model = self.train_init(opt)
+        train_X, train_Y, valid_X, valid_Y = train_and_valid_data
+        print("----------", np.array(train_X[0]).shape)
+        print("++++++++++", np.array(train_X[1]).shape)
+        # weight_lstm_1, bias_lstm_1 = model.get_layer('d1').get_weights()
+        # print("weight_lstm_1 = ", weight_lstm_1)
+        # print("bias_lstm_1 = ", bias_lstm_1)
+
+        check_point = ModelCheckpoint(filepath='./var/' + 'fmi.h5', monitor='val_loss',
+                                      save_best_only=True, mode='auto')
+        early_stop = EarlyStopping(monitor='val_loss', patience=opt.Model['patience'], mode='auto')
+        # tbCallBack = TensorBoard(log_dir='../figure',
+        #                          histogram_freq=0,
+        #                          write_graph=True,
+        #                          write_images=True)
+        history = model.fit(train_X, train_Y, batch_size=opt.Model['batch_size'], epochs=opt.Model['epoch'], verbose=2,
+                            validation_data=(valid_X, valid_Y), callbacks=[check_point, early_stop])
+        loss = np.round(history.history['loss'], decimals=2)
+        val_loss = np.round(history.history['val_loss'], decimals=2)
+        self.logger.info("-----模型训练经过{}轮迭代-----".format(len(loss)))
+        self.logger.info("训练集损失函数为:{}".format(loss))
+        self.logger.info("验证集损失函数为:{}".format(val_loss))
+        self.logger.info("训练结束,原模型地址:{}".format(id(FMI.model)))
+        with self.graph.as_default():
+            tf.compat.v1.keras.backend.set_session(self.sess)
+            FMI.train = True
+            FMI.get_model(opt)
+            FMI.train = False
+        self.logger.info("保护线程,加载模型,地址:{}".format(id(FMI.model)))
+
+    def predict(self, test_X, batch_size=1):
+        with self.graph.as_default():
+            with self.sess.as_default():
+                result = FMI.model.predict(test_X, batch_size=batch_size)
+        self.logger.info("执行预测方法")
+        return result
+
+    def train_custom(self, train_X, train_Y, model, opt):
+        epochs = opt.Model['epoch']
+        batch_size = opt.Model['batch_size']
+        num_batches = len(train_X) // batch_size    # 取整
+
+        optimizer = tf.keras.optimizers.Adam(learning_rate=opt.Model[""])
+        for epoch in range(epochs):
+            for batch_index in range(epochs):
+                start = batch_index * batch_size
+                end = start + batch_size
+                x_batch, y_batch = train_X[start: end], train_Y[start: end]
+
+                with tf.GradientTape() as tape:
+                    res = model(x_batch)
+                    loss = rmse(y_batch, res)
+
+                gradients = tape.gradient(loss, model.trainable_variables)
+
+

+ 139 - 0
cache/sloss.py

@@ -0,0 +1,139 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/5/8 13:15
+# file: loss.py.py
+# author: David
+# company: shenyang JY
+from keras import backend as K
+import tensorflow as tf
+tf.compat.v1.set_random_seed(1234)
+
+
+class SouthLoss(tf.keras.losses.Loss):
+    def __init__(self, opt, name='south_loss'):
+        """
+        南网新规则损失函数
+        :param cap:装机容量
+        """
+        super(SouthLoss, self).__init__(name=name)
+        self.cap = opt.cap*0.2    # 没有归一化cap,必须要先进行归一化
+        self.opt = opt
+        # self.cap01 = opt.cap*0.1
+
+    def call(self, y_true, y_predict):
+        """
+        自动调用
+        :param y_true: 标签
+        :param y_predict: 预测
+        :return: 损失值
+        """
+        # 计算实际和预测的差值
+        y_true = y_true * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        y_predict = y_predict * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        y_true = y_true[:, 15]
+        y_predict = y_predict[:, 15]
+        diff = y_true - y_predict
+        logistic_values = tf.sigmoid(10000 * (y_true - self.cap))
+        base = logistic_values * y_true + (1-logistic_values)*self.cap
+        loss = K.square(diff/base)
+        # loss = K.mean(loss, axis=-1)
+        return loss
+
+    def call2(self, y_true, y_predict):
+        y_true = y_true * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        y_predict = y_predict * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        y_true = y_true[:, 15]
+        y_predict = y_predict[:, 15]
+        diff = y_true - y_predict
+        logistic_values = tf.sigmoid(10000 * (y_true - self.cap))
+        base = logistic_values * y_true + (1 - logistic_values) * self.cap
+        loss = K.square(diff / base)
+
+        mask_logical = tf.logical_and(tf.greater(y_true, self.cap01), tf.greater(y_predict, self.cap01))
+        count = tf.reduce_sum(tf.cast(mask_logical, tf.float32), axis=-1)
+        safe_count = tf.maximum(count, 1)
+        # reduce_sum_loss = tf.reduce_sum(loss, axis=-1)
+        mean_loss = loss / safe_count
+        return mean_loss
+
+    def call1(self, y_true, y_predict):
+        y_true = y_true * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        y_predict = y_predict * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        base = tf.where(y_true > self.cap, y_true, tf.ones_like(y_true)*self.cap)
+        loss = (y_true - y_predict) / base
+        squared_loss = tf.square(loss)
+        mean_squared_loss = tf.reduce_mean(squared_loss, axis=[1])
+        return  mean_squared_loss
+
+
+class NorthEastLoss(tf.keras.losses.Loss):
+    def __init__(self, opt, name='northeast_loss'):
+        """
+        东北新规则超短期损失函数
+        """
+        super(NorthEastLoss, self).__init__(name=name)
+        self.opt = opt
+        self.cap = round(opt.cap*0.1, 2)
+
+    def call(self, y_true, y_predict):
+        # 这里我们添加了一个小的 epsilon 值来避免除以 0
+        y_true = y_true * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+        y_predict = y_predict * self.opt.std['C_REAL_VALUE'] + self.opt.mean['C_REAL_VALUE']
+
+        mask_logical = tf.logical_and(tf.greater(y_true, self.cap), tf.greater(y_predict, self.cap))
+        # mask = tf.cast(~mask_logical, tf.float32)
+        # y_true = y_true * (1 - mask) + 0 * mask
+        # y_predict = y_predict * (1 - mask) + 0 * mask
+
+
+        epsilon = tf.keras.backend.epsilon()
+        y_predict_safe = y_predict + epsilon
+
+        # 计算 (y_true - y_predict) / y_predict_safe
+        difference_over_predict = tf.abs(y_predict - y_true) / tf.abs(y_predict_safe)
+
+        # 将结果中大于等于 1 的部分置为 1,剩下的保留原值
+        masked_difference = tf.where(difference_over_predict >= 1, tf.ones_like(difference_over_predict)*1, difference_over_predict) #tf.where的操作是逐元素的,并且它不会改变张量中元素的数学性质(如可微性、可导性)。
+
+        # 这里我们先沿着特征维度求和,但你也可以选择平均(使用 tf.reduce_mean 而不是 tf.reduce_sum)
+        count = tf.reduce_sum(tf.cast(mask_logical, tf.float32), axis=-1)
+        sum_diff = tf.reduce_sum(masked_difference, axis=-1)
+        # mean_loss = tf.reduce_mean(masked_difference, axis=[1])
+        safe_count = tf.maximum(count, 1)
+        mean = sum_diff / safe_count
+        mean1 = tf.reduce_sum(masked_difference, axis=-1)
+        return mean
+
+
+class NorthWestLoss(tf.keras.losses.Loss):
+    def __init__(self, name='northwest_loss'):
+        """
+        东北新规则超短期损失函数
+        """
+        super(NorthWestLoss, self).__init__(name=name)
+
+    def call(self, y_true, y_pred):
+        # 保证预测值和真实值是浮点数
+        y_pred = tf.cast(y_pred, tf.float32)
+        y_true = tf.cast(y_true, tf.float32)
+
+        # 避免除零错误
+        epsilon = 1e-8
+        y_pred_adjusted = y_pred + epsilon
+        y_true_adjusted = y_true + epsilon
+
+        # 计算 |Pr - Pn|
+        abs_diff = tf.abs(y_pred - y_true)
+
+        # 计算 |Pr - Pn| 的总和
+        sum_abs_diff = tf.reduce_sum(abs_diff)
+
+        # 计算每个差值的权重 |Pr - Pn| / sum(|Pr - Pn|)
+        weights = abs_diff / (sum_abs_diff + epsilon)  # 添加 epsilon 避免除零
+
+        # 计算 |Pr/(Pr + Pn) - 0.5|
+        ratios = tf.abs((y_pred_adjusted / (y_pred_adjusted + y_true_adjusted)) - 0.5)
+
+        # 计算最终的损失值
+        loss = 1.0 - 2.0 * tf.reduce_sum(ratios * weights)
+        return loss

+ 72 - 0
config.py

@@ -0,0 +1,72 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/2 10:28
+# file: config.py
+# author: David
+# company: shenyang JY
+
+"""
+模型调参及系统功能配置
+"""
+import os
+import argparse, yaml
+
+
+class myargparse(argparse.ArgumentParser):
+    def __init__(self, discription, add_help):
+        super(myargparse, self).__init__(description=discription, add_help=add_help)
+        # default_config_parser = parser = argparse.ArgumentParser(
+        #     description='Training Config', add_help=False)
+        self.add_argument(
+            '-c',
+            '--config_yaml',
+            default=
+            'config.yml',
+            type=str,
+            metavar='FILE',
+            help='YAML config file specifying default arguments')
+        # self.add_argument(
+        #     '-f',
+        #     '--feature_yaml',
+        #     default='feature.yml',
+        #     type=str,
+        #     metavar='FILE',
+        #     help='YAML feature norm file for clustering'
+        # )
+
+    def _parse_args_and_yaml(self):
+        given_configs, remaining = self.parse_known_args()
+        current_path = os.path.dirname(__file__)
+        if given_configs.config_yaml:
+            with open(current_path + '/' + given_configs.config_yaml, 'r', encoding='utf-8') as f:
+                cfg = yaml.safe_load(f)
+                self.set_defaults(**cfg)
+        # if given_configs.feature_yaml:
+        #     with open(current_path + '/' + given_configs.feature_yaml, 'r', encoding='utf-8') as f:
+        #         cfg = yaml.safe_load(f)
+        #         self.set_defaults(**cfg)
+        # The main arg parser parses the rest of the args, the usual
+        # defaults will have been overridden if config file specified.
+        opt = self.parse_args(remaining)
+        # Cache the args as a text string to save them in the output dir later
+        opt_text = yaml.safe_dump(opt.__dict__, default_flow_style=False)
+        # print("opt", opt)
+        return opt, opt_text
+
+    def parse_args_and_yaml(self):
+        return self._parse_args_and_yaml()[0]
+
+    def save_args_yml(self, opt, isdict=False):
+        current_path = os.path.dirname(__file__)
+        if isdict:
+            with open(current_path + '/' + 'config.yml', mode='w', encoding='utf-8') as f:
+                yaml.safe_dump(opt, f)
+        else:
+            with open(current_path + '/' + 'config.yml', mode='w', encoding='utf-8') as f:
+                yaml.safe_dump(vars(opt), f)
+
+
+
+if __name__ == '__main__':
+    args = myargparse(discription="场站端配置", add_help=False)
+    opt = args.parse_args_and_yaml()

+ 258 - 0
config.yml

@@ -0,0 +1,258 @@
+Model:
+  add_train: false
+  batch_size: 64
+  dropout_rate: 0.2
+  epoch: 100
+  fusion: true
+  hidden_size: 64
+  his_points: 16
+  how_long_fill: 10
+  input_size_env: 5
+  input_size_nwp: 24
+  lambda_value_1: 0.02
+  lambda_value_2: 0.01
+  learning_rate: 0.001
+  lstm_layers: 1
+  output_size: 16
+  patience: 10
+  predict_data_fill: true
+  region: south129
+  shuffle_train_data: false
+  test_data_fill: false
+  time_step: 16
+  train_data_fill: false
+  use_cuda: false
+  valid_data_rate: 0.15
+algorithm_platform:
+  farm_id: j00314
+  mongodb_database: db_cdq
+  mongodb_write_table: j00314
+  overwrite: 1
+  switch: 1
+authentication:
+  date: '2025-01-08'
+  full_cap: '2024-04-30'
+  repair: '2025-01-08'
+calculate: []
+cap: 50.0
+coe:
+  T1:
+    m: 0.029
+    n: 0.847
+    score: false
+    update: true
+  T10:
+    m: 0.418
+    n: 0.294
+    score: true
+    update: false
+  T11:
+    m: 0.406
+    n: 0.3
+    score: true
+    update: false
+  T12:
+    m: 0.376
+    n: 0.312
+    score: true
+    update: false
+  T13:
+    m: 0.312
+    n: 0.365
+    score: true
+    update: false
+  T14:
+    m: 0.276
+    n: 0.4
+    score: true
+    update: false
+  T15:
+    m: 0.288
+    n: 0.406
+    score: true
+    update: false
+  T16:
+    m: 0.582
+    n: 0.429
+    score: false
+    update: true
+  T2:
+    m: 0.365
+    n: 0.453
+    score: true
+    update: false
+  T3:
+    m: 0.576
+    n: 0.206
+    score: true
+    update: false
+  T4:
+    m: 0.624
+    n: 0.147
+    score: true
+    update: false
+  T5:
+    m: 0.594
+    n: 0.165
+    score: true
+    update: false
+  T6:
+    m: 0.482
+    n: 0.253
+    score: true
+    update: false
+  T7:
+    m: 0.429
+    n: 0.294
+    score: true
+    update: false
+  T8:
+    m: 0.706
+    n: 0.282
+    score: false
+    update: true
+  T9:
+    m: 0.535
+    n: 0.447
+    score: true
+    update: false
+config_yaml: config.yml
+database: mysql+pymysql://root:123456@192.168.1.37:3306/ipfcst-v3-j00105-s-nx55
+dataloc: ./data
+env_columns:
+- C_TIME
+- C_CELLT
+- C_DIFFUSER
+- C_GLOBALR
+- C_RH
+- C_REAL_VALUE
+first_point:
+  sun_up_time: '2024-11-10 07:30:00'
+  sun_up_value: 0.11
+  switch: true
+full_field: true
+history_hours: 1
+mean:
+  C_AIRT: -0.568
+  C_CELLT: 2.067
+  C_DIFFUSER: 117.133
+  C_DIFFUSE_RADIATION: 56.596
+  C_DIRECTR: 77.507
+  C_DIRECT_RADIATION: 132.056
+  C_FP_VALUE: 5.097
+  C_GLOBALR: 194.641
+  C_GLOBALRDA: 7.952
+  C_HOURDA: 3.617
+  C_OBLIQUER: 252.529
+  C_OBLIQUERDA: 10.286
+  C_P: 867.866
+  C_PRESSURE: 860.412
+  C_REAL_VALUE: 6.675
+  C_RH: 38.579
+  C_SWR: 188.652
+  C_T: -1.862
+  C_WD: 211.749
+  C_WD10: 214.02
+  C_WD100: 224.432
+  C_WD170: 224.909
+  C_WD30: 218.134
+  C_WD50: 220.114
+  C_WD70: 221.475
+  C_WD80: 222.015
+  C_WD90: 222.551
+  C_WS: 1.916
+  C_WS10: 2.963
+  C_WS100: 4.467
+  C_WS170: 4.766
+  C_WS30: 3.612
+  C_WS50: 3.921
+  C_WS70: 4.131
+  C_WS80: 4.214
+  C_WS90: 4.292
+  DT_TAG: 48.5
+new_field: true
+nwp_columns:
+- C_TIME
+- C_T
+- C_RH
+- C_PRESSURE
+- C_SWR
+- C_DIFFUSE_RADIATION
+- C_DIRECT_RADIATION
+- C_WD10
+- C_WD30
+- C_WD50
+- C_WD70
+- C_WD80
+- C_WD90
+- C_WD100
+- C_WD170
+- C_WS10
+- C_WS30
+- C_WS50
+- C_WS70
+- C_WS80
+- C_WS90
+- C_WS100
+- C_WS170
+- DT_TAG
+- C_FP_VALUE
+port: 9008
+predict: C_REAL_VALUE
+repair_days: 81
+repair_model_cycle: 5
+reset_sys: false
+spot_trading: []
+std:
+  C_AIRT: 6.321
+  C_CELLT: 11.059
+  C_DIFFUSER: 172.191
+  C_DIFFUSE_RADIATION: 85.053
+  C_DIRECTR: 134.741
+  C_DIRECT_RADIATION: 198.457
+  C_FP_VALUE: 9.418
+  C_GLOBALR: 299.8
+  C_GLOBALRDA: 7.964
+  C_HOURDA: 3.584
+  C_OBLIQUER: 390.907
+  C_OBLIQUERDA: 10.47
+  C_P: 5.749
+  C_PRESSURE: 5.371
+  C_REAL_VALUE: 11.936
+  C_RH: 13.209
+  C_SWR: 283.51
+  C_T: 5.829
+  C_WD: 89.245
+  C_WD10: 61.869
+  C_WD100: 67.519
+  C_WD170: 67.519
+  C_WD30: 62.113
+  C_WD50: 63.592
+  C_WD70: 65.084
+  C_WD80: 65.77
+  C_WD90: 66.516
+  C_WS: 1.587
+  C_WS10: 1.732
+  C_WS100: 2.58
+  C_WS170: 2.6
+  C_WS30: 2.084
+  C_WS50: 2.264
+  C_WS70: 2.391
+  C_WS80: 2.442
+  C_WS90: 2.486
+  DT_TAG: 27.711
+update_add_train_days: 60
+update_coe_days: 3
+usable_power:
+  api_able_power: true
+  bias: 2.524
+  clean_power_which: 1
+  coe: 4
+  down_fractile: 30
+  env: C_GLOBALR
+  k: 0.04079
+  outliers_threshold: 1.5
+  up_fractile: 70
+version: solar-3.1.0.south
+weatherloc:
+- 1

+ 143 - 0
data_features.py

@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/4/12 17:42
+# file: data_features.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+import numpy as np
+np.random.seed(42)
+
+
+class DataFeatures(object):
+    def __init__(self, log, args):
+        self.logger = log
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+        self.columns = list()
+
+    def train_valid_split(self, datax, datay, valid_rate, shuffle):
+        shuffle_index = np.random.permutation(len(datax))
+        indexs = shuffle_index.tolist() if shuffle else np.arange(0, len(datax)).tolist()
+        valid_size = int(len(datax)*valid_rate)
+        valid_index = indexs[-valid_size:]
+        train_index = indexs[:-valid_size]
+        tx, vx, ty, vy = [], [], [], []
+        for i, data in enumerate(zip(datax, datay)):
+            if i in train_index:
+                tx.append(data[0])
+                ty.append(data[1])
+            elif i in valid_index:
+                vx.append(data[0])
+                vy.append(data[1])
+        return tx, vx, ty, vy
+
+    def get_train_data(self, dfs, envir):
+        num = 1
+        train_x, valid_x, train_y, valid_y = [], [], [], []
+        for i, df in enumerate(dfs, start=1):
+            if len(df) < self.opt.Model["time_step"]:
+                self.logger.info("特征处理-训练数据-不满足time_step +{}".format(num))
+                num += 1
+                continue
+            datax, datay = self.get_data_features(df, envir, is_train=True)
+            if len(datax) < 10:
+                self.logger.info("特征处理-训练数据-无法进行最小分割 +{}".format(num))
+                num += 1
+                continue
+            tx, vx, ty, vy = self.train_valid_split(datax, datay, valid_rate=self.opt.Model["valid_data_rate"], shuffle=self.opt.Model['shuffle_train_data'])
+            train_x.extend(tx)
+            valid_x.extend(vx)
+            train_y.extend(ty)
+            valid_y.extend(vy)
+
+        train_y = np.concatenate([[y.iloc[:, 1].values for y in train_y]], axis=0)
+        valid_y = np.concatenate([[y.iloc[:, 1].values for y in valid_y]], axis=0)
+
+        train_x = [np.array([x[0].values for x in train_x]), np.array([x[1].values for x in train_x])]
+        valid_x = [np.array([x[0].values for x in valid_x]), np.array([x[1].values for x in valid_x])]
+
+        return train_x, valid_x, train_y, valid_y
+
+    def get_test_data(self, dfs, envir):
+        num = 0
+        test_x, test_y, data_y = [], [], []
+        for i, df in enumerate(dfs, start=1):
+            if len(df) < self.opt.Model["time_step"]:
+                self.logger.info("特征处理-测试数据-不满足time_step +{}".format(num))
+                num += 1
+                continue
+            datax, datay = self.get_data_features(df, envir, is_train=False)
+
+            test_x.extend(datax)
+            test_y.extend(datay)
+            data_y.extend(datay)
+
+        test_x = [np.array([x[0].values for x in test_x]), np.array([x[1].values for x in test_x])]
+        test_y = np.concatenate([[y.iloc[:, 1].values for y in test_y]], axis=0)
+        return test_x, test_y, data_y
+
+    def get_realtime_data(self, dfs, envir):
+        test_x = []
+        for i, df in enumerate(dfs, start=1):
+            if len(df) < self.opt.Model["time_step"]:
+                self.logger.info("特征处理-预测数据-不满足time_step")
+                continue
+            datax = self.get_realtime_data_features(df, envir)
+            test_x.extend(datax)
+
+        test_x = [np.array([x[0].values for x in test_x]), np.array([x[1].values for x in test_x])]
+        return test_x
+
+    def get_data_features(self, norm_data, envir, is_train):   # 这段代码基于pandas方法的优化
+        time_step = self.opt.Model["time_step"]
+        feature_data = norm_data.reset_index(drop=True)
+        time_step_loc = time_step - 1
+        train_num = int(len(feature_data))
+        label_features = ['C_TIME', 'C_REAL_VALUE'] if is_train is True else ['C_TIME', 'C_REAL_VALUE']
+        nwp_cs = self.opt.nwp_columns.copy()
+        if 'C_TIME' in nwp_cs:
+            nwp_cs.pop(nwp_cs.index('C_TIME'))
+        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(train_num - time_step + 1)]  # 数据库字段 'C_T': 'C_WS170'
+        labels = [feature_data.loc[i:i + time_step_loc, label_features].reset_index(drop=True) for i in range(train_num - time_step + 1)]
+        features_x, features_y = [], []
+        env_fill = envir[-self.opt.Model["his_points"]:]
+        self.logger.info("匹配环境前,{}组 -> ".format(len(nwp)))
+        for i, row in enumerate(zip(nwp, labels)):
+            time_end = row[1]['C_TIME'][0]
+            time_start = time_end - pd.DateOffset(1)
+            row1 = envir[(envir.C_TIME < time_end) & (envir.C_TIME > time_start)][-self.opt.Model["his_points"]:]
+            if len(row1) < self.opt.Model["his_points"]:
+                if self.opt.Model['fusion']:
+                    row1 = env_fill
+                    self.logger.info("训练环境数据不足{}个点:{},用数据进行填充".format(self.opt.Model["his_points"], len(row1)))
+                else:
+                    self.logger.info("训练环境数据不足{}个点:{},弃用".format(self.opt.Model["his_points"], len(row1)))
+                    continue
+            row1 = row1.reset_index(drop=True).drop(['C_TIME'], axis=1)
+            features_x.append([row1, row[0]])
+            features_y.append(row[1])
+        self.logger.info("匹配环境后,{}组".format(len(features_x)))
+        return features_x, features_y
+
+    def get_realtime_data_features(self, norm_data, envir):   # 这段代码基于pandas方法的优化
+        time_step = self.opt.Model["time_step"]
+        feature_data = norm_data.reset_index(drop=True)
+        time_step_loc = time_step - 1
+        nwp_cs = self.opt.nwp_columns.copy()
+        if 'C_TIME' in nwp_cs:
+            nwp_cs.pop(nwp_cs.index('C_TIME'))
+        nwp = [feature_data.loc[i:i + time_step_loc, nwp_cs].reset_index(drop=True) for i in range(1)]  # 数据库字段 'C_T': 'C_WS170'
+        features_x, features_y = [], []
+        self.logger.info("匹配环境前,{}组 -> ".format(len(nwp)))
+        for i, row in enumerate(nwp):
+            row1 = envir[-self.opt.Model["his_points"]:]
+            if len(row1) < self.opt.Model["his_points"]:
+                self.logger.info("环境数据不足{}个点:{}".format(self.opt.Model["his_points"], len(row1)))
+                continue
+            row1 = row1.reset_index(drop=True).drop(['C_TIME'], axis=1)
+            features_x.append([row1, row])
+        self.logger.info("匹配环境后,{}组".format(len(features_x)))
+        return features_x
+
+

+ 95 - 0
data_process.py

@@ -0,0 +1,95 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2024/5/6 13:52
+# file: data_process.py
+# author: David
+# company: shenyang JY
+import os
+import numpy as np
+import pandas as pd
+from cache.data_cleaning import rm_duplicated
+np.random.seed(42)
+
+
+class DataProcess(object):
+    def __init__(self, log, args):
+        self.logger = log
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+
+    # 主要是联立后的补值操作
+    def get_train_data(self, unite, envir):
+        unite['C_TIME'] = pd.to_datetime(unite['C_TIME'])
+        unite['time_diff'] = unite['C_TIME'].diff()
+        dt_short = pd.Timedelta(minutes=15)
+        dt_long = pd.Timedelta(minutes=15 * self.opt.Model['how_long_fill'])
+        data_train = self.missing_time_splite(unite, dt_short, dt_long)
+        miss_points = unite[(unite['time_diff'] > dt_short) & (unite['time_diff'] < dt_long)]
+        miss_number = miss_points['time_diff'].dt.total_seconds().sum(axis=0)/(15*60) - len(miss_points)
+        self.logger.info("再次测算,需要插值的总点数为:{}".format(miss_number))
+        if miss_number > 0 and self.opt.Model["train_data_fill"]:
+            data_train = self.data_fill(data_train)
+        return data_train, envir
+
+    def get_test_data(self, unite, envir):
+        unite['C_TIME'] = pd.to_datetime(unite['C_TIME'])
+        unite['time_diff'] = unite['C_TIME'].diff()
+        dt_short = pd.Timedelta(minutes=15)
+        dt_long = pd.Timedelta(minutes=15 * self.opt.Model['how_long_fill'])
+        data_test = self.missing_time_splite(unite, dt_short, dt_long)
+        miss_points = unite[(unite['time_diff'] > dt_short) & (unite['time_diff'] < dt_long)]
+        miss_number = miss_points['time_diff'].dt.total_seconds().sum(axis=0) / (15 * 60) - len(miss_points)
+        self.logger.info("再次测算,需要插值的总点数为:{}".format(miss_number))
+        if self.opt.Model["test_data_fill"] and miss_number > 0:
+            data_test = self.data_fill(data_test, test=True)
+        return data_test, envir
+
+    def get_predict_data(self, nwp, dq):
+        if self.opt.Model["predict_data_fill"] and len(dq) > len(nwp):
+            self.logger.info("接口nwp和dq合并清洗后,需要插值的总点数为:{}".format(len(dq)-len(nwp)))
+            nwp.set_index('C_TIME', inplace=True)
+            dq.set_index('C_TIME', inplace=True)
+            nwp = nwp.resample('15T').interpolate(method='linear') # nwp先进行线性填充
+            nwp = nwp.reindex(dq.index, method='bfill') # 再对超过采样边缘无法填充的点进行二次填充
+            nwp = nwp.reindex(dq.index, method='ffill')
+            nwp.reset_index(drop=False, inplace=True)
+            dq.reset_index(drop=False, inplace=True)
+        return nwp
+
+    def missing_time_splite(self, df, dt_short, dt_long):
+        n_long, n_short, n_points = 0, 0, 0
+        start_index = 0
+        dfs = []
+        for i in range(1, len(df)):
+            if df['time_diff'][i] >= dt_long:
+                df_long = df.iloc[start_index:i, :-1]
+                dfs.append(df_long)
+                start_index = i
+                n_long += 1
+            if df['time_diff'][i] > dt_short:
+                self.logger.info(f"{df['C_TIME'][i-1]} ~ {df['C_TIME'][i]}")
+                points = df['time_diff'].dt.total_seconds()[i]/(60*15)-1
+                self.logger.info("缺失点数:{}".format(points))
+                if df['time_diff'][i] < dt_long:
+                    n_short += 1
+                    n_points += points
+                    self.logger.info("需要补值的点数:{}".format(points))
+        dfs.append(df.iloc[start_index:, :-1])
+        self.logger.info(f"数据总数:{len(df)}, 时序缺失的间隔:{n_short}, 其中,较长的时间间隔:{n_long}")
+        self.logger.info("需要补值的总点数:{}".format(n_points))
+        return dfs
+
+    def data_fill(self, dfs, test=False):
+        dfs_fill, inserts = [], 0
+        for i, df in enumerate(dfs):
+            df = rm_duplicated(df, self.logger)
+            df1 = df.set_index('C_TIME', inplace=False)
+            dff = df1.resample('15T').interpolate(method='linear')  # 采用线性补值,其他补值方法需要进一步对比
+            dff.reset_index(inplace=True)
+            points = len(dff) - len(df1)
+            dfs_fill.append(dff)
+            self.logger.info("{} ~ {} 有 {} 个点, 填补 {} 个点.".format(dff.iloc[0, 0], dff.iloc[-1, 0], len(dff), points))
+            inserts += points
+        name = "预测数据" if test is True else "训练集"
+        self.logger.info("{}分成了{}段,实际一共补值{}点".format(name, len(dfs_fill), inserts))
+        return dfs_fill

+ 108 - 0
dev.yml

@@ -0,0 +1,108 @@
+reset_sys: true
+Model:
+  add_train: false
+  batch_size: 64
+  dropout_rate: 0.2
+  epoch: 100
+  fusion: true
+  hidden_size: 64
+  his_points: 16
+  how_long_fill: 10
+  input_size_env: 5
+  input_size_nwp: 24
+  lambda_value_1: 0.02
+  lambda_value_2: 0.01
+  learning_rate: 0.001
+  lstm_layers: 1
+  output_size: 16
+  patience: 10
+  predict_data_fill: true
+  region: south129
+  shuffle_train_data: false
+  test_data_fill: false
+  time_step: 16
+  train_data_fill: false
+  use_cuda: false
+  valid_data_rate: 0.15
+coe:
+  T1:
+    m: 0.029
+    n: 0.847
+    score: true
+    update: true
+  T10:
+    m: 0.418
+    n: 0.294
+    score: true
+    update: true
+  T11:
+    m: 0.406
+    n: 0.3
+    score: true
+    update: true
+  T12:
+    m: 0.376
+    n: 0.312
+    score: true
+    update: true
+  T13:
+    m: 0.312
+    n: 0.365
+    score: true
+    update: true
+  T14:
+    m: 0.276
+    n: 0.4
+    score: true
+    update: true
+  T15:
+    m: 0.288
+    n: 0.406
+    score: true
+    update: true
+  T16:
+    m: 0.582
+    n: 0.429
+    score: true
+    update: true
+  T2:
+    m: 0.365
+    n: 0.453
+    score: true
+    update: true
+  T3:
+    m: 0.576
+    n: 0.206
+    score: true
+    update: true
+  T4:
+    m: 0.624
+    n: 0.147
+    score: true
+    update: true
+  T5:
+    m: 0.594
+    n: 0.165
+    score: true
+    update: true
+  T6:
+    m: 0.482
+    n: 0.253
+    score: true
+    update: true
+  T7:
+    m: 0.429
+    n: 0.294
+    score: true
+    update: true
+  T8:
+    m: 0.706
+    n: 0.282
+    score: true
+    update: true
+  T9:
+    m: 0.535
+    n: 0.447
+    score: true
+    update: true
+update_coe_days: 3

+ 125 - 0
error.py

@@ -0,0 +1,125 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/13 9:24
+# file: dbUtils.py
+# author: David
+# company: shenyang JY
+import os
+import pandas as pd
+import numpy as np
+import datetime
+import pickle
+from request import requestHandler
+
+current_path = os.path.dirname(__file__)
+
+
+def readVar(path):
+    if os.path.exists(path):
+        with open(path, "rb") as file:
+            var = pickle.load(file)
+        return var
+    else:
+        return None
+
+
+class dqFix(object):
+    # 将误差存储为静态变量
+
+    def __init__(self, log, args):
+        self.logger = log
+        self.args = args
+        self.opt = self.args.parse_args_and_yaml()
+
+    def history_error(self, history_dq, history_rp, dq):
+        his = self.opt.history_hours * 5
+        errors = pd.merge(history_dq, history_rp, on='C_TIME')
+        errors = errors.tail(his)
+        if len(errors) > 0:
+            errors = errors.apply(pd.to_numeric, errors='ignore')
+            error = errors['C_REAL_VALUE'].values - errors['C_FP_VALUE'].values
+            error = round(np.mean(error), 3)
+        else:
+            error = 0
+        print("---error&errors---:", error, errors)
+        dq['his_fix'] = error + dq['C_FP_VALUE']
+        dq.loc[dq['his_fix'] < 0, ['his_fix']] = 0
+        dq.loc[dq['his_fix'] > self.opt.cap, ['his_fix']] = self.opt.cap
+        dq['C_TIME'] = pd.to_datetime(dq['C_TIME'])
+        return dq.loc[:, ['C_TIME', 'his_fix', 'C_FP_VALUE']]
+
+    def history_error_clock(self, dq, rp, point):
+        history_fixes = []
+        for index, row in dq.iterrows():
+            history_fix = row['C_FP_VALUE']
+            time_end = row.iloc[0] - pd.Timedelta(hours=4) + point*pd.Timedelta(minutes=15)
+            time_start = time_end - pd.Timedelta(hours=self.opt.history_hours)
+            history = rp[(rp.C_TIME <= time_end) & (rp.C_TIME > time_start)].copy()  # rp已过滤限电时段
+            if self.opt.usable_power['api_able_power'] is True:
+                history['error'] = history['C_ABLE_VALUE'] - history['C_FP_VALUE']
+            else:
+                history['error'] = history['C_REAL_VALUE'] - history['C_FP_VALUE']
+            history_err = round(history.loc[:, 'error'].mean(), 3) if len(history) != 0 else 0
+            history_fix += history_err
+            history_fix = history_fix if history_fix > 0 else 0
+            history_fix = self.opt.cap if history_fix > self.opt.cap else history_fix
+            history_fixes.append(history_fix)
+        return history_fixes
+
+    def cdq(self, fhs):
+        cdq = []
+        sun_up_time = datetime.datetime.strptime(self.opt.first_point['sun_up_time'], '%Y-%m-%d %H:%M:%S')
+        for i, fh in fhs.iterrows():
+            if i+1 > 16:
+                T = 'T16'
+            else:
+                T = 'T' + str(i+1)
+            coe_m = float(self.opt.coe[T]['m'])
+            coe_n = float(self.opt.coe[T]['n'])
+            dt = fh['C_TIME']
+            dq = fh['C_FP_VALUE']
+            dq_fix = fh['dq_fix']
+            his_fix = fh['his_fix']
+            new_dq = round(coe_n * his_fix + coe_m * dq_fix, 2)  # 修改曲线为0问题
+
+            for i, cal in enumerate(self.opt.spot_trading):
+                if 'T' + str(cal['point']) == T:
+                    new_dq = round(new_dq * cal['coe'] + cal['abs'], 2)
+                    self.logger.info("现货交易:第{}个点,进行了乘系数加绝对值修改".format(cal['point']))
+
+            for i, cal in enumerate(self.opt.calculate):
+                interval = list(range(cal['extraMinRange'], cal['extraMaxRange'] + 1))
+                if dt.hour in interval:
+                    new_dq = round(new_dq * cal['coe'] + cal['abs'], 2)
+                    self.logger.info("第{}组,时段为:{}-{}点,时刻:{},进行了乘系数加绝对值修改".format(i + 1, cal['extraMinRange'],
+                                                                                        cal['extraMaxRange'], dt.strftime('%Y-%m-%d %H:%M:%S')))
+            new_dq = new_dq if dq > 0 else 0
+            new_dq = new_dq if new_dq > 0 else 0
+            new_dq = self.opt.cap if new_dq > self.opt.cap else new_dq
+            if T == 'T1' and self.opt.first_point['switch'] is True:
+                # 华东特殊规则,第一个点用实际功率替换,日出用昨日日出点替换(系统判断是否限电能接尽接)
+                if dt.time() != sun_up_time.time() or new_dq > 0:
+                    rpt = requestHandler.history_rp.tail(1)
+                    new_dq = rpt['C_ABLE_VALUE'].iloc[0] if rpt['LIMIT_STATUS'].iloc[0] == 1 else rpt['C_REAL_VALUE'].iloc[0]
+                else:
+                    new_dq = self.opt.first_point['sun_up_value']
+            cdq.append({"C_TIME": dt.strftime('%Y-%m-%d %H:%M:%S'), "CDQ_VALUE": new_dq})
+        return cdq
+
+
+if __name__ == "__main__":
+    import argparse
+    # argparse方便于命令行下输入参数,可以根据需要增加更多
+    parser = argparse.ArgumentParser()
+    # parser.add_argument("-t", "--do_train", default=False, type=bool, help="whether to train")
+    # parser.add_argument("-p", "--do_predict", default=True, type=bool, help="whether to train")
+    # parser.add_argument("-b", "--batch_size", default=64, type=int, help="batch size")
+    # parser.add_argument("-e", "--epoch", default=20, type=int, help="epochs num")
+    args = parser.parse_args()
+
+    #
+    # for key in dir(args):               # dir(args) 函数获得args所有的属性
+    #     if not key.startswith("_"):     # 去掉 args 自带属性,比如__name__等
+    #         setattr(con, key, getattr(args, key))   # 将属性值赋给Config
+    #
+    # # main(con)

+ 205 - 0
logs.py

@@ -0,0 +1,205 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/4 22:28
+# file: myLog.py
+# author: David
+# company: shenyang JY
+
+"""
+1. 信息流以控制台和文件形式打印
+2. 文件保存以启动日期为文件名
+3. 控制台INFO,文件DEBUG
+"""
+import codecs
+from pathlib import Path
+import logging, logging.handlers, time, os, re
+from logging.handlers import BaseRotatingHandler
+
+
+class DailyRotatingFileHandler(BaseRotatingHandler):
+    """
+    同`logging.TimedRotatingFileHandler`类似,不过这个handler:
+    - 可以支持多进程
+    - 只支持自然日分割
+    - 暂不支持UTC
+    """
+
+    def __init__(self, filename, backupCount=0, encoding=None, delay=False, utc=False, **kwargs):
+        self.backup_count = backupCount
+        self.utc = utc
+        self.suffix = "%Y-%m-%d"
+        self.base_log_path = Path(filename)
+        self.base_filename = self.base_log_path.name
+        self.current_filename = self._compute_fn()
+        self.current_log_path = self._compute_lp()
+        BaseRotatingHandler.__init__(self, filename, 'a', encoding, delay)
+
+    def shouldRollover(self, record):
+        """
+        判断是否该滚动日志,如果当前时间对应的日志文件名与当前打开的日志文件名不一致,则需要滚动日志
+        """
+        if self.current_filename != self._compute_fn():
+            # 日期变了,计算新的日志文件
+            self.current_filename = self._compute_fn()
+            return True
+        elif os.path.getsize(self.current_log_path) > 10485760:     # 判断文件是否大于10MB字节数
+            # 超过10M了,计算新的日志文件
+            seg = int(self.current_filename.split(".")[-2]) + 1
+            self.current_filename = self._compute_fn(seg=seg)
+            return True
+        return False
+
+    def doRollover(self):
+        """
+        滚动日志
+        """
+        # 关闭旧的日志文件
+        if self.stream:
+            self.stream.close()
+            self.stream = None
+
+        # self.current_log_path = self.base_log_path.with_name(self.current_filename)
+        self.current_log_path = self._compute_lp()
+        # 打开新的日志文件
+        if not self.delay:
+            self.stream = self._open()
+
+        # 删除过期日志
+        # self.delete_expired_files()
+
+    def _compute_lp(self):
+        """
+        计算当前时间对应日志的路径
+        """
+        current_log_path = self.base_log_path.parent / time.strftime(self.suffix, time.localtime())
+        if not os.path.exists(current_log_path):
+            os.mkdir(current_log_path)
+        return current_log_path / self.current_filename
+
+    def _compute_fn(self, seg=0):
+        """
+        计算当前时间对应的日志文件名
+        """
+
+        return "ipfcst-forecast" + "." + time.strftime(self.suffix, time.localtime()) + '.' + str(seg) +'.log'
+
+    def _open(self):
+        """
+        打开新的日志文件,同时更新base_filename指向的软链,修改软链不会对日志记录产生任何影响
+        """
+        if self.encoding is None:
+            stream = open(str(self.current_log_path), self.mode)
+        else:
+            stream = codecs.open(str(self.current_log_path), self.mode, self.encoding)
+
+        # # 删除旧的软链
+        # if self.base_log_path.exists():
+        #     try:
+        #         # 如果base_log_path不是软链或者指向的日志文件不对,则先删除该软链
+        #         if not self.base_log_path.is_symlink() or os.readlink(self.base_log_path) != self.current_log_path:
+        #             os.remove(self.base_log_path)
+        #     except OSError:
+        #         pass
+        #
+        # # 建立新的软链
+        # try:
+        #     os.symlink(self.current_log_path, str(self.base_log_path))
+        # except OSError:
+        #     pass
+        return stream
+
+    def delete_expired_files(self):
+        """
+        删除过期的日志
+        """
+        if self.backup_count <= 0:
+            return
+
+        file_names = os.listdir(str(self.base_log_path.parent))
+        result = []
+        prefix = self.base_filename + "."
+        plen = len(prefix)
+        for file_name in file_names:
+            if re.match(r"^\d{4}-\d{2}-\d{2}(\.\w+)?$", file_name):
+                result.append(file_name)
+        if len(result) < self.backup_count:
+            result = []
+        else:
+            result.sort()
+            result = result[:len(result) - self.backup_count]
+        import shutil
+        for file_name in result:
+            path = self.base_log_path.with_name(file_name)
+            if os.path.isdir(path):
+                shutil.rmtree(path)
+
+
+class Log(object):
+    def __init__(self):
+        # 定义对应的程序模块名name,默认为root
+        self.logger = logging.getLogger()
+        # 设置输出的等级
+        LEVELS = {'NOSET': logging.NOTSET,
+                  'DEBUG': logging.DEBUG,
+                  'INFO': logging.INFO,
+                  'WARNING': logging.WARNING,
+                  'ERROR': logging.ERROR,
+                  'CRITICAL': logging.CRITICAL}
+
+        # 必须设置,这里如果不显示设置,默认过滤掉warning之前的所有级别的信息
+        self.logger.setLevel(LEVELS['DEBUG'])
+
+        # 仅为matplotlib设置更高的日志等级(ERROR)
+        matplotlib_logger = logging.getLogger('matplotlib')
+        matplotlib_logger.setLevel(logging.ERROR)
+
+        # 日志输出格式
+        self.formatter = logging.Formatter(
+            '%(asctime)s - %(filename)s - %(levelname)s - %(message)s - %(funcName)s')  # 输出日志格式
+
+        # 创建一个handler, 向文件logname输出日志信息
+        # fh = logging.FileHandler(self.logname, 'a', encoding='utf-8')
+        # midnight:表示日志文件再每天半夜时分滚动
+        # interval: 间隔单位的个数,指等待多少个when的时间后 Logger会自动重建新闻继续进行日志记录
+        # backupCount:表示日志文件的保留个数,假如为30,保留最近30天的日志文件
+        # fh = logging.handlers.TimedRotatingFileHandler(self.getLogName(), when='midnight', interval=1, backupCount=30, encoding='utf-8')
+        # fh.suffix = "%Y-%m-%d"
+        # # fh.extMatch = r"^\d{4}-\d{2}-\d{2}"
+        # # 设置日志等级
+        # fh.setLevel(LEVELS['INFO'])
+        # # 设置handler的格式对象
+        # fh.setFormatter(self.formatter)
+        filename = self.getLogName()
+        dr_fh = DailyRotatingFileHandler(filename, backupCount=100, encoding='utf-8')
+        dr_fh.setFormatter(self.formatter)
+        # 将handler增加到logger中
+        self.logger.addHandler(dr_fh)
+
+        # 创建一个StreamHandler,用于输出到控制台
+        ch = logging.StreamHandler()
+        ch.setLevel(LEVELS['INFO'])
+        ch.setFormatter(self.formatter)
+        self.logger.addHandler(ch)
+
+        # # 关闭打开的文件
+        dr_fh.close()
+
+    def getLogName(self):
+        # log_path是存放日志的路径
+        # lib_path = os.path.abspath(os.path.join(os.path.dirname(__file__), 'logs'))
+        lib_path = Path(os.path.dirname(__file__)).parent.parent.parent / 'logs'
+        self.logger.info("日志输出路径为:{}".format(lib_path))
+        # 如果不存在这个logs文件夹,就自动创建一个
+        if not os.path.exists(lib_path):
+            os.mkdir(lib_path)
+        return lib_path / 'ipfcst_forecast_link.log'
+
+
+
+if __name__ == "__main__":
+    logger = Log()
+    logger.info("this is info")
+    logger.debug("this is debug")
+    logger.error("this is error")
+    logger.warning("this is warning")
+    logger.critical("critical")

+ 150 - 0
request.py

@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/31 11:14
+# file: requestHandler.py
+# author: David
+# company: shenyang JY
+import pandas as pd
+import numpy as np
+from sqlalchemy.dialects.postgresql.psycopg2 import logger
+
+from cache.inputData import dt_tag
+from cache.data_cleaning import cleaning, key_field_row_cleaning, rm_duplicated
+from validate import ValidationError
+
+
+class requestHandler(object):
+    history_rp = None
+    env = None
+
+    def __init__(self, log, args):
+        self.logger = log
+        self.args = args
+        self.opt = args.parse_args_and_yaml()
+
+    def get_form_data(self, req):
+        data = req.form.get("data")
+        if data is None:
+            raise ValidationError("data为空")
+        # 这个eval()函数会验证字符串是否转换成字典 字段为Null,传回来的是什么格式?
+        # {"字段": null}  {"字段": } 都不会通过验证
+        jata = eval(data.replace("\t", ""))
+        nwp = pd.json_normalize(jata, record_path=["nwp"])
+        dq = pd.json_normalize(jata, record_path=["dq"])
+        history_dq = pd.json_normalize(jata, record_path=["history_dq"])
+        history_rp1 = pd.json_normalize(jata, record_path=["history_rp"])
+        env1 = pd.json_normalize(jata, record_path=["env"])
+        nwp['C_TIME'] = pd.to_datetime(nwp['C_TIME'])
+        nwp['DT_TAG'] = nwp.apply(lambda x: dt_tag(x['C_TIME']), axis=1)
+        dq['C_TIME'] = pd.to_datetime(dq['C_TIME'])
+        history_dq['C_TIME'] = pd.to_datetime(history_dq['C_TIME'])
+        history_rp1['C_TIME'] = pd.to_datetime(history_rp1['C_TIME'])
+        history_rp1 = rm_duplicated(history_rp1, self.logger)
+        env1['C_TIME'] = pd.to_datetime(env1['C_TIME'])
+        self.logger.info("-----历史数据长度,实际功率:{},环境监测仪:{}-----".format(len(history_rp1), len(env1)))
+        self.logger.info("历史数据最后一个点,实际功率:{},环境监测仪:{}".format(history_rp1.iloc[-1, history_rp1.columns.get_loc('C_TIME')], env1.iloc[-1, env1.columns.get_loc('C_TIME')]))
+        # 1. 新增
+        if len(history_rp1) == 1 and requestHandler.history_rp is not None:
+            if requestHandler.history_rp.iloc[-1, requestHandler.history_rp.columns.get_loc('C_TIME')] < history_rp1.iloc[0, history_rp1.columns.get_loc('C_TIME')]:
+                self.logger.info("合并前,实际功率:{}".format(len(requestHandler.history_rp)))
+                requestHandler.history_rp = pd.concat([requestHandler.history_rp, history_rp1]).reset_index(drop=True)
+            if requestHandler.env.iloc[-1, requestHandler.env.columns.get_loc('C_TIME')] < env1.iloc[0, env1.columns.get_loc('C_TIME')]:
+                self.logger.info("合并前,环境监测仪:{}".format(len(requestHandler.env)))
+                requestHandler.env = pd.concat([requestHandler.env, env1]).reset_index(drop=True)
+            self.logger.info("新增方法:实际功率:{},环境监测仪:{}".format(len(requestHandler.history_rp), len(requestHandler.env)))
+        # 2. 重更 或者 初始
+        else:
+            requestHandler.history_rp = history_rp1
+            requestHandler.env = env1
+            self.logger.info("重更或初始方法:实际功率:{},测风塔:{}".format(len(requestHandler.history_rp), len(requestHandler.env)))
+        # 3. 取测风塔均值、更新缓存
+        env_columns = [ele for ele in self.opt.env_columns if ele not in ['C_TIME', 'C_FP_VALUE', 'C_REAL_VALUE', 'error']]
+        env_columns = env_columns if len(env_columns) > 0 else [self.opt.usable_power['env']]
+        env_ave = self.his_data_ave(requestHandler.env, env_columns)
+        rp_inst = self.his_data_inst(requestHandler.history_rp, ['C_REAL_VALUE', 'C_ABLE_VALUE', 'LIMIT_STATUS'])
+        return history_dq, rp_inst, env_ave, nwp, dq
+
+    def his_data_ave(self, his, cols):
+        # 历史数据最后一个点,往前推history_env_hours个小时的点数,不够返回errorCode=3,以测风塔数据取15分钟间隔的左边界为准
+        # 1. 先进行数据清洗和特征筛选
+        self.logger.info("气象站处理前时间范围:{}-{}".format(his.iloc[0, his.columns.get_loc('C_TIME')],
+                                                               his.iloc[-1, his.columns.get_loc('C_TIME')]))
+        if self.opt.Model["fusion"] is False:
+            his = his.replace(-99, np.nan)
+            his.set_index('C_TIME', inplace=True)
+            his = his.fillna(method='ffill')
+            his = his.fillna(method='bfill')
+            his.reset_index(drop=False, inplace=True)
+
+        his_clean = key_field_row_cleaning(his, cols=cols, logger=self.logger)
+
+        if not his_clean.empty:
+            self.logger.info("气象站清洗后时间范围:{}-{}".format(his_clean.iloc[0, his_clean.columns.get_loc('C_TIME')], his_clean.iloc[-1, his_clean.columns.get_loc('C_TIME')]))
+            his_filter = his_clean[['C_TIME']+cols]
+            # 2. 取时刻范围均值,都没有返回-99
+            his_ave15 = his_filter.resample('15T', on='C_TIME', label='left').mean().reset_index()
+            self.logger.info("气象站重采样时间范围:{}-{}".format(his_ave15.iloc[0, his_ave15.columns.get_loc('C_TIME')], his_ave15.iloc[-1, his_ave15.columns.get_loc('C_TIME')]))
+            hours = self.opt.Model["his_points"]
+            his_ave15 = his_ave15.tail(hours)
+            self.logger.info("气象站处理后时间范围:{}-{}".format(his_ave15.iloc[0, his_ave15.columns.get_loc('C_TIME')], his_ave15.iloc[-1, his_ave15.columns.get_loc('C_TIME')]))
+            self.logger.info("更新缓存前,cls.env第一个点时间:{}".format(requestHandler.env.iloc[0, requestHandler.env.columns.get_loc('C_TIME')]))
+            requestHandler.update_cache_env(his_ave15.iloc[0, his_ave15.columns.get_loc('C_TIME')])
+            self.logger.info("更新缓存后,cls.env第一个点时间:{}".format(requestHandler.env.iloc[0, requestHandler.env.columns.get_loc('C_TIME')]))
+            self.logger.info("清洗后不为空,气象站:{}".format(len(his_ave15)))
+            return his_ave15
+        else:
+            return his_clean
+
+    def rp_data_ave(self, his, cols):
+        self.logger.info("实际功率处理前时间范围:{}-{}".format(his.iloc[0, his.columns.get_loc('C_TIME')],
+                                                             his.iloc[-1, his.columns.get_loc('C_TIME')]))
+        his_clean = key_field_row_cleaning(his, cols=cols, logger=self.logger)
+        if not his_clean.empty:
+            his_ave15 = his_clean.resample('15T', on='C_TIME', label='left').mean().reset_index()
+            his_ave15['LIMIT_STATUS'] = his_ave15['LIMIT_STATUS'].apply(lambda x: x if x in [0, 1] else 1)
+            hours = self.opt.Model["his_points"]
+            his_ave15 = his_ave15.tail(hours)
+            self.logger.info("实际功率处理后时间范围:{}-{}".format(his_ave15.iloc[0, his_ave15.columns.get_loc('C_TIME')], his_ave15.iloc[-1, his_ave15.columns.get_loc('C_TIME')]))
+            requestHandler.update_cache_rp(his_ave15.iloc[0, his_ave15.columns.get_loc('C_TIME')])
+            return his_ave15
+        else:
+            return his_clean
+
+    def his_data_inst(self, his, cols):
+        self.logger.info("实际功率处理前时间范围:{}-{}".format(his.iloc[0, his.columns.get_loc('C_TIME')], his.iloc[-1, his.columns.get_loc('C_TIME')]))
+        # 先进行数据清洗和特征筛选
+        his_clean = key_field_row_cleaning(his, cols=cols, logger=self.logger)
+        if not his_clean.empty:
+            # 创建一个新列来存储分钟数
+            his_clean['minute'] = his_clean['C_TIME'].dt.minute
+            # 应用这个函数来找到每行最接近的目标分钟数
+            his_clean['closest_target_minute'] = his_clean['minute'].apply(self.closest_minute)
+            his_clean['C_TIME'] = his_clean.apply(lambda x: x['C_TIME'].replace(minute=x['closest_target_minute']), axis=1)
+            his_inst = his_clean.groupby('C_TIME').first().reset_index(drop=False)
+            his_filter = his_inst[['C_TIME'] + cols]
+            # self.logger.info(his_filter)
+            his_filter.set_index('C_TIME', inplace=True)
+            resample = pd.date_range(start=his_filter.index.min(), end=his_filter.index.max(), freq='15T', name='C_TIME')
+            his_filter = his_filter.reindex(resample, fill_value=np.nan).reset_index(drop=False)
+            # self.logger.info(his_filter)
+            hours = self.opt.Model["his_points"]
+            his_inst = his_filter.tail(hours)
+            self.logger.info(
+                "实际功率处理后时间范围:{}-{}".format(his_inst.iloc[0, his_inst.columns.get_loc('C_TIME')], his_inst.iloc[-1, his_inst.columns.get_loc('C_TIME')]))
+            # self.logger.info(his_inst)
+            requestHandler.update_cache_rp(his_inst.iloc[0, his_inst.columns.get_loc('C_TIME')])
+            return his_inst
+        else:
+            return his_clean
+
+    def closest_minute(self, minute):
+        target_minutes = [0, 15, 30, 45]
+        return min(target_minutes, key=lambda x: abs(x - minute) if x - minute <= 0 else 99)
+
+    @classmethod
+    def update_cache_rp(cls, begin):
+        cls.history_rp = cls.history_rp[cls.history_rp['C_TIME'] >= begin].reset_index(drop=True)
+
+    @classmethod
+    def update_cache_env(cls, begin):
+        cls.env = cls.env[cls.env['C_TIME'] >= begin].reset_index(drop=True)

+ 13 - 0
requirements.txt

@@ -0,0 +1,13 @@
+APScheduler==3.10.4
+Flask==2.2.3
+gunicorn==20.1.0
+Keras==2.3.1
+matplotlib==3.5.3
+numpy==1.21.6
+pandas==1.3.5
+PyMySQL==1.0.2
+pytz==2022.7.1
+PyYAML==6.0
+PyYAML==6.0.1
+SQLAlchemy==1.4.35
+tensorflow==1.15.0

+ 21 - 0
ssl/server.crt

@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgIUFZ+AWdFtTILuBFLvWBENf+x3FlEwDQYJKoZIhvcNAQEL
+BQAwYjELMAkGA1UEBhMCQ04xETAPBgNVBAgMCExpYW9uaW5nMREwDwYDVQQHDAhT
+aGVueWFuZzELMAkGA1UECgwCSlkxDDAKBgNVBAsMA1ImRDESMBAGA1UEAwwJMTI3
+LjAuMC4xMCAXDTI0MDMyMjA1Mjg1MloYDzIxMjQwMjI3MDUyODUyWjBiMQswCQYD
+VQQGEwJDTjERMA8GA1UECAwITGlhb25pbmcxETAPBgNVBAcMCFNoZW55YW5nMQsw
+CQYDVQQKDAJKWTEMMAoGA1UECwwDUiZEMRIwEAYDVQQDDAkxMjcuMC4wLjEwggEi
+MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCl4rdYKwXgt60o+z31Wvd3U+ED
+QIRqrNit+1O5HJwpFw1+tLgpa5YkNrMAhdGiOfjTRznVvqKLiyCb14PZMvnLIULn
+vL2QIADqwuKzZzYqo4Z9VCXS+sXV1l9CcH4pW9q2WYFeJUelasZjyKrevYXaHBne
+5SbcOzSobMLm9zuZ/XbTRVea3hDm66g/ca4i0s1SCeyl0ZEu1n/PLECYt6YdFvJh
+nLaBisFRP5K9cUF9Ok/vUanNWqgxvWLvcSIkJokv+/XKpwZ968wB0z+uwqo0Det7
+Vz+wa0kcuXqEFstjqfG+RnnEah4DVVU6Xd6HI8jtb+8K1sMvtymV1suGcht/AgMB
+AAGjITAfMB0GA1UdDgQWBBQB+8v4CnwPLFHF/GugB/x88zYCsDANBgkqhkiG9w0B
+AQsFAAOCAQEASSIrGNMu+8HiiArRTjlXRCtCtpcKtQERZyQpTE6Q94H8mJwMiXvK
+tpNVQm8AgfypwDJyJNofjsG6symkeNhd/POXNFboc4Vwe+hPQwy9i9hzm/3qJkh+
+CS4eyGOXNfoz/9LWDtSGZn2m1MqqkVuz/x6Xjz8BPHRh4CiIWkF1uxGXbGS80G28
+MNvBZMZt5CRBFeaDjYDcCZiypezwjRZpeYpEwgaigI05GsmBKM1otLlEz0gnr2Ra
+t7KSN4AQxvFDM9dfqHEn6b79gJODx0ETm0K6l8pulbQZxeLbo/dMQ4x2BIWUlztN
+3E8POiiS7D604EGP967y8fZdCevAN54eRw==
+-----END CERTIFICATE-----

+ 28 - 0
ssl/server.key

@@ -0,0 +1,28 @@
+-----BEGIN PRIVATE KEY-----
+MIIEvwIBADANBgkqhkiG9w0BAQEFAASCBKkwggSlAgEAAoIBAQCl4rdYKwXgt60o
++z31Wvd3U+EDQIRqrNit+1O5HJwpFw1+tLgpa5YkNrMAhdGiOfjTRznVvqKLiyCb
+14PZMvnLIULnvL2QIADqwuKzZzYqo4Z9VCXS+sXV1l9CcH4pW9q2WYFeJUelasZj
+yKrevYXaHBne5SbcOzSobMLm9zuZ/XbTRVea3hDm66g/ca4i0s1SCeyl0ZEu1n/P
+LECYt6YdFvJhnLaBisFRP5K9cUF9Ok/vUanNWqgxvWLvcSIkJokv+/XKpwZ968wB
+0z+uwqo0Det7Vz+wa0kcuXqEFstjqfG+RnnEah4DVVU6Xd6HI8jtb+8K1sMvtymV
+1suGcht/AgMBAAECggEAO7Bb03RLnaPJwWTB0w6j28KJRQ/Kdc3y+4tjAUkboadj
+WjOxzmmhcS9jcm00VMCyWWeYFmKNf1OU7K58CUPP4Pj7Z4Ig3YK+gbqVMwOQ2H/V
+rTQBlixY0kxcI35xzvT3ukRzico1QP6uGwqB/TrJtoUFBBCFTppZJL9Z1ByizBH0
+/MKQfHoIEhEa7KScdc9D8oiv33z2zuweoYh6IJsQsQ7uD9Jk5dAIWsaOZM7pLhAr
+SMgn/9ZRPmT0GsWC41Rm/czUmcEPuRM+wmBbC99+I791kZjCNW/jUMhiVY/kc30t
+9pj6k8ut1XZZXwKHFDDVCJYzkBRhvIHUDjScKIoDKQKBgQDTlEOGWUf99w9TLOZz
+wy7VK/fuM+MDnwxxllCT65XZYumRK3oBWlsGtz9zQa0ghyyEOt7/uldZKxPkCBCT
+D9VuJ8QviHATulb0wgnh23RfeglsqgITKcOZ3xEsrq0J7ntteJMYQ7j79OSnnAIg
+8NSsJsXkwXfh2e0W1eyLRGcwZwKBgQDItpA1ABjvzPDHlXuT+WQgxh+wksMWgAWS
+pDu588ZwJtKNTkCpi7lsl7jBBvPeNUUTcnMfZ9fsBG3WNeLjdCOotg0NRwhuQ/K5
+mwplA9epxgBgpfs1C7n2lv/4WrYI9jxsd7Fsxg2oM7p5M7tt9jCdlT1oh5Y3D3rf
+cw2RI4TtKQKBgQCT/VB92FUSvoxpkS5gKlXHpUl1w7U2Azmg+ZLC5DWcKCNW9yIP
+cDPbLrGi3H1ME0B//O2d0ajnJcpz5XAybRDMS+RdmwFJyzynVlGzONzdK28MIp3w
+6XV3Wx+YMVW3bKfIml3IyxXIpjKJSgXzJICdKUGiD/DaeKPtJRSfFOlYYQKBgQCO
+V9rhm5YTytYf1UV05fM6wweKZCry9Av/uXWaJOZgd0fvasW0+CCigN9IFbQxrLm2
+Vv6KEU0yVtnGhyTHhoAbkDClb/DsskeblfbDOPxjrMZrxIrpTc2usdwKt9pNdMPZ
+cPFlYPgih70BY8zS6TwFJegND8tadSIBAklm2zfUMQKBgQDK9eyx6ht9PlA7F+YR
+Tmq2sUNRmi97qPo5sJ5zi+rLTUYPAOU4CKZxdjuF0IHBPZGvYHFfiPksywaC2Vd+
+JY9E9/T0irOso67LFoGDc/F/nnPyF2ENtK78XsM1lbR+LfskaPhhLBdJpJRU6k5y
+g/fdj4MZhjlWIjxph2ZDha+VdQ==
+-----END PRIVATE KEY-----

+ 21 - 0
ssl/server.pem

@@ -0,0 +1,21 @@
+-----BEGIN CERTIFICATE-----
+MIIDdTCCAl2gAwIBAgIUFZ+AWdFtTILuBFLvWBENf+x3FlEwDQYJKoZIhvcNAQEL
+BQAwYjELMAkGA1UEBhMCQ04xETAPBgNVBAgMCExpYW9uaW5nMREwDwYDVQQHDAhT
+aGVueWFuZzELMAkGA1UECgwCSlkxDDAKBgNVBAsMA1ImRDESMBAGA1UEAwwJMTI3
+LjAuMC4xMCAXDTI0MDMyMjA1Mjg1MloYDzIxMjQwMjI3MDUyODUyWjBiMQswCQYD
+VQQGEwJDTjERMA8GA1UECAwITGlhb25pbmcxETAPBgNVBAcMCFNoZW55YW5nMQsw
+CQYDVQQKDAJKWTEMMAoGA1UECwwDUiZEMRIwEAYDVQQDDAkxMjcuMC4wLjEwggEi
+MA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCl4rdYKwXgt60o+z31Wvd3U+ED
+QIRqrNit+1O5HJwpFw1+tLgpa5YkNrMAhdGiOfjTRznVvqKLiyCb14PZMvnLIULn
+vL2QIADqwuKzZzYqo4Z9VCXS+sXV1l9CcH4pW9q2WYFeJUelasZjyKrevYXaHBne
+5SbcOzSobMLm9zuZ/XbTRVea3hDm66g/ca4i0s1SCeyl0ZEu1n/PLECYt6YdFvJh
+nLaBisFRP5K9cUF9Ok/vUanNWqgxvWLvcSIkJokv+/XKpwZ968wB0z+uwqo0Det7
+Vz+wa0kcuXqEFstjqfG+RnnEah4DVVU6Xd6HI8jtb+8K1sMvtymV1suGcht/AgMB
+AAGjITAfMB0GA1UdDgQWBBQB+8v4CnwPLFHF/GugB/x88zYCsDANBgkqhkiG9w0B
+AQsFAAOCAQEASSIrGNMu+8HiiArRTjlXRCtCtpcKtQERZyQpTE6Q94H8mJwMiXvK
+tpNVQm8AgfypwDJyJNofjsG6symkeNhd/POXNFboc4Vwe+hPQwy9i9hzm/3qJkh+
+CS4eyGOXNfoz/9LWDtSGZn2m1MqqkVuz/x6Xjz8BPHRh4CiIWkF1uxGXbGS80G28
+MNvBZMZt5CRBFeaDjYDcCZiypezwjRZpeYpEwgaigI05GsmBKM1otLlEz0gnr2Ra
+t7KSN4AQxvFDM9dfqHEn6b79gJODx0ETm0K6l8pulbQZxeLbo/dMQ4x2BIWUlztN
+3E8POiiS7D604EGP967y8fZdCevAN54eRw==
+-----END CERTIFICATE-----

+ 48 - 0
startup.py

@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/3/2 10:35
+# file: main.py
+# author: David
+# company: shenyang JY
+import datetime
+import time
+from config import myargparse
+from logs import Log
+from data_process import DataProcess
+from data_features import DataFeatures
+from validate import Validation
+from request import requestHandler
+from cache.nn_bp import FMI
+from error import dqFix
+
+def start_up(graph, sess):
+    # 实例化日志类
+    log = Log().logger
+    log.info("日志类初始化")
+    # 实例化配置类
+    args = myargparse(discription="场站端配置", add_help=False)
+    log.info("配置类初始化")
+    # 实例化验证类
+    va = Validation(log=log, args=args)
+    log.info("验证类初始化")
+    # 实例化请求类
+    req = requestHandler(log, args)
+    log.info("请求类初始化")
+    # 实例化数据处理类
+    process = DataProcess(log, args)
+    log.info("数据预处理类初始化")
+    # 实例化特征类
+    features = DataFeatures(log, args)
+    log.info("特征类初始化")
+    # 模型类初始化
+    fmi = FMI(log=log, args=args, graph=graph, sess=sess)
+    log.info("模型类初始化")
+    # 实例化误差修正类
+    fix = dqFix(log=log, args=args)
+    log.info("误差修正类初始化")
+    return log, va, args, req, process, features, fmi, fix
+
+
+
+if __name__ == '__main__':
+    start_up()

+ 102 - 0
validate.py

@@ -0,0 +1,102 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/11/21 13:33
+# file: validate.py
+# author: David
+# company: shenyang JY
+import numpy as np
+import pandas as pd
+from cache.data_cleaning import key_field_row_cleaning
+from data_process import DataProcess
+from cache.limited_power import LimitPower
+
+
+class ValidationError(Exception):
+    def __init__(self, message):
+        self.msg = message
+
+
+class Validation(object):
+    def __init__(self, log, args):
+        self.logger = log
+        self.args = args
+        self.opt = args.parse_args_and_yaml()
+        self.status = 1
+        self.process = DataProcess(log, args)
+        self.lp = LimitPower(log, args, None)
+
+    def validate_nwp(self, nwp):
+        # 0. 关键字段是否缺失
+        nwp_cs = [x for x in nwp.columns.to_list() if x in self.opt.nwp_columns]
+        if 'C_TIME' in nwp_cs:
+            nwp_cs.pop(nwp_cs.index('C_TIME'))
+        nwp_filter = key_field_row_cleaning(nwp, nwp_cs, self.logger)
+        if len(nwp_filter) < 8:
+            self.status = 2
+            raise ValidationError("NWP数据缺失")
+        nwp_filter['C_TIME'] = pd.to_datetime(nwp_filter['C_TIME'])
+        return nwp_filter
+
+    def validate_his_data(self, history_rp, env, history_dq):
+        if len(history_rp) <= 1 and len(env) <= 1:
+            self.status = 3
+            raise ValidationError("鉴权失败-历史数据时刻不满足USTA_HISTORY个数")
+        if len(history_dq) < self.opt.Model["his_points"]:
+            self.status = 2
+            raise ValidationError("history_dq缺失")
+        his = pd.merge(history_rp, history_dq, on='C_TIME')
+        his = pd.merge(env, his, on='C_TIME')
+        if self.opt.usable_power['clean_power_which'] in range(1, 3):
+            his_clean = key_field_row_cleaning(his, [self.opt.usable_power['env']], self.logger)
+        else:
+            his_clean = his
+        his_clean = his_clean.replace(-99, np.nan)
+        points = self.opt.Model['his_points']
+        if len(his_clean) < points * 0.5:
+            self.status = 2
+            raise ValidationError("历史数据缺失,实际长度:{}".format(len(his_clean)))
+        elif len(his_clean) < self.opt.Model["his_points"] or his_clean.isnull().any().any():
+            his_clean.drop(columns=['C_FP_VALUE'], inplace=True)
+            his = pd.merge(history_dq, his_clean, on='C_TIME', how='left')
+            his.set_index('C_TIME', inplace=True)
+            his = his.interpolate(method='linear')
+            his = his.fillna(method='ffill')
+            his = his.fillna(method='bfill')
+            his.reset_index(drop=False, inplace=True)
+            his['LIMIT_STATUS'] = np.where((his['LIMIT_STATUS'] != 0) & (his['LIMIT_STATUS'] != 1), 1, his['LIMIT_STATUS'])
+            return his
+        else:
+            return his_clean
+
+    def validate_authentic(self, nwp, his):
+        # 验证当前天气是否经过鉴权,第一个点一定要是当天的
+        predict_dt = nwp.iloc[0, :]["C_TIME"]
+        history_dt = his.iloc[-1, :]["C_TIME"]
+        if predict_dt - history_dt > pd.Timedelta(hours=4):
+            self.status = 3
+            raise ValidationError("鉴权失败-环境数据结束时间早于预测开始时间4小时")
+
+    def validate_power(self, his):
+        history_rp_env = his.copy()
+        new_rp = []
+        for index, row in history_rp_env.iterrows():
+            zfs = row[self.opt.usable_power["env"]]
+            rp = row['C_REAL_VALUE']
+            able_rp = row['C_ABLE_VALUE']
+            status = row['LIMIT_STATUS']
+            # 以下三种情况用可用功率替代:
+            # 1. api_able_power可用功率参数置为True
+            # 2. 取消模型融合,解析接口数据填充
+            # 3. 用样板机法或单独信号法清洗限电
+            if self.opt.usable_power['api_able_power'] is True or self.opt.Model['fusion'] is False or self.opt.usable_power['clean_power_which'] not in range(1, 3):
+                new_rp.append(able_rp)
+            # 否则用测光法和限电信号联合判断,任意一种限电就执行
+            elif not self.lp.filter_unlimited_power(zfs, rp, self.opt.usable_power['k'], self.opt.usable_power['bias']) or status:  # 任意判断限电
+                new_rp.append(able_rp)
+            else:
+                new_rp.append(rp)  # 全部为不限电
+        history_rp_env["NEW_RP"] = new_rp
+        return history_rp_env[['C_TIME', 'NEW_RP']]
+
+
+