David 2 weken geleden
bovenliggende
commit
b65d94dd98
32 gewijzigde bestanden met toevoegingen van 775 en 804 verwijderingen
  1. 15 0
      common/database.toml
  2. 570 91
      common/database_dml.py
  3. 0 612
      common/database_dml_koi.py
  4. 54 0
      data_processing/data_operation/hive_to_mongo.py
  5. 6 54
      evaluation_processing/evaluation_accuracy.py
  6. 6 21
      models_processing/model_predict/model_prediction_lightgbm.py
  7. 96 0
      models_processing/model_predict/model_prediction_photovoltaic_physical.py
  8. 1 1
      models_processing/model_tf/tf_bilstm.py
  9. 1 1
      models_processing/model_tf/tf_bilstm_2.py
  10. 1 1
      models_processing/model_tf/tf_bp.py
  11. 1 1
      models_processing/model_tf/tf_bp_pre.py
  12. 1 1
      models_processing/model_tf/tf_bp_train.py
  13. 1 1
      models_processing/model_tf/tf_cnn.py
  14. 1 1
      models_processing/model_tf/tf_cnn_pre.py
  15. 1 1
      models_processing/model_tf/tf_cnn_train.py
  16. 1 1
      models_processing/model_tf/tf_lstm.py
  17. 1 1
      models_processing/model_tf/tf_lstm2_pre.py
  18. 1 1
      models_processing/model_tf/tf_lstm2_train.py
  19. 1 1
      models_processing/model_tf/tf_lstm3_pre.py
  20. 1 1
      models_processing/model_tf/tf_lstm3_train.py
  21. 1 1
      models_processing/model_tf/tf_lstm_pre.py
  22. 1 1
      models_processing/model_tf/tf_lstm_train.py
  23. 1 1
      models_processing/model_tf/tf_lstm_zone.py
  24. 1 1
      models_processing/model_tf/tf_lstm_zone_pre.py
  25. 1 1
      models_processing/model_tf/tf_lstm_zone_train.py
  26. 1 1
      models_processing/model_tf/tf_tcn.py
  27. 1 1
      models_processing/model_tf/tf_test.py
  28. 1 1
      models_processing/model_tf/tf_test_pre.py
  29. 1 1
      models_processing/model_tf/tf_test_train.py
  30. 1 1
      models_processing/model_tf/tf_transformer.py
  31. 3 1
      requirements.txt
  32. 2 2
      run_all.py

+ 15 - 0
common/database.toml

@@ -0,0 +1,15 @@
+[mongodb]
+mongodb_connection = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/"
+[hive]
+jdbc_url = "jdbc:hive2://basicserver1:2181,basicserver2:2181,basicserver3:2181,basicserver4:2181,basicserver5:2181/;serviceDiscoveryMode=zooKeeper;zooKeeperNamespace=hiveserver2"
+driver_class = "org.apache.hive.jdbc.HiveDriver"
+user = ""
+password = ""
+jar_file = 'jar/hive-jdbc-standalone.jar'
+
+[xmo]
+features = ['stationCode', 'date_time', 'forecastDatatime', 'rh',  'wd80', 'ws10', 'ws80', 'dniCalcd','rain', 'dewPoint2m', 'snowfall', 'windDirection10m', 'precipitation',
+           'apparentTemperature', 'weatherCode', 'sunshineDuration','shortwaveRadiation', 'directRadiation', 'diffuseRadiation','globalTiltedIrradiance', 'terrestrialRadiation']
+
+numeric_features = ['rh',  'wd80', 'ws10', 'ws80', 'dniCalcd','rain', 'dewPoint2m', 'snowfall', 'windDirection10m', 'precipitation',
+           'apparentTemperature', 'weatherCode', 'sunshineDuration','shortwaveRadiation', 'directRadiation', 'diffuseRadiation','globalTiltedIrradiance', 'terrestrialRadiation']

+ 570 - 91
common/database_dml.py

@@ -6,13 +6,26 @@ from sqlalchemy import create_engine
 import pickle
 from io import BytesIO
 import joblib
-import h5py
+import json
 import tensorflow as tf
 import os
 import tempfile
+import jaydebeapi
+import toml
+from typing import Dict, Any, Optional, Union, Tuple
+from datetime import datetime, timedelta
+
+# 读取 toml 配置文件
+current_dir = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(current_dir, 'database.toml'), 'r', encoding='utf-8') as f:
+    config = toml.load(f)  # 只读的全局配置
+
+jar_file = os.path.join(current_dir, 'jar/hive-jdbc-standalone.jar')
+
 
 def get_data_from_mongo(args):
-    mongodb_connection = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/"
+    # 获取 hive 配置部分
+    mongodb_connection = config['mongodb']['mongodb_connection']
     mongodb_database = args['mongodb_database']
     mongodb_read_table = args['mongodb_read_table']
     query_dict = {}
@@ -42,7 +55,9 @@ def get_data_from_mongo(args):
 
 
 def get_df_list_from_mongo(args):
-    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'].split(',')
+    # 获取 hive 配置部分
+    mongodb_connection = config['mongodb']['mongodb_connection']
+    mongodb_database, mongodb_read_table = args['mongodb_database'], args['mongodb_read_table'].split(',')
     df_list = []
     client = MongoClient(mongodb_connection)
     # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
@@ -58,6 +73,7 @@ def get_df_list_from_mongo(args):
     client.close()
     return df_list
 
+
 def insert_data_into_mongo(res_df, args):
     """
     插入数据到 MongoDB 集合中,可以选择覆盖、追加或按指定的 key 进行更新插入。
@@ -68,7 +84,8 @@ def insert_data_into_mongo(res_df, args):
     - overwrite: 布尔值,True 表示覆盖,False 表示追加
     - update_keys: 列表,指定用于匹配的 key 列,如果存在则更新,否则插入 'col1','col2'
     """
-    mongodb_connection = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/"
+    # 获取 hive 配置部分
+    mongodb_connection = config['mongodb']['mongodb_connection']
     mongodb_database = args['mongodb_database']
     mongodb_write_table = args['mongodb_write_table']
     overwrite = 1
@@ -119,7 +136,7 @@ def insert_data_into_mongo(res_df, args):
 def get_data_fromMysql(params):
     mysql_conn = params['mysql_conn']
     query_sql = params['query_sql']
-    #数据库读取实测气象
+    # 数据库读取实测气象
     engine = create_engine(f"mysql+pymysql://{mysql_conn}")
     # 定义SQL查询
     with engine.connect() as conn:
@@ -128,8 +145,10 @@ def get_data_fromMysql(params):
 
 
 def insert_pickle_model_into_mongo(model, args):
-    mongodb_connection, mongodb_database, mongodb_write_table, model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
-    args['mongodb_database'], args['mongodb_write_table'], args['model_name']
+    # 获取 hive 配置部分
+    mongodb_connection = config['mongodb']['mongodb_connection']
+    mongodb_database, mongodb_write_table, model_name = args['mongodb_database'], args['mongodb_write_table'], args[
+        'model_name']
     client = MongoClient(mongodb_connection)
     db = client[mongodb_database]
     # 序列化模型
@@ -149,9 +168,27 @@ def insert_pickle_model_into_mongo(model, args):
     print("model inserted successfully!")
 
 
-def insert_h5_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,args):
-    mongodb_connection,mongodb_database,scaler_table,model_table,model_name = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
-                                args['mongodb_database'],args['scaler_table'],args['model_table'],args['model_name'])
+def get_pickle_model_from_mongo(args):
+    mongodb_connection = config['mongodb']['mongodb_connection']
+    mongodb_database, mongodb_model_table, model_name = args['mongodb_database'], args['mongodb_model_table'], args['model_name']
+    client = MongoClient(mongodb_connection)
+    db = client[mongodb_database]
+    collection = db[mongodb_model_table]
+    model_data = collection.find_one({"model_name": model_name})
+    if model_data is not None:
+        model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
+        # 反序列化模型
+        model = pickle.loads(model_binary)
+        return model
+    else:
+        return None
+
+
+def insert_h5_model_into_mongo(model, feature_scaler_bytes, target_scaler_bytes, args):
+    # 获取 hive 配置部分
+    mongodb_connection = config['mongodb']['mongodb_connection']
+    mongodb_database, scaler_table, model_table, model_name = args['mongodb_database'], args['scaler_table'], args[
+        'model_table'], args['model_name']
     client = MongoClient(mongodb_connection)
     db = client[mongodb_database]
     if scaler_table in db.list_collection_names():
@@ -209,65 +246,341 @@ def insert_h5_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,a
                 print(f"⚠️ 临时文件清理失败: {temp_path}")
 
 
-# def insert_trained_model_into_mongo(model, args):
-#     mongodb_connection,mongodb_database,model_table,model_name = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
-#                                 args['mongodb_database'],args['model_table'],args['model_name'])
-#
-#     gen_time, params_json, descr = args['gen_time'], args['params'], args['descr']
-#     client = MongoClient(mongodb_connection)
-#     db = client[mongodb_database]
-#     if model_table in db.list_collection_names():
-#         db[model_table].drop()
-#         print(f"Collection '{model_table} already exist, deleted successfully!")
-#     model_table = db[model_table]
-#
-#     # 创建 BytesIO 缓冲区
-#     model_buffer = BytesIO()
-#     # 将模型保存为 HDF5 格式到内存 (BytesIO)
-#     model.save(model_buffer, save_format='h5')
-#     # 将指针移到缓冲区的起始位置
-#     model_buffer.seek(0)
-#     # 获取模型的二进制数据
-#     model_data = model_buffer.read()
-#     # 将模型保存到 MongoDB
-#     model_table.insert_one({
-#         "model_name": model_name,
-#         "model_data": model_data,
-#         "gen_time": gen_time,
-#         "params": params_json,
-#         "descr": descr
-#     })
-#     print("模型成功保存到 MongoDB!")
-
-def insert_scaler_model_into_mongo(feature_scaler_bytes, scaled_target_bytes, args):
-    mongodb_connection,mongodb_database,scaler_table,model_table,model_name = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
-                                args['mongodb_database'],args['scaler_table'],args['model_table'],args['model_name'])
-    client = MongoClient(mongodb_connection)
-    db = client[mongodb_database]
-    if scaler_table in db.list_collection_names():
-        db[scaler_table].drop()
-        print(f"Collection '{scaler_table} already exist, deleted successfully!")
-    collection = db[scaler_table]  # 集合名称
-    # Save the scalers in MongoDB as binary data
-    collection.insert_one({
-        "feature_scaler": feature_scaler_bytes.read(),
-        "target_scaler": scaled_target_bytes.read()
-    })
-    client.close()
-    print("scaler_model inserted successfully!")
+def insert_trained_model_into_mongo(model: tf.keras.Model, args: Dict[str, Any]) -> str:
+    """
+    将训练好的H5模型插入MongoDB,自动维护集合容量不超过50个模型
+    参数:
+    model : keras模型 - 训练好的Keras模型
+    args : dict - 包含以下键的字典:
+        mongodb_database: 数据库名称
+        model_table: 集合名称
+        model_name: 模型名称
+        gen_time: 模型生成时间(datetime对象)
+        params: 模型参数(JSON可序列化对象)
+        descr: 模型描述文本
+    """
+    # ------------------------- 参数校验 -------------------------
+    required_keys = {'mongodb_database', 'model_table', 'model_name',
+                     'gen_time', 'params', 'descr'}
+    if missing := required_keys - args.keys():
+        raise ValueError(f"缺少必要参数: {missing}")
 
+    # ------------------------- 配置解耦 -------------------------
+    # 从环境变量获取连接信息(更安全)
+    mongodb_connection = os.getenv("MONGO_URI", config['mongodb']['mongodb_connection'])
 
-def get_h5_model_from_mongo(args, custom=None):
-    mongodb_connection,mongodb_database,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['model_table'],args['model_name']
-    client = MongoClient(mongodb_connection)
-    # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
-    db = client[mongodb_database]
-    collection = db[model_table]  # 集合名称
+    # ------------------------- 资源初始化 -------------------------
+    fd, temp_path = None, None
+    client = None
+
+    try:
+        # ------------------------- 临时文件处理 -------------------------
+        fd, temp_path = tempfile.mkstemp(suffix='.keras')
+        os.close(fd)  # 立即释放文件锁
+
+        # ------------------------- 模型保存 -------------------------
+        try:
+            model.save(temp_path)  # 不指定save_format,默认使用keras新格式
+        except Exception as e:
+            raise RuntimeError(f"模型保存失败: {str(e)}") from e
+
+        # ------------------------- 数据库连接 -------------------------
+        client = MongoClient(mongodb_connection)
+        db = client[args['mongodb_database']]
+        collection = db[args['model_table']]
+
+        # ------------------------- 索引检查 -------------------------
+        # index_info = collection.index_information()
+        # if "gen_time_1" not in index_info:
+        #     print("开始创建索引...")
+        #     collection.create_index(
+        #         [("gen_time", ASCENDING)],
+        #         name="gen_time_1",
+        #         background=True
+        #     )
+        #     print("索引创建成功")
+        # else:
+        #     print("索引已存在,跳过创建")
+
+        # ------------------------- 容量控制 -------------------------
+        # 使用更高效的计数方式
+        if collection.estimated_document_count() >= 50:
+            # 原子性删除操作
+            if deleted := collection.find_one_and_delete(
+                    sort=[("gen_time", ASCENDING)],
+                    projection={"_id": 1, "model_name": 1, "gen_time": 1}
+            ):
+                print(f"已淘汰模型 [{deleted['model_name']}] 生成时间: {deleted['gen_time']}")
 
-     # 查询 MongoDB 获取模型数据
-    model_doc = collection.find_one({"model_name": model_name})
-    if model_doc:
-        model_data = model_doc['model_data']  # 获取模型的二进制数据
+        # ------------------------- 数据插入 -------------------------
+        with open(temp_path, 'rb') as f:
+            result = collection.insert_one({
+                "model_name": args['model_name'],
+                "model_data": f.read(),
+                "gen_time": args['gen_time'],
+                "params": args['params'],
+                "descr": args['descr']
+            })
+
+        print(f"✅ 模型 {args['model_name']} 保存成功 | 文档ID: {result.inserted_id}")
+        return str(result.inserted_id)
+
+    except Exception as e:
+        # ------------------------- 异常分类处理 -------------------------
+        error_type = "数据库操作" if isinstance(e, (pymongo.errors.PyMongoError, RuntimeError)) else "系统错误"
+        print(f"❌ {error_type} - 详细错误: {str(e)}")
+        raise  # 根据业务需求决定是否重新抛出
+
+    finally:
+        # ------------------------- 资源清理 -------------------------
+        if client:
+            client.close()
+        if temp_path and os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except PermissionError:
+                print(f"⚠️ 临时文件清理失败: {temp_path}")
+
+
+def insert_scaler_model_into_mongo(feature_scaler_bytes: BytesIO, target_scaler_bytes: BytesIO,
+                                   args: Dict[str, Any]) -> str:
+    """
+    将特征缩放器存储到MongoDB,自动维护集合容量不超过50个文档
+
+    参数:
+    feature_scaler_bytes: BytesIO - 特征缩放器字节流
+    scaled_target_bytes: BytesIO - 目标缩放器字节流
+    args : dict - 包含以下键的字典:
+        mongodb_database: 数据库名称
+        scaler_table: 集合名称
+        model_name: 关联模型名称
+        gen_time: 生成时间(datetime对象)
+    """
+    # ------------------------- 参数校验 -------------------------
+    required_keys = {'mongodb_database', 'scaler_table', 'model_name', 'gen_time'}
+    if missing := required_keys - args.keys():
+        raise ValueError(f"缺少必要参数: {missing}")
+
+    # ------------------------- 配置解耦 -------------------------
+    # 从环境变量获取连接信息(安全隔离凭证)
+    mongodb_conn = os.getenv("MONGO_URI", config['mongodb']['mongodb_connection'])
+
+    # ------------------------- 输入验证 -------------------------
+    for buf, name in [(feature_scaler_bytes, "特征缩放器"),
+                      (target_scaler_bytes, "目标缩放器")]:
+        if not isinstance(buf, BytesIO):
+            raise TypeError(f"{name} 必须为BytesIO类型")
+        if buf.getbuffer().nbytes == 0:
+            raise ValueError(f"{name} 字节流为空")
+
+    client = None
+    try:
+        # ------------------------- 数据库连接 -------------------------
+        client = MongoClient(mongodb_conn)
+        db = client[args['mongodb_database']]
+        collection = db[args['scaler_table']]
+
+        # ------------------------- 索引维护 -------------------------
+        # if "gen_time_1" not in collection.index_information():
+        #     collection.create_index([("gen_time", ASCENDING)], name="gen_time_1")
+        #     print("⏱️ 已创建时间排序索引")
+
+        # ------------------------- 容量控制 -------------------------
+        # 使用近似计数提升性能(误差在几十条内可接受)
+        if collection.estimated_document_count() >= 50:
+            # 原子性删除操作(保证事务完整性)
+            if deleted := collection.find_one_and_delete(
+                    sort=[("gen_time", ASCENDING)],
+                    projection={"_id": 1, "model_name": 1, "gen_time": 1}
+            ):
+                print(f"🗑️ 已淘汰最旧缩放器 [{deleted['model_name']}] 生成时间: {deleted['gen_time']}")
+
+        # ------------------------- 数据插入 -------------------------
+        # 确保字节流指针位置正确
+        feature_scaler_bytes.seek(0)
+        target_scaler_bytes.seek(0)
+
+        result = collection.insert_one({
+            "model_name": args['model_name'],
+            "gen_time": args['gen_time'],
+            "feature_scaler": feature_scaler_bytes.read(),
+            "target_scaler": target_scaler_bytes.read()
+        })
+
+        print(f"✅ 缩放器 {args['model_name']} 保存成功 | 文档ID: {result.inserted_id}")
+        return str(result.inserted_id)
+
+    except Exception as e:
+        # ------------------------- 异常分类处理 -------------------------
+        error_type = "数据库操作" if isinstance(e, (pymongo.errors.PyMongoError, ValueError)) else "系统错误"
+        print(f"❌ {error_type}异常 - 详细错误: {str(e)}")
+        raise  # 根据业务需求决定是否重新抛出
+
+    finally:
+        # ------------------------- 资源清理 -------------------------
+        if client:
+            client.close()
+        # 重置字节流指针(确保后续可复用)
+        feature_scaler_bytes.seek(0)
+        target_scaler_bytes.seek(0)
+
+
+def get_h5_model_from_mongo(args: Dict[str, Any], custom_objects: Optional[Dict[str, Any]] = None) -> Optional[
+    tf.keras.Model]:
+    """
+    从MongoDB获取指定模型的最新版本
+
+    参数:
+    args : dict - 包含以下键的字典:
+        mongodb_database: 数据库名称
+        model_table: 集合名称
+        model_name: 要获取的模型名称
+    custom_objects: dict - 自定义Keras对象字典
+
+    返回:
+    tf.keras.Model - 加载成功的Keras模型
+    """
+    # ------------------------- 参数校验 -------------------------
+    required_keys = {'mongodb_database', 'model_table', 'model_name'}
+    if missing := required_keys - args.keys():
+        raise ValueError(f"❌ 缺失必要参数: {missing}")
+
+    # ------------------------- 环境配置 -------------------------
+    mongo_uri = os.getenv("MONGO_URI", config['mongodb']['mongodb_connection'])
+    client = None
+    tmp_file_path = None  # 用于跟踪临时文件路径
+    try:
+        # ------------------------- 数据库连接 -------------------------
+        client = MongoClient(
+            mongo_uri,
+            maxPoolSize=10,  # 连接池优化
+            socketTimeoutMS=5000
+        )
+        db = client[args['mongodb_database']]
+        collection = db[args['model_table']]
+
+        # ------------------------- 索引维护 -------------------------
+        index_name = "model_gen_time_idx"
+        if index_name not in collection.index_information():
+            collection.create_index(
+                [("model_name", 1), ("gen_time", DESCENDING)],
+                name=index_name
+            )
+            print("⏱️ 已创建复合索引")
+
+        # ------------------------- 高效查询 -------------------------
+        model_doc = collection.find_one(
+            {"model_name": args['model_name']},
+            sort=[('gen_time', DESCENDING)],
+            projection={"model_data": 1, "gen_time": 1}  # 获取必要字段
+        )
+
+        if not model_doc:
+            print(f"⚠️ 未找到模型 '{args['model_name']}' 的有效记录")
+            return None
+
+        # ------------------------- 内存优化加载 -------------------------
+        if model_doc:
+            model_data = model_doc['model_data']  # 获取模型的二进制数据
+            # # 将二进制数据加载到 BytesIO 缓冲区
+            # model_buffer = BytesIO(model_data)
+            # # 确保指针在起始位置
+            # model_buffer.seek(0)
+            # # 从缓冲区加载模型
+            # # 使用 h5py 和 BytesIO 从内存中加载模型
+            # with h5py.File(model_buffer, 'r', driver='fileobj') as f:
+            #     model = tf.keras.models.load_model(f, custom_objects=custom_objects)
+            # 创建临时文件
+            with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as tmp_file:
+                tmp_file.write(model_data)
+                tmp_file_path = tmp_file.name  # 获取临时文件路径
+
+            # 从临时文件加载模型
+            model = tf.keras.models.load_model(tmp_file_path, custom_objects=custom_objects)
+
+            print(f"{args['model_name']}模型成功从 MongoDB 加载!")
+            return model
+    except tf.errors.NotFoundError as e:
+        print(f"❌ 模型结构缺失关键组件: {str(e)}")
+        raise RuntimeError("模型架构不完整") from e
+
+    except Exception as e:
+        print(f"❌ 系统异常: {str(e)}")
+        raise
+
+    finally:
+        # ------------------------- 资源清理 -------------------------
+        if client:
+            client.close()
+        # 确保删除临时文件
+        if tmp_file_path and os.path.exists(tmp_file_path):
+            try:
+                os.remove(tmp_file_path)
+                print(f"🧹 已清理临时文件: {tmp_file_path}")
+            except Exception as cleanup_err:
+                print(f"⚠️ 临时文件清理失败: {str(cleanup_err)}")
+
+
+def get_keras_model_from_mongo(
+        args: Dict[str, Any],
+        custom_objects: Optional[Dict[str, Any]] = None
+) -> Optional[tf.keras.Model]:
+    """
+    从MongoDB获取指定模型的最新版本(支持Keras格式)
+
+    参数:
+    args : dict - 包含以下键的字典:
+        mongodb_database: 数据库名称
+        model_table: 集合名称
+        model_name: 要获取的模型名称
+    custom_objects: dict - 自定义Keras对象字典
+
+    返回:
+    tf.keras.Model - 加载成功的Keras模型
+    """
+    # ------------------------- 参数校验 -------------------------
+    required_keys = {'mongodb_database', 'model_table', 'model_name'}
+    if missing := required_keys - args.keys():
+        raise ValueError(f"❌ 缺失必要参数: {missing}")
+
+    # ------------------------- 环境配置 -------------------------
+    mongo_uri = os.getenv("MONGO_URI", config['mongodb']['mongodb_connection'])
+    client = None
+    tmp_file_path = None  # 用于跟踪临时文件路径
+
+    try:
+        # ------------------------- 数据库连接 -------------------------
+        client = MongoClient(
+            mongo_uri,
+            maxPoolSize=10,
+            socketTimeoutMS=5000
+        )
+        db = client[args['mongodb_database']]
+        collection = db[args['model_table']]
+
+        # ------------------------- 索引维护 -------------------------
+        # index_name = "model_gen_time_idx"
+        # if index_name not in collection.index_information():
+        #     collection.create_index(
+        #         [("model_name", 1), ("gen_time", DESCENDING)],
+        #         name=index_name
+        #     )
+        #     print("⏱️ 已创建复合索引")
+
+        # ------------------------- 高效查询 -------------------------
+        model_doc = collection.find_one(
+            {"model_name": args['model_name']},
+            sort=[('gen_time', DESCENDING)],
+            projection={"model_data": 1, "gen_time": 1, 'params': 1}
+        )
+
+        if not model_doc:
+            print(f"⚠️ 未找到模型 '{args['model_name']}' 的有效记录")
+            return None
+
+        # ------------------------- 内存优化加载 -------------------------
+        model_data = model_doc['model_data']
+        model_params = model_doc['params']
         # 创建临时文件(自动删除)
         with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as tmp_file:
             tmp_file.write(model_data)
@@ -276,11 +589,25 @@ def get_h5_model_from_mongo(args, custom=None):
         # 从临时文件加载模型
         model = tf.keras.models.load_model(
             tmp_file_path,
-            custom_objects=custom
+            custom_objects=custom_objects
         )
 
         print(f"{args['model_name']} 模型成功从 MongoDB 加载!")
-        client.close()
+        return model, model_params
+
+    except tf.errors.NotFoundError as e:
+        print(f"❌ 模型结构缺失关键组件: {str(e)}")
+        raise RuntimeError("模型架构不完整") from e
+
+    except Exception as e:
+        print(f"❌ 系统异常: {str(e)}")
+        raise
+
+    finally:
+        # ------------------------- 资源清理 -------------------------
+        if client:
+            client.close()
+
         # 确保删除临时文件
         if tmp_file_path and os.path.exists(tmp_file_path):
             try:
@@ -288,28 +615,180 @@ def get_h5_model_from_mongo(args, custom=None):
                 print(f"🧹 已清理临时文件: {tmp_file_path}")
             except Exception as cleanup_err:
                 print(f"⚠️ 临时文件清理失败: {str(cleanup_err)}")
-        return model
-    else:
-        print(f"未找到model_name为 {model_name} 的模型。")
-        client.close()
-        return None
 
 
-def get_scaler_model_from_mongo(args, only_feature_scaler=False):
-    mongodb_connection, mongodb_database, scaler_table, = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", args['mongodb_database'], args['scaler_table'])
-    client = MongoClient(mongodb_connection)
-    # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
-    db = client[mongodb_database]
-    collection = db[scaler_table]  # 集合名称
-    # Retrieve the scalers from MongoDB
-    scaler_doc = collection.find_one()
-    # Deserialize the scalers
-
-    feature_scaler_bytes = BytesIO(scaler_doc["feature_scaler"])
-    feature_scaler = joblib.load(feature_scaler_bytes)
-    if only_feature_scaler:
-        return feature_scaler
-    target_scaler_bytes = BytesIO(scaler_doc["target_scaler"])
-    target_scaler = joblib.load(target_scaler_bytes)
-    client.close()
-    return feature_scaler,target_scaler
+def get_scaler_model_from_mongo(args: Dict[str, Any], only_feature_scaler: bool = False) -> Union[
+    object, Tuple[object, object]]:
+    """
+    优化版特征缩放器加载函数 - 安全高效获取最新预处理模型
+
+    参数:
+    args : 必须包含键:
+        - mongodb_database: 数据库名称
+        - scaler_table: 集合名称
+        - model_name: 目标模型名称
+    only_feature_scaler : 是否仅返回特征缩放器
+
+    返回:
+    单个缩放器对象或(feature_scaler, target_scaler)元组
+
+    异常:
+    ValueError : 参数缺失或类型错误
+    RuntimeError : 数据操作异常
+    """
+    # ------------------------- 参数校验 -------------------------
+    required_keys = {'mongodb_database', 'scaler_table', 'model_name'}
+    if missing := required_keys - args.keys():
+        raise ValueError(f"❌ 缺失必要参数: {missing}")
+
+    # ------------------------- 环境配置 -------------------------
+    mongo_uri = os.getenv("MONGO_URI", config['mongodb']['mongodb_connection'])
+
+    client = None
+    try:
+        # ------------------------- 数据库连接 -------------------------
+        client = MongoClient(
+            mongo_uri,
+            maxPoolSize=20,  # 连接池上限
+            socketTimeoutMS=3000,  # 3秒超时
+            serverSelectionTimeoutMS=5000  # 5秒服务器选择超时
+        )
+        db = client[args['mongodb_database']]
+        collection = db[args['scaler_table']]
+
+        # ------------------------- 索引维护 -------------------------
+        # index_name = "model_gen_time_idx"
+        # if index_name not in collection.index_information():
+        #     collection.create_index(
+        #         [("model_name", 1), ("gen_time", DESCENDING)],
+        #         name=index_name,
+        #         background=True  # 后台构建避免阻塞
+        #     )
+        #     print("⏱️ 已创建特征缩放器复合索引")
+
+        # ------------------------- 高效查询 -------------------------
+        scaler_doc = collection.find_one(
+            {"model_name": args['model_name']},
+            sort=[('gen_time', DESCENDING)],
+            projection={"feature_scaler": 1, "target_scaler": 1, "gen_time": 1}
+        )
+
+        if not scaler_doc:
+            raise RuntimeError(f"⚠️ 找不到模型 {args['model_name']} 的缩放器记录")
+
+        # ------------------------- 反序列化处理 -------------------------
+        def load_scaler(data: bytes) -> object:
+            """安全加载序列化对象"""
+            with BytesIO(data) as buffer:
+                buffer.seek(0)  # 确保指针复位
+                try:
+                    return joblib.load(buffer)
+                except joblib.UnpicklingError as e:
+                    raise RuntimeError("反序列化失败 (可能版本不兼容)") from e
+
+        # 特征缩放器加载
+        feature_data = scaler_doc["feature_scaler"]
+        if not isinstance(feature_data, bytes):
+            raise RuntimeError("特征缩放器数据格式异常")
+        feature_scaler = load_scaler(feature_data)
+
+        if only_feature_scaler:
+            return feature_scaler
+
+        # 目标缩放器加载
+        target_data = scaler_doc["target_scaler"]
+        if not isinstance(target_data, bytes):
+            raise RuntimeError("目标缩放器数据格式异常")
+        target_scaler = load_scaler(target_data)
+
+        print(f"✅ 成功加载 {args['model_name']} 的缩放器 (版本时间: {scaler_doc.get('gen_time', '未知')})")
+        return feature_scaler, target_scaler
+
+    except PyMongoError as e:
+        raise RuntimeError(f"🔌 数据库操作失败: {str(e)}") from e
+    except RuntimeError as e:
+        raise RuntimeError(f"🔌 mongo操作失败: {str(e)}") from e  # 直接传递已封装的异常
+    except Exception as e:
+        raise RuntimeError(f"❌ 未知系统异常: {str(e)}") from e
+    finally:
+        # ------------------------- 资源清理 -------------------------
+        if client:
+            client.close()
+
+
+def normalize_key(s):
+    return s.lower()
+
+
+def get_xmo_data_from_hive(args):
+    # 获取 hive 配置部分
+    hive_config = config['hive']
+    jdbc_url = hive_config['jdbc_url']
+    driver_class = hive_config['driver_class']
+    user = hive_config['user']
+    password = hive_config['password']
+    features = config['xmo']['features']
+    numeric_features = config['xmo']['numeric_features']
+    if 'moment' not in args or 'farm_id' not in args:
+        msg_error = 'One or more of the following parameters are missing: moment, farm_id!'
+        return msg_error
+    else:
+        moment = args['moment']
+        farm_id = args['farm_id']
+
+        if 'current_date' in args:
+            current_date = datetime.strptime(args['current_date'], "%Y%m%d")
+        else:
+            current_date = datetime.now()
+        if 'days' in args:
+            days = int(args['days']) + 1
+        else:
+            days = 1
+        json_feature = f"nwp_xmo_{moment}"
+        # 建立连接
+        conn = jaydebeapi.connect(driver_class, jdbc_url, [user, password], jar_file)
+        # 查询 Hive 表
+        cursor = conn.cursor()
+        query_sql = ""
+        for i in range(0, days):
+            sysdate_pre = (current_date + timedelta(days=i)).strftime("%Y%m%d")
+            if i == 0:
+                pass
+            else:
+                query_sql += "union \n"
+
+            query_sql += """select rowkey,datatimestamp,{2} from hbase_forecast.forecast_xmo_d{3} 
+                                                     where rowkey>='{0}-{1}0000' and rowkey<='{0}-{1}2345' \n""".format(
+                farm_id, sysdate_pre, json_feature, i)
+        print("query_sql\n", query_sql)
+        cursor.execute(query_sql)
+        # 获取列名
+        columns = [desc[0] for desc in cursor.description]
+        # 获取所有数据
+        rows = cursor.fetchall()
+        # 转成 DataFrame
+        df = pd.DataFrame(rows, columns=columns)
+        cursor.close()
+        conn.close()
+        df[json_feature] = df[json_feature].apply(lambda x: json.loads(x) if isinstance(x, str) else x)
+        df_features = pd.json_normalize(df[json_feature])
+        if 'forecastDatatime' not in df_features.columns:
+            return "The returned data does not contain the forecastDatetime column — the data might be empty or null!"
+        else:
+            df_features['date_time'] = pd.to_datetime(df_features['forecastDatatime'], unit='ms', utc=True).dt.tz_convert(
+                'Asia/Shanghai').dt.strftime('%Y-%m-%d %H:%M:%S')
+            df_features[numeric_features] = df_features[numeric_features].apply(pd.to_numeric, errors='coerce')
+            return df_features[features]
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    args = {
+        'moment': '06',
+        'current_date': '20250609',
+        'farm_id': 'J00883',
+        'days': '13'
+    }
+    df = get_xmo_data_from_hive(args)
+    print(df.head(2),df.shape)
+    print("server start!")

+ 0 - 612
common/database_dml_koi.py

@@ -1,612 +0,0 @@
-import pymongo
-from pymongo import MongoClient, UpdateOne, DESCENDING, ASCENDING
-from pymongo.errors import PyMongoError
-import pandas as pd
-from sqlalchemy import create_engine
-import pickle
-from io import BytesIO
-import joblib
-import h5py, os, io
-import tensorflow as tf
-from typing import Dict, Any, Optional, Union, Tuple
-import tempfile
-
-def get_data_from_mongo(args):
-    mongodb_connection = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/"
-    mongodb_database = args['mongodb_database']
-    mongodb_read_table = args['mongodb_read_table']
-    query_dict = {}
-    if 'timeBegin' in args.keys():
-        timeBegin = args['timeBegin']
-        query_dict.update({"$gte": timeBegin})
-    if 'timeEnd' in args.keys():
-        timeEnd = args['timeEnd']
-        query_dict.update({"$lte": timeEnd})
-
-    client = MongoClient(mongodb_connection)
-    # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
-    db = client[mongodb_database]
-    collection = db[mongodb_read_table]  # 集合名称
-    if len(query_dict) != 0:
-        query = {"dateTime": query_dict}
-        cursor = collection.find(query)
-    else:
-        cursor = collection.find()
-    data = list(cursor)
-    df = pd.DataFrame(data)
-    # 4. 删除 _id 字段(可选)
-    if '_id' in df.columns:
-        df = df.drop(columns=['_id'])
-    client.close()
-    return df
-
-
-def get_df_list_from_mongo(args):
-    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'].split(',')
-    df_list = []
-    client = MongoClient(mongodb_connection)
-    # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
-    db = client[mongodb_database]
-    for table in mongodb_read_table:
-        collection = db[table]  # 集合名称
-        data_from_db = collection.find()  # 这会返回一个游标(cursor)
-        # 将游标转换为列表,并创建 pandas DataFrame
-        df = pd.DataFrame(list(data_from_db))
-        if '_id' in df.columns:
-            df = df.drop(columns=['_id'])
-        df_list.append(df)
-    client.close()
-    return df_list
-
-def insert_data_into_mongo(res_df, args):
-    """
-    插入数据到 MongoDB 集合中,可以选择覆盖、追加或按指定的 key 进行更新插入。
-
-    参数:
-    - res_df: 要插入的 DataFrame 数据
-    - args: 包含 MongoDB 数据库和集合名称的字典
-    - overwrite: 布尔值,True 表示覆盖,False 表示追加
-    - update_keys: 列表,指定用于匹配的 key 列,如果存在则更新,否则插入 'col1','col2'
-    """
-    mongodb_connection = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/"
-    mongodb_database = args['mongodb_database']
-    mongodb_write_table = args['mongodb_write_table']
-    overwrite = 1
-    update_keys = None
-    if 'overwrite' in args.keys():
-        overwrite = int(args['overwrite'])
-    if 'update_keys' in args.keys():
-        update_keys = args['update_keys'].split(',')
-
-    client = MongoClient(mongodb_connection)
-    db = client[mongodb_database]
-    collection = db[mongodb_write_table]
-
-    # 覆盖模式:删除现有集合
-    if overwrite:
-        if mongodb_write_table in db.list_collection_names():
-            collection.drop()
-            print(f"Collection '{mongodb_write_table}' already exists, deleted successfully!")
-
-    # 将 DataFrame 转为字典格式
-    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
-
-    # 如果没有数据,直接返回
-    if not data_dict:
-        print("No data to insert.")
-        return
-
-    # 如果指定了 update_keys,则执行 upsert(更新或插入)
-    if update_keys and not overwrite:
-        operations = []
-        for record in data_dict:
-            # 构建查询条件,用于匹配要更新的文档
-            query = {key: record[key] for key in update_keys}
-            operations.append(UpdateOne(query, {'$set': record}, upsert=True))
-
-        # 批量执行更新/插入操作
-        if operations:
-            result = collection.bulk_write(operations)
-            print(f"Matched: {result.matched_count}, Upserts: {result.upserted_count}")
-    else:
-        # 追加模式:直接插入新数据
-        collection.insert_many(data_dict)
-        print("Data inserted successfully!")
-
-
-def get_data_fromMysql(params):
-    mysql_conn = params['mysql_conn']
-    query_sql = params['query_sql']
-    #数据库读取实测气象
-    engine = create_engine(f"mysql+pymysql://{mysql_conn}")
-    # 定义SQL查询
-    env_df = pd.read_sql_query(query_sql, engine)
-    return env_df
-
-
-def insert_pickle_model_into_mongo(model, args):
-    mongodb_connection, mongodb_database, mongodb_write_table, model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
-    args['mongodb_database'], args['mongodb_write_table'], args['model_name']
-    client = MongoClient(mongodb_connection)
-    db = client[mongodb_database]
-    # 序列化模型
-    model_bytes = pickle.dumps(model)
-    model_data = {
-        'model_name': model_name,
-        'model': model_bytes,  # 将模型字节流存入数据库
-    }
-    print('Training completed!')
-
-    if mongodb_write_table in db.list_collection_names():
-        db[mongodb_write_table].drop()
-        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
-    collection = db[mongodb_write_table]  # 集合名称
-    collection.insert_one(model_data)
-    print("model inserted successfully!")
-
-
-def insert_trained_model_into_mongo(model: tf.keras.Model, args: Dict[str, Any]) -> str:
-    """
-    将训练好的H5模型插入MongoDB,自动维护集合容量不超过50个模型
-    参数:
-    model : keras模型 - 训练好的Keras模型
-    args : dict - 包含以下键的字典:
-        mongodb_database: 数据库名称
-        model_table: 集合名称
-        model_name: 模型名称
-        gen_time: 模型生成时间(datetime对象)
-        params: 模型参数(JSON可序列化对象)
-        descr: 模型描述文本
-    """
-    # ------------------------- 参数校验 -------------------------
-    required_keys = {'mongodb_database', 'model_table', 'model_name',
-                     'gen_time', 'params', 'descr'}
-    if missing := required_keys - args.keys():
-        raise ValueError(f"缺少必要参数: {missing}")
-
-    # ------------------------- 配置解耦 -------------------------
-    # 从环境变量获取连接信息(更安全)
-    mongodb_connection = os.getenv("MONGO_URI", "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/")
-
-    # ------------------------- 资源初始化 -------------------------
-    fd, temp_path = None, None
-    client = None
-
-    try:
-        # ------------------------- 临时文件处理 -------------------------
-        fd, temp_path = tempfile.mkstemp(suffix='.keras')
-        os.close(fd)  # 立即释放文件锁
-
-        # ------------------------- 模型保存 -------------------------
-        try:
-            model.save(temp_path) # 不指定save_format,默认使用keras新格式
-        except Exception as e:
-            raise RuntimeError(f"模型保存失败: {str(e)}") from e
-
-        # ------------------------- 数据库连接 -------------------------
-        client = MongoClient(mongodb_connection)
-        db = client[args['mongodb_database']]
-        collection = db[args['model_table']]
-
-        # ------------------------- 索引检查 -------------------------
-        # index_info = collection.index_information()
-        # if "gen_time_1" not in index_info:
-        #     print("开始创建索引...")
-        #     collection.create_index(
-        #         [("gen_time", ASCENDING)],
-        #         name="gen_time_1",
-        #         background=True
-        #     )
-        #     print("索引创建成功")
-        # else:
-        #     print("索引已存在,跳过创建")
-
-        # ------------------------- 容量控制 -------------------------
-        # 使用更高效的计数方式
-        if collection.estimated_document_count() >= 50:
-            # 原子性删除操作
-            if deleted := collection.find_one_and_delete(
-                    sort=[("gen_time", ASCENDING)],
-                    projection={"_id": 1, "model_name": 1, "gen_time": 1}
-            ):
-                print(f"已淘汰模型 [{deleted['model_name']}] 生成时间: {deleted['gen_time']}")
-
-        # ------------------------- 数据插入 -------------------------
-        with open(temp_path, 'rb') as f:
-            result = collection.insert_one({
-                "model_name": args['model_name'],
-                "model_data": f.read(),
-                "gen_time": args['gen_time'],
-                "params": args['params'],
-                "descr": args['descr']
-            })
-
-        print(f"✅ 模型 {args['model_name']} 保存成功 | 文档ID: {result.inserted_id}")
-        return str(result.inserted_id)
-
-    except Exception as e:
-        # ------------------------- 异常分类处理 -------------------------
-        error_type = "数据库操作" if isinstance(e, (pymongo.errors.PyMongoError, RuntimeError)) else "系统错误"
-        print(f"❌ {error_type} - 详细错误: {str(e)}")
-        raise  # 根据业务需求决定是否重新抛出
-
-    finally:
-        # ------------------------- 资源清理 -------------------------
-        if client:
-            client.close()
-        if temp_path and os.path.exists(temp_path):
-            try:
-                os.remove(temp_path)
-            except PermissionError:
-                print(f"⚠️ 临时文件清理失败: {temp_path}")
-
-
-def insert_scaler_model_into_mongo(feature_scaler_bytes: BytesIO, target_scaler_bytes: BytesIO, args: Dict[str, Any]) -> str:
-    """
-    将特征缩放器存储到MongoDB,自动维护集合容量不超过50个文档
-
-    参数:
-    feature_scaler_bytes: BytesIO - 特征缩放器字节流
-    scaled_target_bytes: BytesIO - 目标缩放器字节流
-    args : dict - 包含以下键的字典:
-        mongodb_database: 数据库名称
-        scaler_table: 集合名称
-        model_name: 关联模型名称
-        gen_time: 生成时间(datetime对象)
-    """
-    # ------------------------- 参数校验 -------------------------
-    required_keys = {'mongodb_database', 'scaler_table', 'model_name', 'gen_time'}
-    if missing := required_keys - args.keys():
-        raise ValueError(f"缺少必要参数: {missing}")
-
-    # ------------------------- 配置解耦 -------------------------
-    # 从环境变量获取连接信息(安全隔离凭证)
-    mongodb_conn = os.getenv("MONGO_URI", "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/")
-
-    # ------------------------- 输入验证 -------------------------
-    for buf, name in [(feature_scaler_bytes, "特征缩放器"),
-                      (target_scaler_bytes, "目标缩放器")]:
-        if not isinstance(buf, BytesIO):
-            raise TypeError(f"{name} 必须为BytesIO类型")
-        if buf.getbuffer().nbytes == 0:
-            raise ValueError(f"{name} 字节流为空")
-
-    client = None
-    try:
-        # ------------------------- 数据库连接 -------------------------
-        client = MongoClient(mongodb_conn)
-        db = client[args['mongodb_database']]
-        collection = db[args['scaler_table']]
-
-        # ------------------------- 索引维护 -------------------------
-        # if "gen_time_1" not in collection.index_information():
-        #     collection.create_index([("gen_time", ASCENDING)], name="gen_time_1")
-        #     print("⏱️ 已创建时间排序索引")
-
-        # ------------------------- 容量控制 -------------------------
-        # 使用近似计数提升性能(误差在几十条内可接受)
-        if collection.estimated_document_count() >= 50:
-            # 原子性删除操作(保证事务完整性)
-            if deleted := collection.find_one_and_delete(
-                    sort=[("gen_time", ASCENDING)],
-                    projection={"_id": 1, "model_name": 1, "gen_time": 1}
-            ):
-                print(f"🗑️ 已淘汰最旧缩放器 [{deleted['model_name']}] 生成时间: {deleted['gen_time']}")
-
-        # ------------------------- 数据插入 -------------------------
-        # 确保字节流指针位置正确
-        feature_scaler_bytes.seek(0)
-        target_scaler_bytes.seek(0)
-
-        result = collection.insert_one({
-            "model_name": args['model_name'],
-            "gen_time": args['gen_time'],
-            "feature_scaler": feature_scaler_bytes.read(),
-            "target_scaler": target_scaler_bytes.read()
-        })
-
-        print(f"✅ 缩放器 {args['model_name']} 保存成功 | 文档ID: {result.inserted_id}")
-        return str(result.inserted_id)
-
-    except Exception as e:
-        # ------------------------- 异常分类处理 -------------------------
-        error_type = "数据库操作" if isinstance(e, (pymongo.errors.PyMongoError, ValueError)) else "系统错误"
-        print(f"❌ {error_type}异常 - 详细错误: {str(e)}")
-        raise  # 根据业务需求决定是否重新抛出
-
-    finally:
-        # ------------------------- 资源清理 -------------------------
-        if client:
-            client.close()
-        # 重置字节流指针(确保后续可复用)
-        feature_scaler_bytes.seek(0)
-        target_scaler_bytes.seek(0)
-
-
-def get_h5_model_from_mongo( args: Dict[str, Any], custom_objects: Optional[Dict[str, Any]] = None) -> Optional[tf.keras.Model]:
-    """
-    从MongoDB获取指定模型的最新版本
-
-    参数:
-    args : dict - 包含以下键的字典:
-        mongodb_database: 数据库名称
-        model_table: 集合名称
-        model_name: 要获取的模型名称
-    custom_objects: dict - 自定义Keras对象字典
-
-    返回:
-    tf.keras.Model - 加载成功的Keras模型
-    """
-    # ------------------------- 参数校验 -------------------------
-    required_keys = {'mongodb_database', 'model_table', 'model_name'}
-    if missing := required_keys - args.keys():
-        raise ValueError(f"❌ 缺失必要参数: {missing}")
-
-    # ------------------------- 环境配置 -------------------------
-    mongo_uri = os.getenv("MONGO_URI", "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/")
-    client = None
-    tmp_file_path = None  # 用于跟踪临时文件路径
-    try:
-        # ------------------------- 数据库连接 -------------------------
-        client = MongoClient(
-            mongo_uri,
-            maxPoolSize=10,  # 连接池优化
-            socketTimeoutMS=5000
-        )
-        db = client[args['mongodb_database']]
-        collection = db[args['model_table']]
-
-        # ------------------------- 索引维护 -------------------------
-        index_name = "model_gen_time_idx"
-        if index_name not in collection.index_information():
-            collection.create_index(
-                [("model_name", 1), ("gen_time", DESCENDING)],
-                name=index_name
-            )
-            print("⏱️ 已创建复合索引")
-
-        # ------------------------- 高效查询 -------------------------
-        model_doc = collection.find_one(
-            {"model_name": args['model_name']},
-            sort=[('gen_time', DESCENDING)],
-            projection={"model_data": 1, "gen_time": 1}  # 获取必要字段
-        )
-
-        if not model_doc:
-            print(f"⚠️ 未找到模型 '{args['model_name']}' 的有效记录")
-            return None
-
-        # ------------------------- 内存优化加载 -------------------------
-        if model_doc:
-            model_data = model_doc['model_data']  # 获取模型的二进制数据
-            # # 将二进制数据加载到 BytesIO 缓冲区
-            # model_buffer = BytesIO(model_data)
-            # # 确保指针在起始位置
-            # model_buffer.seek(0)
-            # # 从缓冲区加载模型
-            # # 使用 h5py 和 BytesIO 从内存中加载模型
-            # with h5py.File(model_buffer, 'r', driver='fileobj') as f:
-            #     model = tf.keras.models.load_model(f, custom_objects=custom_objects)
-            # 创建临时文件
-            with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as tmp_file:
-                tmp_file.write(model_data)
-                tmp_file_path = tmp_file.name  # 获取临时文件路径
-
-            # 从临时文件加载模型
-            model = tf.keras.models.load_model(tmp_file_path, custom_objects=custom_objects)
-
-            print(f"{args['model_name']}模型成功从 MongoDB 加载!")
-            return model
-    except tf.errors.NotFoundError as e:
-        print(f"❌ 模型结构缺失关键组件: {str(e)}")
-        raise RuntimeError("模型架构不完整") from e
-
-    except Exception as e:
-        print(f"❌ 系统异常: {str(e)}")
-        raise
-
-    finally:
-        # ------------------------- 资源清理 -------------------------
-        if client:
-            client.close()
-        # 确保删除临时文件
-        if tmp_file_path and os.path.exists(tmp_file_path):
-            try:
-                os.remove(tmp_file_path)
-                print(f"🧹 已清理临时文件: {tmp_file_path}")
-            except Exception as cleanup_err:
-                print(f"⚠️ 临时文件清理失败: {str(cleanup_err)}")
-
-
-def get_keras_model_from_mongo(
-        args: Dict[str, Any],
-        custom_objects: Optional[Dict[str, Any]] = None
-) -> Optional[tf.keras.Model]:
-    """
-    从MongoDB获取指定模型的最新版本(支持Keras格式)
-
-    参数:
-    args : dict - 包含以下键的字典:
-        mongodb_database: 数据库名称
-        model_table: 集合名称
-        model_name: 要获取的模型名称
-    custom_objects: dict - 自定义Keras对象字典
-
-    返回:
-    tf.keras.Model - 加载成功的Keras模型
-    """
-    # ------------------------- 参数校验 -------------------------
-    required_keys = {'mongodb_database', 'model_table', 'model_name'}
-    if missing := required_keys - args.keys():
-        raise ValueError(f"❌ 缺失必要参数: {missing}")
-
-    # ------------------------- 环境配置 -------------------------
-    mongo_uri = os.getenv("MONGO_URI", "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/")
-    client = None
-    tmp_file_path = None  # 用于跟踪临时文件路径
-
-    try:
-        # ------------------------- 数据库连接 -------------------------
-        client = MongoClient(
-            mongo_uri,
-            maxPoolSize=10,
-            socketTimeoutMS=5000
-        )
-        db = client[args['mongodb_database']]
-        collection = db[args['model_table']]
-
-        # ------------------------- 索引维护 -------------------------
-        # index_name = "model_gen_time_idx"
-        # if index_name not in collection.index_information():
-        #     collection.create_index(
-        #         [("model_name", 1), ("gen_time", DESCENDING)],
-        #         name=index_name
-        #     )
-        #     print("⏱️ 已创建复合索引")
-
-        # ------------------------- 高效查询 -------------------------
-        model_doc = collection.find_one(
-            {"model_name": args['model_name']},
-            sort=[('gen_time', DESCENDING)],
-            projection={"model_data": 1, "gen_time": 1, 'params':1}
-        )
-
-        if not model_doc:
-            print(f"⚠️ 未找到模型 '{args['model_name']}' 的有效记录")
-            return None
-
-        # ------------------------- 内存优化加载 -------------------------
-        model_data = model_doc['model_data']
-        model_params = model_doc['params']
-        # 创建临时文件(自动删除)
-        with tempfile.NamedTemporaryFile(suffix=".keras", delete=False) as tmp_file:
-            tmp_file.write(model_data)
-            tmp_file_path = tmp_file.name  # 记录文件路径
-
-        # 从临时文件加载模型
-        model = tf.keras.models.load_model(
-            tmp_file_path,
-            custom_objects=custom_objects
-        )
-
-        print(f"{args['model_name']} 模型成功从 MongoDB 加载!")
-        return model, model_params
-
-    except tf.errors.NotFoundError as e:
-        print(f"❌ 模型结构缺失关键组件: {str(e)}")
-        raise RuntimeError("模型架构不完整") from e
-
-    except Exception as e:
-        print(f"❌ 系统异常: {str(e)}")
-        raise
-
-    finally:
-        # ------------------------- 资源清理 -------------------------
-        if client:
-            client.close()
-
-        # 确保删除临时文件
-        if tmp_file_path and os.path.exists(tmp_file_path):
-            try:
-                os.remove(tmp_file_path)
-                print(f"🧹 已清理临时文件: {tmp_file_path}")
-            except Exception as cleanup_err:
-                print(f"⚠️ 临时文件清理失败: {str(cleanup_err)}")
-
-def get_scaler_model_from_mongo(args: Dict[str, Any], only_feature_scaler: bool = False) -> Union[object, Tuple[object, object]]:
-    """
-    优化版特征缩放器加载函数 - 安全高效获取最新预处理模型
-
-    参数:
-    args : 必须包含键:
-        - mongodb_database: 数据库名称
-        - scaler_table: 集合名称
-        - model_name: 目标模型名称
-    only_feature_scaler : 是否仅返回特征缩放器
-
-    返回:
-    单个缩放器对象或(feature_scaler, target_scaler)元组
-
-    异常:
-    ValueError : 参数缺失或类型错误
-    RuntimeError : 数据操作异常
-    """
-    # ------------------------- 参数校验 -------------------------
-    required_keys = {'mongodb_database', 'scaler_table', 'model_name'}
-    if missing := required_keys - args.keys():
-        raise ValueError(f"❌ 缺失必要参数: {missing}")
-
-    # ------------------------- 环境配置 -------------------------
-    mongo_uri = os.getenv("MONGO_URI", "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/")
-
-    client = None
-    try:
-        # ------------------------- 数据库连接 -------------------------
-        client = MongoClient(
-            mongo_uri,
-            maxPoolSize=20,  # 连接池上限
-            socketTimeoutMS=3000,  # 3秒超时
-            serverSelectionTimeoutMS=5000  # 5秒服务器选择超时
-        )
-        db = client[args['mongodb_database']]
-        collection = db[args['scaler_table']]
-
-        # ------------------------- 索引维护 -------------------------
-        # index_name = "model_gen_time_idx"
-        # if index_name not in collection.index_information():
-        #     collection.create_index(
-        #         [("model_name", 1), ("gen_time", DESCENDING)],
-        #         name=index_name,
-        #         background=True  # 后台构建避免阻塞
-        #     )
-        #     print("⏱️ 已创建特征缩放器复合索引")
-
-        # ------------------------- 高效查询 -------------------------
-        scaler_doc = collection.find_one(
-            {"model_name": args['model_name']},
-            sort=[('gen_time', DESCENDING)],
-            projection={"feature_scaler": 1, "target_scaler": 1, "gen_time": 1}
-        )
-
-        if not scaler_doc:
-            raise RuntimeError(f"⚠️ 找不到模型 {args['model_name']} 的缩放器记录")
-
-        # ------------------------- 反序列化处理 -------------------------
-        def load_scaler(data: bytes) -> object:
-            """安全加载序列化对象"""
-            with BytesIO(data) as buffer:
-                buffer.seek(0)  # 确保指针复位
-                try:
-                    return joblib.load(buffer)
-                except joblib.UnpicklingError as e:
-                    raise RuntimeError("反序列化失败 (可能版本不兼容)") from e
-
-        # 特征缩放器加载
-        feature_data = scaler_doc["feature_scaler"]
-        if not isinstance(feature_data, bytes):
-            raise RuntimeError("特征缩放器数据格式异常")
-        feature_scaler = load_scaler(feature_data)
-
-        if only_feature_scaler:
-            return feature_scaler
-
-        # 目标缩放器加载
-        target_data = scaler_doc["target_scaler"]
-        if not isinstance(target_data, bytes):
-            raise RuntimeError("目标缩放器数据格式异常")
-        target_scaler = load_scaler(target_data)
-
-        print(f"✅ 成功加载 {args['model_name']} 的缩放器 (版本时间: {scaler_doc.get('gen_time', '未知')})")
-        return feature_scaler, target_scaler
-
-    except PyMongoError as e:
-        raise RuntimeError(f"🔌 数据库操作失败: {str(e)}") from e
-    except RuntimeError as e:
-        raise  RuntimeError(f"🔌 mongo操作失败: {str(e)}") from e# 直接传递已封装的异常
-    except Exception as e:
-        raise RuntimeError(f"❌ 未知系统异常: {str(e)}") from e
-    finally:
-        # ------------------------- 资源清理 -------------------------
-        if client:
-            client.close()
-

+ 54 - 0
data_processing/data_operation/hive_to_mongo.py

@@ -0,0 +1,54 @@
+from flask import Flask,request,jsonify
+import time
+import logging
+import traceback
+from common.database_dml import insert_data_into_mongo,get_xmo_data_from_hive
+app = Flask('hive_to_mongo——service')
+
+
+@app.route('/hello', methods=['POST'])
+def hello():
+    return jsonify(message='Hello, World!')
+
+
+@app.route('/hive_to_mongo', methods=['POST'])
+def data_join():
+    # 获取程序开始时间  
+    start_time = time.time()  
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args', args)
+        logger.info(args)
+        df_hive = get_xmo_data_from_hive(args)
+        if isinstance(df_hive, str):
+            success = 0
+            result['msg'] = df_hive
+        else:
+            insert_data_into_mongo(df_hive, args)
+            success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("hive_to_mongo")
+    from waitress import serve
+    serve(app, host="0.0.0.0", port=10127)
+    print("server start!")
+    
+   
+    

+ 6 - 54
evaluation_processing/evaluation_accuracy.py

@@ -8,6 +8,9 @@ from flask import Flask, request
 import time
 import logging
 import traceback
+
+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
+
 app = Flask('evaluation_accuracy——service')
 url = 'http://49.4.78.194:17160/apiCalculate/calculate'
 '''
@@ -97,66 +100,15 @@ def datetime_to_timestamp(dt):
     return int(round(time.mktime(dt.timetuple()))*1000)
 
 
-
-def get_data_from_mongo(args):
-    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table']
-    client = MongoClient(mongodb_connection)
-    # 选择数据库(如果数据库不存在,MongoDB 会自动创建)
-    db = client[mongodb_database]
-    collection = db[mongodb_read_table]  # 集合名称
-    data_from_db = collection.find()  # 这会返回一个游标(cursor)
-    # 将游标转换为列表,并创建 pandas DataFrame
-    df = pd.DataFrame(list(data_from_db))
-    client.close()
-    return df
-    
-
-def insert_data_into_mongo(res_df,args):
-    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
-    client = MongoClient(mongodb_connection)
-    db = client[mongodb_database]
-    if mongodb_write_table in db.list_collection_names():
-        db[mongodb_write_table].drop()
-        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
-    collection = db[mongodb_write_table]  # 集合名称
-    # 将 DataFrame 转为字典格式
-    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
-    # 插入到 MongoDB
-    collection.insert_many(data_dict)
-    print("data inserted successfully!")
-    
-
-# def compute_accuracy(df,args):
-#     col_time,col_rp,col_pp,formulaType = args['col_time'],args['col_rp'],args['col_pp'],args['formulaType'].split('_')[0]
-#     dates = []
-#     accuracy = []
-#     df = df[(~np.isnan(df[col_rp]))&(~np.isnan(df[col_pp]))]
-#     df = df[[col_time,col_rp,col_pp]].rename(columns={col_time:'C_TIME',col_rp:'realValue',col_pp:'forecastAbleValue'})
-#     df['ableValue'] = df['realValue']
-#     df['C_TIME'] = df['C_TIME'].apply(lambda x: datetime.strptime(x, "%Y-%m-%d %H:%M:%S"))
-#     if formulaType=='DAY':
-#         df['C_DATE'] = df['C_TIME'].apply(lambda x: x.strftime("%Y-%m-%d"))
-#         days_list = df['C_DATE'].unique().tolist()
-#         for day in days_list:
-#             df_tmp = df[df['C_DATE'] == day]
-#             dates.append(day)
-#             accuracy.append(calculate_acc(df_tmp, args))
-#     else:
-#         points = df['C_TIME'].unique().tolist()
-#         for point in points:
-#             df_tmp = df[df['C_TIME'] == point]
-#             dates.append(point)
-#             accuracy.append(calculate_acc(df_tmp, args))
-#     print("accuray compute successfully!")
-#     return pd.DataFrame({'date':dates,'accuracy':accuracy})
-
 # 定义 RMSE 和 MAE 计算函数
 def rmse(y_true, y_pred):
     return np.sqrt(np.mean((y_true - y_pred) ** 2))
 
+
 def mae(y_true, y_pred):
     return np.mean(np.abs(y_true - y_pred))
-    
+
+
 def compute_accuracy(df,args):
     col_time,col_rp,col_pp = args['col_time'],args['col_rp'],args['col_pp']
     df[col_time] = df[col_time].apply(lambda x:pd.to_datetime(x).strftime("%Y-%m-%d")) 

+ 6 - 21
models_processing/model_predict/model_prediction_lightgbm.py

@@ -5,7 +5,7 @@ from flask import Flask,request
 import time
 import logging
 import traceback
-from common.database_dml import get_data_from_mongo,insert_data_into_mongo
+from common.database_dml import get_data_from_mongo, insert_data_into_mongo, get_pickle_model_from_mongo
 from common.alert import send_message
 from datetime import datetime, timedelta
 import pytz
@@ -44,16 +44,8 @@ def forecast_data_distribution(pre_data,args):
         result = get_xxl_dq(farm_id, dt)
     else:
         df = pre_data.sort_values(by=col_time).fillna(method='ffill').fillna(method='bfill')
-        mongodb_connection, mongodb_database, mongodb_model_table, model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/", \
-        args['mongodb_database'], args['mongodb_model_table'], args['model_name']
-        client = MongoClient(mongodb_connection)
-        db = client[mongodb_database]
-        collection = db[mongodb_model_table]
-        model_data = collection.find_one({"model_name": model_name})
-        if model_data is not None:
-            model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
-            # 反序列化模型
-            model = pickle.loads(model_binary)
+        model = get_pickle_model_from_mongo(args)
+        if model is not None:
             diff = set(model.feature_name()) - set(pre_data.columns)
             if len(diff) > 0:
                 send_message('lightgbm预测组件', farm_id, f'NWP特征列缺失,使用DQ代替!features:{diff}')
@@ -73,18 +65,11 @@ def forecast_data_distribution(pre_data,args):
 
 
 def model_prediction(df,args):
-    mongodb_connection,mongodb_database,mongodb_model_table,model_name,howLongAgo,farm_id,target = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_model_table'],args['model_name'],int(args['howLongAgo']),args['farm_id'],args['target']
-    client = MongoClient(mongodb_connection)
-    db = client[mongodb_database]
-    collection = db[mongodb_model_table]
-    model_data = collection.find_one({"model_name": model_name})
+    model_name, howLongAgo, farm_id, target = args['model_name'], int(args['howLongAgo']), args['farm_id'], args['target']
     if 'is_limit' in df.columns:
         df = df[df['is_limit'] == False]
-
-    if model_data is not None:
-        model_binary = model_data['model']  # 确保这个字段是存储模型的二进制数据
-        # 反序列化模型 
-        model = pickle.loads(model_binary)
+    model = get_pickle_model_from_mongo(args)
+    if model is not None:
         df['power_forecast'] = model.predict(df[model.feature_name()])
         df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
         df['model'] = model_name

+ 96 - 0
models_processing/model_predict/model_prediction_photovoltaic_physical.py

@@ -0,0 +1,96 @@
+import pandas as pd
+from pymongo import MongoClient
+import pickle
+from flask import Flask, request
+import time
+import logging
+import traceback
+from common.database_dml import get_data_from_mongo, insert_data_into_mongo
+
+
+app = Flask('model_prediction_photovoltaic_physical——service')
+
+
+def str_to_list(arg):
+    if arg == '':
+        return []
+    else:
+        return arg.split(',')
+
+
+def forecast_data_distribution(pre_data, args):
+    col_time = args['col_time']
+    farm_id = args['farm_id']
+    col_radiance = args['col_radiance']
+    radiance_max = float(args['radiance_max'])
+    cap = float(args['cap'])
+    pre_data['farm_id'] = farm_id
+    pre_data['power_forecast'] = round(pre_data[col_radiance] * cap / radiance_max, 2)
+    if 'sunrise_time' in args:
+        sunrise_time = args['sunrise_time']
+        pre_data.loc[pre_data[col_time].dt.time < sunrise_time, 'power_forecast'] = 0
+    if 'sunset_time' in args:
+        sunset_time = args['sunset_time']
+        pre_data[pre_data[col_time] > sunset_time, 'power_forecast'] = 0
+    return pre_data[['farm_id', 'date_time', 'power_forecast']]
+
+
+def model_prediction(df, args):
+    # 新增日出、日落时间参数
+    howLongAgo, farm_id, target, cap, col_radiance, radiance_max, model_name, col_time = int(args['howLongAgo']), args['farm_id'], \
+    args['target'], args['cap'], args['col_radiance'], args['radiance_max'], args['model_name'], args['col_time']
+    df['power_forecast'] = round(df[col_radiance]*cap/radiance_max, 2)
+    df.loc[df['power_forecast'] < 0, 'power_forecast'] = 0
+    if 'sunrise_time' in args:
+        sunrise_time = args['sunrise_time']
+        df.loc[df[col_time].dt.time < sunrise_time, 'power_forecast'] = 0
+    if 'sunset_time' in args:
+        sunset_time = args['sunset_time']
+        df[df[col_time] > sunset_time, 'power_forecast'] = 0
+    df['model'] = model_name
+    df['howLongAgo'] = howLongAgo
+    df['farm_id'] = farm_id
+    print("model predict result  successfully!")
+    return df[['dateTime', 'howLongAgo', 'model', 'farm_id', 'power_forecast', target]]
+
+
+@app.route('/model_prediction_photovoltaic_physical', methods=['POST'])
+def model_prediction_photovoltaic_physical():
+    # 获取程序开始时间
+    start_time = time.time()
+    result = {}
+    success = 0
+    print("Program starts execution!")
+    try:
+        args = request.values.to_dict()
+        print('args', args)
+        logger.info(args)
+        forecast_file = int(args['forecast_file'])
+        power_df = get_data_from_mongo(args)
+        if forecast_file == 1:
+            predict_data = forecast_data_distribution(power_df, args)
+        else:
+            predict_data = model_prediction(power_df, args)
+        insert_data_into_mongo(predict_data, args)
+        success = 1
+    except Exception as e:
+        my_exception = traceback.format_exc()
+        my_exception.replace("\n", "\t")
+        result['msg'] = my_exception
+    end_time = time.time()
+    result['success'] = success
+    result['args'] = args
+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
+    print("Program execution ends!")
+    return result
+
+
+if __name__ == "__main__":
+    print("Program starts execution!")
+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    logger = logging.getLogger("model_prediction_photovoltaic_physical log")
+    from waitress import serve
+
+    serve(app, host="0.0.0.0", port=10126)
+    print("server start!")

+ 1 - 1
models_processing/model_tf/tf_bilstm.py

@@ -11,7 +11,7 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 1 - 1
models_processing/model_tf/tf_bilstm_2.py

@@ -11,7 +11,7 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 1 - 1
models_processing/model_tf/tf_bp.py

@@ -13,7 +13,7 @@ from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 from models_processing.model_tf.settings import set_deterministic
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from threading import Lock
 import argparse
 model_lock = Lock()

+ 1 - 1
models_processing/model_tf/tf_bp_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock

+ 1 - 1
models_processing/model_tf/tf_bp_train.py

@@ -14,7 +14,7 @@ from data_processing.data_operation.data_handler import DataHandler
 import time, yaml
 from copy import deepcopy
 from models_processing.model_tf.tf_bp import BPHandler
-from common.database_dml_koi import *
+from common.database_dml import *
 import matplotlib.pyplot as plt
 from common.logs import Log
 from common.data_utils import deep_update

+ 1 - 1
models_processing/model_tf/tf_cnn.py

@@ -13,7 +13,7 @@ from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 from models_processing.model_tf.settings import set_deterministic
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from threading import Lock
 import argparse
 model_lock = Lock()

+ 1 - 1
models_processing/model_tf/tf_cnn_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock

+ 1 - 1
models_processing/model_tf/tf_cnn_train.py

@@ -13,7 +13,7 @@ from data_processing.data_operation.data_handler import DataHandler
 import time, yaml
 from copy import deepcopy
 from models_processing.model_tf.tf_cnn import CNNHandler
-from common.database_dml_koi import *
+from common.database_dml import *
 import matplotlib.pyplot as plt
 from common.logs import Log
 from common.data_utils import deep_update

+ 1 - 1
models_processing/model_tf/tf_lstm.py

@@ -11,7 +11,7 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 1 - 1
models_processing/model_tf/tf_lstm2_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock

+ 1 - 1
models_processing/model_tf/tf_lstm2_train.py

@@ -13,7 +13,7 @@ from data_processing.data_operation.data_handler import DataHandler
 import time, yaml, threading
 from copy import deepcopy
 from models_processing.model_tf.tf_lstm import TSHandler
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.logs import Log
 from common.data_utils import deep_update
 

+ 1 - 1
models_processing/model_tf/tf_lstm3_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock

+ 1 - 1
models_processing/model_tf/tf_lstm3_train.py

@@ -12,7 +12,7 @@ import logging, argparse
 from data_processing.data_operation.data_handler import DataHandler
 import time, yaml, threading
 from copy import deepcopy
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.logs import Log
 from common.data_utils import deep_update
 logger = Log('tf_ts3').logger

+ 1 - 1
models_processing/model_tf/tf_lstm_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock

+ 1 - 1
models_processing/model_tf/tf_lstm_train.py

@@ -13,7 +13,7 @@ from data_processing.data_operation.data_handler import DataHandler
 import time, yaml, threading
 from copy import deepcopy
 from models_processing.model_tf.tf_lstm import TSHandler
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.logs import Log
 from common.data_utils import deep_update
 logger = Log('tf_ts').logger

+ 1 - 1
models_processing/model_tf/tf_lstm_zone.py

@@ -11,7 +11,7 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 1 - 1
models_processing/model_tf/tf_lstm_zone_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.custom_data_handler import CustomDataHandler
 from models_processing.model_tf.tf_lstm_zone import TSHandler

+ 1 - 1
models_processing/model_tf/tf_lstm_zone_train.py

@@ -13,7 +13,7 @@ from data_processing.data_operation.custom_data_handler import CustomDataHandler
 import time, yaml, threading
 from copy import deepcopy
 from models_processing.model_tf.tf_lstm_zone import TSHandler
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.logs import Log
 from common.data_utils import deep_update
 logger = Log('tf_ts').logger

+ 1 - 1
models_processing/model_tf/tf_tcn.py

@@ -11,7 +11,7 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 1 - 1
models_processing/model_tf/tf_test.py

@@ -12,7 +12,7 @@ from tensorflow.keras import optimizers, regularizers
 from tensorflow.keras.layers import BatchNormalization, GlobalAveragePooling1D, Dropout, Add, Concatenate, Multiply
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 1 - 1
models_processing/model_tf/tf_test_pre.py

@@ -8,7 +8,7 @@ import json, copy
 import numpy as np
 from flask import Flask, request, g
 import logging, argparse, traceback
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.processing_data_common import missing_features, str_to_list
 from data_processing.data_operation.data_handler import DataHandler
 from threading import Lock

+ 1 - 1
models_processing/model_tf/tf_test_train.py

@@ -13,7 +13,7 @@ from data_processing.data_operation.data_handler import DataHandler
 import time, yaml, threading
 from copy import deepcopy
 from models_processing.model_tf.tf_test import TSHandler
-from common.database_dml_koi import *
+from common.database_dml import *
 from common.logs import Log
 from common.data_utils import deep_update
 logger = Log('tf_test').logger

+ 1 - 1
models_processing/model_tf/tf_transformer.py

@@ -11,7 +11,7 @@ from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoa
 from tensorflow.keras import optimizers, regularizers
 from models_processing.model_tf.losses import region_loss
 import numpy as np
-from common.database_dml_koi import *
+from common.database_dml import *
 from models_processing.model_tf.settings import set_deterministic
 from threading import Lock
 import argparse

+ 3 - 1
requirements.txt

@@ -17,4 +17,6 @@ APScheduler==3.10.4
 paramiko==3.5.0
 PyYAML==6.0.1
 keras==3.8.0
-bayesian-optimization==2.0.4
+toml==0.10.2
+JayDeBeApi==1.2.3
+jpype1==1.5.2

+ 2 - 2
run_all.py

@@ -18,7 +18,6 @@ services = [
     ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
     ("models_processing/model_train/model_training_lstm.py", 10096),
     ("models_processing/model_predict/model_prediction_lstm.py", 10097),
-
     ("models_processing/model_tf/tf_bp_pre.py", 10110),
     ("models_processing/model_tf/tf_bp_train.py", 10111),
     ("models_processing/model_tf/tf_cnn_pre.py", 10112),
@@ -34,7 +33,6 @@ services = [
     ("models_processing/model_tf/tf_lstm_zone_pre.py", 10125),
     ("models_processing/model_tf/tf_lstm_zone_train.py", 10124),
 
-
     ("post_processing/post_processing.py", 10098),
     ("evaluation_processing/analysis.py", 10099),
     ("models_processing/model_predict/res_prediction.py", 10105),
@@ -45,6 +43,8 @@ services = [
     ("data_processing/data_operation/data_tj_nwp_ftp.py", 10106),
     ("post_processing/pre_post_processing.py", 10107),
     ("post_processing/cdq_coe_gen.py", 10123),
+    ("models_processing/model_predict/model_prediction_photovoltaic_physical.py", 10126),
+    ("data_processing/data_operation/hive_to_mongo.py", 10127),
 ]
 
 # 获取当前脚本所在的根目录