6 months ago · 63b4c6c067
--- a/data_processing/data_operation/data_join.py
+++ b/data_processing/data_operation/data_join.py
@@ -0,0 +1,92 @@
 
															+import pandas as pd
														
 
															+from pymongo import MongoClient
														
 
															+from sqlalchemy import create_engine
														
 
															+from flask import Flask,request,jsonify
														
 
															+import time
														
 
															+import logging
														
 
															+import traceback
														
 
															+from functools import reduce
														
 
															+
														
 
															+app = Flask('data_join——service')
														
 
															+
														
 
															+
														
 
															+@app.route('/hello', methods=['POST'])
														
 
															+def hello():
														
 
															+    return jsonify(message='Hello, World!')
														
 
															+
														
 
															+def get_data_from_mongo(args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'].split(',')
														
 
															+    df_list = []
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
														
 
															+    db = client[mongodb_database]
														
 
															+    for table in mongodb_read_table:
														
 
															+        collection = db[table]  # 集合名称
														
 
															+        data_from_db = collection.find()  # 这会返回一个游标（cursor）
														
 
															+        # 将游标转换为列表，并创建 pandas DataFrame
														
 
															+        df = pd.DataFrame(list(data_from_db))
														
 
															+        df_list.append(df)
														
 
															+    client.close()
														
 
															+    return df_list
														
 
															+
														
 
															+
														
 
															+def insert_data_into_mongo(res_df,args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    db = client[mongodb_database]
														
 
															+    if mongodb_write_table in db.list_collection_names():
														
 
															+        db[mongodb_write_table].drop()
														
 
															+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
														
 
															+    collection = db[mongodb_write_table]  # 集合名称
														
 
															+    # 将 DataFrame 转为字典格式
														
 
															+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
														
 
															+    # 插入到 MongoDB
														
 
															+    collection.insert_many(data_dict)
														
 
															+    print("data inserted successfully!")
														
 
															+
														
 
															+
														
 
															+#1.AGC/AVC信号判断限电（有的场站准 有的不准） 1种方法  数据库数据有问题 暂时用不了
														
 
															+def  data_join(df_list, args):
														
 
															+    join_key,join_type = args['join_key'], args['join_type']
														
 
															+    result = reduce(lambda left, right: pd.merge(left, right, how='join_type', on=join_key), df_list)
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+@app.route('/data_join', methods=['POST'])
														
 
															+def data_join():
														
 
															+    # 获取程序开始时间  
														
 
															+    start_time = time.time()  
														
 
															+    result = {}
														
 
															+    success = 0
														
 
															+    print("Program starts execution!")
														
 
															+    try:
														
 
															+        args = request.values.to_dict()
														
 
															+        print('args',args)
														
 
															+        logger.info(args)
														
 
															+        df_list = get_data_from_mongo(args)
														
 
															+        res_df = data_join(df_list,args)
														
 
															+        insert_data_into_mongo(res_df,args)
														
 
															+        success = 1
														
 
															+    except Exception as e:
														
 
															+        my_exception = traceback.format_exc()
														
 
															+        my_exception.replace("\n","\t")
														
 
															+        result['msg'] = my_exception
														
 
															+    end_time = time.time()
														
 
															+    result['success'] = success
														
 
															+    result['args'] = args
														
 
															+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
														
 
															+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
														
 
															+    print("Program execution ends!")
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+if __name__=="__main__":
														
 
															+    print("Program starts execution!")
														
 
															+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
														
 
															+    logger = logging.getLogger("data_join")
														
 
															+    from waitress import serve
														
 
															+    serve(app, host="0.0.0.0", port=10094)
														
 
															+    print("server start!")
														
 
															+    
														
 
															+   
														
 
															+    
														
--- a/data_processing/data_operation/mysql_to_mongo.py
+++ b/data_processing/data_operation/mysql_to_mongo.py
@@ -0,0 +1,78 @@
 
															+import pandas as pd
														
 
															+from pymongo import MongoClient
														
 
															+from sqlalchemy import create_engine
														
 
															+from flask import Flask,request,jsonify
														
 
															+import time
														
 
															+import logging
														
 
															+import traceback
														
 
															+from functools import reduce
														
 
															+
														
 
															+app = Flask('mysql_to_mongo——service')
														
 
															+
														
 
															+
														
 
															+@app.route('/hello', methods=['POST'])
														
 
															+def hello():
														
 
															+    return jsonify(message='Hello, World!')
														
 
															+
														
 
															+def get_data_fromMysql(params):
														
 
															+    mysql_conn = params['mysql_conn']
														
 
															+    query_sql = params['query_sql']
														
 
															+    #数据库读取实测气象
														
 
															+    engine = create_engine(f"mysql+pymysql://{mysql_conn}")
														
 
															+    # 定义SQL查询
														
 
															+    env_df = pd.read_sql_query(query_sql, engine)
														
 
															+    return env_df
														
 
															+
														
 
															+
														
 
															+def insert_data_into_mongo(res_df,args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    db = client[mongodb_database]
														
 
															+    if mongodb_write_table in db.list_collection_names():
														
 
															+        db[mongodb_write_table].drop()
														
 
															+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
														
 
															+    collection = db[mongodb_write_table]  # 集合名称
														
 
															+    # 将 DataFrame 转为字典格式
														
 
															+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
														
 
															+    # 插入到 MongoDB
														
 
															+    collection.insert_many(data_dict)
														
 
															+    print("data inserted successfully!")
														
 
															+
														
 
															+
														
 
															+@app.route('/mysql_to_mongo', methods=['POST'])
														
 
															+def data_join():
														
 
															+    # 获取程序开始时间  
														
 
															+    start_time = time.time()  
														
 
															+    result = {}
														
 
															+    success = 0
														
 
															+    print("Program starts execution!")
														
 
															+    try:
														
 
															+        args = request.values.to_dict()
														
 
															+        print('args',args)
														
 
															+        logger.info(args)
														
 
															+        df_mysql = get_data_fromMysql(args)
														
 
															+        insert_data_into_mongo(df_mysql, args)
														
 
															+        success = 1
														
 
															+    except Exception as e:
														
 
															+        my_exception = traceback.format_exc()
														
 
															+        my_exception.replace("\n","\t")
														
 
															+        result['msg'] = my_exception
														
 
															+    end_time = time.time()
														
 
															+    result['success'] = success
														
 
															+    result['args'] = args
														
 
															+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
														
 
															+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
														
 
															+    print("Program execution ends!")
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+if __name__=="__main__":
														
 
															+    print("Program starts execution!")
														
 
															+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
														
 
															+    logger = logging.getLogger("mysql_to_mongo")
														
 
															+    from waitress import serve
														
 
															+    serve(app, host="0.0.0.0", port=10095)
														
 
															+    print("server start!")
														
 
															+    
														
 
															+   
														
 
															+    
														
--- a/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
+++ b/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
@@ -0,0 +1,126 @@
 
															+import pandas as pd
														
 
															+from pymongo import MongoClient
														
 
															+from flask import Flask,request,jsonify
														
 
															+import time
														
 
															+import logging
														
 
															+import traceback
														
 
															+from sklearn.linear_model import LinearRegression
														
 
															+import numpy as np
														
 
															+from bson.decimal128 import Decimal128
														
 
															+from sklearn.preprocessing import StandardScaler
														
 
															+from sklearn.cluster import DBSCAN
														
 
															+
														
 
															+app = Flask('processing_limit_power_by_statistics_light——service')
														
 
															+
														
 
															+
														
 
															+@app.route('/hello', methods=['POST'])
														
 
															+def hello():
														
 
															+    return jsonify(message='Hello, World!')
														
 
															+
														
 
															+
														
 
															+def get_data_from_mongo(args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
														
 
															+    db = client[mongodb_database]
														
 
															+    collection = db[mongodb_read_table]  # 集合名称
														
 
															+    data_from_db = collection.find()  # 这会返回一个游标（cursor）
														
 
															+    # 将游标转换为列表，并创建 pandas DataFrame
														
 
															+    df = pd.DataFrame(list(data_from_db))
														
 
															+    client.close()
														
 
															+    return df
														
 
															+
														
 
															+
														
 
															+def insert_data_into_mongo(res_df,args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    db = client[mongodb_database]
														
 
															+    if mongodb_write_table in db.list_collection_names():
														
 
															+        db[mongodb_write_table].drop()
														
 
															+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
														
 
															+    collection = db[mongodb_write_table]  # 集合名称
														
 
															+    # 将 DataFrame 转为字典格式
														
 
															+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
														
 
															+    # 插入到 MongoDB
														
 
															+    collection.insert_many(data_dict)
														
 
															+    print("data inserted successfully!")
														
 
															+
														
 
															+
														
 
															+def wind_statistics_judgement(df_power,args):
														
 
															+    """
														
 
															+    原理：基于实测辐照度与实际功率相关性强正相关，呈严格线性关系为假设前提，
														
 
															+      假设误差大致呈现标准正态分布 mean + N*std   
														
 
															+    """
														
 
															+
														
 
															+    col_ws, col_power, eps, min_samples, ws_in, ws_out, ws_rated, cap = (args['col_ws'], args['col_power'], args['eps'],
														
 
															+                                    args['min_samples'], args['ws_in'], args['ws_out'], args['ws_rated'], args['cap'])
														
 
															+    print("min_sample", min_samples)
														
 
															+    df_tmp = df_power[(~np.isnan(df_power[col_ws])) & (~np.isnan(df_power[col_power]))]
														
 
															+    # 标准化数据
														
 
															+    data = df_tmp[[col_ws, col_power]].values
														
 
															+    scaler = StandardScaler()
														
 
															+    data_scaled = scaler.fit_transform(data)
														
 
															+
														
 
															+    # DBSCAN 参数：eps是邻域半径，min_samples是构成核心点的最小样本数
														
 
															+    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
														
 
															+    labels = dbscan.fit_predict(data_scaled)
														
 
															+
														
 
															+    # 将结果添加到数据框
														
 
															+    df_tmp['is_limit'] = labels
														
 
															+    df_tmp['is_limit'] = df_tmp['is_limit'].apply(lambda x: True if x == -1 else False)
														
 
															+
														
 
															+    df_tmp.loc[(df_tmp[col_ws] <= 0) & (df_tmp[col_power] > 0), 'is_limit'] = True
														
 
															+    # 切入、切出风速
														
 
															+    if ws_in != -99 and ws_out != -99:
														
 
															+        df_tmp.loc[(df_tmp[col_ws] >= ws_in * (1 + 0.5)) & (df_tmp[col_ws] < ws_out) & (
														
 
															+                    df_tmp[col_power] == 0), 'is_limit'] = True
														
 
															+    # 额定风速 是风机在正常工作并以最佳效率运行时所需的最小风速
														
 
															+    if ws_rated != -99:
														
 
															+        df_tmp.loc[(df_tmp[col_ws] >= ws_rated) & (df_tmp[col_power] >= cap(1 - 0.03)), 'is_limit'] = False
														
 
															+
														
 
															+    df_tmp['c'] = df_tmp['is_limit'].apply(lambda x: 'red' if x == True else 'green')
														
 
															+    df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
														
 
															+    print(f"原始样本数：{df_power.shape[0]},异常点样本数：{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比："
														
 
															+          f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
														
 
															+    return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
														
 
															+
														
 
															+
														
 
															+@app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])
														
 
															+def processing_limit_power_by_statistics_wind():
														
 
															+    # 获取程序开始时间  
														
 
															+    start_time = time.time()  
														
 
															+    result = {}
														
 
															+    success = 0
														
 
															+    print("Program starts execution!")
														
 
															+    try:
														
 
															+        args = request.values.to_dict()
														
 
															+        print('args',args)
														
 
															+        logger.info(args)
														
 
															+        power_df = get_data_from_mongo(args)
														
 
															+        res_df = wind_statistics_judgement(power_df,args)
														
 
															+        insert_data_into_mongo(res_df,args)
														
 
															+        success = 1
														
 
															+    except Exception as e:
														
 
															+        my_exception = traceback.format_exc()
														
 
															+        my_exception.replace("\n","\t")
														
 
															+        result['msg'] = my_exception
														
 
															+    end_time = time.time() 
														
 
															+   
														
 
															+    result['success'] = success
														
 
															+    result['args'] = args
														
 
															+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
														
 
															+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
														
 
															+    print("Program execution ends!")
														
 
															+    return result
														
 
															+
														
 
															+    
														
 
															+if __name__=="__main__":
														
 
															+    print("Program starts execution!")
														
 
															+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
														
 
															+    logger = logging.getLogger("统计法清洗风电场站限电")
														
 
															+    from waitress import serve
														
 
															+    serve(app, host="0.0.0.0", port=10093)
														
 
															+    print("server start!")
														
 
															+    
														
 
															+   
														
 
															+    
														
--- a/models_processing/model_predict/model_prediction_lstm.py
+++ b/models_processing/model_predict/model_prediction_lstm.py
@@ -0,0 +1,150 @@
 
															+import pandas as pd
														
 
															+from pymongo import MongoClient
														
 
															+from flask import Flask,request
														
 
															+import time
														
 
															+import logging
														
 
															+import traceback
														
 
															+from io import BytesIO
														
 
															+import joblib
														
 
															+import numpy as np
														
 
															+import h5py
														
 
															+import tensorflow as tf
														
 
															+from itertools import chain
														
 
															+
														
 
															+
														
 
															+app = Flask('model_prediction_lstm——service')
														
 
															+
														
 
															+
														
 
															+def get_data_from_mongo(args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_read_table,timeBegin,timeEnd = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'],args['timeBegin'],args['timeEnd']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
														
 
															+    db = client[mongodb_database]
														
 
															+    collection = db[mongodb_read_table]  # 集合名称
														
 
															+    query = {"dateTime": {"$gte": timeBegin, "$lte": timeEnd}}
														
 
															+    cursor = collection.find(query)
														
 
															+    data = list(cursor)
														
 
															+    df = pd.DataFrame(data)
														
 
															+    # 4. 删除 _id 字段（可选）
														
 
															+    if '_id' in df.columns:
														
 
															+        df = df.drop(columns=['_id'])
														
 
															+    client.close()
														
 
															+    return df
														
 
															+    
														
 
															+
														
 
															+def insert_data_into_mongo(res_df,args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    db = client[mongodb_database]
														
 
															+    if mongodb_write_table in db.list_collection_names():
														
 
															+        db[mongodb_write_table].drop()
														
 
															+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
														
 
															+    collection = db[mongodb_write_table]  # 集合名称
														
 
															+    # 将 DataFrame 转为字典格式
														
 
															+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
														
 
															+    # 插入到 MongoDB
														
 
															+    collection.insert_many(data_dict)
														
 
															+    print("data inserted successfully!")
														
 
															+
														
 
															+
														
 
															+def get_model_from_mongo(args):
														
 
															+    mongodb_connection,mongodb_database,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['model_table'],args['model_name']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
														
 
															+    db = client[mongodb_database]
														
 
															+    collection = db[model_table]  # 集合名称
														
 
															+
														
 
															+     # 查询 MongoDB 获取模型数据
														
 
															+    model_doc = collection.find_one({"model_name": model_name})
														
 
															+    if model_doc:
														
 
															+        model_data = model_doc['model_data']  # 获取模型的二进制数据
														
 
															+        # 将二进制数据加载到 BytesIO 缓冲区
														
 
															+        model_buffer = BytesIO(model_data)
														
 
															+        # 从缓冲区加载模型
														
 
															+         # 使用 h5py 和 BytesIO 从内存中加载模型
														
 
															+        with h5py.File(model_buffer, 'r') as f:
														
 
															+            model = tf.keras.models.load_model(f)
														
 
															+        print(f"{model_name}模型成功从 MongoDB 加载！")
														
 
															+        client.close()
														
 
															+        return model
														
 
															+    else:
														
 
															+        print(f"未找到model_name为 {model_name} 的模型。")
														
 
															+        client.close()
														
 
															+        return None
														
 
															+
														
 
															+
														
 
															+# 创建时间序列数据
														
 
															+def create_sequences(data_features,data_target,time_steps):
														
 
															+    X, y = [], []
														
 
															+    if len(data_features)<time_steps:
														
 
															+        print("数据长度不能比时间步长小！")
														
 
															+        return np.array(X), np.array(y)
														
 
															+    else:
														
 
															+        for i in range(len(data_features) - time_steps+1):
														
 
															+            X.append(data_features[i:(i + time_steps)])
														
 
															+            if len(data_target)>0:
														
 
															+                y.append(data_target[i + time_steps -1])
														
 
															+        return np.array(X), np.array(y)
														
 
															+
														
 
															+def model_prediction(df,args):
														
 
															+    mongodb_connection, mongodb_database, scaler_table, features, time_steps = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
														
 
															+                                        args['mongodb_database'], args['scaler_table'],args['features'],args['time_steps'])
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
														
 
															+    db = client[mongodb_database]
														
 
															+    collection = db[scaler_table]  # 集合名称
														
 
															+    # Retrieve the scalers from MongoDB
														
 
															+    scaler_doc = collection.find_one()
														
 
															+    # Deserialize the scalers
														
 
															+    feature_scaler_bytes = BytesIO(scaler_doc["feature_scaler"])
														
 
															+    feature_scaler = joblib.load(feature_scaler_bytes)
														
 
															+    target_scaler_bytes = BytesIO(scaler_doc["target_scaler"])
														
 
															+    target_scaler = joblib.load(target_scaler_bytes)
														
 
															+    scaled_features = feature_scaler.transform(df[features])
														
 
															+    X_predict, _ = create_sequences(scaled_features, [], time_steps)
														
 
															+    # 加载模型时传入自定义损失函数
														
 
															+    # model = load_model(f'{farmId}_model.h5', custom_objects={'rmse': rmse})
														
 
															+    model = get_model_from_mongo(args)
														
 
															+    y_predict = list(chain.from_iterable(target_scaler.inverse_transform([model.predict(X_predict).flatten()])))
														
 
															+    result = df[-len(y_predict):]
														
 
															+    result['predict'] = y_predict
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+@app.route('/model_prediction_lstm', methods=['POST'])
														
 
															+def model_prediction_lstm():
														
 
															+    # 获取程序开始时间  
														
 
															+    start_time = time.time()  
														
 
															+    result = {}
														
 
															+    success = 0
														
 
															+    print("Program starts execution!")
														
 
															+    try:
														
 
															+        args = request.values.to_dict()
														
 
															+        print('args',args)
														
 
															+        logger.info(args)
														
 
															+        power_df = get_data_from_mongo(args)
														
 
															+        model = model_prediction(power_df,args)
														
 
															+        insert_data_into_mongo(model,args)
														
 
															+        success = 1
														
 
															+    except Exception as e:
														
 
															+        my_exception = traceback.format_exc()
														
 
															+        my_exception.replace("\n","\t")
														
 
															+        result['msg'] = my_exception
														
 
															+    end_time = time.time() 
														
 
															+   
														
 
															+    result['success'] = success
														
 
															+    result['args'] = args
														
 
															+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
														
 
															+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
														
 
															+    print("Program execution ends!")
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+if __name__=="__main__":  
														
 
															+    print("Program starts execution!")
														
 
															+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
														
 
															+    logger = logging.getLogger("model_prediction_lstm log")
														
 
															+    from waitress import serve
														
 
															+    serve(app, host="0.0.0.0", port=10097)
														
 
															+    print("server start!")
														
 
															+    
														
--- a/models_processing/model_train/model_training_lstm.py
+++ b/models_processing/model_train/model_training_lstm.py
@@ -0,0 +1,179 @@
 
															+import pandas as pd
														
 
															+import numpy as np
														
 
															+from pymongo import MongoClient
														
 
															+from sklearn.model_selection import train_test_split
														
 
															+from flask import Flask,request
														
 
															+import time
														
 
															+import traceback
														
 
															+import logging
														
 
															+from sklearn.preprocessing import MinMaxScaler
														
 
															+from io import BytesIO
														
 
															+import joblib
														
 
															+from tensorflow.keras.models import Sequential
														
 
															+from tensorflow.keras.layers import LSTM, Dense
														
 
															+from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
														
 
															+import matplotlib.pyplot as plt
														
 
															+import tensorflow as tf
														
 
															+
														
 
															+
														
 
															+app = Flask('model_training_lightgbm——service')
														
 
															+
														
 
															+def draw_loss(history):
														
 
															+    #绘制训练集和验证集损失
														
 
															+    plt.figure(figsize=(20, 8))
														
 
															+    plt.plot(history.history['loss'], label='Training Loss')
														
 
															+    plt.plot(history.history['val_loss'], label='Validation Loss')
														
 
															+    plt.title('Loss Curve')
														
 
															+    plt.xlabel('Epochs')
														
 
															+    plt.ylabel('Loss')
														
 
															+    plt.legend()
														
 
															+    plt.show()
														
 
															+
														
 
															+def get_data_from_mongo(args):
														
 
															+    mongodb_connection,mongodb_database,mongodb_read_table,timeBegin,timeEnd = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'],args['timeBegin'],args['timeEnd']
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
														
 
															+    db = client[mongodb_database]
														
 
															+    collection = db[mongodb_read_table]  # 集合名称
														
 
															+    query = {"dateTime": {"$gte": timeBegin, "$lte": timeEnd}}
														
 
															+    cursor = collection.find(query)
														
 
															+    data = list(cursor)
														
 
															+    df = pd.DataFrame(data)
														
 
															+    # 4. 删除 _id 字段（可选）
														
 
															+    if '_id' in df.columns:
														
 
															+        df = df.drop(columns=['_id'])
														
 
															+    client.close()
														
 
															+    return df
														
 
															+    
														
 
															+
														
 
															+def insert_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,args):
														
 
															+    mongodb_connection,mongodb_database,scaler_table,model_table,model_name = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
														
 
															+                                args['mongodb_database'],args['scaler_table'],args['model_table'],args['model_name'])
														
 
															+    client = MongoClient(mongodb_connection)
														
 
															+    db = client[mongodb_database]
														
 
															+    collection = db[scaler_table]  # 集合名称
														
 
															+    # Save the scalers in MongoDB as binary data
														
 
															+    collection.insert_one({
														
 
															+        "feature_scaler": feature_scaler_bytes.read(),
														
 
															+        "target_scaler": target_scaler_bytes.read()
														
 
															+    })
														
 
															+    print("model inserted successfully!")
														
 
															+    model_table = db[model_table]
														
 
															+    # 创建 BytesIO 缓冲区
														
 
															+    model_buffer = BytesIO()
														
 
															+    # 将模型保存为 HDF5 格式到内存 (BytesIO)
														
 
															+    model.save(model_buffer, save_format='h5')
														
 
															+    # 将指针移到缓冲区的起始位置
														
 
															+    model_buffer.seek(0)
														
 
															+    # 获取模型的二进制数据
														
 
															+    model_data = model_buffer.read()
														
 
															+    # 将模型保存到 MongoDB
														
 
															+    model_table.insert_one({
														
 
															+        "model_name": model_name,
														
 
															+        "model_data": model_data
														
 
															+    })
														
 
															+    print("模型成功保存到 MongoDB！")
														
 
															+
														
 
															+
														
 
															+def rmse(y_true, y_pred):
														
 
															+    return tf.math.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
														
 
															+
														
 
															+
														
 
															+# 创建时间序列数据
														
 
															+def create_sequences(data_features,data_target,time_steps):
														
 
															+    X, y = [], []
														
 
															+    if len(data_features)<time_steps:
														
 
															+        print("数据长度不能比时间步长小！")
														
 
															+        return np.array(X), np.array(y)
														
 
															+    else:
														
 
															+        for i in range(len(data_features) - time_steps+1):
														
 
															+            X.append(data_features[i:(i + time_steps)])
														
 
															+            if len(data_target)>0:
														
 
															+                y.append(data_target[i + time_steps -1])
														
 
															+        return np.array(X), np.array(y)
														
 
															+
														
 
															+
														
 
															+def build_model(data, args):
														
 
															+    begin_time, end_time, col_time, time_steps,features,target = args['begin_time'], args['end_time'], args['col_time'], args['time_steps'], args['features'],args['target']
														
 
															+    train_data = data[(data[col_time] >= begin_time)&(data[col_time] < end_time)]
														
 
															+    # X_train, X_test, y_train, y_test = process_data(df_clean, params)
														
 
															+    # 创建特征和目标的标准化器
														
 
															+    feature_scaler = MinMaxScaler(feature_range=(0, 1))
														
 
															+    target_scaler = MinMaxScaler(feature_range=(0, 1))
														
 
															+    # 标准化特征和目标
														
 
															+    scaled_features = feature_scaler.fit_transform(data[features])
														
 
															+    scaled_target = target_scaler.fit_transform(data[[target]])
														
 
															+    # 保存两个scaler
														
 
															+    feature_scaler_bytes = BytesIO()
														
 
															+    joblib.dump(feature_scaler, feature_scaler_bytes)
														
 
															+    feature_scaler_bytes.seek(0)  # Reset pointer to the beginning of the byte stream
														
 
															+    target_scaler_bytes = BytesIO()
														
 
															+    joblib.dump(target_scaler, target_scaler_bytes)
														
 
															+    target_scaler_bytes.seek(0)
														
 
															+    X, y = create_sequences(scaled_features, scaled_target, time_steps)
														
 
															+    # 划分训练集和测试集
														
 
															+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)
														
 
															+
														
 
															+    # 构建 LSTM 模型
														
 
															+    model = Sequential()
														
 
															+    model.add(LSTM(units=50, return_sequences=False, input_shape=(time_steps, X_train.shape[2])))
														
 
															+    model.add(Dense(1))  # 输出单一值
														
 
															+    # 编译模型
														
 
															+    model.compile(optimizer='adam', loss='mean_squared_error')
														
 
															+    # 定义 EarlyStopping 和 ReduceLROnPlateau 回调
														
 
															+    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
														
 
															+    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)
														
 
															+    # 训练模型
														
 
															+    history = model.fit(X_train, y_train,
														
 
															+                        epochs=100,
														
 
															+                        batch_size=32,
														
 
															+                        validation_data=(X_test, y_test),
														
 
															+                        verbose=2,
														
 
															+                        callbacks=[early_stopping, reduce_lr])
														
 
															+    draw_loss(history)
														
 
															+    return model,feature_scaler_bytes,target_scaler_bytes
														
 
															+
														
 
															+
														
 
															+def str_to_list(arg):
														
 
															+    if arg == '':
														
 
															+        return []
														
 
															+    else:
														
 
															+        return arg.split(',')
														
 
															+
														
 
															+
														
 
															+@app.route('/model_training_lstm', methods=['POST'])
														
 
															+def model_training_lstm():
														
 
															+    # 获取程序开始时间  
														
 
															+    start_time = time.time()  
														
 
															+    result = {}
														
 
															+    success = 0
														
 
															+    print("Program starts execution!")
														
 
															+    try:
														
 
															+        args = request.values.to_dict()
														
 
															+        print('args',args)
														
 
															+        logger.info(args)
														
 
															+        power_df = get_data_from_mongo(args)
														
 
															+        model,feature_scaler_bytes,target_scaler_bytes = build_model(power_df,args)
														
 
															+        insert_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,args)
														
 
															+        success = 1
														
 
															+    except Exception as e:
														
 
															+        my_exception = traceback.format_exc()
														
 
															+        my_exception.replace("\n","\t")
														
 
															+        result['msg'] = my_exception
														
 
															+    end_time = time.time() 
														
 
															+   
														
 
															+    result['success'] = success
														
 
															+    result['args'] = args
														
 
															+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
														
 
															+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
														
 
															+    print("Program execution ends!")
														
 
															+    return result
														
 
															+
														
 
															+
														
 
															+if __name__=="__main__":  
														
 
															+    print("Program starts execution!")
														
 
															+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
														
 
															+    logger = logging.getLogger("model_training_lightgbm log")
														
 
															+    from waitress import serve
														
 
															+    serve(app, host="0.0.0.0", port=10096)
														
 
															+    print("server start!")
														
--- a/run_all.py
+++ b/run_all.py
@@ -2,14 +2,19 @@ import subprocess
 
															 # 定义要启动的应用及其路径和端口
														
 
															 services = [
														
 
															+    ("data_processing/data_operation/data_join.py", 10094),
														
 
															+    ("data_processing/data_operation/mysql_to_mongo.py", 10095),
														
 
															     ("data_processing/processing_limit_power/processing_limit_power_by_agcavc.py", 10086),
														
 
															     ("data_processing/processing_limit_power/processing_limit_power_by_machines.py", 10087),
														
 
															     ("data_processing/processing_limit_power/processing_limit_power_by_records.py", 10088),
														
 
															     ("data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py", 10085),
														
 
															+    ("data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py", 10093),
														
 
															     ("evaluation_processing/analysis.py", 10092),
														
 
															     ("evaluation_processing/evaluation_accuracy.py", 10091),
														
 
															     ("models_processing/model_train/model_training_lightgbm.py", 10089),
														
 
															     ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
														
 
															+    ("models_processing/model_train/model_training_lstm.py", 10096),
														
 
															+    ("models_processing/model_predict/model_prediction_lstm.py", 10097),
														
 
															 ]
														
 
															 # 启动所有服务