6 months ago · 63b4c6c067
--- a/data_processing/data_operation/data_join.py
+++ b/data_processing/data_operation/data_join.py
@@ -0,0 +1,92 @@
 
				+import pandas as pd
			
 
				+from pymongo import MongoClient
			
 
				+from sqlalchemy import create_engine
			
 
				+from flask import Flask,request,jsonify
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from functools import reduce
			
 
				+
			
 
				+app = Flask('data_join——service')
			
 
				+
			
 
				+
			
 
				+@app.route('/hello', methods=['POST'])
			
 
				+def hello():
			
 
				+    return jsonify(message='Hello, World!')
			
 
				+
			
 
				+def get_data_from_mongo(args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'].split(',')
			
 
				+    df_list = []
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
			
 
				+    db = client[mongodb_database]
			
 
				+    for table in mongodb_read_table:
			
 
				+        collection = db[table]  # 集合名称
			
 
				+        data_from_db = collection.find()  # 这会返回一个游标（cursor）
			
 
				+        # 将游标转换为列表，并创建 pandas DataFrame
			
 
				+        df = pd.DataFrame(list(data_from_db))
			
 
				+        df_list.append(df)
			
 
				+    client.close()
			
 
				+    return df_list
			
 
				+
			
 
				+
			
 
				+def insert_data_into_mongo(res_df,args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    if mongodb_write_table in db.list_collection_names():
			
 
				+        db[mongodb_write_table].drop()
			
 
				+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
			
 
				+    collection = db[mongodb_write_table]  # 集合名称
			
 
				+    # 将 DataFrame 转为字典格式
			
 
				+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
			
 
				+    # 插入到 MongoDB
			
 
				+    collection.insert_many(data_dict)
			
 
				+    print("data inserted successfully!")
			
 
				+
			
 
				+
			
 
				+#1.AGC/AVC信号判断限电（有的场站准 有的不准） 1种方法  数据库数据有问题 暂时用不了
			
 
				+def  data_join(df_list, args):
			
 
				+    join_key,join_type = args['join_key'], args['join_type']
			
 
				+    result = reduce(lambda left, right: pd.merge(left, right, how='join_type', on=join_key), df_list)
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+@app.route('/data_join', methods=['POST'])
			
 
				+def data_join():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        df_list = get_data_from_mongo(args)
			
 
				+        res_df = data_join(df_list,args)
			
 
				+        insert_data_into_mongo(res_df,args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time()
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("data_join")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10094)
			
 
				+    print("server start!")
			
 
				+    
			
 
				+   
			
 
				+    
			
--- a/data_processing/data_operation/mysql_to_mongo.py
+++ b/data_processing/data_operation/mysql_to_mongo.py
@@ -0,0 +1,78 @@
 
				+import pandas as pd
			
 
				+from pymongo import MongoClient
			
 
				+from sqlalchemy import create_engine
			
 
				+from flask import Flask,request,jsonify
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from functools import reduce
			
 
				+
			
 
				+app = Flask('mysql_to_mongo——service')
			
 
				+
			
 
				+
			
 
				+@app.route('/hello', methods=['POST'])
			
 
				+def hello():
			
 
				+    return jsonify(message='Hello, World!')
			
 
				+
			
 
				+def get_data_fromMysql(params):
			
 
				+    mysql_conn = params['mysql_conn']
			
 
				+    query_sql = params['query_sql']
			
 
				+    #数据库读取实测气象
			
 
				+    engine = create_engine(f"mysql+pymysql://{mysql_conn}")
			
 
				+    # 定义SQL查询
			
 
				+    env_df = pd.read_sql_query(query_sql, engine)
			
 
				+    return env_df
			
 
				+
			
 
				+
			
 
				+def insert_data_into_mongo(res_df,args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    if mongodb_write_table in db.list_collection_names():
			
 
				+        db[mongodb_write_table].drop()
			
 
				+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
			
 
				+    collection = db[mongodb_write_table]  # 集合名称
			
 
				+    # 将 DataFrame 转为字典格式
			
 
				+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
			
 
				+    # 插入到 MongoDB
			
 
				+    collection.insert_many(data_dict)
			
 
				+    print("data inserted successfully!")
			
 
				+
			
 
				+
			
 
				+@app.route('/mysql_to_mongo', methods=['POST'])
			
 
				+def data_join():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        df_mysql = get_data_fromMysql(args)
			
 
				+        insert_data_into_mongo(df_mysql, args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time()
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("mysql_to_mongo")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10095)
			
 
				+    print("server start!")
			
 
				+    
			
 
				+   
			
 
				+    
			
--- a/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
+++ b/data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py
@@ -0,0 +1,126 @@
 
				+import pandas as pd
			
 
				+from pymongo import MongoClient
			
 
				+from flask import Flask,request,jsonify
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from sklearn.linear_model import LinearRegression
			
 
				+import numpy as np
			
 
				+from bson.decimal128 import Decimal128
			
 
				+from sklearn.preprocessing import StandardScaler
			
 
				+from sklearn.cluster import DBSCAN
			
 
				+
			
 
				+app = Flask('processing_limit_power_by_statistics_light——service')
			
 
				+
			
 
				+
			
 
				+@app.route('/hello', methods=['POST'])
			
 
				+def hello():
			
 
				+    return jsonify(message='Hello, World!')
			
 
				+
			
 
				+
			
 
				+def get_data_from_mongo(args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_read_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[mongodb_read_table]  # 集合名称
			
 
				+    data_from_db = collection.find()  # 这会返回一个游标（cursor）
			
 
				+    # 将游标转换为列表，并创建 pandas DataFrame
			
 
				+    df = pd.DataFrame(list(data_from_db))
			
 
				+    client.close()
			
 
				+    return df
			
 
				+
			
 
				+
			
 
				+def insert_data_into_mongo(res_df,args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    if mongodb_write_table in db.list_collection_names():
			
 
				+        db[mongodb_write_table].drop()
			
 
				+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
			
 
				+    collection = db[mongodb_write_table]  # 集合名称
			
 
				+    # 将 DataFrame 转为字典格式
			
 
				+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
			
 
				+    # 插入到 MongoDB
			
 
				+    collection.insert_many(data_dict)
			
 
				+    print("data inserted successfully!")
			
 
				+
			
 
				+
			
 
				+def wind_statistics_judgement(df_power,args):
			
 
				+    """
			
 
				+    原理：基于实测辐照度与实际功率相关性强正相关，呈严格线性关系为假设前提，
			
 
				+      假设误差大致呈现标准正态分布 mean + N*std   
			
 
				+    """
			
 
				+
			
 
				+    col_ws, col_power, eps, min_samples, ws_in, ws_out, ws_rated, cap = (args['col_ws'], args['col_power'], args['eps'],
			
 
				+                                    args['min_samples'], args['ws_in'], args['ws_out'], args['ws_rated'], args['cap'])
			
 
				+    print("min_sample", min_samples)
			
 
				+    df_tmp = df_power[(~np.isnan(df_power[col_ws])) & (~np.isnan(df_power[col_power]))]
			
 
				+    # 标准化数据
			
 
				+    data = df_tmp[[col_ws, col_power]].values
			
 
				+    scaler = StandardScaler()
			
 
				+    data_scaled = scaler.fit_transform(data)
			
 
				+
			
 
				+    # DBSCAN 参数：eps是邻域半径，min_samples是构成核心点的最小样本数
			
 
				+    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
			
 
				+    labels = dbscan.fit_predict(data_scaled)
			
 
				+
			
 
				+    # 将结果添加到数据框
			
 
				+    df_tmp['is_limit'] = labels
			
 
				+    df_tmp['is_limit'] = df_tmp['is_limit'].apply(lambda x: True if x == -1 else False)
			
 
				+
			
 
				+    df_tmp.loc[(df_tmp[col_ws] <= 0) & (df_tmp[col_power] > 0), 'is_limit'] = True
			
 
				+    # 切入、切出风速
			
 
				+    if ws_in != -99 and ws_out != -99:
			
 
				+        df_tmp.loc[(df_tmp[col_ws] >= ws_in * (1 + 0.5)) & (df_tmp[col_ws] < ws_out) & (
			
 
				+                    df_tmp[col_power] == 0), 'is_limit'] = True
			
 
				+    # 额定风速 是风机在正常工作并以最佳效率运行时所需的最小风速
			
 
				+    if ws_rated != -99:
			
 
				+        df_tmp.loc[(df_tmp[col_ws] >= ws_rated) & (df_tmp[col_power] >= cap(1 - 0.03)), 'is_limit'] = False
			
 
				+
			
 
				+    df_tmp['c'] = df_tmp['is_limit'].apply(lambda x: 'red' if x == True else 'green')
			
 
				+    df_tmp.plot.scatter(x=col_ws, y=col_power, c='c')
			
 
				+    print(f"原始样本数：{df_power.shape[0]},异常点样本数：{df_tmp[df_tmp['is_limit'] == True].shape[0]},剩余样本数占比："
			
 
				+          f"{df_tmp[df_tmp['is_limit'] == False].shape[0] / df_power.shape[0]}")
			
 
				+    return df_tmp[df_tmp['is_limit'] == False].drop(['is_limit','c'],axis=1)
			
 
				+
			
 
				+
			
 
				+@app.route('/processing_limit_power_by_statistics_wind', methods=['POST'])
			
 
				+def processing_limit_power_by_statistics_wind():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        res_df = wind_statistics_judgement(power_df,args)
			
 
				+        insert_data_into_mongo(res_df,args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time() 
			
 
				+   
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+    
			
 
				+if __name__=="__main__":
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("统计法清洗风电场站限电")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10093)
			
 
				+    print("server start!")
			
 
				+    
			
 
				+   
			
 
				+    
			
--- a/models_processing/model_predict/model_prediction_lstm.py
+++ b/models_processing/model_predict/model_prediction_lstm.py
@@ -0,0 +1,150 @@
 
				+import pandas as pd
			
 
				+from pymongo import MongoClient
			
 
				+from flask import Flask,request
			
 
				+import time
			
 
				+import logging
			
 
				+import traceback
			
 
				+from io import BytesIO
			
 
				+import joblib
			
 
				+import numpy as np
			
 
				+import h5py
			
 
				+import tensorflow as tf
			
 
				+from itertools import chain
			
 
				+
			
 
				+
			
 
				+app = Flask('model_prediction_lstm——service')
			
 
				+
			
 
				+
			
 
				+def get_data_from_mongo(args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_read_table,timeBegin,timeEnd = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'],args['timeBegin'],args['timeEnd']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[mongodb_read_table]  # 集合名称
			
 
				+    query = {"dateTime": {"$gte": timeBegin, "$lte": timeEnd}}
			
 
				+    cursor = collection.find(query)
			
 
				+    data = list(cursor)
			
 
				+    df = pd.DataFrame(data)
			
 
				+    # 4. 删除 _id 字段（可选）
			
 
				+    if '_id' in df.columns:
			
 
				+        df = df.drop(columns=['_id'])
			
 
				+    client.close()
			
 
				+    return df
			
 
				+    
			
 
				+
			
 
				+def insert_data_into_mongo(res_df,args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_write_table = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_write_table']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    if mongodb_write_table in db.list_collection_names():
			
 
				+        db[mongodb_write_table].drop()
			
 
				+        print(f"Collection '{mongodb_write_table} already exist, deleted successfully!")
			
 
				+    collection = db[mongodb_write_table]  # 集合名称
			
 
				+    # 将 DataFrame 转为字典格式
			
 
				+    data_dict = res_df.to_dict("records")  # 每一行作为一个字典
			
 
				+    # 插入到 MongoDB
			
 
				+    collection.insert_many(data_dict)
			
 
				+    print("data inserted successfully!")
			
 
				+
			
 
				+
			
 
				+def get_model_from_mongo(args):
			
 
				+    mongodb_connection,mongodb_database,model_table,model_name = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['model_table'],args['model_name']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[model_table]  # 集合名称
			
 
				+
			
 
				+     # 查询 MongoDB 获取模型数据
			
 
				+    model_doc = collection.find_one({"model_name": model_name})
			
 
				+    if model_doc:
			
 
				+        model_data = model_doc['model_data']  # 获取模型的二进制数据
			
 
				+        # 将二进制数据加载到 BytesIO 缓冲区
			
 
				+        model_buffer = BytesIO(model_data)
			
 
				+        # 从缓冲区加载模型
			
 
				+         # 使用 h5py 和 BytesIO 从内存中加载模型
			
 
				+        with h5py.File(model_buffer, 'r') as f:
			
 
				+            model = tf.keras.models.load_model(f)
			
 
				+        print(f"{model_name}模型成功从 MongoDB 加载！")
			
 
				+        client.close()
			
 
				+        return model
			
 
				+    else:
			
 
				+        print(f"未找到model_name为 {model_name} 的模型。")
			
 
				+        client.close()
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+# 创建时间序列数据
			
 
				+def create_sequences(data_features,data_target,time_steps):
			
 
				+    X, y = [], []
			
 
				+    if len(data_features)<time_steps:
			
 
				+        print("数据长度不能比时间步长小！")
			
 
				+        return np.array(X), np.array(y)
			
 
				+    else:
			
 
				+        for i in range(len(data_features) - time_steps+1):
			
 
				+            X.append(data_features[i:(i + time_steps)])
			
 
				+            if len(data_target)>0:
			
 
				+                y.append(data_target[i + time_steps -1])
			
 
				+        return np.array(X), np.array(y)
			
 
				+
			
 
				+def model_prediction(df,args):
			
 
				+    mongodb_connection, mongodb_database, scaler_table, features, time_steps = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
			
 
				+                                        args['mongodb_database'], args['scaler_table'],args['features'],args['time_steps'])
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[scaler_table]  # 集合名称
			
 
				+    # Retrieve the scalers from MongoDB
			
 
				+    scaler_doc = collection.find_one()
			
 
				+    # Deserialize the scalers
			
 
				+    feature_scaler_bytes = BytesIO(scaler_doc["feature_scaler"])
			
 
				+    feature_scaler = joblib.load(feature_scaler_bytes)
			
 
				+    target_scaler_bytes = BytesIO(scaler_doc["target_scaler"])
			
 
				+    target_scaler = joblib.load(target_scaler_bytes)
			
 
				+    scaled_features = feature_scaler.transform(df[features])
			
 
				+    X_predict, _ = create_sequences(scaled_features, [], time_steps)
			
 
				+    # 加载模型时传入自定义损失函数
			
 
				+    # model = load_model(f'{farmId}_model.h5', custom_objects={'rmse': rmse})
			
 
				+    model = get_model_from_mongo(args)
			
 
				+    y_predict = list(chain.from_iterable(target_scaler.inverse_transform([model.predict(X_predict).flatten()])))
			
 
				+    result = df[-len(y_predict):]
			
 
				+    result['predict'] = y_predict
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+@app.route('/model_prediction_lstm', methods=['POST'])
			
 
				+def model_prediction_lstm():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        model = model_prediction(power_df,args)
			
 
				+        insert_data_into_mongo(model,args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time() 
			
 
				+   
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":  
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("model_prediction_lstm log")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10097)
			
 
				+    print("server start!")
			
 
				+    
			
--- a/models_processing/model_train/model_training_lstm.py
+++ b/models_processing/model_train/model_training_lstm.py
@@ -0,0 +1,179 @@
 
				+import pandas as pd
			
 
				+import numpy as np
			
 
				+from pymongo import MongoClient
			
 
				+from sklearn.model_selection import train_test_split
			
 
				+from flask import Flask,request
			
 
				+import time
			
 
				+import traceback
			
 
				+import logging
			
 
				+from sklearn.preprocessing import MinMaxScaler
			
 
				+from io import BytesIO
			
 
				+import joblib
			
 
				+from tensorflow.keras.models import Sequential
			
 
				+from tensorflow.keras.layers import LSTM, Dense
			
 
				+from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
			
 
				+import matplotlib.pyplot as plt
			
 
				+import tensorflow as tf
			
 
				+
			
 
				+
			
 
				+app = Flask('model_training_lightgbm——service')
			
 
				+
			
 
				+def draw_loss(history):
			
 
				+    #绘制训练集和验证集损失
			
 
				+    plt.figure(figsize=(20, 8))
			
 
				+    plt.plot(history.history['loss'], label='Training Loss')
			
 
				+    plt.plot(history.history['val_loss'], label='Validation Loss')
			
 
				+    plt.title('Loss Curve')
			
 
				+    plt.xlabel('Epochs')
			
 
				+    plt.ylabel('Loss')
			
 
				+    plt.legend()
			
 
				+    plt.show()
			
 
				+
			
 
				+def get_data_from_mongo(args):
			
 
				+    mongodb_connection,mongodb_database,mongodb_read_table,timeBegin,timeEnd = "mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",args['mongodb_database'],args['mongodb_read_table'],args['timeBegin'],args['timeEnd']
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    # 选择数据库（如果数据库不存在，MongoDB 会自动创建）
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[mongodb_read_table]  # 集合名称
			
 
				+    query = {"dateTime": {"$gte": timeBegin, "$lte": timeEnd}}
			
 
				+    cursor = collection.find(query)
			
 
				+    data = list(cursor)
			
 
				+    df = pd.DataFrame(data)
			
 
				+    # 4. 删除 _id 字段（可选）
			
 
				+    if '_id' in df.columns:
			
 
				+        df = df.drop(columns=['_id'])
			
 
				+    client.close()
			
 
				+    return df
			
 
				+    
			
 
				+
			
 
				+def insert_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,args):
			
 
				+    mongodb_connection,mongodb_database,scaler_table,model_table,model_name = ("mongodb://root:sdhjfREWFWEF23e@192.168.1.43:30000/",
			
 
				+                                args['mongodb_database'],args['scaler_table'],args['model_table'],args['model_name'])
			
 
				+    client = MongoClient(mongodb_connection)
			
 
				+    db = client[mongodb_database]
			
 
				+    collection = db[scaler_table]  # 集合名称
			
 
				+    # Save the scalers in MongoDB as binary data
			
 
				+    collection.insert_one({
			
 
				+        "feature_scaler": feature_scaler_bytes.read(),
			
 
				+        "target_scaler": target_scaler_bytes.read()
			
 
				+    })
			
 
				+    print("model inserted successfully!")
			
 
				+    model_table = db[model_table]
			
 
				+    # 创建 BytesIO 缓冲区
			
 
				+    model_buffer = BytesIO()
			
 
				+    # 将模型保存为 HDF5 格式到内存 (BytesIO)
			
 
				+    model.save(model_buffer, save_format='h5')
			
 
				+    # 将指针移到缓冲区的起始位置
			
 
				+    model_buffer.seek(0)
			
 
				+    # 获取模型的二进制数据
			
 
				+    model_data = model_buffer.read()
			
 
				+    # 将模型保存到 MongoDB
			
 
				+    model_table.insert_one({
			
 
				+        "model_name": model_name,
			
 
				+        "model_data": model_data
			
 
				+    })
			
 
				+    print("模型成功保存到 MongoDB！")
			
 
				+
			
 
				+
			
 
				+def rmse(y_true, y_pred):
			
 
				+    return tf.math.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
			
 
				+
			
 
				+
			
 
				+# 创建时间序列数据
			
 
				+def create_sequences(data_features,data_target,time_steps):
			
 
				+    X, y = [], []
			
 
				+    if len(data_features)<time_steps:
			
 
				+        print("数据长度不能比时间步长小！")
			
 
				+        return np.array(X), np.array(y)
			
 
				+    else:
			
 
				+        for i in range(len(data_features) - time_steps+1):
			
 
				+            X.append(data_features[i:(i + time_steps)])
			
 
				+            if len(data_target)>0:
			
 
				+                y.append(data_target[i + time_steps -1])
			
 
				+        return np.array(X), np.array(y)
			
 
				+
			
 
				+
			
 
				+def build_model(data, args):
			
 
				+    begin_time, end_time, col_time, time_steps,features,target = args['begin_time'], args['end_time'], args['col_time'], args['time_steps'], args['features'],args['target']
			
 
				+    train_data = data[(data[col_time] >= begin_time)&(data[col_time] < end_time)]
			
 
				+    # X_train, X_test, y_train, y_test = process_data(df_clean, params)
			
 
				+    # 创建特征和目标的标准化器
			
 
				+    feature_scaler = MinMaxScaler(feature_range=(0, 1))
			
 
				+    target_scaler = MinMaxScaler(feature_range=(0, 1))
			
 
				+    # 标准化特征和目标
			
 
				+    scaled_features = feature_scaler.fit_transform(data[features])
			
 
				+    scaled_target = target_scaler.fit_transform(data[[target]])
			
 
				+    # 保存两个scaler
			
 
				+    feature_scaler_bytes = BytesIO()
			
 
				+    joblib.dump(feature_scaler, feature_scaler_bytes)
			
 
				+    feature_scaler_bytes.seek(0)  # Reset pointer to the beginning of the byte stream
			
 
				+    target_scaler_bytes = BytesIO()
			
 
				+    joblib.dump(target_scaler, target_scaler_bytes)
			
 
				+    target_scaler_bytes.seek(0)
			
 
				+    X, y = create_sequences(scaled_features, scaled_target, time_steps)
			
 
				+    # 划分训练集和测试集
			
 
				+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=43)
			
 
				+
			
 
				+    # 构建 LSTM 模型
			
 
				+    model = Sequential()
			
 
				+    model.add(LSTM(units=50, return_sequences=False, input_shape=(time_steps, X_train.shape[2])))
			
 
				+    model.add(Dense(1))  # 输出单一值
			
 
				+    # 编译模型
			
 
				+    model.compile(optimizer='adam', loss='mean_squared_error')
			
 
				+    # 定义 EarlyStopping 和 ReduceLROnPlateau 回调
			
 
				+    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
			
 
				+    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)
			
 
				+    # 训练模型
			
 
				+    history = model.fit(X_train, y_train,
			
 
				+                        epochs=100,
			
 
				+                        batch_size=32,
			
 
				+                        validation_data=(X_test, y_test),
			
 
				+                        verbose=2,
			
 
				+                        callbacks=[early_stopping, reduce_lr])
			
 
				+    draw_loss(history)
			
 
				+    return model,feature_scaler_bytes,target_scaler_bytes
			
 
				+
			
 
				+
			
 
				+def str_to_list(arg):
			
 
				+    if arg == '':
			
 
				+        return []
			
 
				+    else:
			
 
				+        return arg.split(',')
			
 
				+
			
 
				+
			
 
				+@app.route('/model_training_lstm', methods=['POST'])
			
 
				+def model_training_lstm():
			
 
				+    # 获取程序开始时间  
			
 
				+    start_time = time.time()  
			
 
				+    result = {}
			
 
				+    success = 0
			
 
				+    print("Program starts execution!")
			
 
				+    try:
			
 
				+        args = request.values.to_dict()
			
 
				+        print('args',args)
			
 
				+        logger.info(args)
			
 
				+        power_df = get_data_from_mongo(args)
			
 
				+        model,feature_scaler_bytes,target_scaler_bytes = build_model(power_df,args)
			
 
				+        insert_model_into_mongo(model,feature_scaler_bytes,target_scaler_bytes ,args)
			
 
				+        success = 1
			
 
				+    except Exception as e:
			
 
				+        my_exception = traceback.format_exc()
			
 
				+        my_exception.replace("\n","\t")
			
 
				+        result['msg'] = my_exception
			
 
				+    end_time = time.time() 
			
 
				+   
			
 
				+    result['success'] = success
			
 
				+    result['args'] = args
			
 
				+    result['start_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time))
			
 
				+    result['end_time'] = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time))
			
 
				+    print("Program execution ends!")
			
 
				+    return result
			
 
				+
			
 
				+
			
 
				+if __name__=="__main__":  
			
 
				+    print("Program starts execution!")
			
 
				+    logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
			
 
				+    logger = logging.getLogger("model_training_lightgbm log")
			
 
				+    from waitress import serve
			
 
				+    serve(app, host="0.0.0.0", port=10096)
			
 
				+    print("server start!")
			
--- a/run_all.py
+++ b/run_all.py
@@ -2,14 +2,19 @@ import subprocess
 
				 
			
 
				 # 定义要启动的应用及其路径和端口
			
 
				 services = [
			
 
				+    ("data_processing/data_operation/data_join.py", 10094),
			
 
				+    ("data_processing/data_operation/mysql_to_mongo.py", 10095),
			
 
				     ("data_processing/processing_limit_power/processing_limit_power_by_agcavc.py", 10086),
			
 
				     ("data_processing/processing_limit_power/processing_limit_power_by_machines.py", 10087),
			
 
				     ("data_processing/processing_limit_power/processing_limit_power_by_records.py", 10088),
			
 
				     ("data_processing/processing_limit_power/processing_limit_power_by_statistics_light.py", 10085),
			
 
				+    ("data_processing/processing_limit_power/processing_limit_power_by_statistics_wind.py", 10093),
			
 
				     ("evaluation_processing/analysis.py", 10092),
			
 
				     ("evaluation_processing/evaluation_accuracy.py", 10091),
			
 
				     ("models_processing/model_train/model_training_lightgbm.py", 10089),
			
 
				     ("models_processing/model_predict/model_prediction_lightgbm.py", 10090),
			
 
				+    ("models_processing/model_train/model_training_lstm.py", 10096),
			
 
				+    ("models_processing/model_predict/model_prediction_lstm.py", 10097),
			
 
				 ]
			
 
				 
			
 
				 # 启动所有服务