|
@@ -1,6 +1,4 @@
|
|
-import pandas as pd
|
|
|
|
import numpy as np
|
|
import numpy as np
|
|
-from pymongo import MongoClient
|
|
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.model_selection import train_test_split
|
|
from flask import Flask,request
|
|
from flask import Flask,request
|
|
import time
|
|
import time
|
|
@@ -12,23 +10,12 @@ import joblib
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.models import Sequential
|
|
from tensorflow.keras.layers import LSTM, Dense
|
|
from tensorflow.keras.layers import LSTM, Dense
|
|
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
|
|
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
|
|
-# import matplotlib.pyplot as plt
|
|
|
|
import tensorflow as tf
|
|
import tensorflow as tf
|
|
from common.database_dml import get_data_from_mongo,insert_h5_model_into_mongo
|
|
from common.database_dml import get_data_from_mongo,insert_h5_model_into_mongo
|
|
|
|
+from common.processing_data_common import missing_features,str_to_list
|
|
|
|
|
|
app = Flask('model_training_lightgbm——service')
|
|
app = Flask('model_training_lightgbm——service')
|
|
|
|
|
|
-# def draw_loss(history):
|
|
|
|
-# #绘制训练集和验证集损失
|
|
|
|
-# plt.figure(figsize=(20, 8))
|
|
|
|
-# plt.plot(history.history['loss'], label='Training Loss')
|
|
|
|
-# plt.plot(history.history['val_loss'], label='Validation Loss')
|
|
|
|
-# plt.title('Loss Curve')
|
|
|
|
-# plt.xlabel('Epochs')
|
|
|
|
-# plt.ylabel('Loss')
|
|
|
|
-# plt.legend()
|
|
|
|
-# plt.show()
|
|
|
|
-
|
|
|
|
def rmse(y_true, y_pred):
|
|
def rmse(y_true, y_pred):
|
|
return tf.math.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
|
|
return tf.math.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))
|
|
|
|
|
|
@@ -51,6 +38,8 @@ def build_model(data, args):
|
|
col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
|
|
col_time, time_steps,features,target = args['col_time'], int(args['time_steps']), str_to_list(args['features']),args['target']
|
|
if 'is_limit' in data.columns:
|
|
if 'is_limit' in data.columns:
|
|
data = data[data['is_limit']==False]
|
|
data = data[data['is_limit']==False]
|
|
|
|
+ # 清洗特征平均缺失率大于20%的天
|
|
|
|
+ df = missing_features(data, features, col_time)
|
|
train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
|
|
train_data = data.fillna(method='ffill').fillna(method='bfill').sort_values(by=col_time)
|
|
# X_train, X_test, y_train, y_test = process_data(df_clean, params)
|
|
# X_train, X_test, y_train, y_test = process_data(df_clean, params)
|
|
# 创建特征和目标的标准化器
|
|
# 创建特征和目标的标准化器
|