Browse Source

Merge branch 'dev_awg' of anweiguo/algorithm_platform into dev_david

liudawei 4 months ago
parent
commit
a673af6acb

+ 0 - 2
common/processing_data_common.py

@@ -30,5 +30,3 @@ def missing_features(df, features, col_time, threshold=0.2):
     df = df[~df['day'].isin(days_with_high_missing)]
     print("**********删除后维度", df.shape)
     return df.drop('day',axis=1)
-
-

+ 4 - 1
data_processing/data_operation/data_join.py

@@ -18,7 +18,10 @@ def hello():
 def  data_merge(df_list, args):
     join_key,join_type,features = args['join_key'], args['join_type'], str_to_list(args['col_reserve'])
     result = reduce(lambda left, right: pd.merge(left, right, how=join_type, on=join_key), df_list)
-    return result[features]
+    if len(features)==0:
+        return result
+    else:
+        return result[features]
 
 
 @app.route('/data_join', methods=['POST'])

+ 40 - 34
evaluation_processing/analysis_report.py

@@ -25,6 +25,7 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     label = args['label']
     label_pre = args['label_pre']
     farmId = args['farmId']
+    acc_flag = df_accuracy.shape[0]
     df_clean = df_clean.applymap(
         lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
                                                                                                  numbers.Number) else x).sort_values(
@@ -33,7 +34,8 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
         lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
                                                                                                  numbers.Number) else x).sort_values(
         by=col_time)
-    df_accuracy = df_accuracy.applymap(
+    if acc_flag>0:
+        df_accuracy = df_accuracy.applymap(
         lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
                                                                                                  numbers.Number) else x).sort_values(
         by=col_time)
@@ -83,7 +85,7 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
             font=dict(size=12)  # 图例文字大小
         ),
         title=dict(
-            text='实际功率与辐照度的散点图',  # 图表标题
+            # text='实际功率与辐照度的散点图',  # 图表标题
             x=0.5,  # 标题居中
             font=dict(size=16)  # 标题字体大小
         ),
@@ -282,40 +284,44 @@ def put_analysis_report_to_html(args, df_clean, df_predict, df_accuracy):
     power_html = pio.to_html(fig, full_html=False)
 
     # -------------------- 准确率表展示--------------------
-    acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
+    acc_html=''
+    if acc_flag>0:
+        acc_html = df_accuracy.sort_values(by=col_time).to_html(classes='table table-bordered table-striped', index=False)
     # -------------------- 准确率汇总展示--------------------
-    # 指定需要转换的列
-    cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
-    for col in cols_to_convert:
-        if col in df_accuracy.columns:
-            df_accuracy[col] = df_accuracy[col].apply(
-                lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
-                                                                                                         numbers.Number) else np.nan)
+    summary_html = ''
+    if  acc_flag>0:
+        # 指定需要转换的列
+        cols_to_convert = ['MAE', 'accuracy', 'RMSE', 'deviationElectricity', 'deviationAssessment']
+        for col in cols_to_convert:
+            if col in df_accuracy.columns:
+                df_accuracy[col] = df_accuracy[col].apply(
+                    lambda x: float(x.to_decimal()) if isinstance(x, Decimal128) else float(x) if isinstance(x,
+                                                                                                             numbers.Number) else np.nan)
 
-    # 确定存在的列
-    agg_dict = {}
-    rename_cols = ['model']
-    if 'MAE' in df_accuracy.columns:
-        agg_dict['MAE'] = np.nanmean
-        rename_cols.append('MAE平均值')
-    if 'accuracy' in df_accuracy.columns:
-        agg_dict['accuracy'] = np.nanmean
-        rename_cols.append('准确率平均值')
-    if 'RMSE' in df_accuracy.columns:
-        agg_dict['RMSE'] = np.nanmean
-        rename_cols.append('RMSE平均值')
-    if 'deviationElectricity' in df_accuracy.columns:
-        agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
-        rename_cols.append('考核电量平均值')
-        rename_cols.append('考核总电量')
-    if 'deviationAssessment' in df_accuracy.columns:
-        agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
-        rename_cols.append('考核分数平均值')
-        rename_cols.append('考核总分数')
-    # 进行分组聚合,如果有需要聚合的列
-    summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
-    summary_df.columns = rename_cols
-    summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
+        # 确定存在的列
+        agg_dict = {}
+        rename_cols = ['model']
+        if 'MAE' in df_accuracy.columns:
+            agg_dict['MAE'] = np.nanmean
+            rename_cols.append('MAE平均值')
+        if 'accuracy' in df_accuracy.columns:
+            agg_dict['accuracy'] = np.nanmean
+            rename_cols.append('准确率平均值')
+        if 'RMSE' in df_accuracy.columns:
+            agg_dict['RMSE'] = np.nanmean
+            rename_cols.append('RMSE平均值')
+        if 'deviationElectricity' in df_accuracy.columns:
+            agg_dict['deviationElectricity'] = [np.nanmean, np.nansum]
+            rename_cols.append('考核电量平均值')
+            rename_cols.append('考核总电量')
+        if 'deviationAssessment' in df_accuracy.columns:
+            agg_dict['deviationAssessment'] = [np.nanmean, np.nansum]
+            rename_cols.append('考核分数平均值')
+            rename_cols.append('考核总分数')
+        # 进行分组聚合,如果有需要聚合的列
+        summary_df = df_accuracy.groupby('model').agg(agg_dict).reset_index()
+        summary_df.columns = rename_cols
+        summary_html = summary_df.to_html(classes='table table-bordered table-striped', index=False)
     # -------------------- 生成完整 HTML 页面 --------------------
 
     html_content = f"""

+ 6 - 3
models_processing/model_train/model_training_lstm.py

@@ -8,7 +8,7 @@ from sklearn.preprocessing import MinMaxScaler
 from io import BytesIO
 import joblib
 from tensorflow.keras.models import Sequential
-from tensorflow.keras.layers import LSTM, Dense
+from tensorflow.keras.layers import LSTM, Dense, Dropout
 from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
 import tensorflow as tf
 from common.database_dml import get_data_from_mongo,insert_h5_model_into_mongo
@@ -75,7 +75,7 @@ def build_model(data, args):
 
     # 构建 LSTM 模型
     model = Sequential()
-    model.add(LSTM(units=50, return_sequences=False, input_shape=(time_steps, X_train.shape[2])))
+    model.add(LSTM(units=64, return_sequences=False, input_shape=(time_steps, X_train.shape[2])))
     model.add(Dense(1))  # 输出单一值
     # 编译模型
     model.compile(optimizer='adam', loss='mean_squared_error')
@@ -83,11 +83,14 @@ def build_model(data, args):
     early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
     reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1)
     # 训练模型
-    history = model.fit(X_train, y_train,
+    # 使用GPU进行训练
+    with tf.device('/GPU:1'):
+        history = model.fit(X_train, y_train,
                         epochs=100,
                         batch_size=32,
                         validation_data=(X_test, y_test),
                         verbose=2,
+                        shuffle=False,
                         callbacks=[early_stopping, reduce_lr])
     # draw_loss(history)
     return model,feature_scaler_bytes,target_scaler_bytes