Bläddra i källkod

预处理同时生成归一化文件

liudawei 1 år sedan
förälder
incheckning
964699904f
4 ändrade filer med 48 tillägg och 4 borttagningar
  1. 1 0
      .gitignore
  2. 6 2
      db-light/getdata/inputData.py
  3. 37 0
      db-light/norm.py
  4. 4 2
      db-light/utils/Arg.py

+ 1 - 0
.gitignore

@@ -17,6 +17,7 @@
 /data
 /guyuan
 /guyuan1
+/guyuan2
 /db-light/.idea/
 /db-wind/.idea/
 

+ 6 - 2
db-light/getdata/inputData.py

@@ -7,8 +7,9 @@ import pytz
 plt.rcParams['font.sans-serif'] = ['SimHei']
 import utils.savedata
 from utils import Arg
+from norm import Normalize
 arg = Arg.Arg()
-
+norm = Normalize()
 def clear_data():
     """
     删除所有csv
@@ -74,6 +75,7 @@ def get_process_NWP(database):
     NWP = NWP.rename(columns={'C_PRE_TIME': 'C_TIME'})
 
     utils.savedata.saveData("NWP.csv",NWP)
+    norm.normalize(NWP)
     return NWP
 
 
@@ -102,6 +104,7 @@ def get_process_weather(database):
         weather_sql = "select " + all_columns_str + " from t_weather_station_status_data where C_EQUIPMENT_NO="+str(i)
         weather = exec_sql(weather_sql, engine)
         utils.savedata.saveData("weather/weather-{}.csv".format(i), weather)
+        norm.normalize(weather)
 
 def get_process_power(database):
     """
@@ -128,6 +131,7 @@ def get_process_power(database):
             ps = 0
     power5 = pd.DataFrame(power5, columns=['C_TIME', 'C_REAL_VALUE'])
     utils.savedata.saveData("power5.csv", power5)
+    norm.normalize(power5)
 
 
 def get_process_dq(database):
@@ -323,7 +327,7 @@ def data_process(database):
     get_process_power(database)
     indep_process()
     NWP_indep_process()
-    # Data_split()
+    norm.save_yml({'mean': norm.mean, 'std': norm.std}, arg.normloc)
 
 if __name__ == '__main__':
 

+ 37 - 0
db-light/norm.py

@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+# time: 2023/6/25 10:28
+# file: norm.py
+# author: David
+# company: shenyang JY
+import numpy as np
+import yaml
+
+
+class Normalize():
+    def __init__(self):
+        self.mean = {}
+        self.std = {}
+
+    def normalize(self, df):
+        """
+        暂时不将C_TIME归一化
+        :param dfs:
+        :return:
+        """
+        if 'C_TIME' in df.columns:
+            df.drop('C_TIME', axis=1, inplace=True)
+        mean_dict = np.mean(df, axis=0)  # 数据的均值
+        std_dict = np.std(df, axis=0)  # 标准差
+        for k, v in mean_dict.to_dict().items():
+            self.mean[k] = v
+        for k, v in std_dict.to_dict().items():
+            self.std[k] = v
+        print("归一化参数,均值为:{},方差为:{}".format(self.mean, self.std))
+
+    def save_yml(self, yml_dict, path):
+        cfg = {}
+        for k, v in yml_dict.items():
+            cfg[k] = v
+        with open(path, 'w') as f:
+            yaml.safe_dump(cfg, f, default_flow_style=False)

+ 4 - 2
db-light/utils/Arg.py

@@ -3,8 +3,10 @@ class Arg:
         # 数据库地址
         self.database = "mysql+pymysql://root:123@localhost:3306/ipfcst-guyuan"
         # 数据存放位置
-        self.dataloc = "../guyuan1/"
+        self.dataloc = "../guyuan/"
         # 变量存放位置
-        self.varloc = "../guyuan1/var/"
+        self.varloc = "../guyuan/var/"
         # 环境监测仪个数
         self.weatherloc = [1]
+        # 归一化文件存放位置
+        self.normloc = '../guyuan/norm.yaml'