2 years ago · a856c9d0e0
--- a/.gitignore
+++ b/.gitignore
@@ -4,10 +4,19 @@
 
				 /checkpoint
			
 
				 /log
			
 
				 /data
			
 
				+/data_laodian
			
 
				+/data_mts
			
 
				+/mts
			
 
				 /figure
			
 
				+./db-light/.idea
			
 
				+./db-light/getdata/__pycache__
			
 
				+./db-light/utils/__pycache__
			
 
				 *.log
			
 
				 *.swp
			
 
				 /log
			
 
				 /data
			
 
				-
			
 
				+/guyuan
			
 
				+/guyuan1
			
 
				+/db-light/.idea/
			
 
				+/db-wind/.idea/
			
 
				 
			
--- a/db-light/.idea/.gitignore
+++ b/db-light/.idea/.gitignore
--- a/db-light/.idea/db-light.iml
+++ b/db-light/.idea/db-light.iml
@@ -1,8 +0,0 @@
 
				-<?xml version="1.0" encoding="UTF-8"?>
			
 
				-<module type="PYTHON_MODULE" version="4">
			
 
				-  <component name="NewModuleRootManager">
			
 
				-    <content url="file://$MODULE_DIR$" />
			
 
				-    <orderEntry type="inheritedJdk" />
			
 
				-    <orderEntry type="sourceFolder" forTests="false" />
			
 
				-  </component>
			
 
				-</module>
			
--- a/db-light/.idea/inspectionProfiles/Project_Default.xml
+++ b/db-light/.idea/inspectionProfiles/Project_Default.xml
@@ -1,12 +0,0 @@
 
				-<component name="InspectionProjectProfileManager">
			
 
				-  <profile version="1.0">
			
 
				-    <option name="myName" value="Project Default" />
			
 
				-    <inspection_tool class="PyPep8NamingInspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
			
 
				-      <option name="ignoredErrors">
			
 
				-        <list>
			
 
				-          <option value="N801" />
			
 
				-        </list>
			
 
				-      </option>
			
 
				-    </inspection_tool>
			
 
				-  </profile>
			
 
				-</component>
			
--- a/db-light/.idea/inspectionProfiles/profiles_settings.xml
+++ b/db-light/.idea/inspectionProfiles/profiles_settings.xml
@@ -1,6 +0,0 @@
 
				-<component name="InspectionProjectProfileManager">
			
 
				-  <settings>
			
 
				-    <option name="USE_PROJECT_PROFILE" value="false" />
			
 
				-    <version value="1.0" />
			
 
				-  </settings>
			
 
				-</component>
			
--- a/db-light/.idea/misc.xml
+++ b/db-light/.idea/misc.xml
@@ -1,4 +0,0 @@
 
				-<?xml version="1.0" encoding="UTF-8"?>
			
 
				-<project version="4">
			
 
				-  <component name="ProjectRootManager" version="2" project-jdk-name="shortest" project-jdk-type="Python SDK" />
			
 
				-</project>
			
--- a/db-light/.idea/modules.xml
+++ b/db-light/.idea/modules.xml
@@ -1,8 +0,0 @@
 
				-<?xml version="1.0" encoding="UTF-8"?>
			
 
				-<project version="4">
			
 
				-  <component name="ProjectModuleManager">
			
 
				-    <modules>
			
 
				-      <module fileurl="file://$PROJECT_DIR$/.idea/db-light.iml" filepath="$PROJECT_DIR$/.idea/db-light.iml" />
			
 
				-    </modules>
			
 
				-  </component>
			
 
				-</project>
			
--- a/db-light/.idea/vcs.xml
+++ b/db-light/.idea/vcs.xml
@@ -1,6 +0,0 @@
 
				-<?xml version="1.0" encoding="UTF-8"?>
			
 
				-<project version="4">
			
 
				-  <component name="VcsDirectoryMappings">
			
 
				-    <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
			
 
				-  </component>
			
 
				-</project>
			
--- a/db-light/.idea/workspace.xml
+++ b/db-light/.idea/workspace.xml
@@ -1,80 +0,0 @@
 
				-<?xml version="1.0" encoding="UTF-8"?>
			
 
				-<project version="4">
			
 
				-  <component name="AutoImportSettings">
			
 
				-    <option name="autoReloadType" value="SELECTIVE" />
			
 
				-  </component>
			
 
				-  <component name="ChangeListManager">
			
 
				-    <list default="true" id="c3718d76-2473-4764-9a49-7a3d2369b500" name="变更" comment="">
			
 
				-      <change beforePath="$PROJECT_DIR$/../.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/../.gitignore" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../db-wind/getdata/inputData.py" beforeDir="false" afterPath="$PROJECT_DIR$/../db-wind/getdata/inputData.py" afterDir="false" />
			
 
				-      <change beforePath="$PROJECT_DIR$/../db-wind/utils/Arg.py" beforeDir="false" afterPath="$PROJECT_DIR$/../db-wind/utils/Arg.py" afterDir="false" />
			
 
				-    </list>
			
 
				-    <option name="SHOW_DIALOG" value="false" />
			
 
				-    <option name="HIGHLIGHT_CONFLICTS" value="true" />
			
 
				-    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
			
 
				-    <option name="LAST_RESOLUTION" value="IGNORE" />
			
 
				-  </component>
			
 
				-  <component name="Git.Settings">
			
 
				-    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$/.." />
			
 
				-  </component>
			
 
				-  <component name="MarkdownSettingsMigration">
			
 
				-    <option name="stateVersion" value="1" />
			
 
				-  </component>
			
 
				-  <component name="ProjectId" id="2R6GDNKM9XtFUJj4RUrGUcgmf7x" />
			
 
				-  <component name="ProjectViewState">
			
 
				-    <option name="hideEmptyMiddlePackages" value="true" />
			
 
				-    <option name="showLibraryContents" value="true" />
			
 
				-  </component>
			
 
				-  <component name="PropertiesComponent"><![CDATA[{
			
 
				-  "keyToString": {
			
 
				-    "RunOnceActivity.OpenProjectViewOnStart": "true",
			
 
				-    "RunOnceActivity.ShowReadmeOnStart": "true",
			
 
				-    "last_opened_file_path": "D:/data-process/db-wind"
			
 
				-  }
			
 
				-}]]></component>
			
 
				-  <component name="RunManager">
			
 
				-    <configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
			
 
				-      <module name="db-light" />
			
 
				-      <option name="INTERPRETER_OPTIONS" value="" />
			
 
				-      <option name="PARENT_ENVS" value="true" />
			
 
				-      <envs>
			
 
				-        <env name="PYTHONUNBUFFERED" value="1" />
			
 
				-      </envs>
			
 
				-      <option name="SDK_HOME" value="" />
			
 
				-      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
			
 
				-      <option name="IS_MODULE_SDK" value="true" />
			
 
				-      <option name="ADD_CONTENT_ROOTS" value="true" />
			
 
				-      <option name="ADD_SOURCE_ROOTS" value="true" />
			
 
				-      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
			
 
				-      <option name="PARAMETERS" value="" />
			
 
				-      <option name="SHOW_COMMAND_LINE" value="false" />
			
 
				-      <option name="EMULATE_TERMINAL" value="false" />
			
 
				-      <option name="MODULE_MODE" value="false" />
			
 
				-      <option name="REDIRECT_INPUT" value="false" />
			
 
				-      <option name="INPUT_FILE" value="" />
			
 
				-      <method v="2" />
			
 
				-    </configuration>
			
 
				-  </component>
			
 
				-  <component name="SpellCheckerSettings" RuntimeDictionaries="0" Folders="0" CustomDictionaries="0" DefaultDictionary="应用程序级" UseSingleDictionary="true" transferred="true" />
			
 
				-  <component name="TaskManager">
			
 
				-    <task active="true" id="Default" summary="默认任务">
			
 
				-      <changelist id="c3718d76-2473-4764-9a49-7a3d2369b500" name="变更" comment="" />
			
 
				-      <created>1686562996440</created>
			
 
				-      <option name="number" value="Default" />
			
 
				-      <option name="presentableId" value="Default" />
			
 
				-      <updated>1686562996440</updated>
			
 
				-    </task>
			
 
				-    <servers />
			
 
				-  </component>
			
 
				-  <component name="XDebuggerManager">
			
 
				-    <breakpoint-manager>
			
 
				-      <breakpoints>
			
 
				-        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
			
 
				-          <url>file://$PROJECT_DIR$/main.py</url>
			
 
				-          <line>8</line>
			
 
				-          <option name="timeStamp" value="1" />
			
 
				-        </line-breakpoint>
			
 
				-      </breakpoints>
			
 
				-    </breakpoint-manager>
			
 
				-  </component>
			
 
				-</project>
			
--- a/db-light/getdata/__pycache__/inputData.cpython-37.pyc
+++ b/db-light/getdata/__pycache__/inputData.cpython-37.pyc
--- a/db-light/getdata/inputData.py
+++ b/db-light/getdata/inputData.py
@@ -54,7 +54,7 @@ def get_process_NWP(database):
 
				     """
			
 
				     # NPW数据
			
 
				     engine = create_database(database)
			
 
				-    sql_NWP = "select C_PRE_TIME,C_T,C_RH,C_PRESSURE,C_WD10,C_WD30,C_WD50,C_WD70,C_WD80,C_WD90,C_WD100,C_WD170,C_WS10,C_WS30,C_WS50,C_WS70,C_WS80,C_WS90,C_WS100,C_WS170 from t_nwp"
			
 
				+    sql_NWP = "select C_PRE_TIME,C_T,C_RH,C_PRESSURE, C_SWR, C_WD10,C_WD30,C_WD50,C_WD70,C_WD80,C_WD90,C_WD100,C_WD170,C_WS10,C_WS30,C_WS50,C_WS70,C_WS80,C_WS90,C_WS100,C_WS170 from t_nwp"  # 光的NWP字段
			
 
				     NWP = exec_sql(sql_NWP, engine)
			
 
				 
			
 
				     #删除后三位
			
@@ -76,57 +76,32 @@ def get_process_NWP(database):
 
				     utils.savedata.saveData("NWP.csv",NWP)
			
 
				     return NWP
			
 
				 
			
 
				-def get_process_turbine(database):
			
 
				-    """
			
 
				-    从数据库中获取风头数据，并进行简单处理
			
 
				-    :param database:
			
 
				-    :return:
			
 
				-    """
			
 
				-
			
 
				-    # 获取NWP数据
			
 
				-    NWP = utils.savedata.readData("NWP.csv")
			
 
				-    NWP_date = NWP.iloc[:,0]
			
 
				-    print(NWP_date)
			
 
				-
			
 
				-    # 机头数据
			
 
				-    engine = create_database(database)
			
 
				-    for i in arg.turbineloc:
			
 
				-        print("导出风机{}的数据".format(i))
			
 
				-        sql_turbine = "select C_TIME,C_DATA1 as C_WS, C_DATA2 as C_WD, C_DATA3 as C_ACTIVE_POWER from t_wind_turbine_status_data WHERE C_EQUIPMENT_NO=" + str(i) + " and C_DATA1 != -99 AND C_DATA1 != 0" #+ " and C_WS>0 and C_ACTIVE_POWER>0"
			
 
				-        turbine = exec_sql(sql_turbine, engine)
			
 
				-
			
 
				-        #直接导出所有数据
			
 
				-        utils.savedata.saveData("turbine-all/turbine-{}.csv".format(i), turbine)
			
 
				-
			
 
				-        #每15分钟导出一个数据
			
 
				-        filtered_df = turbine[turbine['C_TIME'].isin(NWP_date)]
			
 
				-        utils.savedata.saveData("turbine-15/turbine-{}.csv".format(i), filtered_df)
			
 
				 
			
 
				-def get_process_tower(database):
			
 
				+def get_process_weather(database):
			
 
				     """
			
 
				-    获取测风塔数据
			
 
				+    获取环境检测仪数据
			
 
				     :param database:
			
 
				     :return:
			
 
				     """
			
 
				     engine = create_database(database)
			
 
				-    print("现有测风塔：{}".format(arg.towerloc))
			
 
				-    for i in arg.towerloc:
			
 
				-        print("测风塔{}导出数据".format(i))
			
 
				+    print("现有环境监测仪：{}".format(arg.weatherloc))
			
 
				+    for i in arg.weatherloc:
			
 
				+        print("环境监测仪{}导出数据".format(i))
			
 
				         # 删除没用的列
			
 
				-        drop_colmns = ["C_DATA1","C_DATA2","C_DATA3","C_DATA4","C_DATA5","C_DATA6","C_DATA7","C_DATA8","C_DATA9","C_DATA10","C_IS_GENERATED","C_ABNORMAL_CODE"]
			
 
				+        drop_colmns = ["C_ID", "C_EQUIPMENT_NO", "C_DATA1","C_DATA2","C_DATA3","C_DATA4","C_DATA5","C_DATA6","C_DATA7","C_DATA8","C_DATA9","C_DATA10", "C_STATUS", "C_IS_GENERATED","C_ABNORMAL_CODE"]
			
 
				 
			
 
				         get_colmns = []
			
 
				         # 查询表的所有列名
			
 
				-        result_set = exec_sql("SHOW COLUMNS FROM t_wind_tower_status_data", engine)
			
 
				+        result_set = exec_sql("SHOW COLUMNS FROM t_weather_station_status_data", engine)
			
 
				         for name in result_set.iloc[:,0]:
			
 
				             if name not in drop_colmns:
			
 
				                 get_colmns.append(name)
			
 
				 
			
 
				         all_columns_str = ", ".join([f'{col}' for col in get_colmns])
			
 
				 
			
 
				-        tower_sql = "select " + all_columns_str + " from t_wind_tower_status_data where C_EQUIPMENT_NO="+str(i)
			
 
				-        tower = exec_sql(tower_sql, engine)
			
 
				-        utils.savedata.saveData("tower/tower-{}.csv".format(i), tower)
			
 
				+        weather_sql = "select " + all_columns_str + " from t_weather_station_status_data where C_EQUIPMENT_NO="+str(i)
			
 
				+        weather = exec_sql(weather_sql, engine)
			
 
				+        utils.savedata.saveData("weather/weather-{}.csv".format(i), weather)
			
 
				 
			
 
				 def get_process_power(database):
			
 
				     """
			
@@ -136,8 +111,23 @@ def get_process_power(database):
 
				     """
			
 
				     engine = create_database(database)
			
 
				     sql_power = "select C_TIME,C_REAL_VALUE from t_power_station_status_data"
			
 
				-    power = exec_sql(sql_power, engine)
			
 
				-    utils.savedata.saveData("power.csv", power)
			
 
				+    powers = exec_sql(sql_power, engine)
			
 
				+    utils.savedata.saveData("power.csv", powers)
			
 
				+    power5, power_index = [], [0]  # 功率表，索引表
			
 
				+    ps = 0
			
 
				+    # 获取5分钟一个间隔的功率数据
			
 
				+    for i, power in powers.iterrows():
			
 
				+        real_value = power['C_REAL_VALUE']
			
 
				+        ps += real_value
			
 
				+        if str(power['C_TIME'].minute)[-1] in ('0', '5'):
			
 
				+            power_index.append(i)
			
 
				+            num = power_index[-1] - power_index[-2]
			
 
				+            num = num if num != 0 else 1
			
 
				+            psa = round(ps / num, 2)
			
 
				+            power5.append([power['C_TIME'], psa])
			
 
				+            ps = 0
			
 
				+    power5 = pd.DataFrame(power5, columns=['C_TIME', 'C_REAL_VALUE'])
			
 
				+    utils.savedata.saveData("power5.csv", power5)
			
 
				 
			
 
				 
			
 
				 def get_process_dq(database):
			
@@ -161,57 +151,49 @@ def get_process_cdq(database):
 
				     engine = create_database(database)
			
 
				     sql_cdq = "select C_FORECAST_TIME AS C_TIME, C_ABLE_VALUE from t_forecast_power_ultra_short_term_his"
			
 
				     cdq = exec_sql(sql_cdq, engine)
			
 
				-    cdq['C_TIME'] = cdq['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')
			
 
				+    cdq['C_TIME'] = pd.to_datetime(cdq['C_TIME'], unit='ms')
			
 
				     utils.savedata.saveData("cdq.csv", cdq)
			
 
				 
			
 
				 
			
 
				-def get_turbine_info(database):
			
 
				-    """
			
 
				-    获取风机信息
			
 
				-    :param database:
			
 
				-    :return:
			
 
				-    """
			
 
				-    engine = create_engine(database)
			
 
				-    sql_turbine = "select C_ID, C_LATITUDE as '纬度', C_LONGITUDE as '经度', C_HUB_HEIGHT as '轮毂高度' from t_wind_turbine_info"
			
 
				-    turbine_info = exec_sql(sql_turbine, engine)
			
 
				-    utils.savedata.saveData("风机信息.csv", turbine_info)
			
 
				-
			
 
				 def indep_process():
			
 
				     """
			
 
				     进一步数据处理：时间统一处理等
			
 
				     :return:
			
 
				     """
			
 
				-    # 测风塔数据处理
			
 
				-    for i in arg.towerloc:
			
 
				-        tower = utils.savedata.readData("/tower/tower-{}.csv".format(i))
			
 
				+    # 环境监测仪数据处理
			
 
				+    for i in arg.weatherloc:
			
 
				+        weather = utils.savedata.readData("/weather/weather-{}.csv".format(i))
			
 
				         # 判断每一列是否全是 -99
			
 
				-        all_minus_99 = (tower == -99).all()
			
 
				+        all_minus_99 = (weather == -99).all()
			
 
				 
			
 
				         # 获取全是 -99 的列的列名
			
 
				         cols_to_drop = all_minus_99[all_minus_99 == True].index.tolist()
			
 
				 
			
 
				         # 使用 drop() 方法删除列
			
 
				-        tower = tower.drop(cols_to_drop, axis=1)
			
 
				+        weather = weather.drop(cols_to_drop, axis=1)
			
 
				+
			
 
				         # MBD: 将一部分是-99的列删除，把-99替换为nan
			
 
				-        tower_nan = tower.replace(-99, np.nan, inplace=False)
			
 
				+        weather_nan = weather.replace(-99, np.nan, inplace=False)
			
 
				         # nan 超过80% 删除
			
 
				-        tower = tower.dropna(axis=1, thresh=len(tower_nan) * 0.8)
			
 
				-        utils.savedata.saveData("/tower/tower-{}-process.csv".format(i), tower)
			
 
				-
			
 
				-    # 测风塔时间统一
			
 
				-    tower1 = utils.savedata.readData("/tower/tower-{}-process.csv".format(1))
			
 
				+        weather = weather.dropna(axis=1, thresh=len(weather_nan) * 0.8)
			
 
				+        weather = weather.replace(np.nan, -99, inplace=False)
			
 
				+        # 删除取值全部相同的列
			
 
				+        weather = weather.loc[:, (weather != weather.iloc[0]).any()]
			
 
				+        utils.savedata.saveData("/weather/weather-{}-process.csv".format(i), weather)
			
 
				+
			
 
				+    # 时间统一
			
 
				+    weather1 = utils.savedata.readData("/weather/weather-{}-process.csv".format(1))
			
 
				     # tower2 = utils.savedata.readData("/tower/tower-{}-process.csv".format(2))
			
 
				     # tower1 = tower1[tower1['C_TIME'].isin(tower2['C_TIME'])]
			
 
				     # tower2 = tower2[tower2['C_TIME'].isin(tower1['C_TIME'])]
			
 
				 
			
 
				-    utils.savedata.saveData("/tower/tower-{}-process.csv".format(1), tower1)
			
 
				+    utils.savedata.saveData("/weather/weather-{}-process.csv".format(1), weather1)
			
 
				     # utils.savedata.saveData("/tower/tower-{}-process.csv".format(2), tower2)
			
 
				 
			
 
				     # 所有表时间统一
			
 
				-    filenames = ["/NWP.csv","/power.csv", "/dq.csv", '/tower/tower-1-process.csv']
			
 
				+    filenames = ["/NWP.csv","/power.csv", "power5.csv", "/dq.csv", "/cdq.csv", '/weather/weather-1-process.csv']
			
 
				     dataframes = []
			
 
				-    for i in arg.turbineloc:
			
 
				-        filenames.append("/turbine-15/turbine-{}.csv".format(i))
			
 
				+
			
 
				     for name in filenames:
			
 
				         dataframes.append(utils.savedata.readData(name))
			
 
				 
			
@@ -335,11 +317,9 @@ def data_process(database):
 
				     """
			
 
				     clear_data()
			
 
				     get_process_dq(database)
			
 
				-    # get_process_cdq(database)
			
 
				+    get_process_cdq(database)
			
 
				     get_process_NWP(database)
			
 
				-    get_process_turbine(database)
			
 
				-    get_turbine_info(database)
			
 
				-    get_process_tower(database)
			
 
				+    get_process_weather(database)
			
 
				     get_process_power(database)
			
 
				     indep_process()
			
 
				     NWP_indep_process()
			
--- a/db-light/main.py
+++ b/db-light/main.py
@@ -3,221 +3,7 @@ import os
 
				 from datetime import timedelta
			
 
				 from getdata import inputData
			
 
				 from utils import Arg
			
 
				-#——————————————————————————机头风速-99和连续异常值清洗代码——————————————————————————————
			
 
				-def mark_abnormal_streaks(df, columns, min_streak):
			
 
				-    abnormal_mask = pd.Series(False, index=df.index)
			
 
				-    streak_start = None
			
 
				-    for i in range(len(df)):
			
 
				-        if i == 0 or any(df.at[i - 1, col] != df.at[i, col] for col in columns):
			
 
				-            streak_start = i
			
 
				-
			
 
				-        if i - streak_start >= min_streak - 1:
			
 
				-            abnormal_mask[i - min_streak + 1:i + 1] = True
			
 
				-
			
 
				-    return abnormal_mask
			
 
				-
			
 
				-
			
 
				-def remove_abnormal_values(df, N):
			
 
				-    # 标记C_ACTIVE_POWER为-99的行为异常值
			
 
				-    abnormal_mask1 = df['C_ACTIVE_POWER'] == -99
			
 
				-    count_abnormal1 = abnormal_mask1.sum()
			
 
				-
			
 
				-    # 标记C_WS, A, B连续5行不变的行为异常值
			
 
				-    columns = ['C_WS', 'C_WD', 'C_ACTIVE_POWER']
			
 
				-    abnormal_mask2 = mark_abnormal_streaks(df, columns, N)
			
 
				-    count_abnormal2 = abnormal_mask2.sum()
			
 
				-    # 获得所有异常值的布尔掩码
			
 
				-    abnormal_mask = abnormal_mask1 | abnormal_mask2
			
 
				-    # 获取连续异常值具体数值
			
 
				-    removed_continuous_values = {column: df.loc[abnormal_mask2, column].unique() for column in columns}
			
 
				-    # 剔除异常值
			
 
				-    df_clean = df[~abnormal_mask]
			
 
				-    total_removed = abnormal_mask.sum()
			
 
				-    return df_clean, count_abnormal1, count_abnormal2, total_removed, removed_continuous_values
			
 
				-
			
 
				-
			
 
				-def process_csv_files(input_dir, output_dir,M,N):  # MBD:没有考虑时间重复
			
 
				-    if not os.path.exists(output_dir):
			
 
				-        os.makedirs(output_dir)
			
 
				-
			
 
				-    for i in arg.turbineloc:
			
 
				-        input_file = os.path.join(input_dir, f"turbine-{i}.csv")
			
 
				-        output_file = os.path.join(output_dir, f"turbine-{i}.csv")
			
 
				-
			
 
				-        # 读取csv文件
			
 
				-        df = pd.read_csv(input_file)
			
 
				-
			
 
				-        # 剔除异常值，并获取异常值统计信息
			
 
				-        df_clean, count_abnormal1, count_abnormal2, total_removed, removed_continuous_values = remove_abnormal_values(df,N)
			
 
				-
			
 
				-        # 输出异常值统计信息
			
 
				-        print(f"处理文件：{input_file}")
			
 
				-        print(f"剔除 -99 点异常值数量：{count_abnormal1}")
			
 
				-        print(f"剔除连续异常值数量：{count_abnormal2}")
			
 
				-        print(f"总共剔除数据量：{total_removed}")
			
 
				-        print(f"剔除的连续异常值具体数值：{removed_continuous_values}\n")
			
 
				-
			
 
				-        # 保存处理过的CSV文件
			
 
				-        df_clean.to_csv(output_file, index=False)
			
 
				-#——————————————————————————风机单机时间对齐——————————————————————————————
			
 
				-def TimeMerge(input_dir, output_dir,M):
			
 
				-    # 读取所有CSV文件
			
 
				-    files = [os.path.join(input_dir, f"turbine-{i}.csv") for i in arg.turbineloc]
			
 
				-    dataframes = [pd.read_csv(f) for f in files]
			
 
				-
			
 
				-    # 获取C_TIME列的交集
			
 
				-    c_time_intersection = set(dataframes[0]["C_TIME"])
			
 
				-    for df in dataframes[1:]:
			
 
				-        c_time_intersection.intersection_update(df["C_TIME"])
			
 
				-
			
 
				-    # 只保留C_TIME交集中的数据
			
 
				-    filtered_dataframes = [df[df["C_TIME"].isin(c_time_intersection)] for df in dataframes]
			
 
				-
			
 
				-    # 将每个过滤后的DataFrame写入新的CSV文件
			
 
				-    os.makedirs(output_dir, exist_ok=True)
			
 
				-    for (filtered_df, i) in zip(filtered_dataframes, arg.turbineloc):
			
 
				-        if i == 144:
			
 
				-            filtered_df['C_ACTIVE_POWER'] /= 1000
			
 
				-        filtered_df.to_csv(os.path.join(output_dir, f"turbine-{i}.csv"), index=False)
			
 
				-
			
 
				-#——————————————————————————风机缺失点处理——————————————————————————————
			
 
				-def MissingPointProcessing(input_dir,output_dir,M,N):
			
 
				-
			
 
				-    # 存储数据的列表
			
 
				-
			
 
				-    # 读取M个文件
			
 
				-    for k in arg.turbineloc:
			
 
				-        file_name = input_dir + '/' + f"turbine-{k}.csv"
			
 
				-        # file_name = os.path.join(input_dir, f"turbine-{k}.csv")
			
 
				-    # 读取CSV文件
			
 
				-        data = pd.read_csv(file_name, parse_dates=['C_TIME'])
			
 
				-
			
 
				-    # 计算时间差
			
 
				-        data['time_diff'] = data['C_TIME'].diff().dt.total_seconds()
			
 
				-
			
 
				-    # 找出缺失的时间点
			
 
				-        missing_data_points = data[data['time_diff'] > 900]
			
 
				-
			
 
				-    # 存储填充的时间和值
			
 
				-        filled_data = []
			
 
				-
			
 
				-    # 输出缺失的开始时刻和数量
			
 
				-        print("缺失的开始时刻:")
			
 
				-        for index, row in missing_data_points.iterrows():
			
 
				-            missing_start = row['C_TIME'] - timedelta(seconds=row['time_diff'])
			
 
				-            missing_count = int(row['time_diff'] // 900) - 1
			
 
				-
			
 
				-            # 如果缺失的点数小于N个，则进行填充 MBD:填充代码比较啰嗦
			
 
				-            if missing_count <= N:
			
 
				-                prev_values = data.iloc[index - 1][['C_WS', 'C_WD', 'C_ACTIVE_POWER']]
			
 
				-                next_values = row[['C_WS', 'C_WD', 'C_ACTIVE_POWER']]
			
 
				-
			
 
				-                for i in range(1, missing_count + 1):
			
 
				-                    t = i / (missing_count + 1)
			
 
				-                    filled_time = missing_start + timedelta(minutes=15 * i)
			
 
				-
			
 
				-                    filled_values = {
			
 
				-                        'C_TIME': filled_time,
			
 
				-                        'C_WS': prev_values['C_WS'] + (next_values['C_WS'] - prev_values['C_WS']) * t,
			
 
				-                        'C_WD': prev_values['C_WD']+(next_values['C_WD']-prev_values['C_WD'])*t,
			
 
				-                        'C_ACTIVE_POWER': prev_values['C_ACTIVE_POWER'] + (
			
 
				-                                    next_values['C_ACTIVE_POWER'] - prev_values['C_ACTIVE_POWER']) * t,
			
 
				-                    }
			
 
				-
			
 
				-                    # 将角度值限制在-180到180的范围内
			
 
				-                    filled_values['C_WD'] = (filled_values['C_WD'] + 180) % 360 - 180
			
 
				-
			
 
				-                    filled_data.append(filled_values)
			
 
				-                    print(f"填充的时间: {filled_time}, 填充的值: {filled_values}")
			
 
				-
			
 
				-            print(f"{missing_start} - 缺失的点的数量: {missing_count}")
			
 
				-
			
 
				-        # 将填充的数据插入原始数据中
			
 
				-        filled_df = pd.DataFrame(filled_data)
			
 
				-        data = pd.concat([data, filled_df], ignore_index=True)
			
 
				-        # 对数据按时间排序并重置索引
			
 
				-        data = data.sort_values(by='C_TIME').reset_index(drop=True)
			
 
				-
			
 
				-        # 输出总缺失点数
			
 
				-        missing_data_points = data[data['time_diff'] > 900]
			
 
				-        print(f"总缺失点数: {int(missing_data_points['time_diff'].sum() // 900) - len(missing_data_points)}")
			
 
				-        data.drop(columns=['time_diff'], inplace=True)
			
 
				-        os.makedirs(output_dir, exist_ok=True)
			
 
				-        output_path_name = os.path.join(output_dir, f"turbine-{k}.csv")
			
 
				-        print(output_path_name)
			
 
				-    # 保存插值后的文件
			
 
				-        data.to_csv(output_path_name, index=False)
			
 
				-#——————————————————————————风机单机连续时间段分割——————————————————————————————
			
 
				-def top_n_continuous_periods(data, n):
			
 
				-    continuous_periods = []
			
 
				-    continuous_start = data['C_TIME'].iloc[0]
			
 
				-    continuous_count = 1
			
 
				-
			
 
				-    for i in range(1, len(data)):
			
 
				-        if data['time_diff'].iloc[i] == 900:
			
 
				-            continuous_count += 1
			
 
				-        else:
			
 
				-            continuous_periods.append({
			
 
				-                'start': continuous_start,
			
 
				-                'end': data['C_TIME'].iloc[i - 1],
			
 
				-                'count': continuous_count
			
 
				-            })
			
 
				-            continuous_start = data['C_TIME'].iloc[i]
			
 
				-            continuous_count = 1
			
 
				-
			
 
				-    continuous_periods.append({
			
 
				-        'start': continuous_start,
			
 
				-        'end': data['C_TIME'].iloc[-1],
			
 
				-        'count': continuous_count
			
 
				-    })
			
 
				-    continuous_periods.sort(key=lambda x: x['count'], reverse=True)
			
 
				-    return continuous_periods[:n]
			
 
				-def Continuous_Data(input_dir,output_dir,M,TopN):
			
 
				-    # 读取CSV文件
			
 
				-    for k in arg.turbineloc:
			
 
				-        path_dir = f"turbine-{k}.csv"
			
 
				-        input_path = os.path.join(input_dir, path_dir)
			
 
				-        data = pd.read_csv(input_path, parse_dates=['C_TIME'])
			
 
				-        data = data.sort_values(by='C_TIME').reset_index(drop=True)
			
 
				-    # 计算时间差
			
 
				-        data['time_diff'] = data['C_TIME'].diff().dt.total_seconds()
			
 
				-    # 获取Top N连续的数据段
			
 
				-        top_n = TopN
			
 
				-        top_n_periods = top_n_continuous_periods(data, top_n)
			
 
				-        data.drop(columns=['time_diff'], inplace=True)
			
 
				-    # 输出Top N连续的数据的数量、开始时间和最后的时间
			
 
				-        print(f"Top {top_n} 连续的数据段:")
			
 
				-        for i, period in enumerate(top_n_periods):
			
 
				-            print(f"{i + 1}. 开始时间: {period['start']} - 结束时间: {period['end']} - 数据量: {period['count']}")
			
 
				-            output_file = f"turbine-{k}_{period['count']}.csv"
			
 
				-            mask = (data['C_TIME'] >= period['start']) & (data['C_TIME'] <= period['end'])
			
 
				-            filtered_df = data.loc[mask]
			
 
				-            # 更新文件名中的period['count']为数据集大小
			
 
				-            output_file = output_file.replace(str(period['count']), str(filtered_df.shape[0]))
			
 
				-            output_folder = f"Continuous_Turbine_Data_{period['count']}_{period['start'].strftime('%y-%m-%d-%H-%M')}_{period['end'].strftime('%y-%m-%d-%H-%M')}"
			
 
				-            output_folder = os.path.join(output_dir, output_folder)
			
 
				-            if not os.path.exists(output_folder):
			
 
				-                os.makedirs(output_folder)
			
 
				-            # 保存截取的数据到新的csv文件
			
 
				-            # filtered_df.to_csv(output_dir, index=False)
			
 
				-            filtered_df.to_csv(os.path.join(output_folder, output_file), index=False)
			
 
				-            print(f"Processed {input_path}")
			
 
				-
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     arg = Arg.Arg()
			
 
				     inputData.data_process(arg.database)
			
 
				-    input_dir = "../data_mts/turbine-15"  # 输入文件夹路径
			
 
				-    output_dir = "../data_mts/output_clean_csv_files"  # 输出文件夹路径
			
 
				-    # 对机头风速连续异常值和-99进行清洗,第三个参数是连续5个值不变以后就认为异常
			
 
				-    # 这步会生成一个"output_clean_csv_files"文件夹，里面包含全部单机的数据，存储的机头风速只清理了-99，参数50是风机数量+1，风机参数5就是连续5个点的认为是异常值，全部剔除。
			
 
				-    process_csv_files(input_dir, output_dir, 50, 5)
			
 
				-    output_dir_time_Merge = "../data_mts/output_filtered_csv_files"
			
 
				-    # 这步会生成一个"output_filtered_csv_files"文件夹，在上一步的基础上，对齐了全部风机的时间，只各自保留了交集。
			
 
				-    TimeMerge(output_dir,output_dir_time_Merge,50)
			
 
				-    output_complete_data = "../data_mts/complete_data"
			
 
				-    # 这步会生成一个"complete_data"文件夹，在上一步的基础上，填充了10个时间点之内的缺失。
			
 
				-    MissingPointProcessing(output_dir_time_Merge,output_complete_data,50,10)
			
 
				-    continuous_time = "../data_mts/continuous_data"
			
 
				-    # 这步会生成一个"Continuous_data"文件夹，在上一步的基础上，取Top10个连续时间段最长的单机数据。
			
 
				-    Continuous_Data(output_complete_data, continuous_time, 50, 10)
			
--- a/db-light/utils/Arg.py
+++ b/db-light/utils/Arg.py
@@ -1,12 +1,10 @@
 
				 class Arg:
			
 
				     def __init__(self):
			
 
				         # 数据库地址
			
 
				-        self.database = "mysql+pymysql://root:123@localhost:3306/ipfcst-mts"
			
 
				+        self.database = "mysql+pymysql://root:123@localhost:3306/ipfcst-guyuan"
			
 
				         # 数据存放位置
			
 
				-        self.dataloc = "../mts/"
			
 
				+        self.dataloc = "../guyuan1/"
			
 
				         # 变量存放位置
			
 
				-        self.varloc = "../mts/var/"
			
 
				-        # 测风塔个数
			
 
				-        self.towerloc = [1]
			
 
				-        # 机头编号
			
 
				-        self.turbineloc = [i for i in range(102, 162)]
			
 
				+        self.varloc = "../guyuan1/var/"
			
 
				+        # 环境监测仪个数
			
 
				+        self.weatherloc = [1]
			
--- a/db-light/utils/__pycache__/Arg.cpython-37.pyc
+++ b/db-light/utils/__pycache__/Arg.cpython-37.pyc
--- a/db-light/utils/__pycache__/savedata.cpython-37.pyc
+++ b/db-light/utils/__pycache__/savedata.cpython-37.pyc
--- a/db-wind/getdata/inputData.py
+++ b/db-wind/getdata/inputData.py
@@ -149,6 +149,7 @@ def get_process_dq(database):
 
				     engine = create_database(database)
			
 
				     sql_dq = "select C_FORECAST_TIME AS C_TIME, C_ABLE_VALUE from t_forecast_power_short_term_his"
			
 
				     dq = exec_sql(sql_dq, engine)
			
 
				+    dq['C_TIME'] = pd.to_datetime(dq['C_TIME'], unit='ms')
			
 
				     utils.savedata.saveData("dq.csv", dq)
			
 
				 
			
 
				 def get_process_cdq(database):
			
@@ -160,6 +161,7 @@ def get_process_cdq(database):
 
				     engine = create_database(database)
			
 
				     sql_cdq = "select C_FORECAST_TIME AS C_TIME, C_ABLE_VALUE from t_forecast_power_ultra_short_term_his"
			
 
				     cdq = exec_sql(sql_cdq, engine)
			
 
				+    cdq['C_TIME'] = cdq['C_TIME'].dt.strftime('%Y-%m-%d %H:%M:%S')
			
 
				     utils.savedata.saveData("cdq.csv", cdq)
			
 
				 
			
 
				 
			
--- a/db-wind/utils/Arg.py
+++ b/db-wind/utils/Arg.py
@@ -1,11 +1,11 @@
 
				 class Arg:
			
 
				     def __init__(self):
			
 
				         # 数据库地址
			
 
				-        self.database = "mysql+pymysql://root:!QAZ2root@192.168.1.205:3306/ipfcst-laodian"
			
 
				+        self.database = "mysql+pymysql://root:123@localhost:3306/ipfcst-mts"
			
 
				         # 数据存放位置
			
 
				-        self.dataloc = "../data/"
			
 
				+        self.dataloc = "../mts/"
			
 
				         # 变量存放位置
			
 
				-        self.varloc = "../data/var/"
			
 
				+        self.varloc = "../mts/var/"
			
 
				         # 测风塔个数
			
 
				         self.towerloc = [1]
			
 
				         # 机头编号