material.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. # @FileName :material.py
  4. # @Time :2025/4/29 11:07
  5. # @Author :David
  6. # @Company: shenyang JY
  7. import os.path
  8. import types
  9. import pandas as pd
  10. from pathlib import Path
  11. from app.common.config import logger, parser
  12. from concurrent.futures import ThreadPoolExecutor
  13. from functools import partial
  14. class MaterialLoader:
  15. def __init__(self, base_path, lazy_load=True):
  16. self.base_path = Path(base_path)
  17. self.lazy_load = lazy_load
  18. self._data_cache = {}
  19. self.opt = parser.parse_args_and_yaml()
  20. self.sum_cap = 0
  21. self.weighted_nwp = pd.DataFrame()
  22. self.weighted_nwp_h = pd.DataFrame()
  23. self.weighted_nwp_v = pd.DataFrame()
  24. self.weighted_nwp_v_h = pd.DataFrame()
  25. def wrapper_path(self, station_id, spec):
  26. return f"{self.base_path/station_id/spec}.txt"
  27. def _load_material(self, station_id):
  28. """核心数据加载方法"""
  29. # 根据您的原始代码逻辑简化的加载流程
  30. try:
  31. basic = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['basic']), sep=r'\s+', header=0)
  32. power = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['power']), sep=r'\s+', header=0)
  33. plant_type = int(basic.loc[basic['PropertyID'].tolist().index('PlantType'), 'Value'])
  34. assert plant_type == 0 or plant_type == 1
  35. # 根据电站类型加载数据
  36. nwp_v = pd.read_csv(self.wrapper_path(station_id, f"0/{self.opt.doc_mapping['nwp_v']}"), sep=r'\s+', header=0)
  37. nwp_v_h = pd.read_csv(self.wrapper_path(station_id, f"0/{self.opt.doc_mapping['nwp_v_h']}"), sep=r'\s+', header=0)
  38. nwp_own = pd.read_csv(self.wrapper_path(station_id, f"1/{self.opt.doc_mapping['nwp_own']}"), sep=r'\s+', header=0)
  39. nwp_own_h = pd.read_csv(self.wrapper_path(station_id, f"1/{self.opt.doc_mapping['nwp_own_h']}"), sep=r'\s+', header=0)
  40. if self.opt.switch_nwp_owner:
  41. nwp_v, nwp_v_h = nwp_own, nwp_own_h
  42. # 如果是风电
  43. if plant_type == 0:
  44. station_info = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_w']), sep=r'\s+', header=0)
  45. station_info_d = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_d_w']), sep=r'\s+', header=0)
  46. nwp = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_w']), sep=r'\s+', header=0)
  47. nwp_h = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_w_h']), sep=r'\s+', header=0)
  48. cap = float(station_info.loc[0, 'PlantCap'])
  49. if Path(self.wrapper_path(station_id, self.opt.doc_mapping['env_wf'])).exists():
  50. env = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['env_wf']), sep=r'\s+', header=0)
  51. else:
  52. env = None
  53. # 如果是光伏
  54. else:
  55. station_info = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_s']), sep=r'\s+', header=0)
  56. station_info_d = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_d_s']), sep=r'\s+', header=0)
  57. nwp = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_s']), sep=r'\s+', header=0)
  58. nwp_h = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_s_h']), sep=r'\s+', header=0)
  59. cap = float(station_info.loc[0, 'PlantCap'])
  60. if Path(self.wrapper_path(station_id, self.opt.doc_mapping['env_sf'])).exists():
  61. env = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['env_sf']), sep=r'\s+', header=0)
  62. else:
  63. env = None
  64. return types.SimpleNamespace(**{
  65. 'station_info': station_info,
  66. 'station_info_d': station_info_d,
  67. 'nwp': nwp,
  68. 'nwp_h': nwp_h,
  69. 'power': power,
  70. 'nwp_v': nwp_v,
  71. 'nwp_v_h': nwp_v_h,
  72. 'env': env,
  73. 'cap': cap
  74. })
  75. except Exception as e:
  76. print(f"Error loading {station_id}: {str(e)}")
  77. return None
  78. def get_material(self, station_id):
  79. if self.lazy_load:
  80. if station_id not in self._data_cache:
  81. self._data_cache[station_id] = self._load_material(station_id)
  82. self.add_weights(self._data_cache[station_id])
  83. return self._data_cache[station_id]
  84. else:
  85. return self._load_material(station_id)
  86. def add_weights(self, data_objects):
  87. """对nwp数据进行cap加权(nwp, nwp_h, nwp_v_, nwp_v_h)"""
  88. def sum_df(df_obj, df, weight):
  89. """内部函数:对DataFrame进行加权求和"""
  90. columns_to_scale = [col for col in df.columns if col not in ['PlantID', 'PlantName', 'Datetime']]
  91. if not df_obj.empty:
  92. # 验证列名一致性
  93. assert set(df_obj.columns) == set(df.columns), "DataFrame列不匹配"
  94. # 向量化操作:仅对数值列进行加权累加
  95. df_obj[columns_to_scale] += df[columns_to_scale] * weight
  96. else:
  97. # 初始化操作:复制结构并加权数值列
  98. df_obj = df.copy()
  99. df_obj[columns_to_scale] = df[columns_to_scale] * weight
  100. return df_obj
  101. # 从data_objects解构对象
  102. nwp, nwp_h, nwp_v, nwp_v_h, power, cap = (
  103. data_objects.nwp,
  104. data_objects.nwp_h,
  105. data_objects.nwp_v,
  106. data_objects.nwp_v_h,
  107. data_objects.power,
  108. data_objects.cap
  109. )
  110. # 累加总容量(用于后续归一化)
  111. self.sum_cap += cap
  112. # 对每个NWP数据集进行容量加权
  113. self.weighted_nwp = sum_df(self.weighted_nwp, nwp, cap)
  114. self.weighted_nwp_h = sum_df(self.weighted_nwp_h, nwp_h, cap)
  115. self.weighted_nwp_v = sum_df(self.weighted_nwp_v, nwp_v, cap)
  116. self.weighted_nwp_v_h = sum_df(self.weighted_nwp_v_h, nwp_v_h, cap)
  117. def get_material_region(self):
  118. try:
  119. basic = pd.read_csv(os.path.join(self.base_path, self.opt.doc_area_mapping['basic']), sep=r'\s+', header=0)
  120. power = pd.read_csv(os.path.join(self.base_path, self.opt.doc_area_mapping['power']), sep=r'\s+', header=0)
  121. plant_type = int(basic.loc[basic['PropertyID'].tolist().index('PlantType'), 'Value'])
  122. area_id = int(basic.loc[basic['PropertyID'].tolist().index('AreaId'), 'Value'])
  123. assert plant_type == 0 or plant_type == 1
  124. area_cap = float(basic.loc[basic['PropertyID'].tolist().index('AreaCap'), 'Value'])
  125. columns_to_scale = [col for col in self.weighted_nwp.columns if col not in ['PlantID', 'PlantName', 'Datetime']]
  126. self.weighted_nwp[columns_to_scale] /= self.sum_cap
  127. return types.SimpleNamespace(**{
  128. 'nwp': self.weighted_nwp,
  129. 'nwp_h': self.weighted_nwp_h,
  130. 'power': power,
  131. 'nwp_v': self.weighted_nwp_v,
  132. 'nwp_v_h': self.weighted_nwp_v_h,
  133. 'area_cap': area_cap
  134. })
  135. except Exception as e:
  136. print(f"Region Error loading: {str(e)}")
  137. return None
  138. if __name__ == "__main__":
  139. run_code = 0