material.py 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #!/usr/bin/env python
  2. # -*- coding:utf-8 -*-
  3. # @FileName :material.py
  4. # @Time :2025/4/29 11:07
  5. # @Author :David
  6. # @Company: shenyang JY
  7. import os.path
  8. import types
  9. import pandas as pd
  10. from pathlib import Path
  11. from app.common.config import logger, parser
  12. from concurrent.futures import ThreadPoolExecutor
  13. from functools import partial
  14. class MaterialLoader:
  15. def __init__(self, input_file, lazy_load=True):
  16. self.lazy_load = lazy_load
  17. self._data_cache = {}
  18. self.opt = parser.parse_args_and_yaml()
  19. self.base_path = Path(self.opt.dqyc_base_path)
  20. self.base_path_cdq = Path(self.opt.cdqyc_base_path)
  21. def wrapper_path(self, station_id, spec):
  22. return f"{self.base_path/station_id/spec}.txt"
  23. def wrapper_path_cdq(self, area_id, spec):
  24. return f"{self.base_path_cdq/area_id/spec}.txt"
  25. def _load_material(self, station_id):
  26. """核心数据加载方法"""
  27. # 根据您的原始代码逻辑简化的加载流程
  28. try:
  29. basic = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['basic']), sep=r'\s+', header=0)
  30. power = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['power']), sep=r'\s+', header=0)
  31. plant_type = int(basic.loc[basic['PropertyID'].tolist().index('PlantType'), 'Value'])
  32. assert plant_type == 0 or plant_type == 1
  33. # 根据电站类型加载数据
  34. nwp_v = pd.read_csv(self.wrapper_path(station_id, f"0/{self.opt.doc_mapping['nwp_v']}"), sep=r'\s+', header=0)
  35. nwp_v_h = pd.read_csv(self.wrapper_path(station_id, f"0/{self.opt.doc_mapping['nwp_v_h']}"), sep=r'\s+', header=0)
  36. nwp_own = pd.read_csv(self.wrapper_path(station_id, f"1/{self.opt.doc_mapping['nwp_own']}"), sep=r'\s+', header=0)
  37. nwp_own_h = pd.read_csv(self.wrapper_path(station_id, f"1/{self.opt.doc_mapping['nwp_own_h']}"), sep=r'\s+', header=0)
  38. if self.opt.switch_nwp_owner:
  39. nwp_v, nwp_v_h = nwp_own, nwp_own_h
  40. # 如果是风电
  41. if plant_type == 0:
  42. station_info = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_w']), sep=r'\s+', header=0)
  43. station_info_d = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_d_w']), sep=r'\s+', header=0)
  44. nwp = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_w']), sep=r'\s+', header=0)
  45. nwp_h = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_w_h']), sep=r'\s+', header=0)
  46. cap = float(station_info.loc[0, 'PlantCap'])
  47. if Path(self.wrapper_path(station_id, self.opt.doc_mapping['env_wf'])).exists():
  48. env = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['env_wf']), sep=r'\s+', header=0)
  49. else:
  50. env = None
  51. # 如果是光伏
  52. else:
  53. station_info = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_s']), sep=r'\s+', header=0)
  54. station_info_d = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_d_s']), sep=r'\s+', header=0)
  55. nwp = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_s']), sep=r'\s+', header=0)
  56. nwp_h = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_s_h']), sep=r'\s+', header=0)
  57. cap = float(station_info.loc[0, 'PlantCap'])
  58. if Path(self.wrapper_path(station_id, self.opt.doc_mapping['env_sf'])).exists():
  59. env = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['env_sf']), sep=r'\s+', header=0)
  60. else:
  61. env = None
  62. return types.SimpleNamespace(**{
  63. 'station_info': station_info,
  64. 'station_info_d': station_info_d,
  65. 'nwp': nwp,
  66. 'nwp_h': nwp_h,
  67. 'power': power,
  68. 'nwp_v': nwp_v,
  69. 'nwp_v_h': nwp_v_h,
  70. 'env': env,
  71. 'cap': cap
  72. })
  73. except Exception as e:
  74. print(f"Error loading {station_id}: {str(e)}")
  75. return None
  76. def _load_material_cdq(self, area_id, moment):
  77. """核心数据加载方法"""
  78. # 根据您的原始代码逻辑简化的加载流程
  79. try:
  80. basic = pd.read_csv(self.wrapper_path_cdq(area_id, self.opt.doc_cdq_mapping['basic']), sep=r'\s+', header=0)
  81. basic_area = pd.read_csv(self.wrapper_path_cdq(area_id, self.opt.doc_cdq_mapping['basic_area']), sep=r'\s+', header=0)
  82. plant_type = int(basic.loc[basic['PropertyID'].tolist().index('PlantType'), 'Value'])
  83. assert plant_type == 0 or plant_type == 1
  84. # 根据电站类型加载数据
  85. if self.opt.switch_nwp_owner:
  86. nwp_v, nwp_v_h = nwp_own, nwp_own_h
  87. # 如果是风电
  88. if plant_type == 0:
  89. station_info = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_w']), sep=r'\s+', header=0)
  90. station_info_d = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_d_w']), sep=r'\s+', header=0)
  91. nwp = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_w']), sep=r'\s+', header=0)
  92. nwp_h = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_w_h']), sep=r'\s+', header=0)
  93. cap = float(station_info.loc[0, 'PlantCap'])
  94. if Path(self.wrapper_path(station_id, self.opt.doc_mapping['env_wf'])).exists():
  95. env = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['env_wf']), sep=r'\s+', header=0)
  96. else:
  97. env = None
  98. # 如果是光伏
  99. else:
  100. station_info = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_s']), sep=r'\s+', header=0)
  101. station_info_d = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['station_info_d_s']), sep=r'\s+', header=0)
  102. nwp = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_s']), sep=r'\s+', header=0)
  103. nwp_h = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['nwp_s_h']), sep=r'\s+', header=0)
  104. cap = float(station_info.loc[0, 'PlantCap'])
  105. if Path(self.wrapper_path(station_id, self.opt.doc_mapping['env_sf'])).exists():
  106. env = pd.read_csv(self.wrapper_path(station_id, self.opt.doc_mapping['env_sf']), sep=r'\s+', header=0)
  107. else:
  108. env = None
  109. return types.SimpleNamespace(**{
  110. 'station_info': station_info,
  111. 'station_info_d': station_info_d,
  112. 'nwp': nwp,
  113. 'nwp_h': nwp_h,
  114. 'power': power,
  115. 'nwp_v': nwp_v,
  116. 'nwp_v_h': nwp_v_h,
  117. 'env': env,
  118. 'cap': cap
  119. })
  120. except Exception as e:
  121. print(f"Error loading {station_id}: {str(e)}")
  122. return None
  123. def get_material(self, station_id):
  124. if self.lazy_load:
  125. if station_id not in self._data_cache:
  126. self._data_cache[station_id] = self._load_material(station_id)
  127. return self._data_cache[station_id]
  128. else:
  129. return self._load_material(station_id)
  130. def add_weights(self, data_objects):
  131. """对nwp数据进行cap加权(nwp, nwp_h, nwp_v_, nwp_v_h)"""
  132. def local_sum(df, weight):
  133. """内部函数:对DataFrame进行加权求和"""
  134. weighted_df = df.copy()
  135. columns_to_scale = [col for col in df.columns if col not in ['PlantID', 'PlantName', 'PlantType', 'Qbsj', 'Datetime']]
  136. weighted_df[columns_to_scale] = weighted_df[columns_to_scale] * weight
  137. return weighted_df, weight
  138. # 从data_objects解构对象
  139. nwp, nwp_h, nwp_v, nwp_v_h, power, cap = (
  140. data_objects.nwp,
  141. data_objects.nwp_h,
  142. data_objects.nwp_v,
  143. data_objects.nwp_v_h,
  144. data_objects.power,
  145. data_objects.cap
  146. )
  147. # 对每个NWP数据集进行容量加权
  148. weighted_nwp, cap = local_sum(nwp, cap)
  149. weighted_nwp_h, _ = local_sum(nwp_h, cap)
  150. weighted_nwp_v, _ = local_sum(nwp_v, cap)
  151. weighted_nwp_v_h, _ = local_sum(nwp_v_h, cap)
  152. return {
  153. 'nwp': weighted_nwp,
  154. 'nwp_h': weighted_nwp_h,
  155. 'nwp_v': weighted_nwp_v,
  156. 'nwp_v_h': weighted_nwp_v_h,
  157. 'cap': cap
  158. }
  159. def get_material_region(self):
  160. try:
  161. basic = pd.read_csv(os.path.join(self.base_path, self.opt.doc_mapping['basic_area']+'.txt'), sep=r'\s+', header=0)
  162. power = pd.read_csv(os.path.join(self.base_path, self.opt.doc_mapping['power_area']+'.txt'), sep=r'\s+', header=0)
  163. plant_type = int(basic.loc[basic['PropertyID'].tolist().index('PlantType'), 'Value'])
  164. area_id = int(basic.loc[basic['PropertyID'].tolist().index('AreaId'), 'Value'])
  165. assert plant_type == 0 or plant_type == 1
  166. area_cap = float(basic.loc[basic['PropertyID'].tolist().index('AreaCap'), 'Value'])
  167. return types.SimpleNamespace(**{
  168. 'power': power,
  169. 'area_cap': area_cap,
  170. 'area_id': area_id
  171. })
  172. except Exception as e:
  173. print(f"Region Error loading: {str(e)}")
  174. return None
  175. def get_material_cdq(self):
  176. pass
  177. if __name__ == "__main__":
  178. run_code = 0