diff --git a/HSP2/PLANK_Class.py b/HSP2/PLANK_Class.py index eefccc16..c2c1a56f 100644 --- a/HSP2/PLANK_Class.py +++ b/HSP2/PLANK_Class.py @@ -1122,7 +1122,7 @@ def baldth(nsfg,no3,tam,po4,paldh,naldh,aldl,aldh,mbal,dox,anaer,oxald,bal,depco # use unit death rate to compute death rate; dthbal is expressed # as umoles of phosphorus per liter per interval - return (ald * bal) + slof # dthbal + return (ald * bal) + slof, bal # dthbal def balrx(self, ballit,tw,talgrl,talgrh,talgrm,malgr,cmmp, cmmnp,tamfg,amrfg,nsfg,cmmn,cmmlt,delt60, @@ -1162,7 +1162,7 @@ def balrx(self, ballit,tw,talgrl,talgrh,talgrm,malgr,cmmp, cmmnp,tamfg,amrfg,nsf grobal = self.grochk (po4,no3,tam,phfg,decfg,baco2,cvbpc,cvbpn,nsfg,nmingr,pmingr,cmingr,i0,grtotn,grobal) # calculate benthic algae death, baldth only called here - dthbal = self.baldth(nsfg,no3,tam,po4,paldh,naldh,aldl,aldh,mbal,dox,anaer,oxald,bal,depcor) + (dthbal, bal) = self.baldth(nsfg,no3,tam,po4,paldh,naldh,aldl,aldh,mbal,dox,anaer,oxald,bal,depcor) bal += grobal # determine the new benthic algae population diff --git a/HSP2tools/HDF5.py b/HSP2tools/HDF5.py index e4ab929d..6b70e6cb 100644 --- a/HSP2tools/HDF5.py +++ b/HSP2tools/HDF5.py @@ -10,6 +10,9 @@ from threading import Lock class HDF5: + + REQUIRES_MAPPING = ['GQUAL','CONS'] + def __init__(self, file_name:str) -> None: self.file_name = file_name self.aliases = self._read_aliases_csv() @@ -17,26 +20,37 @@ def __init__(self, file_name:str) -> None: self.lock = Lock() self.gqual_prefixes = self._read_gqual_mapping() + self.cons_prefixes = self._read_cons_mapping() + self.iqual_prefixes = self._read_iqual_mapping() - def _read_gqual_mapping(self) -> Dict[str,str]: - """"GQUAL is based on number which corresponds to the parameter - however which number is assoicated with which parameter changes - based on the UCI file. Need to read from GQUAL tables + def _read_nqual_mapping(self, key:str, target_col:str, nquals:int = 10) -> Dict[str,str]: + """Some modules, like GQUAL, allow for number which corresponds to the consistent + being modeled. However which number is assoicated with which parameter changes + based on the UCI file. Need to read from specification tables """ - - gqual_prefixes = {} - for i in range(1,7): + dict_mappings = {} + for i in range(1,nquals): try: with pd.HDFStore(self.file_name,'r') as store: - df = pd.read_hdf(store,f'RCHRES/GQUAL/GQUAL{i}') + df = pd.read_hdf(store,f'{key}{i}') row = df.iloc[0] - gqid = row['GQID'] - gqual_prefixes[gqid] = str(i) + gqid = row[target_col] + dict_mappings[gqid] = str(i) except KeyError: - #Mean no gqual number (e.g. QUAL3) for this run + #Mean no nqual number (e.g. GQUAL3) for this run pass - return gqual_prefixes + return dict_mappings + def _read_gqual_mapping(self) -> Dict[str,str]: + return self._read_nqual_mapping(R'RCHRES/GQUAL/GQUAL', 'GQID', 7) + + def _read_cons_mapping(self) -> Dict[str,str]: + return self._read_nqual_mapping(R'RCHRES/CONS/CONS','CONID', 7) + + def _read_iqual_mapping(self) -> Dict[str,str]: + """placeholder - for implementation similar to gqual - but for current test just assume 1""" + return {'':'1'} + def _read_aliases_csv(self) -> Dict[Tuple[str,str,str],str]: datapath = os.path.join(HSP2tools.__path__[0], 'data', 'HBNAliases.csv') df = pd.read_csv(datapath) @@ -52,11 +66,12 @@ def get_time_series(self, operation:str, id:str, constituent:str, activity:str) #We still need a special case for IMPLAND/IQUAL and PERLAND/PQUAL constituent_prefix = '' - if activity == 'GQUAL': + if activity in self.REQUIRES_MAPPING: constituent_prefix = '' - for key, value in self.gqual_prefixes.items(): + prefix_dict = getattr(self, f'{activity.lower()}_prefixes') + for key, value in prefix_dict.items(): if constituent.startswith(key): - constituent_prefix = f'GQUAL{value}_' + constituent_prefix = f'{activity}{value}_' constituent = constituent.replace(key,'') key = (operation,id,activity) diff --git a/HSP2tools/data/HBNAliases.csv b/HSP2tools/data/HBNAliases.csv index 0896d00c..e136566c 100644 --- a/HSP2tools/data/HBNAliases.csv +++ b/HSP2tools/data/HBNAliases.csv @@ -127,3 +127,14 @@ RCHRES,PLANK,TP,PTOTCONC RCHRES,PLANK,ROTORN,NTOTORGOUT RCHRES,PLANK,ROTORP,PTOTORGOUT RCHRES,PLANK,ROTORC,CTOTORGOUT +IMPLND,IQUAL,IQADDR,IQADDRCOD +IMPLND,IQUAL,IQADEP,IQADEPCOD +IMPLND,IQUAL,IQADWT,IQADWTCOD +IMPLND,IQUAL,SOQC,SOQCCOD +IMPLND,IQUAL,SOQO,SOQOCOD +IMPLND,IQUAL,SOQOC,SOQOCCOD +IMPLND,IQUAL,SOQS,SOQSCOD +IMPLND,IQUAL,SOQSP,SOQSPCOD +IMPLND,IQUAL,SOQUAL,SOQUALCOD +IMPLND,IQUAL,SQO,SQOCOD +RCHRES,CONS,CON,CONC diff --git a/tests/convert/regression_base.py b/tests/convert/regression_base.py index 8b4a8978..7396f4a7 100644 --- a/tests/convert/regression_base.py +++ b/tests/convert/regression_base.py @@ -1,3 +1,4 @@ +from datetime import time import os import inspect import webbrowser @@ -6,10 +7,14 @@ import pandas as pd import numpy as np -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union from concurrent.futures import ThreadPoolExecutor, as_completed, thread + +OperationsTuple = Tuple[str,str,str,str,str] +ResultsTuple = Tuple[bool,bool,bool,float] + class RegressTest(object): def __init__(self, compare_case:str, operations:List[str]=[], activities:List[str]=[], tcodes:List[str] = ['2'], ids:List[str] = [], threads:int=os.cpu_count() - 1) -> None: @@ -38,13 +43,22 @@ def _init_files(self): def _get_hbn_data(self, test_dir: str) -> None: sub_dir = os.path.join(test_dir, 'HSPFresults') + self.hspf_data_collection = {} for file in os.listdir(sub_dir): if file.lower().endswith('.hbn'): - self.hspf_data = HBNOutput(os.path.join(test_dir, sub_dir, file)) - break - self.hspf_data.read_data() - - def _get_hdf5_data(self, test_dir: str) -> List[HDF5]: + hspf_data = HBNOutput(os.path.join(test_dir, sub_dir, file)) + hspf_data.read_data() + for key in hspf_data.output_dictionary.keys(): + self.hspf_data_collection[key] = hspf_data + + def get_hspf_time_series(self, ops:OperationsTuple) -> Union[pd.Series,None]: + operation, activity, id, constituent, tcode = ops + key = f'{operation}_{activity}_{id}_{tcode}' + hspf_data = self.hspf_data_collection[key] + series = hspf_data.get_time_series(operation, int(id), constituent, activity, 'hourly') + return series + + def _get_hdf5_data(self, test_dir: str) -> None: sub_dir = os.path.join(test_dir, 'HSP2results') for file in os.listdir(sub_dir): if file.lower().endswith('.h5') or file.lower().endswith('.hdf'): @@ -62,12 +76,12 @@ def should_compare(self, operation:str, activity:str, id:str, tcode:str) -> bool return False return True - def generate_report(self, file:str, results: Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float]]) -> None: + def generate_report(self, file:str, results: Dict[OperationsTuple,ResultsTuple]) -> None: html = self.make_html_report(results) self.write_html(file,html) webbrowser.open_new_tab('file://' + file) - def make_html_report(self, results_dict:Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float]]) -> str: + def make_html_report(self, results_dict:Dict[OperationsTuple,ResultsTuple]) -> str: """populates html table""" style_th = 'style="text-align:left"' style_header = 'style="border:1px solid; background-color:#EEEEEE"' @@ -75,13 +89,14 @@ def make_html_report(self, results_dict:Dict[Tuple[str,str,str,str,str],Tuple[bo html = f'

CONVERSION TEST REPORT

\n' html += f'\n' - for key in self.hspf_data.output_dictionary.keys(): + for key in self.hspf_data_collection.keys(): operation, activity, opn_id, tcode = key.split('_') if not self.should_compare(operation, activity, opn_id, tcode): continue html += f'\n' html += f'\n' - for cons in self.hspf_data.output_dictionary[key]: + hspf_data = self.hspf_data_collection[key] + for cons in hspf_data.output_dictionary[key]: result = results_dict[(operation,activity,opn_id, cons, tcode)] no_data_hsp2, no_data_hspf, match, diff = result html += self.make_html_comp_row(cons, no_data_hsp2, no_data_hspf, match, diff) @@ -110,16 +125,17 @@ def write_html(self, file:str, html:str) -> None: with open(file, 'w') as f: f.write(html) - def run_test(self) -> Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float]]: + def run_test(self) -> Dict[OperationsTuple,ResultsTuple]: futures = {} results_dict = {} with ThreadPoolExecutor(max_workers=self.threads) as executor: - for key in self.hspf_data.output_dictionary.keys(): + for key in self.hspf_data_collection.keys(): (operation, activity, opn_id, tcode) = key.split('_') if not self.should_compare(operation, activity, opn_id, tcode): continue - for cons in self.hspf_data.output_dictionary[key]: + hspf_data = self.hspf_data_collection[key] + for cons in hspf_data.output_dictionary[key]: params = (operation,activity,opn_id,cons,tcode) futures[executor.submit(self.check_con, params)] = params @@ -129,13 +145,13 @@ def run_test(self) -> Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float return results_dict - def check_con(self, params:Tuple[str,str,str,str,str]) -> Tuple[bool,bool,bool,float]: + def check_con(self, params:OperationsTuple) -> ResultsTuple: """Performs comparision of single constituent""" operation, activity, id, constituent, tcode = params print(f' {operation}_{id} {activity} {constituent}\n') ts_hsp2 = self.hsp2_data.get_time_series(operation, id, constituent, activity) - ts_hspf = self.hspf_data.get_time_series(operation, int(id), constituent, activity, 'hourly') + ts_hspf = self.get_hspf_time_series(params) no_data_hsp2 = ts_hsp2 is None no_data_hspf = ts_hspf is None @@ -157,22 +173,20 @@ def check_con(self, params:Tuple[str,str,str,str,str]) -> Tuple[bool,bool,bool,f elif constituent == 'QTOTAL' or constituent == 'HTEXCH' : tolerance = max(abs(ts_hsp2.values.min()), abs(ts_hsp2.values.max())) * 1e-3 - ts_hsp2, ts_hspf = self.validate_time_series(ts_hsp2, ts_hspf, - self.hsp2_data, self.hspf_data, operation, activity, id, constituent) + ts_hsp2, ts_hspf = self.validate_time_series(ts_hsp2, ts_hspf, operation, activity, id, constituent) match, diff = self.compare_time_series(ts_hsp2, ts_hspf, tolerance) return (no_data_hsp2, no_data_hspf, match, diff) def fill_nan_and_null(self, timeseries:pd.Series, replacement_value:float = 0.0) -> pd.Series: - """Replaces any nan or HSPF nulls -1.0e26 with provided replacement_value""" + """Replaces any nan or HSPF nulls -1.0e30 with provided replacement_value""" timeseries = timeseries.fillna(replacement_value) - timeseries = timeseries.replace(-1.0e26, replacement_value) + timeseries = timeseries.where(timeseries > -1.0e30, replacement_value) return timeseries - def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series, - hsp2_data:HDF5, hspf_data:HBNOutput, operation:str, activity:str, - id:str, cons:str) -> Tuple[pd.Series, pd.Series]: + def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series, operation:str, + activity:str, id:str, cons:str) -> Tuple[pd.Series, pd.Series]: """ validates a corrects time series to avoid false differences """ # In some test cases it looked like HSP2 was executing for a single extra time step @@ -187,8 +201,11 @@ def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series, ### special cases # if tiny suro in one and no suro in the other, don't trigger on suro-dependent numbers if activity == 'PWTGAS' and cons in ['SOTMP', 'SODOX', 'SOCO2']: - ts_suro_hsp2 = hsp2_data.get_time_series(operation, id, "SURO", "PWATER") - ts_suro_hspf = hspf_data.get_time_series(operation, int(id), "SURO", "PWATER", 'hourly') + ts_suro_hsp2 = self.hsp2_data.get_time_series(operation, id, 'SURO', 'PWATER') + ts_suro_hsp2 = self.fill_nan_and_null(ts_suro_hsp2) + ts_suro_hspf = self.get_hspf_time_series((operation, 'PWATER', id, 'SURO', 2)) + ts_suro_hsp2 = self.fill_nan_and_null(ts_suro_hspf) + idx_zero_suro_hsp2 = ts_suro_hsp2 == 0 idx_low_suro_hsp2 = ts_suro_hsp2 < 1.0e-8 @@ -200,7 +217,8 @@ def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series, # if volume in reach is going to zero, small concentration differences are not signficant if activity == 'SEDTRN' and cons in ['SSEDCLAY', 'SSEDTOT']: - ts_vol_hsp2 = hsp2_data.get_time_series(operation, id, "VOL", "HYDR") + ts_vol_hsp2 = self.hsp2_data.get_time_series(operation, id, "VOL", "HYDR") + ts_vol_hsp2 = self.fill_nan_and_null(ts_vol_hsp2) idx_low_vol = ts_vol_hsp2 < 1.0e-4 ts_hsp2.loc[idx_low_vol] = ts_hsp2.loc[idx_low_vol] = 0
{key}
ConstituentMax DiffMatchNote