Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions HSP2/PLANK_Class.py
Original file line number Diff line number Diff line change
Expand Up @@ -1122,7 +1122,7 @@ def baldth(nsfg,no3,tam,po4,paldh,naldh,aldl,aldh,mbal,dox,anaer,oxald,bal,depco

# use unit death rate to compute death rate; dthbal is expressed
# as umoles of phosphorus per liter per interval
return (ald * bal) + slof # dthbal
return (ald * bal) + slof, bal # dthbal


def balrx(self, ballit,tw,talgrl,talgrh,talgrm,malgr,cmmp, cmmnp,tamfg,amrfg,nsfg,cmmn,cmmlt,delt60,
Expand Down Expand Up @@ -1162,7 +1162,7 @@ def balrx(self, ballit,tw,talgrl,talgrh,talgrm,malgr,cmmp, cmmnp,tamfg,amrfg,nsf
grobal = self.grochk (po4,no3,tam,phfg,decfg,baco2,cvbpc,cvbpn,nsfg,nmingr,pmingr,cmingr,i0,grtotn,grobal)

# calculate benthic algae death, baldth only called here
dthbal = self.baldth(nsfg,no3,tam,po4,paldh,naldh,aldl,aldh,mbal,dox,anaer,oxald,bal,depcor)
(dthbal, bal) = self.baldth(nsfg,no3,tam,po4,paldh,naldh,aldl,aldh,mbal,dox,anaer,oxald,bal,depcor)

bal += grobal # determine the new benthic algae population

Expand Down
45 changes: 30 additions & 15 deletions HSP2tools/HDF5.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,33 +10,47 @@
from threading import Lock

class HDF5:

REQUIRES_MAPPING = ['GQUAL','CONS']

def __init__(self, file_name:str) -> None:
self.file_name = file_name
self.aliases = self._read_aliases_csv()
self.data = {}
self.lock = Lock()

self.gqual_prefixes = self._read_gqual_mapping()
self.cons_prefixes = self._read_cons_mapping()
self.iqual_prefixes = self._read_iqual_mapping()

def _read_gqual_mapping(self) -> Dict[str,str]:
""""GQUAL is based on number which corresponds to the parameter
however which number is assoicated with which parameter changes
based on the UCI file. Need to read from GQUAL tables
def _read_nqual_mapping(self, key:str, target_col:str, nquals:int = 10) -> Dict[str,str]:
"""Some modules, like GQUAL, allow for number which corresponds to the consistent
being modeled. However which number is assoicated with which parameter changes
based on the UCI file. Need to read from specification tables
"""

gqual_prefixes = {}
for i in range(1,7):
dict_mappings = {}
for i in range(1,nquals):
try:
with pd.HDFStore(self.file_name,'r') as store:
df = pd.read_hdf(store,f'RCHRES/GQUAL/GQUAL{i}')
df = pd.read_hdf(store,f'{key}{i}')
row = df.iloc[0]
gqid = row['GQID']
gqual_prefixes[gqid] = str(i)
gqid = row[target_col]
dict_mappings[gqid] = str(i)
except KeyError:
#Mean no gqual number (e.g. QUAL3) for this run
#Mean no nqual number (e.g. GQUAL3) for this run
pass
return gqual_prefixes
return dict_mappings

def _read_gqual_mapping(self) -> Dict[str,str]:
return self._read_nqual_mapping(R'RCHRES/GQUAL/GQUAL', 'GQID', 7)

def _read_cons_mapping(self) -> Dict[str,str]:
return self._read_nqual_mapping(R'RCHRES/CONS/CONS','CONID', 7)

def _read_iqual_mapping(self) -> Dict[str,str]:
"""placeholder - for implementation similar to gqual - but for current test just assume 1"""
return {'':'1'}

def _read_aliases_csv(self) -> Dict[Tuple[str,str,str],str]:
datapath = os.path.join(HSP2tools.__path__[0], 'data', 'HBNAliases.csv')
df = pd.read_csv(datapath)
Expand All @@ -52,11 +66,12 @@ def get_time_series(self, operation:str, id:str, constituent:str, activity:str)

#We still need a special case for IMPLAND/IQUAL and PERLAND/PQUAL
constituent_prefix = ''
if activity == 'GQUAL':
if activity in self.REQUIRES_MAPPING:
constituent_prefix = ''
for key, value in self.gqual_prefixes.items():
prefix_dict = getattr(self, f'{activity.lower()}_prefixes')
for key, value in prefix_dict.items():
if constituent.startswith(key):
constituent_prefix = f'GQUAL{value}_'
constituent_prefix = f'{activity}{value}_'
constituent = constituent.replace(key,'')

key = (operation,id,activity)
Expand Down
11 changes: 11 additions & 0 deletions HSP2tools/data/HBNAliases.csv
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,14 @@ RCHRES,PLANK,TP,PTOTCONC
RCHRES,PLANK,ROTORN,NTOTORGOUT
RCHRES,PLANK,ROTORP,PTOTORGOUT
RCHRES,PLANK,ROTORC,CTOTORGOUT
IMPLND,IQUAL,IQADDR,IQADDRCOD
IMPLND,IQUAL,IQADEP,IQADEPCOD
IMPLND,IQUAL,IQADWT,IQADWTCOD
IMPLND,IQUAL,SOQC,SOQCCOD
IMPLND,IQUAL,SOQO,SOQOCOD
IMPLND,IQUAL,SOQOC,SOQOCCOD
IMPLND,IQUAL,SOQS,SOQSCOD
IMPLND,IQUAL,SOQSP,SOQSPCOD
IMPLND,IQUAL,SOQUAL,SOQUALCOD
IMPLND,IQUAL,SQO,SQOCOD
RCHRES,CONS,CON,CONC
68 changes: 43 additions & 25 deletions tests/convert/regression_base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from datetime import time
import os
import inspect
import webbrowser
Expand All @@ -6,10 +7,14 @@
import pandas as pd
import numpy as np

from typing import Dict, List, Tuple
from typing import Dict, List, Tuple, Union

from concurrent.futures import ThreadPoolExecutor, as_completed, thread


OperationsTuple = Tuple[str,str,str,str,str]
ResultsTuple = Tuple[bool,bool,bool,float]

class RegressTest(object):
def __init__(self, compare_case:str, operations:List[str]=[], activities:List[str]=[],
tcodes:List[str] = ['2'], ids:List[str] = [], threads:int=os.cpu_count() - 1) -> None:
Expand Down Expand Up @@ -38,13 +43,22 @@ def _init_files(self):

def _get_hbn_data(self, test_dir: str) -> None:
sub_dir = os.path.join(test_dir, 'HSPFresults')
self.hspf_data_collection = {}
for file in os.listdir(sub_dir):
if file.lower().endswith('.hbn'):
self.hspf_data = HBNOutput(os.path.join(test_dir, sub_dir, file))
break
self.hspf_data.read_data()

def _get_hdf5_data(self, test_dir: str) -> List[HDF5]:
hspf_data = HBNOutput(os.path.join(test_dir, sub_dir, file))
hspf_data.read_data()
for key in hspf_data.output_dictionary.keys():
self.hspf_data_collection[key] = hspf_data

def get_hspf_time_series(self, ops:OperationsTuple) -> Union[pd.Series,None]:
operation, activity, id, constituent, tcode = ops
key = f'{operation}_{activity}_{id}_{tcode}'
hspf_data = self.hspf_data_collection[key]
series = hspf_data.get_time_series(operation, int(id), constituent, activity, 'hourly')
return series

def _get_hdf5_data(self, test_dir: str) -> None:
sub_dir = os.path.join(test_dir, 'HSP2results')
for file in os.listdir(sub_dir):
if file.lower().endswith('.h5') or file.lower().endswith('.hdf'):
Expand All @@ -62,26 +76,27 @@ def should_compare(self, operation:str, activity:str, id:str, tcode:str) -> bool
return False
return True

def generate_report(self, file:str, results: Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float]]) -> None:
def generate_report(self, file:str, results: Dict[OperationsTuple,ResultsTuple]) -> None:
html = self.make_html_report(results)
self.write_html(file,html)
webbrowser.open_new_tab('file://' + file)

def make_html_report(self, results_dict:Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float]]) -> str:
def make_html_report(self, results_dict:Dict[OperationsTuple,ResultsTuple]) -> str:
"""populates html table"""
style_th = 'style="text-align:left"'
style_header = 'style="border:1px solid; background-color:#EEEEEE"'

html = f'<html><header><h1>CONVERSION TEST REPORT</h1></header><body>\n'
html += f'<table style="border:1px solid">\n'

for key in self.hspf_data.output_dictionary.keys():
for key in self.hspf_data_collection.keys():
operation, activity, opn_id, tcode = key.split('_')
if not self.should_compare(operation, activity, opn_id, tcode):
continue
html += f'<tr><th colspan=5 {style_header}>{key}</th></tr>\n'
html += f'<tr><th></th><th {style_th}>Constituent</th><th {style_th}>Max Diff</th><th>Match</th><th>Note</th></tr>\n'
for cons in self.hspf_data.output_dictionary[key]:
hspf_data = self.hspf_data_collection[key]
for cons in hspf_data.output_dictionary[key]:
result = results_dict[(operation,activity,opn_id, cons, tcode)]
no_data_hsp2, no_data_hspf, match, diff = result
html += self.make_html_comp_row(cons, no_data_hsp2, no_data_hspf, match, diff)
Expand Down Expand Up @@ -110,16 +125,17 @@ def write_html(self, file:str, html:str) -> None:
with open(file, 'w') as f:
f.write(html)

def run_test(self) -> Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float]]:
def run_test(self) -> Dict[OperationsTuple,ResultsTuple]:
futures = {}
results_dict = {}

with ThreadPoolExecutor(max_workers=self.threads) as executor:
for key in self.hspf_data.output_dictionary.keys():
for key in self.hspf_data_collection.keys():
(operation, activity, opn_id, tcode) = key.split('_')
if not self.should_compare(operation, activity, opn_id, tcode):
continue
for cons in self.hspf_data.output_dictionary[key]:
hspf_data = self.hspf_data_collection[key]
for cons in hspf_data.output_dictionary[key]:
params = (operation,activity,opn_id,cons,tcode)
futures[executor.submit(self.check_con, params)] = params

Expand All @@ -129,13 +145,13 @@ def run_test(self) -> Dict[Tuple[str,str,str,str,str],Tuple[bool,bool,bool,float

return results_dict

def check_con(self, params:Tuple[str,str,str,str,str]) -> Tuple[bool,bool,bool,float]:
def check_con(self, params:OperationsTuple) -> ResultsTuple:
"""Performs comparision of single constituent"""
operation, activity, id, constituent, tcode = params
print(f' {operation}_{id} {activity} {constituent}\n')

ts_hsp2 = self.hsp2_data.get_time_series(operation, id, constituent, activity)
ts_hspf = self.hspf_data.get_time_series(operation, int(id), constituent, activity, 'hourly')
ts_hspf = self.get_hspf_time_series(params)

no_data_hsp2 = ts_hsp2 is None
no_data_hspf = ts_hspf is None
Expand All @@ -157,22 +173,20 @@ def check_con(self, params:Tuple[str,str,str,str,str]) -> Tuple[bool,bool,bool,f
elif constituent == 'QTOTAL' or constituent == 'HTEXCH' :
tolerance = max(abs(ts_hsp2.values.min()), abs(ts_hsp2.values.max())) * 1e-3

ts_hsp2, ts_hspf = self.validate_time_series(ts_hsp2, ts_hspf,
self.hsp2_data, self.hspf_data, operation, activity, id, constituent)
ts_hsp2, ts_hspf = self.validate_time_series(ts_hsp2, ts_hspf, operation, activity, id, constituent)

match, diff = self.compare_time_series(ts_hsp2, ts_hspf, tolerance)

return (no_data_hsp2, no_data_hspf, match, diff)

def fill_nan_and_null(self, timeseries:pd.Series, replacement_value:float = 0.0) -> pd.Series:
"""Replaces any nan or HSPF nulls -1.0e26 with provided replacement_value"""
"""Replaces any nan or HSPF nulls -1.0e30 with provided replacement_value"""
timeseries = timeseries.fillna(replacement_value)
timeseries = timeseries.replace(-1.0e26, replacement_value)
timeseries = timeseries.where(timeseries > -1.0e30, replacement_value)
return timeseries

def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series,
hsp2_data:HDF5, hspf_data:HBNOutput, operation:str, activity:str,
id:str, cons:str) -> Tuple[pd.Series, pd.Series]:
def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series, operation:str,
activity:str, id:str, cons:str) -> Tuple[pd.Series, pd.Series]:
""" validates a corrects time series to avoid false differences """

# In some test cases it looked like HSP2 was executing for a single extra time step
Expand All @@ -187,8 +201,11 @@ def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series,
### special cases
# if tiny suro in one and no suro in the other, don't trigger on suro-dependent numbers
if activity == 'PWTGAS' and cons in ['SOTMP', 'SODOX', 'SOCO2']:
ts_suro_hsp2 = hsp2_data.get_time_series(operation, id, "SURO", "PWATER")
ts_suro_hspf = hspf_data.get_time_series(operation, int(id), "SURO", "PWATER", 'hourly')
ts_suro_hsp2 = self.hsp2_data.get_time_series(operation, id, 'SURO', 'PWATER')
ts_suro_hsp2 = self.fill_nan_and_null(ts_suro_hsp2)
ts_suro_hspf = self.get_hspf_time_series((operation, 'PWATER', id, 'SURO', 2))
ts_suro_hsp2 = self.fill_nan_and_null(ts_suro_hspf)


idx_zero_suro_hsp2 = ts_suro_hsp2 == 0
idx_low_suro_hsp2 = ts_suro_hsp2 < 1.0e-8
Expand All @@ -200,7 +217,8 @@ def validate_time_series(self, ts_hsp2:pd.Series, ts_hspf:pd.Series,

# if volume in reach is going to zero, small concentration differences are not signficant
if activity == 'SEDTRN' and cons in ['SSEDCLAY', 'SSEDTOT']:
ts_vol_hsp2 = hsp2_data.get_time_series(operation, id, "VOL", "HYDR")
ts_vol_hsp2 = self.hsp2_data.get_time_series(operation, id, "VOL", "HYDR")
ts_vol_hsp2 = self.fill_nan_and_null(ts_vol_hsp2)

idx_low_vol = ts_vol_hsp2 < 1.0e-4
ts_hsp2.loc[idx_low_vol] = ts_hsp2.loc[idx_low_vol] = 0
Expand Down