diff --git a/.bumpversion.cfg b/.bumpversion.cfg index 4470bc7..d97a799 100644 --- a/.bumpversion.cfg +++ b/.bumpversion.cfg @@ -1,5 +1,5 @@ [bumpversion] -current_version = 1.2.9 +current_version = 1.3.0 commit = False tag = False tag_name = {new_version} diff --git a/aodntools/__init__.py b/aodntools/__init__.py index 5089ced..19b4f1d 100644 --- a/aodntools/__init__.py +++ b/aodntools/__init__.py @@ -1 +1 @@ -__version__ = '1.2.9' +__version__ = '1.3.0' diff --git a/aodntools/timeseries_products/Documentation/velocity_aggregated_timeseries.md b/aodntools/timeseries_products/Documentation/velocity_aggregated_timeseries.md index 1e0144c..9ff6fb8 100644 --- a/aodntools/timeseries_products/Documentation/velocity_aggregated_timeseries.md +++ b/aodntools/timeseries_products/Documentation/velocity_aggregated_timeseries.md @@ -90,6 +90,8 @@ In order to keep track of the provenance of the aggregated file, accessory varia - `LONGITUDE(INSTRUMENT)`: LONGITUDE per instrument. - `NOMINAL_DEPTH(INSTRUMENT)`: nominal depth per instrument, from the input file’s variable `NOMINAL_DEPTH` or global attribute instrument_nominal_depth. - `SECONDS_TO_MIDDLE(INSTRUMENT)`: offset from the timestamp to the middle of the measurement window for each deployment +- CELL_INDEX(OBSERVATION): index of the corresponding measuring cell + ### Attributes diff --git a/aodntools/timeseries_products/Documentation/velocity_hourly_timeseries.md b/aodntools/timeseries_products/Documentation/velocity_hourly_timeseries.md new file mode 100644 index 0000000..ddb7386 --- /dev/null +++ b/aodntools/timeseries_products/Documentation/velocity_hourly_timeseries.md @@ -0,0 +1,127 @@ +# Velocity Hourly Time Series Product + +- [Objective](#objective) +- [Input](#input) +- [Method](#method) +- [Output](#output) + + + + +## Objective + +This product provides aggregated quality controlled U, V, and W velocity time-series files for each mooring site, binned into 1-hour intervals, including only in-water data flagged as "good" or "probably good" in the input files. QC flags are not included. Statistics related to the averaging process will be stored as variables (standard deviation, minimum and maximum values, number of records binned). For the profiling (ADCP) instruments, the absolute depth of each measuring cell is calculated using the `DEPTH` measured at the instrument and the `HEIGHT_ABOVE_SENSOR` coordinate. + +The output from a single run of the code will be an aggregated file of all available measurements of the velocity components UCUR, VCUR and (where available) WCUR at one mooring site, binned into 1-hour intervals. + +## Input + +The aggregation function will accept a list of input files, and the code of the mooring site (`site_code`), in addition to arguments that identify the path of input and output files. + +The code aggregates variables and files that meet the following requirements: + +- File contains data from only one deployment of one instrument; +- File is a delayed-mode, quality-controlled product (file version label “FV01”); +- File is compliant with CF-1.6 and IMOS-1.4 conventions; +- File contains, at the minimum, the components of current velocity (`UCUR`, `VCUR`), and variables `TIME`, `DEPTH`, `LATITUDE`, `LONGITUDE`, and `HEIGHT_ABOVE_SENSOR` in the case of ADCPs; +- All files to be aggregated are from the same site, and have the same `site_code` attribute; +- Variables to be aggregated have `TIME` and (optionally) `HEIGHT_ABOVE_SENSOR` as their only dimensions (or if `LATITUDE` and `LONGITUDE` are included as dimensions, they have size 1); +- The in-water data are bounded by the global attributes `time_deployment_start` and `time_deployment_end`; + + + +The code is able to access the input files either locally, or remotely via the OPeNDAP protocol. + +## Method + +Generating function: + +``` +usage: velocity_aggregated_timeseries.py [-h] -site SITE_CODE -files FILENAMES + [-indir INPUT_DIR] + [-outdir OUTPUT_DIR] + [-download_url DOWNLOAD_URL] + [-opendap_url OPENDAP_URL] + +Concatenate X,Y,Z velocity variables from ALL instruments from ALL deployments +from ONE site + +optional arguments: + -h, --help show this help message and exit + -site SITE_CODE site code, like NRMMAI + -files FILENAMES name of the file that contains the source URLs + -indir INPUT_DIR base path of input files + -outdir OUTPUT_DIR path where the result file will be written. Default ./ + -download_url DOWNLOAD_URL path to the download_url_prefix + -opendap_url OPENDAP_URL path to the opendap_url_prefix + + +``` + + + +### Input file validation + +Before proceeding to the aggregation, each input file will be checked to ensure it meets the requirements (as specified above under Inputs). Any input files that fail to meet the requirements will be excluded from the aggregation, and their URL listed in a global attribute `rejected_files`. + +### Dimensions + +The dimensions of the resulting file are determined as follows: + +- `OBSERVATION`: the total number of observation records, excluding out-of-the-water data, in all input files; +- `INSTRUMENT`: the number of instruments (i.e. number of files); +- `strlen`: a fixed dimension of length 256 for character array variables. + +### Variables + +Only in-water velocity measurements flagged as “good” or “probably good” in the input files are included. These values are averaged into one-hour time bins (independently within each depth cell for ADCPs). Timestamps in the input files indicate the start of each measurement interval, and these _have not been shifted to the centre of the interval before binning_. This could lead to an artificial shift of up to half an hour in the output data. The size of this shift, where known, has been recorded in the `SECONDS_TO_MIDDLE` variable. + +After this averaging, the velocity variables are flattened into one dimensional arrays, and the arrays from each input file are concatenated into the output file. The resulting variables have dimension `OBSERVATION`. + +The binning intervals will be one hour long, centred on the hour (i.e. HH:00:00). Each timestamp will be repeated once for each ADCP depth cell, in order to match the shape of the velocity variables. The `TIME` coordinate variable in the output file also has dimension `OBSERVATION`. + +The `DEPTH` variables from input files are averaged into the same one-hour bins, and concatenated into a variable `DEPTH(OBSERVATION)`. In the case of ADCP instruments, the `HEIGHT_ABOVE_SENSOR` is converted to absolute depth by subtracting each of the height values from the depth measurements at the instrument. + +All output variables with the `INSTRUMENT` dimension are sorted in chronological order, and the input files aggregated chronologically, according to the global attribute `time_deployment_start`. + +In order to keep track of the provenance of the aggregated file, accessory variables are created: + + +- `instrument_index(OBSERVATION)`: index [0:number of files] of the instrument used, referencing the `INSTRUMENT` dimension. +- `source_file(INSTRUMENT, strlen)`: URLs of the files used +- `instrument_id(INSTRUMENT, strlen)`: concatenated deployment_code, instrument and instrument_serial_number from the global attributes of each file +- `LATITUDE(INSTRUMENT)`: LATITUDE per instrument. +- `LONGITUDE(INSTRUMENT)`: LONGITUDE per instrument. +- `NOMINAL_DEPTH(INSTRUMENT)`: nominal depth per instrument, from the input file’s variable `NOMINAL_DEPTH` or global attribute instrument_nominal_depth. +- `CELL_INDEX(OBSERVATION)`: index of the corresponding measuring cell. + + + +### Attributes + +The variable attributes will comply with the IMOS metadata standards. + +The global metadata will be a set of IMOS standard attributes. Fixed attributes are read from a [JSON file](../velocity_hourly_timeseries_template.json) that contains the {key:value} pairs for each of them. + +Attributes specific to each aggregated product, are added as follows: + +- `site_code`: obtained from the input files (should be the same in all of them); +- `time_coverage_start`, `time_coverage_end`: set to the full range of TIME values in the aggregated file; +- `geospatial_vertical_min`, `geospatial_vertical_max`: set to the full range of DEPTH values in the aggregated file; +- `geospatial_lat_min`, `geospatial_lat_max` : set to the full range of LATITUDE values in the aggregated file; +- `geospatial_lon_min`, `geospatial_lon_max`: set to the full range of LONGITUDE values in the aggregated file; +- `date_created`: set to the date/time the product file is created; +- `history`: set to “: Aggregated file created.”; +- `keywords`: set to a comma-separated list of the main variable names (“UCUR, VCUR, WCUR, DEPTH, AGGREGATED”); +- `lineage`: a statement about how the file was created, including a link to the code used; +- `title`: "Long Timeseries Velocity Hourly Aggregated product: UCUR, VCUR, WCUR, DEPTH at between and "; +- `rejected_files`: a list of URLs for files that were in the input files list, but did not meet the input requirements. + + +## Output + +The output from a single run of the code will be an aggregated file of all available current velocity measurements at one mooring site. + +The product will be delivered, in netCDF4 classic format, compliant with the CF-1.6 and IMOS-1.4 conventions, and structured according to the [indexed ragged array representation](http://cfconventions.org/cf-conventions/v1.6.0/cf-conventions.html#_indexed_ragged_array_representation). + + diff --git a/aodntools/timeseries_products/README.md b/aodntools/timeseries_products/README.md index b8407b9..257a878 100644 --- a/aodntools/timeseries_products/README.md +++ b/aodntools/timeseries_products/README.md @@ -7,6 +7,7 @@ Documentation: - [Hourly time series (non-velocity)](Documentation/Hourly_timeseries.md) - [Gridded time series (Temperature)](Documentation/Gridded_timeseries.md) - [Velocity aggregated time series](Documentation/Velocity_agrregated_timeseries.md) +- [Velocity hourly time series](Documentation/velocity_hourly_timeseries.md) Please use the [issue tracker](https://github.com/aodn/python-aodntools/issues) for feedback and suggestions related to these products. diff --git a/aodntools/timeseries_products/velocity_hourly_timeseries.py b/aodntools/timeseries_products/velocity_hourly_timeseries.py new file mode 100644 index 0000000..d3cd104 --- /dev/null +++ b/aodntools/timeseries_products/velocity_hourly_timeseries.py @@ -0,0 +1,340 @@ +import argparse +import json +import os +import shutil +import tempfile +from datetime import datetime + +import numpy as np +import pandas as pd +import xarray as xr +from netCDF4 import Dataset, num2date, stringtochar +from pkg_resources import resource_filename + +import aodntools.timeseries_products.aggregated_timeseries as utils +from aodntools import __version__ +from aodntools.timeseries_products.velocity_aggregated_timeseries import check_file + +TEMPLATE_JSON = resource_filename(__name__, 'velocity_hourly_timeseries_template.json') +QC_FLAG_MAX = 2 +TIME_UNITS = "days since 1950-01-01 00:00:00 UTC" +TIME_CALENDAR = "gregorian" +TIME_EPOCH = np.datetime64("1950-01-01T00:00:00") +ONE_DAY = np.timedelta64(1, 'D') + + +def cell_velocity_resample(df, binning_function): + """ + Resample a dataset to a specific time_interval. + if WCUR not present, returns nan + :param df: grouped dataframe + :param binning_function: name of standard numpy function used for binning + :return: binned U, v, W CUR according to the binning function + """ + df_binned = df.apply(binning_function) + UCUR = np.array(df_binned['UCUR']) + VCUR = np.array(df_binned['VCUR']) + if 'WCUR' in df_binned: + WCUR = np.array(df_binned['WCUR']) + else: + WCUR = np.full(len(df), np.nan) + DEPTH = np.array(df_binned['DEPTH']) + + return UCUR, VCUR, WCUR, DEPTH + + +def append_resampled_values(nc_cell, ds, slice_start, binning_functions): + """ + Resample U, V, W current and depth values from a single ADCP cell into hourly bins, and + append the mean values to the corresponding variables in the output dataset (starting at + index slice_start), along with additional statistical variables specified by binning_functions. + :param nc_cell: input xarray Dataset representing a single ADCP cell (or point time series) + :param ds: output netcdf4 Dataset to update with resampled values + :param slice_start: start index of the slice + :param binning_functions: list of numpy function names for binning + :return: end index of the slice + """ + df_cell = nc_cell.squeeze().to_dataframe() + # shift the index forward 30min to centre the bins on the hour + df_cell.index = df_cell.index + pd.Timedelta(minutes=30) + # TODO: shift timestamps to centre of sampling interval + + df_cell_1H = df_cell.resample('1H') + slice_end = len(df_cell_1H) + slice_start + + # set binned timestamps + time_slice = (np.fromiter(df_cell_1H.groups.keys(), dtype='M8[ns]') - TIME_EPOCH) / ONE_DAY + ds['TIME'][slice_start:slice_end] = time_slice + + # take the mean of the variables + ds['UCUR'][slice_start:slice_end], \ + ds['VCUR'][slice_start:slice_end], \ + ds['WCUR'][slice_start:slice_end], \ + ds['DEPTH'][slice_start:slice_end] = cell_velocity_resample(df_cell_1H, 'mean') + + for method in binning_functions: + ds['UCUR_' + method][slice_start:slice_end], \ + ds['VCUR_' + method][slice_start:slice_end], \ + ds['WCUR_' + method][slice_start:slice_end], \ + ds['DEPTH_' + method][slice_start:slice_end] = cell_velocity_resample(df_cell_1H, method) + + return slice_end + + + +## MAIN FUNCTION +def velocity_hourly_aggregated(files_to_agg, site_code, input_dir='', output_dir='./', + download_url_prefix=None, opendap_url_prefix=None): + """ + Aggregate U, V and W CUR variables from the given files (from the same site) and average into hourly bins. + The vertical cells are flattened and the actual depth of each is calculated. + Additional metadata variables are stored to track the origin of the data. + :param files_to_agg: list of files to aggregate + :param site_code: site code + :param input_dir: base path where source files are stored + :param output_dir: path where the result file will be written + :param download_url_prefix: URL prefix for file download (to be prepended to paths in files_to_agg) + :param opendap_url_prefix: URL prefix for OPENAP access (to be prepended to paths in files_to_agg) + :return: file path of the hourly aggregated product, dict of rejected files: errors + """ + + varlist = ['UCUR', 'VCUR', 'WCUR', 'DEPTH'] + binning_fun = ['max', 'min', 'std', 'count'] + + bad_files = {} + + chunk_size = 90 ## size in days + + ## default name for temporary file. It will be renamed at the end + _, temp_outfile = tempfile.mkstemp(suffix='.nc', dir=output_dir) + + ## check files and get total number of flattened obs + print("CHECKING FILES...") + for index, file in enumerate(files_to_agg): + print(index, end=',', flush=True) + with xr.open_dataset(os.path.join(input_dir, file)) as nc: + error_list = check_file(nc, site_code) + if error_list: + bad_files.update({file: error_list}) + print(" ") + + ## remove bad files form the list + for file in bad_files.keys(): + files_to_agg.remove(file) + + ## sort the files in chronological order + files_to_agg = utils.sort_files(files_to_agg, input_dir=input_dir) + + ## create ncdf file, dimensions (unlimited) and variables + ds = Dataset(temp_outfile, 'w', format='NETCDF4_CLASSIC') + OBSERVATION = ds.createDimension('OBSERVATION', size=None) + INSTRUMENT = ds.createDimension('INSTRUMENT', size=len(files_to_agg)) + STRING256 = ds.createDimension("strlen", size=256) + + obs_double_template = {'datatype': np.float64, 'zlib': True, 'dimensions': ('OBSERVATION'), "fill_value": 99999.0} + obs_float_template = {'datatype': np.float32, 'zlib': True, 'dimensions': ('OBSERVATION'), "fill_value": 99999.0} + obs_int_template = {'datatype': np.int16, 'zlib': True, 'dimensions': ('OBSERVATION')} + inst_S256_template = {'datatype': 'S1', 'dimensions': ('INSTRUMENT', "strlen")} + inst_float_template ={'datatype': np.float32, 'dimensions': ('INSTRUMENT')} + inst_double_template ={'datatype': np.float64, 'dimensions': ('INSTRUMENT')} + + UCUR = ds.createVariable(varname='UCUR', **obs_float_template) + UCUR_max = ds.createVariable(varname='UCUR_max', **obs_float_template) + UCUR_min = ds.createVariable(varname='UCUR_min', **obs_float_template) + UCUR_std = ds.createVariable(varname='UCUR_std', **obs_float_template) + UCUR_count = ds.createVariable(varname='UCUR_count', **obs_int_template) + VCUR = ds.createVariable(varname='VCUR', **obs_float_template) + VCUR_max = ds.createVariable(varname='VCUR_max', **obs_float_template) + VCUR_min = ds.createVariable(varname='VCUR_min', **obs_float_template) + VCUR_std = ds.createVariable(varname='VCUR_std', **obs_float_template) + VCUR_count = ds.createVariable(varname='VCUR_count', **obs_int_template) + WCUR = ds.createVariable(varname='WCUR', **obs_float_template) + WCUR_max = ds.createVariable(varname='WCUR_max', **obs_float_template) + WCUR_min = ds.createVariable(varname='WCUR_min', **obs_float_template) + WCUR_std = ds.createVariable(varname='WCUR_std', **obs_float_template) + WCUR_count = ds.createVariable(varname='WCUR_count', **obs_int_template) + + DEPTH = ds.createVariable(varname='DEPTH', **obs_float_template) + DEPTH_max = ds.createVariable(varname='DEPTH_max', **obs_float_template) + DEPTH_min = ds.createVariable(varname='DEPTH_min', **obs_float_template) + DEPTH_std = ds.createVariable(varname='DEPTH_std', **obs_float_template) + DEPTH_count = ds.createVariable(varname='DEPTH_count', **obs_int_template) + + TIME = ds.createVariable(varname='TIME', **obs_double_template) + instrument_index = ds.createVariable(varname='instrument_index', **obs_int_template) + + source_file = ds.createVariable(varname='source_file', **inst_S256_template) + instrument_id = ds.createVariable(varname='instrument_id', **inst_S256_template) + LATITUDE = ds.createVariable(varname='LATITUDE', **inst_double_template) + LONGITUDE = ds.createVariable(varname='LONGITUDE', **inst_double_template) + NOMINAL_DEPTH = ds.createVariable(varname='NOMINAL_DEPTH', **inst_float_template) + SECONDS_TO_MIDDLE = ds.createVariable(varname='SECONDS_TO_MIDDLE', **inst_float_template) + CELL_INDEX = ds.createVariable(varname='CELL_INDEX', **obs_int_template) + + + + ## main loop + print('PROCESSING...') + slice_start = 0 + for index, file in enumerate(files_to_agg): + print(index, end=",", flush=True) + + ## this is for filling the slice of variables with INSTRUMENT dim + slice_instrument_start = slice_start + + with xr.open_dataset(os.path.join(input_dir, file)) as nc: + + is_2D = 'HEIGHT_ABOVE_SENSOR' in list(nc.variables) + + ## mask values with QC flag>2 + for var in varlist: + nc[var] = nc[var].where(nc[var+'_quality_control'] <= QC_FLAG_MAX) + + ## process in chunks + ## in water only + chunk_start = np.datetime64(nc.attrs['time_deployment_start']) + chunk_end = np.datetime64(nc.attrs['time_deployment_end']) + + time_increment = 60*60*24*chunk_size ## secs x mins x hours x days + chunk_increment = np.timedelta64(time_increment, 's') + chunk_partial = chunk_start + chunk_increment + chunk_index = 0 + while chunk_start < chunk_partial and chunk_start <= chunk_end: + nc_chunk = nc.where((nc.TIME >= chunk_start) & (nc.TIME < chunk_partial), drop=True) + if is_2D: + ## process all cells, one by one + heights = nc_chunk.HEIGHT_ABOVE_SENSOR.values + for cell_idx, cell_height in enumerate(heights): + ## get cell data, drop HEIGHT_ABOVE_SENSOR dim + nc_cell = nc_chunk.sel(HEIGHT_ABOVE_SENSOR=cell_height) + ## convert to absolute DEPTH + nc_cell['DEPTH'] = nc_cell['DEPTH'] - cell_height + slice_end = append_resampled_values(nc_cell[varlist], ds, slice_start, binning_fun) + CELL_INDEX[slice_start:slice_end] = np.full(slice_end - slice_start, cell_idx, dtype=np.uint32) + + slice_start = slice_end + else: + slice_end = append_resampled_values(nc_chunk[varlist], ds, slice_start, binning_fun) + CELL_INDEX[slice_start:slice_end] = np.full(slice_end - slice_start, 0, dtype=np.uint32) + + slice_start = slice_end + chunk_start = chunk_partial + chunk_partial += chunk_increment + chunk_index += 1 + + + ## metadata variables + instrument_index[slice_instrument_start:slice_end] = np.repeat(index, slice_end - slice_instrument_start) + LATITUDE[index] = nc.LATITUDE.values + LONGITUDE[index] = nc.LONGITUDE.values + NOMINAL_DEPTH[index] = np.array(utils.get_nominal_depth(nc)) + source_file[index] = stringtochar(np.array(file, dtype='S256')) + instrument_id[index] = stringtochar(np.array(utils.get_instrument_id(nc), dtype='S256')) + ## add time offset to the middle of the measuring window, if it exists + if 'seconds_to_middle_of_measurement' in nc.TIME.attrs: + SECONDS_TO_MIDDLE[index] = nc.TIME.seconds_to_middle_of_measurement + else: + SECONDS_TO_MIDDLE[index] = np.nan + + print(" ") + ## add atributes + with open(TEMPLATE_JSON) as json_file: + attribute_dictionary = json.load(json_file) + variable_attribute_dictionary = attribute_dictionary['_variables'] + global_attribute_dictionary = attribute_dictionary['_global'] + + ## set variable attrs + for var in list(ds.variables): + ds[var].setncatts(variable_attribute_dictionary[var]) + + if download_url_prefix or opendap_url_prefix: + ds['source_file'].setncatts(utils.source_file_attributes(download_url_prefix, opendap_url_prefix)) + + ## set global attrs + timeformat = '%Y-%m-%dT%H:%M:%SZ' + file_timeformat = '%Y%m%d' + + time_start = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(timeformat) + time_end = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(timeformat) + time_start_filename = num2date(np.min(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(file_timeformat) + time_end_filename = num2date(np.max(TIME[:]), TIME_UNITS, TIME_CALENDAR).strftime(file_timeformat) + + + contributor_name, contributor_email, contributor_role = utils.get_contributors(files_to_agg=files_to_agg, input_dir=input_dir) + add_attribute = { + 'title': ("Long Timeseries Velocity Hourly Aggregated product: " + ', '.join(varlist) + " at " + + site_code + " between " + time_start + " and " + time_end), + 'site_code': site_code, + 'time_coverage_start': time_start, + 'time_coverage_end': time_end, + 'geospatial_vertical_min': np.float32(np.nanmin(ds['DEPTH'])), + 'geospatial_vertical_max': np.float32(np.nanmax(ds['DEPTH'])), + 'geospatial_lat_min': np.float64(np.min(ds['LATITUDE'])), + 'geospatial_lat_max': np.float64(np.max(ds['LATITUDE'])), + 'geospatial_lon_min': np.float64(np.min(ds['LONGITUDE'])), + 'geospatial_lon_max': np.float64(np.max(ds['LONGITUDE'])), + 'date_created': datetime.utcnow().strftime(timeformat), + 'history': datetime.utcnow().strftime(timeformat) + ': Aggregated file created.', + 'keywords': ', '.join(varlist + ['AGGREGATED']), + 'rejected_files': "\n".join(bad_files.keys()), + 'contributor_name': "; ".join(contributor_name), + 'contributor_email': "; ".join(contributor_email), + 'contributor_role': "; ".join(contributor_role), + 'generating_code_version': __version__ + } + + ## add version + github_comment = ('\nThis file was created using https://github.com/aodn/python-aodntools/blob/' + '{v}/aodntools/timeseries_products/{f}'.format(v=__version__, f=os.path.basename(__file__)) + ) + global_attribute_dictionary['lineage'] += github_comment + + global_attribute_dictionary.update(add_attribute) + ds.setncatts(dict(sorted(global_attribute_dictionary.items()))) + + + ## NOTE: There is a possibility of having NaNs in DEPTH after the binning + ## this is the warning when calculating the min/max DEPTH + ## maybe I should clean the dataset before close it + + ds.close() + + + + ## create the output file name and rename the tmp file + facility_code = utils.get_facility_code(os.path.join(input_dir, files_to_agg[0])) + data_code = 'VZ' + product_type = 'hourly-timeseries' + file_version = 2 + output_name = '_'.join(['IMOS', facility_code, data_code, time_start_filename, site_code, ('FV0'+str(file_version)), + ("velocity-"+product_type), + ('END-'+ time_end_filename), 'C-' + datetime.utcnow().strftime(file_timeformat)]) + '.nc' + ncout_path = os.path.join(output_dir, output_name) + shutil.move(temp_outfile, ncout_path) + + + return ncout_path, bad_files + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description="Concatenate X,Y,Z velocity variables from ALL instruments from ALL deployments from ONE site") + parser.add_argument('-site', dest='site_code', help='site code, like NRMMAI', required=True) + parser.add_argument('-files', dest='filenames', help='name of the file that contains the source URLs', required=True) + parser.add_argument('-indir', dest='input_dir', help='base path of input files', default='', required=False) + parser.add_argument('-outdir', dest='output_dir', help='path where the result file will be written. Default ./', + default='./', required=False) + parser.add_argument('-download_url', dest='download_url', help='path to the download_url_prefix', + default='', required=False) + parser.add_argument('-opendap_url', dest='opendap_url', help='path to the opendap_url_prefix', + default='', required=False) + + args = parser.parse_args() + + with open(args.filenames) as ff: + files_to_agg = [line.rstrip() for line in ff] + + + print(velocity_hourly_aggregated(files_to_agg=files_to_agg, site_code=args.site_code, + input_dir=args.input_dir, output_dir=args.output_dir, + download_url_prefix=args.download_url, opendap_url_prefix=args.opendap_url)) diff --git a/aodntools/timeseries_products/velocity_hourly_timeseries_template.json b/aodntools/timeseries_products/velocity_hourly_timeseries_template.json new file mode 100644 index 0000000..9f9db2b --- /dev/null +++ b/aodntools/timeseries_products/velocity_hourly_timeseries_template.json @@ -0,0 +1,219 @@ +{ + "_variables": { + "TIME": { + "axis": "T", + "comment": "time stamp corresponds to the hour and represents binned data [30,30) minutes before and after the hour", + "long_name": "time", + "standard_name": "time", + "valid_max": 90000.0, + "valid_min": 0.0, + "units": "days since 1950-01-01 00:00:00 UTC", + "calendar": "gregorian" + }, + "LATITUDE":{ + "axis": "Y", + "long_name": "latitude", + "reference_datum": "WGS84 geographic coordinate system", + "standard_name": "latitude", + "units": "degrees_north", + "valid_max": 90.0, + "valid_min": -90.0 + }, + "LONGITUDE": { + "axis": "X", + "long_name": "longitude", + "reference_datum": "WGS84 geographic coordinate system", + "standard_name": "longitude", + "units": "degrees_east", + "valid_max": 180.0, + "valid_min": -180.0 + }, + "DEPTH": { + "coordinates": "TIME LATITUDE LONGITUDE NOMINAL DEPTH", + "DEPTH:ancillary_variables": "DEPTH_min DEPTH_max DEPTH_std DEPTH_count", + "long_name": "mean actual depth", + "positive": "down", + "reference_datum": "sea surface", + "standard_name": "depth", + "units": "m", + "valid_max": 12000.0, + "valid_min": -5.0, + "cell_methods": "TIME: mean" + }, + "DEPTH_max": { + "units": "m", + "standard_name": "depth", + "long_name": "maximum depth value in the bin, after rejection of flagged data", + "cell_methods": "TIME: maximum" + }, + "DEPTH_min": { + "units": "m", + "standard_name": "depth", + "long_name": "minimum depth value in the bin, after rejection of flagged data", + "cell_methods": "TIME: minimum" + }, + "DEPTH_std": { + "units": "m", + "standard_name": "depth", + "long_name": "standard deviation of depth values in the bin, after rejection of flagged data", + "cell_methods": "TIME: standard_deviation" + }, + "DEPTH_count": { + "standard_name": "depth number_of_observations", + "units": "1", + "long_name": "number of depth observations in the bin, after rejection of flagged data" + }, + "NOMINAL_DEPTH": { + "axis": "Z", + "long_name": "nominal depth", + "positive": "down", + "reference_datum": "sea surface", + "standard_name": "depth", + "units": "m", + "valid_max": 12000.0, + "valid_min": -5.0 + }, + "instrument_index": { + "long_name": "which instrument this obs is for", + "instance_dimension": "INSTRUMENT" + }, + "instrument_id": { + "long_name": "source deployment code, instrument make, model, serial_number" + }, + "source_file": { + "long_name": "source file for this instrument" + }, + "UCUR": { + "coordinates": "TIME DEPTH LATITUDE LONGITUDE", + "ancillary_variables": "UCUR_max, UCUR_min, UCUR_std, UCUR_count", + "long_name": "eastward_sea_water_velocity", + "standard_name": "eastward_sea_water_velocity", + "units": "m s-1", + "cell_methods": "TIME: mean", + "valid_max": 10.0, + "valid_min": -10.0 + }, + "UCUR_max": { + "units": "m s-1", + "standard_name": "eastward_sea_water_velocity", + "long_name": "maximum eastward_sea_water_velocity value in the bin, after rejection of flagged data", + "cell_methods": "TIME: maximum" + }, + "UCUR_min": { + "units": "m s-1", + "standard_name": "eastward_sea_water_velocity", + "long_name": "minimum eastward_sea_water_velocity value in the bin, after rejection of flagged data", + "cell_methods": "TIME: minimum" + }, + "UCUR_std": { + "units": "m s-1", + "standard_name": "eastward_sea_water_velocity", + "long_name": "standard deviation of eastward_sea_water_velocity values in the bin, after rejection of flagged data", + "cell_methods": "TIME: standard_deviation" + }, + "UCUR_count": { + "standard_name": "eastward_sea_water_velocity number_of_observations", + "units": "1", + "long_name": "number of eastward_sea_water_velocity observations in the bin, after rejection of flagged data" + }, + "VCUR": { + "coordinates": "TIME DEPTH LATITUDE LONGITUDE", + "ancillary_variables": "VCUR_max, VCUR_min, VCUR_std, VCUR_count", + "long_name": "northward_sea_water_velocity", + "standard_name": "northward_sea_water_velocity", + "units": "m s-1", + "cell_methods": "TIME: mean", + "valid_max": 10.0, + "valid_min": -10.0 + }, + "VCUR_max": { + "units": "m s-1", + "standard_name": "northward_sea_water_velocity", + "long_name": "maximum northward_sea_water_velocity value in the bin, after rejection of flagged data", + "cell_methods": "TIME: maximum" + }, + "VCUR_min": { + "units": "m s-1", + "standard_name": "northward_sea_water_velocity", + "long_name": "minimum northward_sea_water_velocity value in the bin, after rejection of flagged data", + "cell_methods": "TIME: minimum" + }, + "VCUR_std": { + "units": "m s-1", + "standard_name": "northward_sea_water_velocity", + "long_name": "standard deviation of northward_sea_water_velocity values in the bin, after rejection of flagged data", + "cell_methods": "TIME: standard_deviation" + }, + "VCUR_count": { + "standard_name": "northward_sea_water_velocity number_of_observations", + "units": "1", + "long_name": "number of northward_sea_water_velocity observations in the bin, after rejection of flagged data" + }, + + "WCUR": { + "coordinates": "TIME DEPTH LATITUDE LONGITUDE", + "ancillary_variables": "WCUR_max, WCUR_min, WCUR_std, WCUR_count", + "long_name": "upward_sea_water_velocity", + "standard_name": "upward_sea_water_velocity", + "units": "m s-1", + "cell_methods": "TIME: mean", + "valid_max": 5.0, + "valid_min": -5.0 + }, + "WCUR_max": { + "units": "m s-1", + "standard_name": "upward_sea_water_velocity", + "long_name": "maximum upward_sea_water_velocity value in the bin, after rejection of flagged data", + "cell_methods": "TIME: maximum" + }, + "WCUR_min": { + "units": "m s-1", + "standard_name": "upward_sea_water_velocity", + "long_name": "minimum upward_sea_water_velocity value in the bin, after rejection of flagged data", + "cell_methods": "TIME: minimum" + }, + "WCUR_std": { + "units": "m s-1", + "standard_name": "upward_sea_water_velocity", + "long_name": "standard deviation of upward_sea_water_velocity values in the bin, after rejection of flagged data", + "cell_methods": "TIME: standard_deviation" + }, + "WCUR_count": { + "standard_name": "upward_sea_water_velocity number_of_observations", + "units": "1", + "long_name": "number of upward_sea_water_velocity observations in the bin, after rejection of flagged data" + }, + "CELL_INDEX": { + "long_name": "index of the corresponding measuring cell", + "comment": "Cell index is included for reference only and cannot be used to extract values at constant depth. The number and vertical spacing of cells can vary by instrument and deployment. The actual depth of any given cell can change between deployments, and also varies with time during a deployment. The closest cell to the sensor has index 0." + }, + "SECONDS_TO_MIDDLE": { + "long_name": "offset from recorded timestamp to middle of the measurement window in the input file", + "units": "s" + } + }, + "_global":{ + "abstract": "Hourly Time-series Product: This file contains all measurements of quality-controlled U, V and W sea water velocity variables from all instruments deployed at the selected site, binned into 1-hour time intervals. Out-of-water measurements, and those flagged as bad by IMOS standard automated quality-control procedures, have been excluded. Timestamps in the input files indicate the start of each measurement interval (up to an hour in duration), and these have not been shifted to the centre of the interval before binning. Instrument details are stored as variables in order to keep a record of the origin of each measurement.", + "acknowledgement": "Any users of IMOS data are required to clearly acknowledge the source of the material derived from IMOS in the format: \"Data was sourced from the Integrated Marine Observing System (IMOS) - IMOS is a national collaborative research infrastructure, supported by the Australian Government.\" If relevant, also credit other organisations involved in collection of this particular datastream (as listed in 'credit' in the metadata record).", + "author": "Klein, Eduardo", + "author_email": "info@aodn.org.au", + "citation": "The citation in a list of references is: \"IMOS [year-of-data-download], [Title], [data-access-URL], accessed [date-of-access].\".", + "comment": "Timestamps in the input files indicate the start of each measurement interval (instrument-dependent; up to an hour in duration), and these have not been shifted to the centre of the interval before binning. This could lead to an artificial shift of up to half an hour in the output data. The size of this shift, where known, has been recorded in the SECONDS_TO_MIDDLE variable.", + "Conventions": "CF-1.6,IMOS-1.4", + "data_centre": "Australian Ocean Data Network (AODN)", + "data_centre_email": "info@aodn.org.au", + "disclaimer": "Data, products and services from IMOS are provided \"as is\" without any warranty as to fitness for a particular purpose.", + "featureType": "timeSeries", + "file_version": "Level 2 - Quality Controlled Data", + "file_version_quality_control": "Quality controlled data have been through quality assurance procedures such as automated routines and sensor calibration or visual inspection and flag of obvious errors. The data are in physical units using standard SI metric units with calibration and other pre-processing routines applied, all time and location values are in absolute coordinates to comply with standards and datum. Data includes flags for each measurement to indicate the estimated quality of the measurement. Metadata exists for the data or for the higher level dataset that the data belongs to. This is the standard IMOS data level and is what should be made available to AODN and to the IMOS community.", + "institution_references": "http://imos.org.au/facilities/aodn/", + "keywords_vocabulary": "IMOS parameter names. See https://github.com/aodn/imos-toolbox/blob/master/IMOS/imosParameters.txt", + "license": "http://creativecommons.org/licenses/by/4.0/", + "naming_authority": "IMOS", + "project": "Integrated Marine Observing System (IMOS)", + "references": "http://www.imos.org.au", + "source": "Mooring", + "standard_name_vocabulary": "NetCDF Climate and Forecast (CF) Metadata Convention Standard Name Table 45", + "lineage": "The aggregated UCUR, VCUR and WCUR are produced by sequentially concatenating the individual values in each of the input files after being binned into 1 hour fixed interval. In the case of ADCPs, the current values at each measuring cell are referenced to its absolute DEPTH. The resulting variable has dimension OBSERVATION. The DEPTH variable is calculated from the DEPTH measurements at the instrument and the HEIGHT_ABOVE_SENSOR distance corresponding to each measurement cell. The values are summarised using the arithmetic mean. Additional variables derived from the binning process are also stored: minimum, maximum, standard deviation and number of observations in each time bin. The resulting variables have dimension OBSERVATION. The variable TIME from input files and centered to the hour is concatenated into a variable TIME(OBSERVATION). The DEPTH variable from input files is averaged into the same 1 hour bin and concatenated into a variable DEPTH(OBSERVATION). If not present, fill values are stored. All output variables with the INSTRUMENT dimension are sorted in chronological order. In order to keep track of the provenance of VoI in the aggregated file, accessory variables are created." + } +} diff --git a/setup.py b/setup.py index aed5148..2517dd6 100644 --- a/setup.py +++ b/setup.py @@ -4,8 +4,8 @@ 'jsonschema>=2.6.0,<3.0.0', 'numpy>=1.13.0', 'netCDF4>=1.5.3', - 'pandas>=0.24.2', - 'xarray>=0.11.3' + 'pandas>=0.24.2,<0.25.0', + 'xarray>=0.11.3,<0.14.0' ] TESTS_REQUIRE = [ @@ -26,7 +26,7 @@ setup( name=PACKAGE_NAME, - version='1.2.9', + version='1.3.0', packages=find_packages(exclude=PACKAGE_EXCLUDES), package_data=PACKAGE_DATA, url='https://github.com/aodn',