diff --git a/aodntools/timeseries_products/hourly_timeseries.py b/aodntools/timeseries_products/hourly_timeseries.py index 2e5be4e..66e5292 100644 --- a/aodntools/timeseries_products/hourly_timeseries.py +++ b/aodntools/timeseries_products/hourly_timeseries.py @@ -305,22 +305,22 @@ def PDresample_by_hour(df, function_dict, function_stats): :param df: pandas dataframe with ancillary variables and coords removed but with TIME as index :return: pandas dataframe """ - ## back the index 30min - df.index = df.index - pd.Timedelta(30, units='m') varnames = df.columns - df_data = pd.DataFrame() + df_data = pd.DataFrame(index=pd.DatetimeIndex([])) for variable in varnames: ds_var = df[variable] - ds_var_mean = ds_var.resample('1H').apply(function_dict[variable]).astype(np.float32) + ds_var_resample = ds_var.resample('1H', base=0.5) # shift by half hour to centre bin on the hour + ds_var_mean = ds_var_resample.apply(function_dict[variable]).astype(np.float32) df_data = pd.concat([df_data, ds_var_mean], axis=1, sort=False) for stat_method in function_stats: - ds_var_stat = ds_var.resample('1H').apply(stat_method).astype(np.float32) + ds_var_stat = ds_var_resample.apply(stat_method).astype(np.float32) ds_var_stat = ds_var_stat.rename("_".join([variable, stat_method])) df_data = pd.concat([df_data, ds_var_stat], axis=1, sort=False) - ##forward the index 30min - df_data.index = df_data.index + pd.Timedelta(30, units='m') + ##forward the index 30min so the timestamps are on the hour + df_data.index += pd.to_timedelta('30min') + return df_data diff --git a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220404.nc b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc similarity index 58% rename from test_aodntools/timeseries_products/IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220404.nc rename to test_aodntools/timeseries_products/IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc index d02738a..07d6952 100644 Binary files a/test_aodntools/timeseries_products/IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220404.nc and b/test_aodntools/timeseries_products/IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc differ diff --git a/test_aodntools/timeseries_products/test_hourly_timeseries.py b/test_aodntools/timeseries_products/test_hourly_timeseries.py index 3534494..07ad35b 100644 --- a/test_aodntools/timeseries_products/test_hourly_timeseries.py +++ b/test_aodntools/timeseries_products/test_hourly_timeseries.py @@ -21,7 +21,7 @@ ] INPUT_PATHS = [os.path.join(TEST_ROOT, f) for f in INPUT_FILES] EXPECTED_OUTPUT_FILE = os.path.join( - TEST_ROOT, 'IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220404.nc' + TEST_ROOT, 'IMOS_ANMN-NRS_STZ_20181213_NRSROT_FV02_hourly-timeseries_END-20190523_C-20220428.nc' ) INST_VARIABLES = {'instrument_id', 'source_file', 'LONGITUDE', 'LATITUDE', 'NOMINAL_DEPTH'} @@ -89,6 +89,7 @@ def test_hourly_aggregator(self): # check variable values expected = Dataset(EXPECTED_OUTPUT_FILE) + self.assertEqual(len(expected['TIME']), len(dataset['TIME'])) compare_vars = ('TIME', 'NOMINAL_DEPTH', 'instrument_index', 'TEMP', 'TEMP_count', 'TEMP_min', 'TEMP_max') non_match_vars = [var for var in compare_vars