From c75f7470658eda0d79a28f043ed3356eef02dfb8 Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Thu, 11 May 2023 08:25:12 -0700 Subject: [PATCH 01/20] Fix chunking issues in sum_AMEL and reduce_damages --- src/dscim/preprocessing/preprocessing.py | 42 +++++++++++++++++++++--- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/src/dscim/preprocessing/preprocessing.py b/src/dscim/preprocessing/preprocessing.py index 67fb2eec..bb39a096 100644 --- a/src/dscim/preprocessing/preprocessing.py +++ b/src/dscim/preprocessing/preprocessing.py @@ -102,6 +102,24 @@ def reduce_damages( xr.open_zarr(damages).chunks["batch"][0] == 15 ), "'batch' dim on damages does not have chunksize of 15. Please rechunk." + if "coastal" not in sector: + chunkies = { + "rcp": 1, + "region": -1, + "gcm": 1, + "year": 10, + "model": 1, + "ssp": 1, + } + else: + chunkies = { + "region": -1, + "slr": 1, + "year": 10, + "model": 1, + "ssp": 1, + } + ce_batch_dims = [i for i in gdppc.dims] + [ i for i in ds.dims if i not in gdppc.dims and i != "batch" ] @@ -110,15 +128,15 @@ def reduce_damages( i for i in gdppc.region.values if i in ce_batch_coords["region"] ] ce_shapes = [len(ce_batch_coords[c]) for c in ce_batch_dims] - ce_chunks = [xr.open_zarr(damages).chunks[c][0] for c in ce_batch_dims] + ce_chunks = chunkies template = xr.DataArray( - da.empty(ce_shapes, chunks=ce_chunks), + da.empty(ce_shapes), dims=ce_batch_dims, coords=ce_batch_coords, - ) + ).chunk(chunkies) - other = xr.open_zarr(damages) + other = xr.open_zarr(damages).chunk(chunkies) out = other.map_blocks( ce_from_chunk, @@ -205,7 +223,21 @@ def sum_AMEL( for sector in sectors: print(f"Opening {sector},{params[sector]['sector_path']}") ds = xr.open_zarr(params[sector]["sector_path"], consolidated=True) - ds = ds[params[sector][var]].rename(var) + ds = ( + ds[params[sector][var]] + .rename(var) + .chunk( + { + "batch": 15, + "ssp": 1, + "model": 1, + "rcp": 1, + "gcm": 1, + "year": 10, + "region": -1, + } + ) + ) ds = xr.where(np.isinf(ds), np.nan, ds) datasets.append(ds) From d0cc0375c823725f73c0585351e60132705ac912 Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Thu, 11 May 2023 15:16:05 -0400 Subject: [PATCH 02/20] Remove unused variable --- src/dscim/preprocessing/preprocessing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/dscim/preprocessing/preprocessing.py b/src/dscim/preprocessing/preprocessing.py index bb39a096..a253b389 100644 --- a/src/dscim/preprocessing/preprocessing.py +++ b/src/dscim/preprocessing/preprocessing.py @@ -128,7 +128,6 @@ def reduce_damages( i for i in gdppc.region.values if i in ce_batch_coords["region"] ] ce_shapes = [len(ce_batch_coords[c]) for c in ce_batch_dims] - ce_chunks = chunkies template = xr.DataArray( da.empty(ce_shapes), From 031e01c023f2701e319262f1ead1cb336e163617 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Thu, 11 May 2023 12:43:29 -0700 Subject: [PATCH 03/20] Sort batches in the right order --- tests/test_input_damages.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index 76b28a1b..ce45edce 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -31,7 +31,7 @@ def test_parse_projection_filesys(tmp_path): """ Test that parse_projection_filesys correctly retrieves projection system output structure """ - rcp = ["rcp85", "rcp45"] + rcp = ["rcp45", "rcp85"] gcm = ["ACCESS1-0", "GFDL-CM3"] model = ["high", "low"] ssp = [f"SSP{n}" for n in range(2, 4)] @@ -45,14 +45,14 @@ def test_parse_projection_filesys(tmp_path): os.makedirs(os.path.join(tmp_path, b, r, g, m, s)) out_expected = { - "batch": list(chain(repeat("batch9", 16), repeat("batch6", 16))), - "rcp": list(chain(repeat("rcp85", 8), repeat("rcp45", 8))) * 2, + "batch": list(chain(repeat("batch6", 16), repeat("batch9", 16))), + "rcp": list(chain(repeat("rcp45", 8), repeat("rcp85", 8))) * 2, "gcm": list(chain(repeat("ACCESS1-0", 4), repeat("GFDL-CM3", 4))) * 4, "model": list(chain(repeat("high", 2), repeat("low", 2))) * 8, "ssp": ["SSP2", "SSP3"] * 16, "path": [ os.path.join(tmp_path, b, r, g, m, s) - for b in ["batch9", "batch6"] + for b in ["batch6", "batch9"] for r in rcp for g in gcm for m in model @@ -65,6 +65,9 @@ def test_parse_projection_filesys(tmp_path): df_out_actual = _parse_projection_filesys(input_path=tmp_path) df_out_actual.reset_index(drop=True, inplace=True) + df_out_actual = df_out_actual.sort_values( + by=["batch", "rcp", "gcm", "model", "ssp"] + ) pd.testing.assert_frame_equal(df_out_expected, df_out_actual) From a1e6d9508d04c1cd6debf8f14d60816c15ed2460 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Thu, 11 May 2023 13:02:30 -0700 Subject: [PATCH 04/20] Update test_parse_projection_filesys() --- tests/test_input_damages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index ce45edce..c19d8723 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -64,10 +64,10 @@ def test_parse_projection_filesys(tmp_path): df_out_expected = pd.DataFrame(out_expected) df_out_actual = _parse_projection_filesys(input_path=tmp_path) - df_out_actual.reset_index(drop=True, inplace=True) df_out_actual = df_out_actual.sort_values( by=["batch", "rcp", "gcm", "model", "ssp"] ) + df_out_actual.reset_index(drop=True, inplace=True) pd.testing.assert_frame_equal(df_out_expected, df_out_actual) From 9e66ab9d8489d54852312a66029e9ce575e4192b Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Tue, 16 May 2023 14:04:18 -0500 Subject: [PATCH 05/20] Add region to damages chunk sizes --- src/dscim/preprocessing/input_damages.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index 005513e5..9f772eca 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -738,7 +738,15 @@ def prep( ).expand_dims({"gcm": [gcm]}) damages = damages.chunk( - {"batch": 15, "ssp": 1, "model": 1, "rcp": 1, "gcm": 1, "year": 10} + { + "batch": 15, + "ssp": 1, + "model": 1, + "rcp": 1, + "gcm": 1, + "year": 10, + "region": -1, + } ) damages.coords.update({"batch": [f"batch{i}" for i in damages.batch.values]}) From fab22865647414f6f611e2adf9f6c15b3e16648f Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Mon, 22 May 2023 14:25:09 -0700 Subject: [PATCH 06/20] Add a function for concatenating labor/energy damage output --- src/dscim/preprocessing/input_damages.py | 25 ++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index 9f772eca..ee9058aa 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -95,6 +95,31 @@ def _parse_projection_filesys(input_path, query="exists==True"): return df.query(query) +def concatenate_damage_output(damage_dir, basename, save_path): + """Concatenate labor/energy damage output across batches. + + Parameters + ---------- + damage_dir str + Directory containing separate labor/energy damage output files by batches. + basename str + Prefix of the damage output filenames (ex. {basename}_batch0.zarr) + save_path str + Path to save concatenated file in .zarr format + """ + paths = glob.glob(f"{damage_dir}/{basename}*") + data = xr.open_mfdataset(paths=paths, engine="zarr") + + for v in list(data.coords.keys()): + if data.coords[v].dtype == object: + data.coords[v] = data.coords[v].astype("unicode") + for v in list(data.variables.keys()): + if data[v].dtype == object: + data[v] = data[v].astype("unicode") + + data.to_zarr(save_path, mode="w") + + def calculate_labor_impacts(input_path, file_prefix, variable, val_type): """Calculate impacts for labor results. From c6d13498331961378ebfc136dc2dfdc5f24acf23 Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Tue, 23 May 2023 10:01:12 -0500 Subject: [PATCH 07/20] Chunk coastal --- src/dscim/preprocessing/input_damages.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index ee9058aa..18c3920f 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -396,7 +396,7 @@ def process_batch(g): batches = [ds for ds in batches if ds is not None] chunkies = { "rcp": 1, - "region": 24378, + "region": -1, "gcm": 1, "year": 10, "model": 1, @@ -823,6 +823,15 @@ def coastal_inputs( ) else: d = d.sel(adapt_type=adapt_type, vsl_valuation=vsl_valuation, drop=True) + chunkies = { + "batch": 15, + "ssp": 1, + "model": 1, + "slr": 1, + "year": 10, + "region": -1, + } + d = d.chunk(chunkies) d.to_zarr( f"{path}/coastal_damages_{version}-{adapt_type}-{vsl_valuation}.zarr", consolidated=True, From e187c1dca2ef385756ffbfdda022ee55c17dc019 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Wed, 24 May 2023 12:21:15 -0700 Subject: [PATCH 08/20] Add unit test for concatenate_damage_output --- tests/test_input_damages.py | 67 +++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index c19d8723..447b364e 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -72,6 +72,73 @@ def test_parse_projection_filesys(tmp_path): pd.testing.assert_frame_equal(df_out_expected, df_out_actual) +def test_concatenate_damage_output(tmp_path): + """ + Test that concatenate_damage_output correctly concatenates damages across batches and saves to a single zarr file + """ + d = os.path.join(tmp_path, "concatenate_in") + if not os.path.exists(d): + os.makedirs(d) + + for b in ["batch6", "batch9"]: + ds_in = xr.Dataset( + { + "delta_rebased": ( + ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], + np.full((2, 2, 2, 2, 1, 2, 2), 1), + ), + "histclim_rebased": ( + ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], + np.full((2, 2, 2, 2, 1, 2, 2), 2), + ), + }, + coords={ + "batch": (["batch"], [b]), + "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]), + "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]), + "rcp": (["rcp"], ["rcp45", "rcp85"]), + "region": (["region"], ["ZWE.test_region", "USA.test_region"]), + "ssp": (["ssp"], ["SSP2", "SSP3"]), + "year": (["year"], [2020, 2099]), + }, + ) + + infile = os.path.join(d, f"test_insuffix_{b}.zarr") + + ds_in.to_zarr(infile) + + ds_out_expected = xr.Dataset( + { + "delta_rebased": ( + ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], + np.full((2, 2, 2, 2, 2, 2, 2), 1), + ), + "histclim_rebased": ( + ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], + np.full((2, 2, 2, 2, 2, 2, 2), 2), + ), + }, + coords={ + "batch": (["batch"], ["batch6", "batch9"]), + "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]), + "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]), + "rcp": (["rcp"], ["rcp45", "rcp85"]), + "region": (["region"], ["ZWE.test_region", "USA.test_region"]), + "ssp": (["ssp"], ["SSP2", "SSP3"]), + "year": (["year"], [2020, 2099]), + }, + ) + + concatenate_damage_output( + damage_dir=d, + basename="test_insuffix", + save_path=os.path.join(d, "concatenate.zarr"), + ) + ds_out_actual = xr.open_zarr(os.path.join(d, "concatenate.zarr")) + + xr.testing.assert_equal(ds_out_expected, ds_out_actual) + + @pytest.fixture def labor_in_val_fixture(tmp_path): """ From 7302ed45dee4d510a7b94d41a9076e470e20dacf Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Wed, 24 May 2023 12:28:31 -0700 Subject: [PATCH 09/20] Import function --- tests/test_input_damages.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index 447b364e..9e7882ba 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -9,6 +9,7 @@ from dscim.menu.simple_storage import EconVars from dscim.preprocessing.input_damages import ( _parse_projection_filesys, + concatenate_damage_output, calculate_labor_impacts, concatenate_labor_damages, calculate_labor_batch_damages, From e7915c4cebff0edccddd3b8d8127ee2dbf0fc8d3 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Tue, 30 May 2023 08:14:39 -0700 Subject: [PATCH 10/20] chunk concatenated energy/labor and save mortality to float32 --- src/dscim/preprocessing/input_damages.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index 18c3920f..40743696 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -110,6 +110,21 @@ def concatenate_damage_output(damage_dir, basename, save_path): paths = glob.glob(f"{damage_dir}/{basename}*") data = xr.open_mfdataset(paths=paths, engine="zarr") + for v in data: + del data[v].encoding["chunks"] + + chunkies = { + "batch": 15, + "rcp": 1, + "gcm": 1, + "model": 1, + "ssp": 1, + "region": -1, + "year": 10, + } + + data = data.chunk(chunkies) + for v in list(data.coords.keys()): if data.coords[v].dtype == object: data.coords[v] = data.coords[v].astype("unicode") @@ -777,6 +792,7 @@ def prep( # convert to EPA VSL damages = damages * 0.90681089 + damages.astype(np.float32) for v in list(damages.coords.keys()): if damages.coords[v].dtype == object: From 66bd2702190298a87f577fc8429d6e0918d67ab1 Mon Sep 17 00:00:00 2001 From: davidrzhdu <111376495+davidrzhdu@users.noreply.github.com> Date: Tue, 30 May 2023 15:34:23 -0500 Subject: [PATCH 11/20] fix a small issue --- src/dscim/preprocessing/input_damages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index 40743696..80f7b2f5 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -792,7 +792,7 @@ def prep( # convert to EPA VSL damages = damages * 0.90681089 - damages.astype(np.float32) + damages = damages.astype(np.float32) for v in list(damages.coords.keys()): if damages.coords[v].dtype == object: From 58682ccbd8a920c087fbe600bf73231d7834c68d Mon Sep 17 00:00:00 2001 From: davidrzhdu <111376495+davidrzhdu@users.noreply.github.com> Date: Tue, 30 May 2023 15:50:47 -0500 Subject: [PATCH 12/20] Update test_input_damages.py because mortality has saved in float32 --- tests/test_input_damages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index 9e7882ba..e1278339 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -1101,11 +1101,11 @@ def test_prep_mortality_damages( { "delta": ( ["gcm", "batch", "ssp", "rcp", "model", "year", "region"], - np.full((2, 2, 2, 2, 2, 2, 2), -0.90681089), + np.float32(np.full((2, 2, 2, 2, 2, 2, 2), -0.90681089)), ), "histclim": ( ["gcm", "batch", "ssp", "rcp", "model", "year", "region"], - np.full((2, 2, 2, 2, 2, 2, 2), 2 * 0.90681089), + np.float32(np.full((2, 2, 2, 2, 2, 2, 2), 2 * 0.90681089)), ), }, coords={ From 970c6233f45eceecfe861633b72d561b5ad9fcbd Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Tue, 13 Jun 2023 13:21:02 -0500 Subject: [PATCH 13/20] Update CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2b1826e0..18dc91ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [0.4.0] - Unreleased ### Added +- Functions to concatenate input damages across batches. ([PR #83](https://github.com/ClimateImpactLab/dscim/pull/83), [@davidrzhdu](https://github.com/davidrzhdu)) - New unit tests for [dscim/utils/input_damages.py](https://github.com/ClimateImpactLab/dscim/blob/main/src/dscim/preprocessing/input_damages.py). ([PR #68](https://github.com/ClimateImpactLab/dscim/pull/68), [@davidrzhdu](https://github.com/davidrzhdu)) - New unit tests for [dscim/utils/rff.py](https://github.com/ClimateImpactLab/dscim/blob/main/src/dscim/utils/rff.py). ([PR #73](https://github.com/ClimateImpactLab/dscim/pull/73), [@JMGilbert](https://github.com/JMGilbert)) - New unit tests for [dscim/dscim/preprocessing.py](https://github.com/ClimateImpactLab/dscim/blob/main/src/dscim/preprocessing/preprocessing.py). ([PR #67](https://github.com/ClimateImpactLab/dscim/pull/67), [@JMGilbert](https://github.com/JMGilbert)) @@ -23,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Remove old/unnecessary files. ([PR #57](https://github.com/ClimateImpactLab/dscim/pull/57), [@JMGilbert](https://github.com/JMGilbert)) - Remove unused “save_path” and “ec_cls” from `read_energy_files_parallel()`. ([PR #56](https://github.com/ClimateImpactLab/dscim/pull/56), [@davidrzhdu](https://github.com/davidrzhdu)) ### Fixed +- Make all input damages output files with correct chunksizes. ([PR #83](https://github.com/ClimateImpactLab/dscim/pull/83), [@JMGilbert](https://github.com/JMGilbert)) - Add `.load()` to every loading of population data from EconVars. ([PR #82](https://github.com/ClimateImpactLab/dscim/pull/82), [@davidrzhdu](https://github.com/davidrzhdu)) - Make `compute_ag_damages` function correctly save outputs in float32. ([PR #72](https://github.com/ClimateImpactLab/dscim/pull/72) and [PR #82](https://github.com/ClimateImpactLab/dscim/pull/82), [@davidrzhdu](https://github.com/davidrzhdu)) - Make rff damage functions read in and save out in the proper filepath structure. ([PR #79](https://github.com/ClimateImpactLab/dscim/pull/79), [@JMGilbert](https://github.com/JMGilbert)) From 7fc0f03ebe94803288d0edd594894351ac6f6dd5 Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Thu, 29 Jun 2023 13:49:36 -0500 Subject: [PATCH 14/20] Update test_input_damages.py --- tests/test_input_damages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index e1278339..14fdd91b 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -768,7 +768,7 @@ def energy_in_netcdf_fixture(tmp_path): "region", "year", ], - np.full((1, 1, 1, 1, 1, 2, 2), 2), + np.full((1, 1, 1, 1, 1, 2, 2), 2).astype(object), ), }, coords={ From edc191e14332008ecba78ee5a9244dd34dc1af65 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Thu, 29 Jun 2023 13:18:23 -0700 Subject: [PATCH 15/20] create a list of filenames to open in 'concatenate_damage_output' --- src/dscim/preprocessing/input_damages.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index 80f7b2f5..5c762388 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -107,7 +107,10 @@ def concatenate_damage_output(damage_dir, basename, save_path): save_path str Path to save concatenated file in .zarr format """ - paths = glob.glob(f"{damage_dir}/{basename}*") + paths = [ + f"{damage_dir}/{basename}_{b}.zarr" + for b in ["batch" + str(i) for i in range(0, 15)] + ] data = xr.open_mfdataset(paths=paths, engine="zarr") for v in data: From 7574b6ee1bbd996daefb48920b39299b22376687 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Thu, 29 Jun 2023 13:36:40 -0700 Subject: [PATCH 16/20] update test_concatenate_damage_output --- tests/test_input_damages.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index 14fdd91b..edbf4a0e 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -81,7 +81,7 @@ def test_concatenate_damage_output(tmp_path): if not os.path.exists(d): os.makedirs(d) - for b in ["batch6", "batch9"]: + for b in ["batch" + str(i) for i in range(0, 15)]: ds_in = xr.Dataset( { "delta_rebased": ( @@ -120,7 +120,7 @@ def test_concatenate_damage_output(tmp_path): ), }, coords={ - "batch": (["batch"], ["batch6", "batch9"]), + "batch": (["batch"], ["batch" + str(i) for i in range(0, 15)]), "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]), "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]), "rcp": (["rcp"], ["rcp45", "rcp85"]), @@ -768,7 +768,9 @@ def energy_in_netcdf_fixture(tmp_path): "region", "year", ], - np.full((1, 1, 1, 1, 1, 2, 2), 2).astype(object), + np.full((1, 1, 1, 1, 1, 2, 2), 2).astype( + object + ), ), }, coords={ From c730d2638b9790d2b8d2375dc779a9e8d14bf2bb Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Thu, 29 Jun 2023 15:39:43 -0500 Subject: [PATCH 17/20] Ensure that dtype = object is tested --- tests/test_input_damages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index edbf4a0e..4ad605d6 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -86,7 +86,7 @@ def test_concatenate_damage_output(tmp_path): { "delta_rebased": ( ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], - np.full((2, 2, 2, 2, 1, 2, 2), 1), + np.full((2, 2, 2, 2, 1, 2, 2), 1).astype(object), ), "histclim_rebased": ( ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], @@ -94,7 +94,7 @@ def test_concatenate_damage_output(tmp_path): ), }, coords={ - "batch": (["batch"], [b]), + "batch": (["batch"], np.array([b], dtype=object)), "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]), "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]), "rcp": (["rcp"], ["rcp45", "rcp85"]), From f0e6ede82179e8f2caaa81b2738b39140c39cf36 Mon Sep 17 00:00:00 2001 From: Jonah Gilbert Date: Thu, 29 Jun 2023 15:50:30 -0500 Subject: [PATCH 18/20] Change object coordinate --- tests/test_input_damages.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index 4ad605d6..d35d1fb3 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -94,8 +94,8 @@ def test_concatenate_damage_output(tmp_path): ), }, coords={ - "batch": (["batch"], np.array([b], dtype=object)), - "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]), + "batch": (["batch"], [b]), + "gcm": (["gcm"], np.array(["ACCESS1-0", "BNU-ESM"], dtype=object)), "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]), "rcp": (["rcp"], ["rcp45", "rcp85"]), "region": (["region"], ["ZWE.test_region", "USA.test_region"]), From ae69955917f045136bcec1cbe82bc545762b758f Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Thu, 29 Jun 2023 14:00:34 -0700 Subject: [PATCH 19/20] update test_concatenate_damage_output --- tests/test_input_damages.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py index d35d1fb3..d6d8e0b6 100644 --- a/tests/test_input_damages.py +++ b/tests/test_input_damages.py @@ -112,11 +112,11 @@ def test_concatenate_damage_output(tmp_path): { "delta_rebased": ( ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], - np.full((2, 2, 2, 2, 2, 2, 2), 1), + np.full((2, 2, 2, 2, 15, 2, 2), 1), ), "histclim_rebased": ( ["ssp", "rcp", "model", "gcm", "batch", "year", "region"], - np.full((2, 2, 2, 2, 2, 2, 2), 2), + np.full((2, 2, 2, 2, 15, 2, 2), 2), ), }, coords={ @@ -135,7 +135,9 @@ def test_concatenate_damage_output(tmp_path): basename="test_insuffix", save_path=os.path.join(d, "concatenate.zarr"), ) - ds_out_actual = xr.open_zarr(os.path.join(d, "concatenate.zarr")) + ds_out_actual = xr.open_zarr(os.path.join(d, "concatenate.zarr")).sel( + batch=["batch" + str(i) for i in range(0, 15)] + ) xr.testing.assert_equal(ds_out_expected, ds_out_actual) From 43b78433237a7588bb5a624f396475326a9997a1 Mon Sep 17 00:00:00 2001 From: davidrzhdu Date: Thu, 29 Jun 2023 15:30:05 -0700 Subject: [PATCH 20/20] Change dtype of batch --- src/dscim/preprocessing/input_damages.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py index 5c762388..ab368ff0 100644 --- a/src/dscim/preprocessing/input_damages.py +++ b/src/dscim/preprocessing/input_damages.py @@ -3,7 +3,6 @@ """ import os -import glob import re import logging import warnings @@ -131,6 +130,7 @@ def concatenate_damage_output(damage_dir, basename, save_path): for v in list(data.coords.keys()): if data.coords[v].dtype == object: data.coords[v] = data.coords[v].astype("unicode") + data.coords["batch"] = data.coords["batch"].astype("unicode") for v in list(data.variables.keys()): if data[v].dtype == object: data[v] = data[v].astype("unicode")