From c75f7470658eda0d79a28f043ed3356eef02dfb8 Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Thu, 11 May 2023 08:25:12 -0700
Subject: [PATCH 01/20] Fix chunking issues in sum_AMEL and reduce_damages

---
 src/dscim/preprocessing/preprocessing.py | 42 +++++++++++++++++++++---
 1 file changed, 37 insertions(+), 5 deletions(-)

diff --git a/src/dscim/preprocessing/preprocessing.py b/src/dscim/preprocessing/preprocessing.py
index 67fb2eec..bb39a096 100644
--- a/src/dscim/preprocessing/preprocessing.py
+++ b/src/dscim/preprocessing/preprocessing.py
@@ -102,6 +102,24 @@ def reduce_damages(
                 xr.open_zarr(damages).chunks["batch"][0] == 15
             ), "'batch' dim on damages does not have chunksize of 15. Please rechunk."
 
+            if "coastal" not in sector:
+                chunkies = {
+                    "rcp": 1,
+                    "region": -1,
+                    "gcm": 1,
+                    "year": 10,
+                    "model": 1,
+                    "ssp": 1,
+                }
+            else:
+                chunkies = {
+                    "region": -1,
+                    "slr": 1,
+                    "year": 10,
+                    "model": 1,
+                    "ssp": 1,
+                }
+
             ce_batch_dims = [i for i in gdppc.dims] + [
                 i for i in ds.dims if i not in gdppc.dims and i != "batch"
             ]
@@ -110,15 +128,15 @@ def reduce_damages(
                 i for i in gdppc.region.values if i in ce_batch_coords["region"]
             ]
             ce_shapes = [len(ce_batch_coords[c]) for c in ce_batch_dims]
-            ce_chunks = [xr.open_zarr(damages).chunks[c][0] for c in ce_batch_dims]
+            ce_chunks = chunkies
 
     template = xr.DataArray(
-        da.empty(ce_shapes, chunks=ce_chunks),
+        da.empty(ce_shapes),
         dims=ce_batch_dims,
         coords=ce_batch_coords,
-    )
+    ).chunk(chunkies)
 
-    other = xr.open_zarr(damages)
+    other = xr.open_zarr(damages).chunk(chunkies)
 
     out = other.map_blocks(
         ce_from_chunk,
@@ -205,7 +223,21 @@ def sum_AMEL(
         for sector in sectors:
             print(f"Opening {sector},{params[sector]['sector_path']}")
             ds = xr.open_zarr(params[sector]["sector_path"], consolidated=True)
-            ds = ds[params[sector][var]].rename(var)
+            ds = (
+                ds[params[sector][var]]
+                .rename(var)
+                .chunk(
+                    {
+                        "batch": 15,
+                        "ssp": 1,
+                        "model": 1,
+                        "rcp": 1,
+                        "gcm": 1,
+                        "year": 10,
+                        "region": -1,
+                    }
+                )
+            )
             ds = xr.where(np.isinf(ds), np.nan, ds)
             datasets.append(ds)
 

From d0cc0375c823725f73c0585351e60132705ac912 Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Thu, 11 May 2023 15:16:05 -0400
Subject: [PATCH 02/20] Remove unused variable

---
 src/dscim/preprocessing/preprocessing.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/dscim/preprocessing/preprocessing.py b/src/dscim/preprocessing/preprocessing.py
index bb39a096..a253b389 100644
--- a/src/dscim/preprocessing/preprocessing.py
+++ b/src/dscim/preprocessing/preprocessing.py
@@ -128,7 +128,6 @@ def reduce_damages(
                 i for i in gdppc.region.values if i in ce_batch_coords["region"]
             ]
             ce_shapes = [len(ce_batch_coords[c]) for c in ce_batch_dims]
-            ce_chunks = chunkies
 
     template = xr.DataArray(
         da.empty(ce_shapes),

From 031e01c023f2701e319262f1ead1cb336e163617 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Thu, 11 May 2023 12:43:29 -0700
Subject: [PATCH 03/20] Sort batches in the right order

---
 tests/test_input_damages.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index 76b28a1b..ce45edce 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -31,7 +31,7 @@ def test_parse_projection_filesys(tmp_path):
     """
     Test that parse_projection_filesys correctly retrieves projection system output structure
     """
-    rcp = ["rcp85", "rcp45"]
+    rcp = ["rcp45", "rcp85"]
     gcm = ["ACCESS1-0", "GFDL-CM3"]
     model = ["high", "low"]
     ssp = [f"SSP{n}" for n in range(2, 4)]
@@ -45,14 +45,14 @@ def test_parse_projection_filesys(tmp_path):
                         os.makedirs(os.path.join(tmp_path, b, r, g, m, s))
 
     out_expected = {
-        "batch": list(chain(repeat("batch9", 16), repeat("batch6", 16))),
-        "rcp": list(chain(repeat("rcp85", 8), repeat("rcp45", 8))) * 2,
+        "batch": list(chain(repeat("batch6", 16), repeat("batch9", 16))),
+        "rcp": list(chain(repeat("rcp45", 8), repeat("rcp85", 8))) * 2,
         "gcm": list(chain(repeat("ACCESS1-0", 4), repeat("GFDL-CM3", 4))) * 4,
         "model": list(chain(repeat("high", 2), repeat("low", 2))) * 8,
         "ssp": ["SSP2", "SSP3"] * 16,
         "path": [
             os.path.join(tmp_path, b, r, g, m, s)
-            for b in ["batch9", "batch6"]
+            for b in ["batch6", "batch9"]
             for r in rcp
             for g in gcm
             for m in model
@@ -65,6 +65,9 @@ def test_parse_projection_filesys(tmp_path):
 
     df_out_actual = _parse_projection_filesys(input_path=tmp_path)
     df_out_actual.reset_index(drop=True, inplace=True)
+    df_out_actual = df_out_actual.sort_values(
+        by=["batch", "rcp", "gcm", "model", "ssp"]
+    )
 
     pd.testing.assert_frame_equal(df_out_expected, df_out_actual)
 

From a1e6d9508d04c1cd6debf8f14d60816c15ed2460 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Thu, 11 May 2023 13:02:30 -0700
Subject: [PATCH 04/20] Update test_parse_projection_filesys()

---
 tests/test_input_damages.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index ce45edce..c19d8723 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -64,10 +64,10 @@ def test_parse_projection_filesys(tmp_path):
     df_out_expected = pd.DataFrame(out_expected)
 
     df_out_actual = _parse_projection_filesys(input_path=tmp_path)
-    df_out_actual.reset_index(drop=True, inplace=True)
     df_out_actual = df_out_actual.sort_values(
         by=["batch", "rcp", "gcm", "model", "ssp"]
     )
+    df_out_actual.reset_index(drop=True, inplace=True)
 
     pd.testing.assert_frame_equal(df_out_expected, df_out_actual)
 

From 9e66ab9d8489d54852312a66029e9ce575e4192b Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Tue, 16 May 2023 14:04:18 -0500
Subject: [PATCH 05/20] Add region to damages chunk sizes

---
 src/dscim/preprocessing/input_damages.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index 005513e5..9f772eca 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -738,7 +738,15 @@ def prep(
         ).expand_dims({"gcm": [gcm]})
 
         damages = damages.chunk(
-            {"batch": 15, "ssp": 1, "model": 1, "rcp": 1, "gcm": 1, "year": 10}
+            {
+                "batch": 15,
+                "ssp": 1,
+                "model": 1,
+                "rcp": 1,
+                "gcm": 1,
+                "year": 10,
+                "region": -1,
+            }
         )
         damages.coords.update({"batch": [f"batch{i}" for i in damages.batch.values]})
 

From fab22865647414f6f611e2adf9f6c15b3e16648f Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Mon, 22 May 2023 14:25:09 -0700
Subject: [PATCH 06/20] Add a function for concatenating labor/energy damage
 output

---
 src/dscim/preprocessing/input_damages.py | 25 ++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index 9f772eca..ee9058aa 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -95,6 +95,31 @@ def _parse_projection_filesys(input_path, query="exists==True"):
     return df.query(query)
 
 
+def concatenate_damage_output(damage_dir, basename, save_path):
+    """Concatenate labor/energy damage output across batches.
+
+    Parameters
+    ----------
+    damage_dir str
+        Directory containing separate labor/energy damage output files by batches.
+    basename str
+        Prefix of the damage output filenames (ex. {basename}_batch0.zarr)
+    save_path str
+        Path to save concatenated file in .zarr format
+    """
+    paths = glob.glob(f"{damage_dir}/{basename}*")
+    data = xr.open_mfdataset(paths=paths, engine="zarr")
+
+    for v in list(data.coords.keys()):
+        if data.coords[v].dtype == object:
+            data.coords[v] = data.coords[v].astype("unicode")
+    for v in list(data.variables.keys()):
+        if data[v].dtype == object:
+            data[v] = data[v].astype("unicode")
+
+    data.to_zarr(save_path, mode="w")
+
+
 def calculate_labor_impacts(input_path, file_prefix, variable, val_type):
     """Calculate impacts for labor results.
 

From c6d13498331961378ebfc136dc2dfdc5f24acf23 Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Tue, 23 May 2023 10:01:12 -0500
Subject: [PATCH 07/20] Chunk coastal

---
 src/dscim/preprocessing/input_damages.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index ee9058aa..18c3920f 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -396,7 +396,7 @@ def process_batch(g):
     batches = [ds for ds in batches if ds is not None]
     chunkies = {
         "rcp": 1,
-        "region": 24378,
+        "region": -1,
         "gcm": 1,
         "year": 10,
         "model": 1,
@@ -823,6 +823,15 @@ def coastal_inputs(
             )
         else:
             d = d.sel(adapt_type=adapt_type, vsl_valuation=vsl_valuation, drop=True)
+            chunkies = {
+                "batch": 15,
+                "ssp": 1,
+                "model": 1,
+                "slr": 1,
+                "year": 10,
+                "region": -1,
+            }
+            d = d.chunk(chunkies)
             d.to_zarr(
                 f"{path}/coastal_damages_{version}-{adapt_type}-{vsl_valuation}.zarr",
                 consolidated=True,

From e187c1dca2ef385756ffbfdda022ee55c17dc019 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Wed, 24 May 2023 12:21:15 -0700
Subject: [PATCH 08/20] Add unit test for concatenate_damage_output

---
 tests/test_input_damages.py | 67 +++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index c19d8723..447b364e 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -72,6 +72,73 @@ def test_parse_projection_filesys(tmp_path):
     pd.testing.assert_frame_equal(df_out_expected, df_out_actual)
 
 
+def test_concatenate_damage_output(tmp_path):
+    """
+    Test that concatenate_damage_output correctly concatenates damages across batches and saves to a single zarr file
+    """
+    d = os.path.join(tmp_path, "concatenate_in")
+    if not os.path.exists(d):
+        os.makedirs(d)
+
+    for b in ["batch6", "batch9"]:
+        ds_in = xr.Dataset(
+            {
+                "delta_rebased": (
+                    ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
+                    np.full((2, 2, 2, 2, 1, 2, 2), 1),
+                ),
+                "histclim_rebased": (
+                    ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
+                    np.full((2, 2, 2, 2, 1, 2, 2), 2),
+                ),
+            },
+            coords={
+                "batch": (["batch"], [b]),
+                "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]),
+                "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]),
+                "rcp": (["rcp"], ["rcp45", "rcp85"]),
+                "region": (["region"], ["ZWE.test_region", "USA.test_region"]),
+                "ssp": (["ssp"], ["SSP2", "SSP3"]),
+                "year": (["year"], [2020, 2099]),
+            },
+        )
+
+        infile = os.path.join(d, f"test_insuffix_{b}.zarr")
+
+        ds_in.to_zarr(infile)
+
+    ds_out_expected = xr.Dataset(
+        {
+            "delta_rebased": (
+                ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
+                np.full((2, 2, 2, 2, 2, 2, 2), 1),
+            ),
+            "histclim_rebased": (
+                ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
+                np.full((2, 2, 2, 2, 2, 2, 2), 2),
+            ),
+        },
+        coords={
+            "batch": (["batch"], ["batch6", "batch9"]),
+            "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]),
+            "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]),
+            "rcp": (["rcp"], ["rcp45", "rcp85"]),
+            "region": (["region"], ["ZWE.test_region", "USA.test_region"]),
+            "ssp": (["ssp"], ["SSP2", "SSP3"]),
+            "year": (["year"], [2020, 2099]),
+        },
+    )
+
+    concatenate_damage_output(
+        damage_dir=d,
+        basename="test_insuffix",
+        save_path=os.path.join(d, "concatenate.zarr"),
+    )
+    ds_out_actual = xr.open_zarr(os.path.join(d, "concatenate.zarr"))
+
+    xr.testing.assert_equal(ds_out_expected, ds_out_actual)
+
+
 @pytest.fixture
 def labor_in_val_fixture(tmp_path):
     """

From 7302ed45dee4d510a7b94d41a9076e470e20dacf Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Wed, 24 May 2023 12:28:31 -0700
Subject: [PATCH 09/20] Import function

---
 tests/test_input_damages.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index 447b364e..9e7882ba 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -9,6 +9,7 @@
 from dscim.menu.simple_storage import EconVars
 from dscim.preprocessing.input_damages import (
     _parse_projection_filesys,
+    concatenate_damage_output,
     calculate_labor_impacts,
     concatenate_labor_damages,
     calculate_labor_batch_damages,

From e7915c4cebff0edccddd3b8d8127ee2dbf0fc8d3 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Tue, 30 May 2023 08:14:39 -0700
Subject: [PATCH 10/20] chunk concatenated energy/labor and save mortality to
 float32

---
 src/dscim/preprocessing/input_damages.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index 18c3920f..40743696 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -110,6 +110,21 @@ def concatenate_damage_output(damage_dir, basename, save_path):
     paths = glob.glob(f"{damage_dir}/{basename}*")
     data = xr.open_mfdataset(paths=paths, engine="zarr")
 
+    for v in data:
+        del data[v].encoding["chunks"]
+
+    chunkies = {
+        "batch": 15,
+        "rcp": 1,
+        "gcm": 1,
+        "model": 1,
+        "ssp": 1,
+        "region": -1,
+        "year": 10,
+    }
+
+    data = data.chunk(chunkies)
+
     for v in list(data.coords.keys()):
         if data.coords[v].dtype == object:
             data.coords[v] = data.coords[v].astype("unicode")
@@ -777,6 +792,7 @@ def prep(
 
         # convert to EPA VSL
         damages = damages * 0.90681089
+        damages.astype(np.float32)
 
         for v in list(damages.coords.keys()):
             if damages.coords[v].dtype == object:

From 66bd2702190298a87f577fc8429d6e0918d67ab1 Mon Sep 17 00:00:00 2001
From: davidrzhdu <111376495+davidrzhdu@users.noreply.github.com>
Date: Tue, 30 May 2023 15:34:23 -0500
Subject: [PATCH 11/20] fix a small issue

---
 src/dscim/preprocessing/input_damages.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index 40743696..80f7b2f5 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -792,7 +792,7 @@ def prep(
 
         # convert to EPA VSL
         damages = damages * 0.90681089
-        damages.astype(np.float32)
+        damages = damages.astype(np.float32)
 
         for v in list(damages.coords.keys()):
             if damages.coords[v].dtype == object:

From 58682ccbd8a920c087fbe600bf73231d7834c68d Mon Sep 17 00:00:00 2001
From: davidrzhdu <111376495+davidrzhdu@users.noreply.github.com>
Date: Tue, 30 May 2023 15:50:47 -0500
Subject: [PATCH 12/20] Update test_input_damages.py because mortality has
 saved in float32

---
 tests/test_input_damages.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index 9e7882ba..e1278339 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -1101,11 +1101,11 @@ def test_prep_mortality_damages(
         {
             "delta": (
                 ["gcm", "batch", "ssp", "rcp", "model", "year", "region"],
-                np.full((2, 2, 2, 2, 2, 2, 2), -0.90681089),
+                np.float32(np.full((2, 2, 2, 2, 2, 2, 2), -0.90681089)),
             ),
             "histclim": (
                 ["gcm", "batch", "ssp", "rcp", "model", "year", "region"],
-                np.full((2, 2, 2, 2, 2, 2, 2), 2 * 0.90681089),
+                np.float32(np.full((2, 2, 2, 2, 2, 2, 2), 2 * 0.90681089)),
             ),
         },
         coords={

From 970c6233f45eceecfe861633b72d561b5ad9fcbd Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Tue, 13 Jun 2023 13:21:02 -0500
Subject: [PATCH 13/20] Update CHANGELOG.md

---
 CHANGELOG.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 2b1826e0..18dc91ac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [0.4.0] - Unreleased
 ### Added
+- Functions to concatenate input damages across batches. ([PR #83](https://github.com/ClimateImpactLab/dscim/pull/83), [@davidrzhdu](https://github.com/davidrzhdu))
 - New unit tests for [dscim/utils/input_damages.py](https://github.com/ClimateImpactLab/dscim/blob/main/src/dscim/preprocessing/input_damages.py). ([PR #68](https://github.com/ClimateImpactLab/dscim/pull/68), [@davidrzhdu](https://github.com/davidrzhdu))
 - New unit tests for [dscim/utils/rff.py](https://github.com/ClimateImpactLab/dscim/blob/main/src/dscim/utils/rff.py). ([PR #73](https://github.com/ClimateImpactLab/dscim/pull/73), [@JMGilbert](https://github.com/JMGilbert))
 - New unit tests for [dscim/dscim/preprocessing.py](https://github.com/ClimateImpactLab/dscim/blob/main/src/dscim/preprocessing/preprocessing.py). ([PR #67](https://github.com/ClimateImpactLab/dscim/pull/67), [@JMGilbert](https://github.com/JMGilbert))
@@ -23,6 +24,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Remove old/unnecessary files. ([PR #57](https://github.com/ClimateImpactLab/dscim/pull/57), [@JMGilbert](https://github.com/JMGilbert))
 - Remove unused “save_path” and “ec_cls” from `read_energy_files_parallel()`. ([PR #56](https://github.com/ClimateImpactLab/dscim/pull/56), [@davidrzhdu](https://github.com/davidrzhdu))
 ### Fixed
+- Make all input damages output files with correct chunksizes. ([PR #83](https://github.com/ClimateImpactLab/dscim/pull/83), [@JMGilbert](https://github.com/JMGilbert))
 - Add `.load()` to every loading of population data from EconVars. ([PR #82](https://github.com/ClimateImpactLab/dscim/pull/82), [@davidrzhdu](https://github.com/davidrzhdu))
 - Make `compute_ag_damages` function correctly save outputs in float32. ([PR #72](https://github.com/ClimateImpactLab/dscim/pull/72) and [PR #82](https://github.com/ClimateImpactLab/dscim/pull/82), [@davidrzhdu](https://github.com/davidrzhdu))
 - Make rff damage functions read in and save out in the proper filepath structure. ([PR #79](https://github.com/ClimateImpactLab/dscim/pull/79), [@JMGilbert](https://github.com/JMGilbert))

From 7fc0f03ebe94803288d0edd594894351ac6f6dd5 Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Thu, 29 Jun 2023 13:49:36 -0500
Subject: [PATCH 14/20] Update test_input_damages.py

---
 tests/test_input_damages.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index e1278339..14fdd91b 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -768,7 +768,7 @@ def energy_in_netcdf_fixture(tmp_path):
                                             "region",
                                             "year",
                                         ],
-                                        np.full((1, 1, 1, 1, 1, 2, 2), 2),
+                                        np.full((1, 1, 1, 1, 1, 2, 2), 2).astype(object),
                                     ),
                                 },
                                 coords={

From edc191e14332008ecba78ee5a9244dd34dc1af65 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Thu, 29 Jun 2023 13:18:23 -0700
Subject: [PATCH 15/20] create a list of filenames to open in
 'concatenate_damage_output'

---
 src/dscim/preprocessing/input_damages.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index 80f7b2f5..5c762388 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -107,7 +107,10 @@ def concatenate_damage_output(damage_dir, basename, save_path):
     save_path str
         Path to save concatenated file in .zarr format
     """
-    paths = glob.glob(f"{damage_dir}/{basename}*")
+    paths = [
+        f"{damage_dir}/{basename}_{b}.zarr"
+        for b in ["batch" + str(i) for i in range(0, 15)]
+    ]
     data = xr.open_mfdataset(paths=paths, engine="zarr")
 
     for v in data:

From 7574b6ee1bbd996daefb48920b39299b22376687 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Thu, 29 Jun 2023 13:36:40 -0700
Subject: [PATCH 16/20] update test_concatenate_damage_output

---
 tests/test_input_damages.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index 14fdd91b..edbf4a0e 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -81,7 +81,7 @@ def test_concatenate_damage_output(tmp_path):
     if not os.path.exists(d):
         os.makedirs(d)
 
-    for b in ["batch6", "batch9"]:
+    for b in ["batch" + str(i) for i in range(0, 15)]:
         ds_in = xr.Dataset(
             {
                 "delta_rebased": (
@@ -120,7 +120,7 @@ def test_concatenate_damage_output(tmp_path):
             ),
         },
         coords={
-            "batch": (["batch"], ["batch6", "batch9"]),
+            "batch": (["batch"], ["batch" + str(i) for i in range(0, 15)]),
             "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]),
             "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]),
             "rcp": (["rcp"], ["rcp45", "rcp85"]),
@@ -768,7 +768,9 @@ def energy_in_netcdf_fixture(tmp_path):
                                             "region",
                                             "year",
                                         ],
-                                        np.full((1, 1, 1, 1, 1, 2, 2), 2).astype(object),
+                                        np.full((1, 1, 1, 1, 1, 2, 2), 2).astype(
+                                            object
+                                        ),
                                     ),
                                 },
                                 coords={

From c730d2638b9790d2b8d2375dc779a9e8d14bf2bb Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Thu, 29 Jun 2023 15:39:43 -0500
Subject: [PATCH 17/20] Ensure that dtype = object is tested

---
 tests/test_input_damages.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index edbf4a0e..4ad605d6 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -86,7 +86,7 @@ def test_concatenate_damage_output(tmp_path):
             {
                 "delta_rebased": (
                     ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
-                    np.full((2, 2, 2, 2, 1, 2, 2), 1),
+                    np.full((2, 2, 2, 2, 1, 2, 2), 1).astype(object),
                 ),
                 "histclim_rebased": (
                     ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
@@ -94,7 +94,7 @@ def test_concatenate_damage_output(tmp_path):
                 ),
             },
             coords={
-                "batch": (["batch"], [b]),
+                "batch": (["batch"], np.array([b], dtype=object)),
                 "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]),
                 "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]),
                 "rcp": (["rcp"], ["rcp45", "rcp85"]),

From f0e6ede82179e8f2caaa81b2738b39140c39cf36 Mon Sep 17 00:00:00 2001
From: Jonah Gilbert <jonahmgilbert1@gmail.com>
Date: Thu, 29 Jun 2023 15:50:30 -0500
Subject: [PATCH 18/20] Change object coordinate

---
 tests/test_input_damages.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index 4ad605d6..d35d1fb3 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -94,8 +94,8 @@ def test_concatenate_damage_output(tmp_path):
                 ),
             },
             coords={
-                "batch": (["batch"], np.array([b], dtype=object)),
-                "gcm": (["gcm"], ["ACCESS1-0", "BNU-ESM"]),
+                "batch": (["batch"], [b]),
+                "gcm": (["gcm"], np.array(["ACCESS1-0", "BNU-ESM"], dtype=object)),
                 "model": (["model"], ["IIASA GDP", "OECD Env-Growth"]),
                 "rcp": (["rcp"], ["rcp45", "rcp85"]),
                 "region": (["region"], ["ZWE.test_region", "USA.test_region"]),

From ae69955917f045136bcec1cbe82bc545762b758f Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Thu, 29 Jun 2023 14:00:34 -0700
Subject: [PATCH 19/20] update test_concatenate_damage_output

---
 tests/test_input_damages.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/test_input_damages.py b/tests/test_input_damages.py
index d35d1fb3..d6d8e0b6 100644
--- a/tests/test_input_damages.py
+++ b/tests/test_input_damages.py
@@ -112,11 +112,11 @@ def test_concatenate_damage_output(tmp_path):
         {
             "delta_rebased": (
                 ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
-                np.full((2, 2, 2, 2, 2, 2, 2), 1),
+                np.full((2, 2, 2, 2, 15, 2, 2), 1),
             ),
             "histclim_rebased": (
                 ["ssp", "rcp", "model", "gcm", "batch", "year", "region"],
-                np.full((2, 2, 2, 2, 2, 2, 2), 2),
+                np.full((2, 2, 2, 2, 15, 2, 2), 2),
             ),
         },
         coords={
@@ -135,7 +135,9 @@ def test_concatenate_damage_output(tmp_path):
         basename="test_insuffix",
         save_path=os.path.join(d, "concatenate.zarr"),
     )
-    ds_out_actual = xr.open_zarr(os.path.join(d, "concatenate.zarr"))
+    ds_out_actual = xr.open_zarr(os.path.join(d, "concatenate.zarr")).sel(
+        batch=["batch" + str(i) for i in range(0, 15)]
+    )
 
     xr.testing.assert_equal(ds_out_expected, ds_out_actual)
 

From 43b78433237a7588bb5a624f396475326a9997a1 Mon Sep 17 00:00:00 2001
From: davidrzhdu <davidrzhdu@uchicago.edu>
Date: Thu, 29 Jun 2023 15:30:05 -0700
Subject: [PATCH 20/20] Change dtype of batch

---
 src/dscim/preprocessing/input_damages.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dscim/preprocessing/input_damages.py b/src/dscim/preprocessing/input_damages.py
index 5c762388..ab368ff0 100644
--- a/src/dscim/preprocessing/input_damages.py
+++ b/src/dscim/preprocessing/input_damages.py
@@ -3,7 +3,6 @@
 """
 
 import os
-import glob
 import re
 import logging
 import warnings
@@ -131,6 +130,7 @@ def concatenate_damage_output(damage_dir, basename, save_path):
     for v in list(data.coords.keys()):
         if data.coords[v].dtype == object:
             data.coords[v] = data.coords[v].astype("unicode")
+    data.coords["batch"] = data.coords["batch"].astype("unicode")
     for v in list(data.variables.keys()):
         if data[v].dtype == object:
             data[v] = data[v].astype("unicode")