From ea59ef2e0264d3ebdb1908d9c6e4231249a5f529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 00:40:57 +0200 Subject: [PATCH 01/46] feat: add /maps viewer with STAC-backed display metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a display block to each built-in dataset template (colormap, value range, nodata) and surface it through the STAC Render extension on every published collection. Add a /maps endpoint that serves a single-page map viewer: it reads the STAC catalog to list available datasets, loads the Render and Datacube metadata to configure a ZarrLayer (MapLibre + @carbonplan/zarr-layer), and builds a time slider from cube:dimensions. No tile server or build step required — the browser reads the Zarr store directly via the existing /zarr HTTP range endpoint. Closes #66 --- src/climate_api/data/datasets/chirps3.yaml | 4 + src/climate_api/data/datasets/era5_land.yaml | 7 + src/climate_api/data/datasets/worldpop.yaml | 4 + src/climate_api/stac/services.py | 34 +- src/climate_api/system/routes.py | 9 +- src/climate_api/system/templates.py | 5 + src/climate_api/templates/landing_page.html | 8 + src/climate_api/templates/maps.html | 496 +++++++++++++++++++ 8 files changed, 564 insertions(+), 3 deletions(-) create mode 100644 src/climate_api/templates/maps.html diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index b977bb2d..0ff48b3c 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -13,3 +13,7 @@ resolution: 5 km x 5 km source: CHIRPS v3 source_url: https://www.chc.ucsb.edu/data/chirps3 + display: + colormap: blues + range: [0.0, 20.0] + nodata: 0.0 diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index 91716520..adc354f9 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -17,6 +17,9 @@ resolution: 9 km x 9 km source: ERA5-Land Reanalysis source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land + display: + colormap: rdbu_r + range: [15.0, 40.0] - id: era5land_precipitation_hourly name: Total precipitation (ERA5-Land) @@ -38,3 +41,7 @@ resolution: 9 km x 9 km source: ERA5-Land Reanalysis source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land + display: + colormap: blues + range: [0.0, 5.0] + nodata: 0.0 diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index ead982b9..22d85960 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -18,3 +18,7 @@ resolution: 100m x 100m source: WorldPop Global2 source_url: https://hub.worldpop.org/project/categories?id=3 + display: + colormap: viridis + range: [0.0, 1000.0] + nodata: 0.0 diff --git a/src/climate_api/stac/services.py b/src/climate_api/stac/services.py index 4a0c730e..7f53347a 100644 --- a/src/climate_api/stac/services.py +++ b/src/climate_api/stac/services.py @@ -25,6 +25,7 @@ CATALOG_DESCRIPTION = "Published Climate API GeoZarr datasets" STAC_VERSION = "1.1.0" DATACUBE_EXTENSION = "https://stac-extensions.github.io/datacube/v2.3.0/schema.json" +RENDER_EXTENSION = "https://stac-extensions.github.io/render/v2.0.0/schema.json" ZARR_EXTENSION = "https://stac-extensions.github.io/zarr/v1.1.0/schema.json" DEFAULT_STAC_LICENSE = "various" SPATIAL_STEP_DECIMALS = 8 @@ -89,11 +90,16 @@ def build_collection(dataset_id: str, request: Request) -> dict[str, object]: collection_payload["stac_version"] = STAC_VERSION collection_payload["description"] = template.description collection_payload["title"] = template.title + renders = _build_renders(artifact, source_dataset) + extensions = {DATACUBE_EXTENSION, ZARR_EXTENSION} + if renders is not None: + collection_payload["renders"] = renders + extensions.add(RENDER_EXTENSION) existing_extensions = collection_payload.get("stac_extensions", []) if isinstance(existing_extensions, list): - collection_payload["stac_extensions"] = sorted({*existing_extensions, DATACUBE_EXTENSION, ZARR_EXTENSION}) + collection_payload["stac_extensions"] = sorted({*existing_extensions, *extensions}) else: - collection_payload["stac_extensions"] = sorted([DATACUBE_EXTENSION, ZARR_EXTENSION]) + collection_payload["stac_extensions"] = sorted(extensions) collection_payload["links"] = template_links assets = collection_payload.setdefault("assets", {}) zarr_from_xstac = assets.get("zarr", {}) if isinstance(assets, dict) else {} @@ -427,6 +433,30 @@ def _zarr_open_kwargs(artifact: ArtifactRecord) -> dict[str, bool | None]: return {"consolidated": _zarr_consolidated_flag(_artifact_store_path(artifact))} +def _build_renders(artifact: ArtifactRecord, source_dataset: dict[str, Any]) -> dict[str, Any] | None: + display = source_dataset.get("display") + if not isinstance(display, dict): + return None + colormap_name = display.get("colormap") + value_range = display.get("range") + if not isinstance(colormap_name, str) or not isinstance(value_range, list) or len(value_range) != 2: + return None + render: dict[str, Any] = { + "title": artifact.dataset_name, + "assets": ["zarr"], + "rescale": [[float(value_range[0]), float(value_range[1])]], + "colormap_name": colormap_name, + "climate_api:variable": artifact.variable, + } + nodata = display.get("nodata") + if nodata is not None: + render["nodata"] = float(nodata) + units = source_dataset.get("convert_units") or source_dataset.get("units") + if isinstance(units, str): + render["climate_api:units"] = units + return {"default": render} + + def _zarr_consolidated_flag(artifact_path: str) -> bool | None: if "://" in artifact_path: return None diff --git a/src/climate_api/system/routes.py b/src/climate_api/system/routes.py index e52a5911..4b80f267 100644 --- a/src/climate_api/system/routes.py +++ b/src/climate_api/system/routes.py @@ -7,7 +7,7 @@ from fastapi.responses import HTMLResponse, JSONResponse, Response from .schemas import AppInfo, HealthStatus, Status -from .templates import ROOT_RESPONSES, app_version, render_landing, root_json, wants_json +from .templates import ROOT_RESPONSES, app_version, render_landing, render_maps, root_json, wants_json router = APIRouter() @@ -21,6 +21,13 @@ def read_index(request: Request) -> Response: return HTMLResponse(render_landing(app_version, base)) +@router.get("/maps", response_class=HTMLResponse, include_in_schema=False) +def maps(request: Request) -> HTMLResponse: + """Return the interactive map viewer.""" + base = str(request.base_url).rstrip("/") + return HTMLResponse(render_maps(base)) + + @router.get("/health") def health() -> HealthStatus: """Return health status for container health checks.""" diff --git a/src/climate_api/system/templates.py b/src/climate_api/system/templates.py index 6caa992a..2aba0070 100644 --- a/src/climate_api/system/templates.py +++ b/src/climate_api/system/templates.py @@ -94,6 +94,11 @@ def wants_json(request: Request) -> bool: return json_q >= 0 and (html_q < 0 or json_q >= html_q) +def render_maps(base: str) -> str: + """Render the map viewer page.""" + return get_template("maps.html").render(base=base) + + def render_landing(version: str, base: str) -> str: """Render the root landing page with live instance status.""" try: diff --git a/src/climate_api/templates/landing_page.html b/src/climate_api/templates/landing_page.html index 49cd5999..66dae0ba 100644 --- a/src/climate_api/templates/landing_page.html +++ b/src/climate_api/templates/landing_page.html @@ -276,6 +276,14 @@

Explore

>Interactive Swagger UI for all endpoints +
  • + Map viewer + Browse published datasets on an interactive map +
  • STAC Catalog + + + + + Map viewer — DHIS2 Climate API + + + + + +
    +
    +

    Climate API

    + Map viewer + +
    + +
    +
    + +
    +
    + + +
    + + + + + +
    Loading datasets...
    +
    +
    +
    + + + + From e3b6ccfd07c65607121370cd9c21f3feada3d4f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 00:47:26 +0200 Subject: [PATCH 02/46] fix: fit map to configured instance extent on load --- src/climate_api/templates/maps.html | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/climate_api/templates/maps.html b/src/climate_api/templates/maps.html index f36b0c01..382f2f45 100644 --- a/src/climate_api/templates/maps.html +++ b/src/climate_api/templates/maps.html @@ -333,6 +333,7 @@

    Climate API

    }, center: [20, 5], zoom: 2, + fitBoundsOptions: { padding: 40 }, attributionControl: { compact: true }, }); @@ -355,7 +356,18 @@

    Climate API

    statusEl.className = isError ? "status error" : "status"; } + async function fitToExtent() { + try { + const res = await fetch("{{ base }}/extent"); + if (!res.ok) return; + const extent = await res.json(); + const [xmin, ymin, xmax, ymax] = extent.bbox; + map.fitBounds([[xmin, ymin], [xmax, ymax]], { padding: 40, maxZoom: 10 }); + } catch {} + } + async function loadCatalog() { + await fitToExtent(); try { const res = await fetch("{{ base }}/stac/catalog.json"); if (!res.ok) throw new Error(`HTTP ${res.status}`); From 93ebb2693c384b2e56b90d39c66874738d823911 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 00:54:18 +0200 Subject: [PATCH 03/46] fix: use hex colormap format expected by zarr-layer --- src/climate_api/templates/maps.html | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/src/climate_api/templates/maps.html b/src/climate_api/templates/maps.html index 382f2f45..c23b2abd 100644 --- a/src/climate_api/templates/maps.html +++ b/src/climate_api/templates/maps.html @@ -236,29 +236,10 @@

    Climate API

    function buildColormap(name) { const key = COLORMAP_ALIASES[name?.toLowerCase()] ?? "viridis"; - const safe = Object.keys(COLORMAP_ALIASES).includes(name?.toLowerCase()) - ? key - : "viridis"; try { - const colors = colormap({ - colormap: safe, - nshades: 256, - format: "rba", - alpha: 1.0, - }); - return new Uint8Array( - colors.flatMap(([r, g, b, a]) => [r, g, b, Math.round(a * 255)]) - ); + return colormap({ colormap: key, nshades: 256, format: "hex", alpha: 1.0 }); } catch { - const fallback = colormap({ - colormap: "viridis", - nshades: 256, - format: "rba", - alpha: 1.0, - }); - return new Uint8Array( - fallback.flatMap(([r, g, b, a]) => [r, g, b, Math.round(a * 255)]) - ); + return colormap({ colormap: "viridis", nshades: 256, format: "hex", alpha: 1.0 }); } } From cbd1f73bda45b7e99a1f79ca6655c711721d749f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 00:57:46 +0200 Subject: [PATCH 04/46] fix: replace colormap CDN import with inline interpolated stops --- src/climate_api/templates/maps.html | 71 ++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 21 deletions(-) diff --git a/src/climate_api/templates/maps.html b/src/climate_api/templates/maps.html index c23b2abd..50088df1 100644 --- a/src/climate_api/templates/maps.html +++ b/src/climate_api/templates/maps.html @@ -217,30 +217,59 @@

    Climate API

    + + + +
    +
    +

    Climate API

    + Map viewer + +
    + +
    +
    + +
    +
    + + +
    + + + + + + + +
    Loading datasets...
    +
    +
    +
    + + + + From a981f387a9d914710570d7993cba6bc7904b4607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:29:55 +0200 Subject: [PATCH 15/46] feat: switch to OpenFreeMap vector tiles basemap Replaces the OSM raster style with OpenFreeMap's positron vector style. The data layer is inserted before the first symbol layer so country borders, road labels, and place names always render on top of the climate data. --- src/climate_api/templates/map-viewer.html | 29 ++++++----------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index af66880a..e67e72bf 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -310,26 +310,7 @@

    Climate API

    const map = new maplibregl.Map({ container: "map", - style: { - version: 8, - sources: { - osm: { - type: "raster", - tiles: ["https://tile.openstreetmap.org/{z}/{x}/{y}.png"], - tileSize: 256, - attribution: - '© OpenStreetMap', - }, - }, - layers: [ - { - id: "osm", - type: "raster", - source: "osm", - paint: { "raster-opacity": 0.5 }, - }, - ], - }, + style: "https://tiles.openfreemap.org/styles/positron", center: [20, 5], zoom: 2, fitBoundsOptions: { padding: 40 }, @@ -339,6 +320,7 @@

    Climate API

    let activeLayer = null; let timeSteps = []; let timeDimKey = "time"; + let labelLayerId; const selectEl = document.getElementById("dataset-select"); const timeSection = document.getElementById("time-section"); @@ -466,7 +448,7 @@

    Climate API

    ...(zarrVersion !== null && { zarrVersion }), ...(fillValue !== null && { fillValue }), }); - map.addLayer(activeLayer); + map.addLayer(activeLayer, labelLayerId); } catch (err) { setStatus(`Failed to create layer: ${err.message}`, true); return; @@ -525,7 +507,10 @@

    Climate API

    activeLayer?.setSelector({ [timeDimKey]: i }); }); - map.on("load", loadCatalog); + map.on("load", () => { + labelLayerId = map.getStyle().layers.find((l) => l.type === "symbol")?.id; + loadCatalog(); + }); From 066294cfb670d6838088f706f60a9b796bf0d3cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:34:55 +0200 Subject: [PATCH 16/46] feat: render data layer below country boundaries and labels --- src/climate_api/templates/map-viewer.html | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index e67e72bf..96fab6cb 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -508,7 +508,10 @@

    Climate API

    }); map.on("load", () => { - labelLayerId = map.getStyle().layers.find((l) => l.type === "symbol")?.id; + const layers = map.getStyle().layers; + labelLayerId = + layers.find((l) => l.type === "line" && l.id.startsWith("boundary"))?.id ?? + layers.find((l) => l.type === "symbol")?.id; loadCatalog(); }); From d06a8eed062f2273f4a76ed6e88373bdf451203a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:36:58 +0200 Subject: [PATCH 17/46] feat: remove layer opacity --- src/climate_api/templates/map-viewer.html | 1 - 1 file changed, 1 deletion(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index 96fab6cb..bf819381 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -443,7 +443,6 @@

    Climate API

    variable, clim, colormap: cm, - opacity: 0.6, selector, ...(zarrVersion !== null && { zarrVersion }), ...(fillValue !== null && { fillValue }), From 2d59bc0ec5d3d397e28a83500822c98a66460328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:44:30 +0200 Subject: [PATCH 18/46] fix: use kelvin range for ERA5 temperature display --- src/climate_api/data/datasets/era5_land.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index adc354f9..c3c20fa8 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -19,7 +19,7 @@ source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land display: colormap: rdbu_r - range: [15.0, 40.0] + range: [288.0, 313.0] - id: era5land_precipitation_hourly name: Total precipitation (ERA5-Land) From d2b1ad20460bcc045c608f715d757c2491b17546 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:50:26 +0200 Subject: [PATCH 19/46] fix: set explicit opacity 1 on zarr layer --- src/climate_api/templates/map-viewer.html | 1 + 1 file changed, 1 insertion(+) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index bf819381..fa9bee8c 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -443,6 +443,7 @@

    Climate API

    variable, clim, colormap: cm, + opacity: 1, selector, ...(zarrVersion !== null && { zarrVersion }), ...(fillValue !== null && { fillValue }), From 71493b793f3da8507c0b44ee073fb5006792d752 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:52:47 +0200 Subject: [PATCH 20/46] =?UTF-8?q?fix:=20center=20ERA5=20temperature=20scal?= =?UTF-8?q?e=20on=200=C2=B0C=20(273=20K)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/climate_api/data/datasets/era5_land.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index c3c20fa8..7a106d5b 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -19,7 +19,7 @@ source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land display: colormap: rdbu_r - range: [288.0, 313.0] + range: [243.0, 303.0] - id: era5land_precipitation_hourly name: Total precipitation (ERA5-Land) From 01cbd3873bb2ef979e7c98c731f77d5389eda873 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:54:31 +0200 Subject: [PATCH 21/46] =?UTF-8?q?fix:=20widen=20ERA5=20temperature=20scale?= =?UTF-8?q?=20to=20=C2=B140=20K=20around=200=C2=B0C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/climate_api/data/datasets/era5_land.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index 7a106d5b..161f6832 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -19,7 +19,7 @@ source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land display: colormap: rdbu_r - range: [243.0, 303.0] + range: [233.0, 313.0] - id: era5land_precipitation_hourly name: Total precipitation (ERA5-Land) From 17ea77ad3095d29d500e9fb5047fadb09d066b7a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 02:57:35 +0200 Subject: [PATCH 22/46] fix: raise boundary and label layers above zarr layer after add --- src/climate_api/templates/map-viewer.html | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index fa9bee8c..07a2eda8 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -320,7 +320,6 @@

    Climate API

    let activeLayer = null; let timeSteps = []; let timeDimKey = "time"; - let labelLayerId; const selectEl = document.getElementById("dataset-select"); const timeSection = document.getElementById("time-section"); @@ -448,7 +447,12 @@

    Climate API

    ...(zarrVersion !== null && { zarrVersion }), ...(fillValue !== null && { fillValue }), }); - map.addLayer(activeLayer, labelLayerId); + map.addLayer(activeLayer); + for (const layer of map.getStyle().layers) { + if ((layer.type === "line" && layer.id.startsWith("boundary")) || layer.type === "symbol") { + map.moveLayer(layer.id); + } + } } catch (err) { setStatus(`Failed to create layer: ${err.message}`, true); return; @@ -508,10 +512,6 @@

    Climate API

    }); map.on("load", () => { - const layers = map.getStyle().layers; - labelLayerId = - layers.find((l) => l.type === "line" && l.id.startsWith("boundary"))?.id ?? - layers.find((l) => l.type === "symbol")?.id; loadCatalog(); }); From a99114cdcc7b003b16ba1d9fc8824fff72594f3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 03:00:14 +0200 Subject: [PATCH 23/46] fix: only zoom to extent on first dataset load --- src/climate_api/templates/map-viewer.html | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index 07a2eda8..bab57302 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -320,6 +320,7 @@

    Climate API

    let activeLayer = null; let timeSteps = []; let timeDimKey = "time"; + let initialZoomDone = false; const selectEl = document.getElementById("dataset-select"); const timeSection = document.getElementById("time-section"); @@ -458,9 +459,10 @@

    Climate API

    return; } - // Zoom to dataset spatial extent. + // Zoom to dataset spatial extent on first load only. const bbox = collection.extent?.spatial?.bbox?.[0]; - if (bbox) { + if (bbox && !initialZoomDone) { + initialZoomDone = true; map.fitBounds( [ [bbox[0], bbox[1]], From 04ecfd4704801307a85c5dd0a540933c37519119 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 03:03:13 +0200 Subject: [PATCH 24/46] fix: remove per-dataset fitBounds, extent zoom handled on page load --- src/climate_api/templates/map-viewer.html | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index bab57302..b3a3d2d4 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -320,7 +320,6 @@

    Climate API

    let activeLayer = null; let timeSteps = []; let timeDimKey = "time"; - let initialZoomDone = false; const selectEl = document.getElementById("dataset-select"); const timeSection = document.getElementById("time-section"); @@ -459,18 +458,6 @@

    Climate API

    return; } - // Zoom to dataset spatial extent on first load only. - const bbox = collection.extent?.spatial?.bbox?.[0]; - if (bbox && !initialZoomDone) { - initialZoomDone = true; - map.fitBounds( - [ - [bbox[0], bbox[1]], - [bbox[2], bbox[3]], - ], - { padding: 60, maxZoom: 8 } - ); - } // Time slider. if (timeSteps.length > 1) { From 8a235db707f6360729f14c89b12bb2476ec0d229 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 13:51:58 +0200 Subject: [PATCH 25/46] fix: update CHIRPS3 nodata to -9999, WorldPop colormap to reds with adjusted range --- src/climate_api/data/datasets/chirps3.yaml | 4 ++-- src/climate_api/data/datasets/worldpop.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index 0ff48b3c..7fecad80 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -7,7 +7,7 @@ sync_execution: append sync_availability: latest_available_function: climate_api.providers.availability.chirps3_daily_latest_available - ingestion: + ingestion: function: dhis2eo.data.chc.chirps3.daily.download units: mm resolution: 5 km x 5 km @@ -16,4 +16,4 @@ display: colormap: blues range: [0.0, 20.0] - nodata: 0.0 + nodata: -9999.0 diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index 22d85960..b7d9f64c 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -19,6 +19,6 @@ source: WorldPop Global2 source_url: https://hub.worldpop.org/project/categories?id=3 display: - colormap: viridis - range: [0.0, 1000.0] + colormap: reds + range: [0.0, 25.0] nodata: 0.0 From c4e780c00956a313f9b322ac2ea7fd2673e2d41f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Fri, 8 May 2026 13:55:39 +0200 Subject: [PATCH 26/46] feat: rename maps.html to map-viewer.html, fix display ranges and nodata values --- src/climate_api/data/datasets/chirps3.yaml | 4 +- src/climate_api/data/datasets/era5_land.yaml | 2 +- src/climate_api/data/datasets/worldpop.yaml | 4 +- src/climate_api/system/templates.py | 2 +- src/climate_api/templates/map-viewer.html | 22 +- src/climate_api/templates/maps.html | 480 ------------------- 6 files changed, 13 insertions(+), 501 deletions(-) delete mode 100644 src/climate_api/templates/maps.html diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index 0ff48b3c..7fecad80 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -7,7 +7,7 @@ sync_execution: append sync_availability: latest_available_function: climate_api.providers.availability.chirps3_daily_latest_available - ingestion: + ingestion: function: dhis2eo.data.chc.chirps3.daily.download units: mm resolution: 5 km x 5 km @@ -16,4 +16,4 @@ display: colormap: blues range: [0.0, 20.0] - nodata: 0.0 + nodata: -9999.0 diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index adc354f9..161f6832 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -19,7 +19,7 @@ source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land display: colormap: rdbu_r - range: [15.0, 40.0] + range: [233.0, 313.0] - id: era5land_precipitation_hourly name: Total precipitation (ERA5-Land) diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index 22d85960..b7d9f64c 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -19,6 +19,6 @@ source: WorldPop Global2 source_url: https://hub.worldpop.org/project/categories?id=3 display: - colormap: viridis - range: [0.0, 1000.0] + colormap: reds + range: [0.0, 25.0] nodata: 0.0 diff --git a/src/climate_api/system/templates.py b/src/climate_api/system/templates.py index 5e09d854..6cfeca77 100644 --- a/src/climate_api/system/templates.py +++ b/src/climate_api/system/templates.py @@ -98,7 +98,7 @@ def wants_json(request: Request) -> bool: def render_maps(base: str) -> str: """Render the map viewer page.""" - return get_template("maps.html").render(base=base) + return get_template("map-viewer.html").render(base=base) def _load_extent() -> dict[str, Any] | None: diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index e67e72bf..b3a3d2d4 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -320,7 +320,6 @@

    Climate API

    let activeLayer = null; let timeSteps = []; let timeDimKey = "time"; - let labelLayerId; const selectEl = document.getElementById("dataset-select"); const timeSection = document.getElementById("time-section"); @@ -443,28 +442,22 @@

    Climate API

    variable, clim, colormap: cm, - opacity: 0.6, + opacity: 1, selector, ...(zarrVersion !== null && { zarrVersion }), ...(fillValue !== null && { fillValue }), }); - map.addLayer(activeLayer, labelLayerId); + map.addLayer(activeLayer); + for (const layer of map.getStyle().layers) { + if ((layer.type === "line" && layer.id.startsWith("boundary")) || layer.type === "symbol") { + map.moveLayer(layer.id); + } + } } catch (err) { setStatus(`Failed to create layer: ${err.message}`, true); return; } - // Zoom to dataset spatial extent. - const bbox = collection.extent?.spatial?.bbox?.[0]; - if (bbox) { - map.fitBounds( - [ - [bbox[0], bbox[1]], - [bbox[2], bbox[3]], - ], - { padding: 60, maxZoom: 8 } - ); - } // Time slider. if (timeSteps.length > 1) { @@ -508,7 +501,6 @@

    Climate API

    }); map.on("load", () => { - labelLayerId = map.getStyle().layers.find((l) => l.type === "symbol")?.id; loadCatalog(); }); diff --git a/src/climate_api/templates/maps.html b/src/climate_api/templates/maps.html deleted file mode 100644 index 53019d86..00000000 --- a/src/climate_api/templates/maps.html +++ /dev/null @@ -1,480 +0,0 @@ - - - - - - Map viewer — DHIS2 Climate API - - - - - -
    -
    -

    Climate API

    - Map viewer - -
    - -
    -
    - -
    -
    - - -
    - - - - - -
    Loading datasets...
    -
    -
    -
    - - - - From 0e57332ff96855c4a8b4a7dc6090871f2f02de74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 14:58:35 +0200 Subject: [PATCH 27/46] Reapply "feat: extensible transforms pipeline for zarr build" This reverts commit a21e7ca293c9a807530606333c67bc4bd66660ef. --- climate_api/data/datasets/era5_land.yaml | 15 ++- .../data_manager/services/downloader.py | 11 ++ src/climate_api/transforms/__init__.py | 13 +++ src/climate_api/transforms/deaccumulate.py | 22 ++++ src/climate_api/transforms/unit_conversion.py | 39 +++++++ tests/test_transforms.py | 101 ++++++++++++++++++ 6 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 src/climate_api/transforms/__init__.py create mode 100644 src/climate_api/transforms/deaccumulate.py create mode 100644 src/climate_api/transforms/unit_conversion.py create mode 100644 tests/test_transforms.py diff --git a/climate_api/data/datasets/era5_land.yaml b/climate_api/data/datasets/era5_land.yaml index 91716520..150c75f5 100644 --- a/climate_api/data/datasets/era5_land.yaml +++ b/climate_api/data/datasets/era5_land.yaml @@ -8,15 +8,20 @@ sync_availability: latest_available_function: climate_api.providers.availability.lagged_latest_available lag_hours: 120 - ingestion: + ingestion: function: dhis2eo.data.destine.era5_land.hourly.download default_params: variables: ['t2m'] + transforms: + - climate_api.transforms.convert_units units: kelvin convert_units: degC resolution: 9 km x 9 km source: ERA5-Land Reanalysis source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land + display: + colormap: rdbu_r + range: [15.0, 40.0] - id: era5land_precipitation_hourly name: Total precipitation (ERA5-Land) @@ -32,9 +37,15 @@ function: dhis2eo.data.destine.era5_land.hourly.download default_params: variables: ['tp'] - pre_process: ['deaccumulate_era5'] + transforms: + - climate_api.transforms.deaccumulate_era5 + - climate_api.transforms.convert_units units: m convert_units: mm resolution: 9 km x 9 km source: ERA5-Land Reanalysis source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land + display: + colormap: blues + range: [0.0, 5.0] + nodata: 0.0 diff --git a/climate_api/data_manager/services/downloader.py b/climate_api/data_manager/services/downloader.py index 3c93772e..e294b144 100644 --- a/climate_api/data_manager/services/downloader.py +++ b/climate_api/data_manager/services/downloader.py @@ -143,6 +143,7 @@ def build_dataset_zarr(dataset: dict[str, Any], *, start: str | None = None, end dims = [lon_dim, lat_dim] ds = _select_time_range(ds, dataset=dataset, start=start, end=end) + ds = _run_transforms(ds, dataset) xmin = ds[lon_dim].min().item() xmax = ds[lon_dim].max().item() @@ -243,6 +244,16 @@ def _select_time_range( return selected +def _run_transforms(ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset: + for entry in dataset.get("transforms", []): + func_path = entry if isinstance(entry, str) else entry["function"] + params = {} if isinstance(entry, str) else entry.get("params", {}) + func = _get_dynamic_function(func_path) + logger.info("Applying transform %s to dataset %s", func_path, dataset.get("id", "?")) + ds = func(ds, dataset, **params) + return ds + + def _compute_time_space_chunks( ds: xr.Dataset, dataset: dict[str, Any], diff --git a/src/climate_api/transforms/__init__.py b/src/climate_api/transforms/__init__.py new file mode 100644 index 00000000..e5988537 --- /dev/null +++ b/src/climate_api/transforms/__init__.py @@ -0,0 +1,13 @@ +"""Built-in dataset transform functions for the transforms pipeline. + +Each function has the signature: + (ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset + +Functions can be referenced by their dotted module path in the dataset YAML +``transforms`` list, the same way ``ingestion.function`` works. +""" + +from .deaccumulate import deaccumulate_era5 +from .unit_conversion import convert_units + +__all__ = ["convert_units", "deaccumulate_era5"] diff --git a/src/climate_api/transforms/deaccumulate.py b/src/climate_api/transforms/deaccumulate.py new file mode 100644 index 00000000..fd46dec8 --- /dev/null +++ b/src/climate_api/transforms/deaccumulate.py @@ -0,0 +1,22 @@ +"""Deaccumulation transforms for ERA5 accumulated fields.""" + +from typing import Any + +import xarray as xr + + +def deaccumulate_era5(ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset: + """Convert ERA5 accumulated fields to per-step values by forward differencing. + + ERA5 stores precipitation and other flux variables as accumulations from the + start of the forecast step. This subtracts consecutive steps so each value + represents the amount in that step alone, then clips negative artefacts. + """ + varname = dataset["variable"] + da = ds[varname] + time_dim = next(d for d in da.dims if "time" in d) + diff = da.diff(dim=time_dim) + diff = diff.clip(min=0) + # Drop the first time step (no previous step to diff against) and reassign. + ds = ds.sel({time_dim: ds[time_dim][1:]}) + return ds.assign({varname: diff.assign_attrs(da.attrs)}) diff --git a/src/climate_api/transforms/unit_conversion.py b/src/climate_api/transforms/unit_conversion.py new file mode 100644 index 00000000..65045ff8 --- /dev/null +++ b/src/climate_api/transforms/unit_conversion.py @@ -0,0 +1,39 @@ +"""Unit conversion transform: scale + offset applied to the dataset variable.""" + +import logging +from typing import Any + +import xarray as xr + +logger = logging.getLogger(__name__) + +# (from_units, to_units) -> (display_label, scale, offset) +# Applied as: converted = original * scale + offset +_CONVERSIONS: dict[tuple[str, str], tuple[str, float, float]] = { + ("kelvin", "degc"): ("degC", 1.0, -273.15), + ("m", "mm"): ("mm", 1000.0, 0.0), +} + + +def convert_units(ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset: + """Convert the dataset variable from ``units`` to ``convert_units``. + + Reads ``units`` and ``convert_units`` from the dataset template dict. + Returns the dataset unchanged if either field is absent or the conversion + is not registered in ``_CONVERSIONS``. + """ + convert_to = dataset.get("convert_units") + if not convert_to: + return ds + units = dataset.get("units", "") + key = (units.lower(), convert_to.lower()) + conversion = _CONVERSIONS.get(key) + if conversion is None: + logger.warning("No unit conversion registered for %s -> %s; skipping", units, convert_to) + return ds + label, scale, offset = conversion + varname = dataset["variable"] + logger.info("Converting %s from %s to %s", varname, units, label) + da = ds[varname] + converted = da * scale + offset if scale != 1.0 else da + offset + return ds.assign({varname: converted.assign_attrs({**da.attrs, "units": label})}) diff --git a/tests/test_transforms.py b/tests/test_transforms.py new file mode 100644 index 00000000..d1ab1dd6 --- /dev/null +++ b/tests/test_transforms.py @@ -0,0 +1,101 @@ +import numpy as np +import xarray as xr + +from climate_api.transforms import convert_units, deaccumulate_era5 + + +def _ds(varname: str, values: list[float], time_steps: int = 1) -> xr.Dataset: + if time_steps > 1: + data = np.array(values, dtype=float).reshape(time_steps, -1) + return xr.Dataset({varname: xr.DataArray(data, dims=["time", "x"])}) + return xr.Dataset({varname: xr.DataArray(np.array(values, dtype=float))}) + + +class TestConvertUnits: + def test_kelvin_to_celsius(self): + ds = _ds("t2m", [273.15, 293.15, 313.15]) + result = convert_units(ds, {"variable": "t2m", "units": "kelvin", "convert_units": "degC"}) + np.testing.assert_allclose(result["t2m"].values, [0.0, 20.0, 40.0]) + assert result["t2m"].attrs["units"] == "degC" + + def test_metres_to_mm(self): + ds = _ds("tp", [0.001, 0.005]) + result = convert_units(ds, {"variable": "tp", "units": "m", "convert_units": "mm"}) + np.testing.assert_allclose(result["tp"].values, [1.0, 5.0]) + assert result["tp"].attrs["units"] == "mm" + + def test_no_convert_units_field_is_noop(self): + ds = _ds("t2m", [300.0]) + result = convert_units(ds, {"variable": "t2m", "units": "kelvin"}) + np.testing.assert_array_equal(result["t2m"].values, ds["t2m"].values) + + def test_unknown_conversion_is_noop(self): + ds = _ds("x", [1.0]) + result = convert_units(ds, {"variable": "x", "units": "foo", "convert_units": "bar"}) + np.testing.assert_array_equal(result["x"].values, ds["x"].values) + + def test_preserves_existing_attrs(self): + ds = xr.Dataset({"t2m": xr.DataArray([300.0], attrs={"long_name": "temperature", "units": "K"})}) + result = convert_units(ds, {"variable": "t2m", "units": "kelvin", "convert_units": "degC"}) + assert result["t2m"].attrs["long_name"] == "temperature" + + +class TestDeaccumulateEra5: + def test_differences_along_time(self): + ds = _ds("tp", [0.0, 1.0, 3.0, 6.0], time_steps=4) + result = deaccumulate_era5(ds, {"variable": "tp"}) + assert result.sizes["time"] == 3 + np.testing.assert_array_equal(result["tp"].values.flatten(), [1.0, 2.0, 3.0]) + + def test_clips_negative_values(self): + ds = _ds("tp", [3.0, 1.0, 4.0], time_steps=3) + result = deaccumulate_era5(ds, {"variable": "tp"}) + assert (result["tp"].values >= 0).all() + + def test_preserves_attrs(self): + data = np.array([[0.0], [1.0]]) + ds = xr.Dataset({"tp": xr.DataArray(data, dims=["time", "x"], attrs={"units": "m"})}) + result = deaccumulate_era5(ds, {"variable": "tp"}) + assert result["tp"].attrs["units"] == "m" + + +class TestRunTransformsPipeline: + def test_pipeline_via_dotted_path(self): + ds = _ds("t2m", [273.15]) + dataset = { + "variable": "t2m", + "units": "kelvin", + "convert_units": "degC", + "transforms": ["climate_api.transforms.convert_units"], + } + from climate_api.data_manager.services.downloader import _run_transforms + + result = _run_transforms(ds, dataset) + np.testing.assert_allclose(result["t2m"].values, [0.0]) + + def test_empty_transforms_is_noop(self): + ds = _ds("x", [1.0, 2.0]) + from climate_api.data_manager.services.downloader import _run_transforms + + result = _run_transforms(ds, {"variable": "x", "transforms": []}) + np.testing.assert_array_equal(result["x"].values, ds["x"].values) + + def test_no_transforms_key_is_noop(self): + ds = _ds("x", [1.0]) + from climate_api.data_manager.services.downloader import _run_transforms + + result = _run_transforms(ds, {"variable": "x"}) + np.testing.assert_array_equal(result["x"].values, ds["x"].values) + + def test_dict_entry_with_params(self): + ds = _ds("t2m", [273.15]) + dataset = { + "variable": "t2m", + "units": "kelvin", + "convert_units": "degC", + "transforms": [{"function": "climate_api.transforms.convert_units"}], + } + from climate_api.data_manager.services.downloader import _run_transforms + + result = _run_transforms(ds, dataset) + np.testing.assert_allclose(result["t2m"].values, [0.0]) From 4110792e108fdfebec756a39cb7a8e0a306382b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 14:59:14 +0200 Subject: [PATCH 28/46] fix: move transforms package to flat layout (climate_api/transforms/) --- {src/climate_api => climate_api}/transforms/__init__.py | 0 {src/climate_api => climate_api}/transforms/deaccumulate.py | 0 {src/climate_api => climate_api}/transforms/unit_conversion.py | 0 3 files changed, 0 insertions(+), 0 deletions(-) rename {src/climate_api => climate_api}/transforms/__init__.py (100%) rename {src/climate_api => climate_api}/transforms/deaccumulate.py (100%) rename {src/climate_api => climate_api}/transforms/unit_conversion.py (100%) diff --git a/src/climate_api/transforms/__init__.py b/climate_api/transforms/__init__.py similarity index 100% rename from src/climate_api/transforms/__init__.py rename to climate_api/transforms/__init__.py diff --git a/src/climate_api/transforms/deaccumulate.py b/climate_api/transforms/deaccumulate.py similarity index 100% rename from src/climate_api/transforms/deaccumulate.py rename to climate_api/transforms/deaccumulate.py diff --git a/src/climate_api/transforms/unit_conversion.py b/climate_api/transforms/unit_conversion.py similarity index 100% rename from src/climate_api/transforms/unit_conversion.py rename to climate_api/transforms/unit_conversion.py From 2b276a00923453ec17cabda058bf1e1e4d239aca Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 15:35:50 +0200 Subject: [PATCH 29/46] feat: use MapLibre globe projection in map viewer --- src/climate_api/templates/map-viewer.html | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index b3a3d2d4..5c8ce7df 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -311,8 +311,9 @@

    Climate API

    const map = new maplibregl.Map({ container: "map", style: "https://tiles.openfreemap.org/styles/positron", - center: [20, 5], - zoom: 2, + center: [20, 20], + zoom: 1.5, + projection: { type: "globe" }, fitBoundsOptions: { padding: 40 }, attributionControl: { compact: true }, }); @@ -501,6 +502,13 @@

    Climate API

    }); map.on("load", () => { + map.setFog({ + "range": [0.5, 10], + "color": "#f8f8f8", + "horizon-blend": 0.08, + "space-color": "#d0e4f7", + "star-intensity": 0.0, + }); loadCatalog(); }); From 89ba3c011a72f780a9a21b01f5972625a0dbf038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 15:47:10 +0200 Subject: [PATCH 30/46] fix: remove setFog call not supported in MapLibre v5 --- src/climate_api/templates/map-viewer.html | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index 5c8ce7df..ca377d74 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -502,13 +502,6 @@

    Climate API

    }); map.on("load", () => { - map.setFog({ - "range": [0.5, 10], - "color": "#f8f8f8", - "horizon-blend": 0.08, - "space-color": "#d0e4f7", - "star-intensity": 0.0, - }); loadCatalog(); }); From 735679b840adcd124d8c40023b4dc201362677eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 15:49:53 +0200 Subject: [PATCH 31/46] fix: set globe projection in load handler so style cannot override it --- src/climate_api/templates/map-viewer.html | 1 + 1 file changed, 1 insertion(+) diff --git a/src/climate_api/templates/map-viewer.html b/src/climate_api/templates/map-viewer.html index ca377d74..8bb424f8 100644 --- a/src/climate_api/templates/map-viewer.html +++ b/src/climate_api/templates/map-viewer.html @@ -502,6 +502,7 @@

    Climate API

    }); map.on("load", () => { + map.setProjection({ type: "globe" }); loadCatalog(); }); From 2814112d1ed0ca8a70332584307901e94d0d68a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:04:06 +0200 Subject: [PATCH 32/46] fix: scope cache files by extent_id and validate spatial coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate deployment instances (e.g. Norway vs Sierra Leone) sharing the same DOWNLOAD_DIR would silently reuse each other's NetCDF/Zarr cache files because the prefix was keyed only on dataset id. Add an optional extent_id suffix so each extent gets its own cache namespace. Validate bbox against a dataset's declared coverage field before downloading, returning HTTP 400 early instead of a confusing provider-level error. Add coverage: {lat: [-50, 50]} to chirps3.yaml since CHIRPS3 does not cover latitudes above 50°N (e.g. Norway). --- src/climate_api/data/datasets/chirps3.yaml | 2 + .../data_manager/services/downloader.py | 61 +++++++++--- src/climate_api/ingestions/services.py | 9 +- tests/test_datasets.py | 28 +++--- tests/test_downloader.py | 97 +++++++++++++++++-- 5 files changed, 161 insertions(+), 36 deletions(-) diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index 7fecad80..fe3907d0 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -7,6 +7,8 @@ sync_execution: append sync_availability: latest_available_function: climate_api.providers.availability.chirps3_daily_latest_available + coverage: + lat: [-50, 50] ingestion: function: dhis2eo.data.chc.chirps3.daily.download units: mm diff --git a/src/climate_api/data_manager/services/downloader.py b/src/climate_api/data_manager/services/downloader.py index 3c93772e..27c9b351 100644 --- a/src/climate_api/data_manager/services/downloader.py +++ b/src/climate_api/data_manager/services/downloader.py @@ -46,6 +46,7 @@ def download_dataset( country_code: str | None, overwrite: bool, background_tasks: BackgroundTasks | None, + extent_id: str | None = None, ) -> list[Path]: """Download dataset files and return the NetCDF paths created or modified by this run. @@ -54,10 +55,11 @@ def download_dataset( When running in the background-task path, the download is deferred and this function returns an empty list because no files have been created yet. """ + _validate_spatial_coverage(dataset, bbox) ingestion = dataset["ingestion"] eo_download_func_path = ingestion["function"] eo_download_func = _get_dynamic_function(eo_download_func_path) - before_files = {path.resolve(): path.stat().st_mtime_ns for path in get_cache_files(dataset)} + before_files = {path.resolve(): path.stat().st_mtime_ns for path in get_cache_files(dataset, extent_id=extent_id)} params = dict(ingestion.get("default_params", {})) params.update( @@ -65,7 +67,7 @@ def download_dataset( "start": start, "end": end or datetime.date.today().isoformat(), "dirname": DOWNLOAD_DIR, - "prefix": _get_cache_prefix(dataset), + "prefix": _get_cache_prefix(dataset, extent_id=extent_id), "overwrite": overwrite, } ) @@ -105,19 +107,21 @@ def download_dataset( message = str(exc).strip() or "Unexpected error from upstream data provider" raise HTTPException(status_code=502, detail=f"Upstream dataset download failed: {message}") from exc - after_files = [path.resolve() for path in get_cache_files(dataset)] + after_files = [path.resolve() for path in get_cache_files(dataset, extent_id=extent_id)] changed_files = [ path for path in after_files if path not in before_files or path.stat().st_mtime_ns != before_files[path] ] return changed_files -def build_dataset_zarr(dataset: dict[str, Any], *, start: str | None = None, end: str | None = None) -> None: +def build_dataset_zarr( + dataset: dict[str, Any], *, start: str | None = None, end: str | None = None, extent_id: str | None = None +) -> None: """Collect dataset cache files into one optimised Zarr archive, clipped to request scope.""" logger.info(f"Optimizing cache for dataset {dataset['id']}") ingestion = dataset["ingestion"] - files = get_cache_files(dataset) + files = get_cache_files(dataset, extent_id=extent_id) logger.info(f"Opening {len(files)} files from cache") ds = xr.open_mfdataset(files) @@ -161,7 +165,7 @@ def build_dataset_zarr(dataset: dict[str, Any], *, start: str | None = None, end # save as zarr logger.info("Saving to optimized zarr file") - zarr_path = DOWNLOAD_DIR / f"{_get_cache_prefix(dataset)}.zarr" + zarr_path = DOWNLOAD_DIR / f"{_get_cache_prefix(dataset, extent_id=extent_id)}.zarr" multiscales = dict(ingestion.get("multiscales", {})) @@ -269,26 +273,59 @@ def _compute_time_space_chunks( return chunks -def _get_cache_prefix(dataset: dict[str, Any]) -> str: - return str(dataset["id"]) +def _get_cache_prefix(dataset: dict[str, Any], extent_id: str | None = None) -> str: + base = str(dataset["id"]) + return f"{base}_{extent_id}" if extent_id else base -def get_cache_files(dataset: dict[str, Any]) -> list[Path]: +def get_cache_files(dataset: dict[str, Any], extent_id: str | None = None) -> list[Path]: """Return all NetCDF cache files matching this dataset's prefix.""" # TODO: not bulletproof -- e.g. 2m_temperature matches 2m_temperature_modified - prefix = _get_cache_prefix(dataset) + prefix = _get_cache_prefix(dataset, extent_id=extent_id) return list(DOWNLOAD_DIR.glob(f"{prefix}*.nc")) -def get_zarr_path(dataset: dict[str, Any]) -> Path | None: +def get_zarr_path(dataset: dict[str, Any], extent_id: str | None = None) -> Path | None: """Return the optimised zarr archive path if it exists.""" - prefix = _get_cache_prefix(dataset) + prefix = _get_cache_prefix(dataset, extent_id=extent_id) optimized = DOWNLOAD_DIR / f"{prefix}.zarr" if optimized.exists(): return optimized return None +def _validate_spatial_coverage(dataset: dict[str, Any], bbox: list[float] | None) -> None: + """Raise HTTP 400 if the request bbox falls outside the dataset's declared coverage.""" + coverage = dataset.get("coverage") + if not coverage or bbox is None: + return + xmin, ymin, xmax, ymax = bbox + lat_bounds = coverage.get("lat") + if lat_bounds is not None: + cov_lat_min, cov_lat_max = lat_bounds + if ymin > cov_lat_max or ymax < cov_lat_min: + raise HTTPException( + status_code=400, + detail=( + f"Dataset '{dataset['id']}' does not cover this extent. " + f"Latitude coverage: {cov_lat_min}°–{cov_lat_max}°, " + f"requested: {ymin}°–{ymax}°." + ), + ) + lon_bounds = coverage.get("lon") + if lon_bounds is not None: + cov_lon_min, cov_lon_max = lon_bounds + if xmin > cov_lon_max or xmax < cov_lon_min: + raise HTTPException( + status_code=400, + detail=( + f"Dataset '{dataset['id']}' does not cover this extent. " + f"Longitude coverage: {cov_lon_min}°–{cov_lon_max}°, " + f"requested: {xmin}°–{xmax}°." + ), + ) + + def _get_dynamic_function(full_path: str) -> Callable[..., Any]: """Import and return a function given its dotted module path.""" parts = full_path.split(".") diff --git a/src/climate_api/ingestions/services.py b/src/climate_api/ingestions/services.py index dfc5efd4..889d461e 100644 --- a/src/climate_api/ingestions/services.py +++ b/src/climate_api/ingestions/services.py @@ -215,13 +215,14 @@ def create_artifact( country_code=country_code, overwrite=overwrite, background_tasks=None, + extent_id=extent_id, ) logger.info("Download finished for dataset '%s': changed_files=%d", dataset["id"], len(downloaded_files)) if prefer_zarr or requires_canonical_zarr: try: logger.info("Building canonical Zarr artifact for dataset '%s'", dataset["id"]) - downloader.build_dataset_zarr(dataset, start=start, end=end) + downloader.build_dataset_zarr(dataset, start=start, end=end, extent_id=extent_id) logger.info("Canonical Zarr artifact built for dataset '%s'", dataset["id"]) except Exception as exc: if requires_canonical_zarr: @@ -241,16 +242,16 @@ def create_artifact( exc_info=True, ) - zarr_path = downloader.get_zarr_path(dataset) + zarr_path = downloader.get_zarr_path(dataset, extent_id=extent_id) if requires_canonical_zarr and zarr_path is None: raise HTTPException( status_code=500, detail="Append sync requires a canonical Zarr artifact, but no Zarr store was produced.", ) cache_files = ( - downloader.get_cache_files(dataset) + downloader.get_cache_files(dataset, extent_id=extent_id) if requires_canonical_zarr - else downloaded_files or downloader.get_cache_files(dataset) + else downloaded_files or downloader.get_cache_files(dataset, extent_id=extent_id) ) primary_path: str | None diff --git a/tests/test_datasets.py b/tests/test_datasets.py index 5dd31e71..ebe2e6a2 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -279,7 +279,7 @@ def test_create_artifact_computes_coverage_from_created_artifact_paths( created_file.write_text("dummy", encoding="utf-8") monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) captured: dict[str, object] = {} @@ -350,7 +350,7 @@ def fake_download_dataset( return [created_file] monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -419,7 +419,7 @@ def fake_download_dataset( monkeypatch.setattr(services, "utc_now", lambda: FixedDateTime(2026, 4, 21, 13, 47, 31, tzinfo=UTC)) monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -466,7 +466,7 @@ def test_create_artifact_returns_409_when_downloaded_artifact_has_no_data( created_file.write_text("dummy", encoding="utf-8") monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -526,7 +526,7 @@ def fake_download_dataset( monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: tmp_path / "chirps3_precipitation_daily.zarr") + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: tmp_path / "chirps3_precipitation_daily.zarr") monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -654,8 +654,8 @@ def fake_find_existing_artifact(**kwargs: object) -> ArtifactRecord | None: monkeypatch.setattr(services, "_find_existing_artifact", fake_find_existing_artifact) monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path) - monkeypatch.setattr(services.downloader, "get_cache_files", lambda _: [created_file]) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path) + monkeypatch.setattr(services.downloader, "get_cache_files", lambda dataset, extent_id=None: [created_file]) monkeypatch.setattr( services, "get_data_coverage_for_paths", @@ -702,15 +702,17 @@ def test_create_artifact_delta_requires_canonical_zarr_when_prefer_zarr_is_false captured_build: dict[str, object] = {} - def fake_build_dataset_zarr(dataset_arg: dict[str, object], *, start: str | None, end: str | None) -> None: + def fake_build_dataset_zarr( + dataset_arg: dict[str, object], *, start: str | None, end: str | None, extent_id: str | None = None + ) -> None: captured_build["dataset_id"] = dataset_arg["id"] captured_build["start"] = start captured_build["end"] = end monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", fake_build_dataset_zarr) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path) - monkeypatch.setattr(services.downloader, "get_cache_files", lambda _: [created_file]) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path) + monkeypatch.setattr(services.downloader, "get_cache_files", lambda dataset, extent_id=None: [created_file]) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -764,7 +766,7 @@ def fail_build_dataset_zarr(*_: object, **__: object) -> None: monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", fail_build_dataset_zarr) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) with pytest.raises(services.HTTPException) as exc_info: @@ -802,8 +804,8 @@ def test_create_artifact_delta_rejects_short_rebuilt_coverage( monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path) - monkeypatch.setattr(services.downloader, "get_cache_files", lambda _: [created_file]) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path) + monkeypatch.setattr(services.downloader, "get_cache_files", lambda dataset, extent_id=None: [created_file]) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, diff --git a/tests/test_downloader.py b/tests/test_downloader.py index aef87a8a..2b1f98aa 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -147,6 +147,89 @@ def fake_download( assert "Upstream dataset download failed: provider timeout" == str(exc_info.value.detail) +# --------------------------------------------------------------------------- +# _get_cache_prefix — extent_id isolation +# --------------------------------------------------------------------------- + + +def test_get_cache_prefix_without_extent_id() -> None: + dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}} + assert downloader._get_cache_prefix(dataset) == "chirps3_precipitation_daily" + + +def test_get_cache_prefix_with_extent_id() -> None: + dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}} + assert downloader._get_cache_prefix(dataset, extent_id="nor") == "chirps3_precipitation_daily_nor" + + +# --------------------------------------------------------------------------- +# _validate_spatial_coverage +# --------------------------------------------------------------------------- + + +def test_validate_spatial_coverage_passes_when_no_coverage_declared() -> None: + dataset: dict[str, Any] = {"id": "worldpop_population_yearly", "ingestion": {}} + downloader._validate_spatial_coverage(dataset, bbox=[4.5, 57.9, 31.1, 71.2]) + + +def test_validate_spatial_coverage_passes_when_no_bbox() -> None: + dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "coverage": {"lat": [-50, 50]}} + downloader._validate_spatial_coverage(dataset, bbox=None) + + +def test_validate_spatial_coverage_passes_when_bbox_inside_coverage() -> None: + dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "coverage": {"lat": [-50, 50]}} + downloader._validate_spatial_coverage(dataset, bbox=[-10.0, -10.0, 10.0, 10.0]) + + +def test_validate_spatial_coverage_raises_when_bbox_outside_lat_coverage() -> None: + dataset: dict[str, Any] = { + "id": "chirps3_precipitation_daily", + "ingestion": {}, + "coverage": {"lat": [-50, 50]}, + } + with pytest.raises(HTTPException) as exc_info: + downloader._validate_spatial_coverage(dataset, bbox=[4.5, 57.9, 31.1, 71.2]) + assert exc_info.value.status_code == 400 + assert "does not cover this extent" in str(exc_info.value.detail) + assert "Latitude" in str(exc_info.value.detail) + + +def test_validate_spatial_coverage_raises_when_bbox_outside_lon_coverage() -> None: + dataset: dict[str, Any] = { + "id": "some_dataset", + "ingestion": {}, + "coverage": {"lon": [-180, 60]}, + } + with pytest.raises(HTTPException) as exc_info: + downloader._validate_spatial_coverage(dataset, bbox=[70.0, -10.0, 90.0, 10.0]) + assert exc_info.value.status_code == 400 + assert "Longitude" in str(exc_info.value.detail) + + +def test_download_dataset_returns_400_when_bbox_outside_dataset_coverage( + monkeypatch: pytest.MonkeyPatch, +) -> None: + dataset: dict[str, Any] = { + "id": "chirps3_precipitation_daily", + "ingestion": {"function": "ignored.path"}, + "coverage": {"lat": [-50, 50]}, + } + + with pytest.raises(HTTPException) as exc_info: + downloader.download_dataset( + dataset=dataset, + start="2020-01-01", + end="2020-01-31", + bbox=[4.5, 57.9, 31.1, 71.2], + country_code=None, + overwrite=False, + background_tasks=None, + ) + assert exc_info.value.status_code == 400 + assert "does not cover this extent" in str(exc_info.value.detail) + + # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -273,7 +356,7 @@ def test_build_dataset_zarr_flat_creates_zarr(tmp_path: Path, monkeypatch: pytes """Flat zarr is written with the correct variable and no pyramid level dirs.""" nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) downloader.build_dataset_zarr(_FLAT_DATASET) @@ -310,7 +393,7 @@ def test_build_dataset_zarr_normalises_coordinate_names(tmp_path: Path, monkeypa "ingestion": {}, } monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: [path]) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: [path]) downloader.build_dataset_zarr(dataset) @@ -346,7 +429,7 @@ def test_build_dataset_zarr_normalises_xy_coordinate_names(tmp_path: Path, monke "ingestion": {}, } monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: [path]) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: [path]) downloader.build_dataset_zarr(dataset) @@ -374,7 +457,7 @@ def test_build_dataset_zarr_clips_to_requested_daily_range( "ingestion": {}, } monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) downloader.build_dataset_zarr(dataset, start="2024-02-01", end="2024-02-10") @@ -404,7 +487,7 @@ def test_build_dataset_zarr_pyramid_copies_time_to_root(tmp_path: Path, monkeypa """Pyramid zarr build copies the time coordinate to the store root for zarr-layer.""" nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) def fake_create_pyramid(ds: xr.Dataset, levels: int, x_dim: str, y_dim: str, method: str) -> Pyramid: return _make_fake_pyramid(ds, tmp_path / "my_dataset.zarr") @@ -422,7 +505,7 @@ def test_build_dataset_zarr_pyramid_is_openable_via_level_0(tmp_path: Path, monk """open_zarr_dataset returns the dataset from level 0 of the pyramid store.""" nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) def fake_create_pyramid(ds: xr.Dataset, levels: int, x_dim: str, y_dim: str, method: str) -> Pyramid: return _make_fake_pyramid(ds, tmp_path / "my_dataset.zarr") @@ -446,7 +529,7 @@ def test_build_dataset_zarr_pyramid_normalises_coordinate_names( # Source files use lat/lon (WorldPop-style); canonical names must appear in the written store. nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) received: list[xr.Dataset] = [] From 00be6d8feda47495a58554091d11e95457039a65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:10:49 +0200 Subject: [PATCH 33/46] refactor: replace coverage field with OGC extents in dataset templates Aligns dataset YAML schema with OGC API Collections by replacing the custom coverage.lat/lon block with extents.spatial.bbox (OGC [xmin, ymin, xmax, ymax] format) and adding extents.temporal with begin, end, trs, and resolution fields. _validate_spatial_coverage now reads extents.spatial.bbox directly, which covers both axes in one check without separate lat/lon keys. All three dataset templates receive extents blocks. --- src/climate_api/data/datasets/chirps3.yaml | 11 +++- src/climate_api/data/datasets/era5_land.yaml | 22 +++++++- src/climate_api/data/datasets/worldpop.yaml | 9 +++ .../data_manager/services/downloader.py | 55 ++++++++++--------- tests/test_downloader.py | 28 ++++++---- 5 files changed, 84 insertions(+), 41 deletions(-) diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index fe3907d0..20b7d13d 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -7,8 +7,15 @@ sync_execution: append sync_availability: latest_available_function: climate_api.providers.availability.chirps3_daily_latest_available - coverage: - lat: [-50, 50] + extents: + spatial: + bbox: [-180, -50, 180, 50] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: "1981-01-01" + end: + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + resolution: P1D ingestion: function: dhis2eo.data.chc.chirps3.daily.download units: mm diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index 161f6832..2804a6b1 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -8,7 +8,16 @@ sync_availability: latest_available_function: climate_api.providers.availability.lagged_latest_available lag_hours: 120 - ingestion: + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: "1950-01-01" + end: + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + resolution: PT1H + ingestion: function: dhis2eo.data.destine.era5_land.hourly.download default_params: variables: ['t2m'] @@ -31,7 +40,16 @@ sync_availability: latest_available_function: climate_api.providers.availability.lagged_latest_available lag_hours: 120 - ingestion: + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: "1950-01-01" + end: + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + resolution: PT1H + ingestion: function: dhis2eo.data.destine.era5_land.hourly.download default_params: variables: ['tp'] diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index b7d9f64c..67ba3bb4 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -8,6 +8,15 @@ latest_available_function: climate_api.providers.availability.worldpop_release_latest_available # WorldPop projections are intentionally request-driven for future years. allow_future: true + extents: + spatial: + bbox: [-180, -90, 180, 90] + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: "2000" + end: + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + resolution: P1Y ingestion: function: dhis2eo.data.worldpop.pop_total.yearly.download multiscales: diff --git a/src/climate_api/data_manager/services/downloader.py b/src/climate_api/data_manager/services/downloader.py index 27c9b351..25be6b23 100644 --- a/src/climate_api/data_manager/services/downloader.py +++ b/src/climate_api/data_manager/services/downloader.py @@ -295,35 +295,36 @@ def get_zarr_path(dataset: dict[str, Any], extent_id: str | None = None) -> Path def _validate_spatial_coverage(dataset: dict[str, Any], bbox: list[float] | None) -> None: - """Raise HTTP 400 if the request bbox falls outside the dataset's declared coverage.""" - coverage = dataset.get("coverage") - if not coverage or bbox is None: + """Raise HTTP 400 if the request bbox falls outside the dataset's declared extents.""" + extents = dataset.get("extents") + if not extents or bbox is None: return + spatial = extents.get("spatial") + if not spatial: + return + cov_bbox = spatial.get("bbox") + if not cov_bbox: + return + cov_xmin, cov_ymin, cov_xmax, cov_ymax = cov_bbox xmin, ymin, xmax, ymax = bbox - lat_bounds = coverage.get("lat") - if lat_bounds is not None: - cov_lat_min, cov_lat_max = lat_bounds - if ymin > cov_lat_max or ymax < cov_lat_min: - raise HTTPException( - status_code=400, - detail=( - f"Dataset '{dataset['id']}' does not cover this extent. " - f"Latitude coverage: {cov_lat_min}°–{cov_lat_max}°, " - f"requested: {ymin}°–{ymax}°." - ), - ) - lon_bounds = coverage.get("lon") - if lon_bounds is not None: - cov_lon_min, cov_lon_max = lon_bounds - if xmin > cov_lon_max or xmax < cov_lon_min: - raise HTTPException( - status_code=400, - detail=( - f"Dataset '{dataset['id']}' does not cover this extent. " - f"Longitude coverage: {cov_lon_min}°–{cov_lon_max}°, " - f"requested: {xmin}°–{xmax}°." - ), - ) + if ymin > cov_ymax or ymax < cov_ymin: + raise HTTPException( + status_code=400, + detail=( + f"Dataset '{dataset['id']}' does not cover this extent. " + f"Latitude coverage: {cov_ymin}°–{cov_ymax}°, " + f"requested: {ymin}°–{ymax}°." + ), + ) + if xmin > cov_xmax or xmax < cov_xmin: + raise HTTPException( + status_code=400, + detail=( + f"Dataset '{dataset['id']}' does not cover this extent. " + f"Longitude coverage: {cov_xmin}°–{cov_xmax}°, " + f"requested: {xmin}°–{xmax}°." + ), + ) def _get_dynamic_function(full_path: str) -> Callable[..., Any]: diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 2b1f98aa..d87ed10a 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -167,26 +167,34 @@ def test_get_cache_prefix_with_extent_id() -> None: # --------------------------------------------------------------------------- -def test_validate_spatial_coverage_passes_when_no_coverage_declared() -> None: +_CHIRPS3_EXTENTS: dict[str, Any] = { + "spatial": {"bbox": [-180, -50, 180, 50], "crs": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"} +} +_LIMITED_LON_EXTENTS: dict[str, Any] = { + "spatial": {"bbox": [-180, -90, 60, 90], "crs": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"} +} + + +def test_validate_spatial_coverage_passes_when_no_extents_declared() -> None: dataset: dict[str, Any] = {"id": "worldpop_population_yearly", "ingestion": {}} downloader._validate_spatial_coverage(dataset, bbox=[4.5, 57.9, 31.1, 71.2]) def test_validate_spatial_coverage_passes_when_no_bbox() -> None: - dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "coverage": {"lat": [-50, 50]}} + dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "extents": _CHIRPS3_EXTENTS} downloader._validate_spatial_coverage(dataset, bbox=None) -def test_validate_spatial_coverage_passes_when_bbox_inside_coverage() -> None: - dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "coverage": {"lat": [-50, 50]}} +def test_validate_spatial_coverage_passes_when_bbox_inside_extents() -> None: + dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "extents": _CHIRPS3_EXTENTS} downloader._validate_spatial_coverage(dataset, bbox=[-10.0, -10.0, 10.0, 10.0]) -def test_validate_spatial_coverage_raises_when_bbox_outside_lat_coverage() -> None: +def test_validate_spatial_coverage_raises_when_bbox_outside_lat_extents() -> None: dataset: dict[str, Any] = { "id": "chirps3_precipitation_daily", "ingestion": {}, - "coverage": {"lat": [-50, 50]}, + "extents": _CHIRPS3_EXTENTS, } with pytest.raises(HTTPException) as exc_info: downloader._validate_spatial_coverage(dataset, bbox=[4.5, 57.9, 31.1, 71.2]) @@ -195,11 +203,11 @@ def test_validate_spatial_coverage_raises_when_bbox_outside_lat_coverage() -> No assert "Latitude" in str(exc_info.value.detail) -def test_validate_spatial_coverage_raises_when_bbox_outside_lon_coverage() -> None: +def test_validate_spatial_coverage_raises_when_bbox_outside_lon_extents() -> None: dataset: dict[str, Any] = { "id": "some_dataset", "ingestion": {}, - "coverage": {"lon": [-180, 60]}, + "extents": _LIMITED_LON_EXTENTS, } with pytest.raises(HTTPException) as exc_info: downloader._validate_spatial_coverage(dataset, bbox=[70.0, -10.0, 90.0, 10.0]) @@ -207,13 +215,13 @@ def test_validate_spatial_coverage_raises_when_bbox_outside_lon_coverage() -> No assert "Longitude" in str(exc_info.value.detail) -def test_download_dataset_returns_400_when_bbox_outside_dataset_coverage( +def test_download_dataset_returns_400_when_bbox_outside_dataset_extents( monkeypatch: pytest.MonkeyPatch, ) -> None: dataset: dict[str, Any] = { "id": "chirps3_precipitation_daily", "ingestion": {"function": "ignored.path"}, - "coverage": {"lat": [-50, 50]}, + "extents": _CHIRPS3_EXTENTS, } with pytest.raises(HTTPException) as exc_info: From 2f005d3c1bbdae03ad583f4cf486d94a683058ba Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:12:43 +0200 Subject: [PATCH 34/46] chore: remove empty yaml entries from dataset extents blocks --- src/climate_api/data/datasets/chirps3.yaml | 2 -- src/climate_api/data/datasets/era5_land.yaml | 4 ---- src/climate_api/data/datasets/worldpop.yaml | 2 -- 3 files changed, 8 deletions(-) diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index 20b7d13d..2ff99c5c 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -13,8 +13,6 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "1981-01-01" - end: - trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: P1D ingestion: function: dhis2eo.data.chc.chirps3.daily.download diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index 2804a6b1..5e977fa1 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -14,8 +14,6 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "1950-01-01" - end: - trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: PT1H ingestion: function: dhis2eo.data.destine.era5_land.hourly.download @@ -46,8 +44,6 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "1950-01-01" - end: - trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: PT1H ingestion: function: dhis2eo.data.destine.era5_land.hourly.download diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index 67ba3bb4..13e75281 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -14,8 +14,6 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "2000" - end: - trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: P1Y ingestion: function: dhis2eo.data.worldpop.pop_total.yearly.download From 038a34465fcc900cd53a0fdebfa641088272d985 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:14:39 +0200 Subject: [PATCH 35/46] fix: restore trs field in dataset extents temporal blocks --- src/climate_api/data/datasets/chirps3.yaml | 1 + src/climate_api/data/datasets/era5_land.yaml | 2 ++ src/climate_api/data/datasets/worldpop.yaml | 1 + 3 files changed, 4 insertions(+) diff --git a/src/climate_api/data/datasets/chirps3.yaml b/src/climate_api/data/datasets/chirps3.yaml index 2ff99c5c..998fbb89 100644 --- a/src/climate_api/data/datasets/chirps3.yaml +++ b/src/climate_api/data/datasets/chirps3.yaml @@ -13,6 +13,7 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "1981-01-01" + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: P1D ingestion: function: dhis2eo.data.chc.chirps3.daily.download diff --git a/src/climate_api/data/datasets/era5_land.yaml b/src/climate_api/data/datasets/era5_land.yaml index 5e977fa1..3a64134e 100644 --- a/src/climate_api/data/datasets/era5_land.yaml +++ b/src/climate_api/data/datasets/era5_land.yaml @@ -14,6 +14,7 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "1950-01-01" + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: PT1H ingestion: function: dhis2eo.data.destine.era5_land.hourly.download @@ -44,6 +45,7 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "1950-01-01" + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: PT1H ingestion: function: dhis2eo.data.destine.era5_land.hourly.download diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index 13e75281..63868831 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -14,6 +14,7 @@ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: begin: "2000" + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: P1Y ingestion: function: dhis2eo.data.worldpop.pop_total.yearly.download From 4b992bf5bc9a53b2f396b58dd9252e8eb6996224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:17:04 +0200 Subject: [PATCH 36/46] fix: break long lambda line in test to satisfy E501 --- tests/test_datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_datasets.py b/tests/test_datasets.py index ebe2e6a2..e2bdafe4 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -526,7 +526,8 @@ def fake_download_dataset( monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: tmp_path / "chirps3_precipitation_daily.zarr") + zarr_path_chirps = tmp_path / "chirps3_precipitation_daily.zarr" + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path_chirps) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, From 0a8c2d8568848ba6681f046407dff37c0275f355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:28:48 +0200 Subject: [PATCH 37/46] fix: require data_dir in config to prevent cross-instance cache sharing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each configured instance must now declare data_dir in climate-api.yaml. The API raises a clear error at startup if a config file is present but data_dir is not set, rather than silently falling back to a shared XDG directory that another instance might also use. Resolution order for the data directory: 1. CACHE_OVERRIDE env var — preserved for Docker/CI backward compat 2. data_dir from CLIMATE_API_CONFIG — required when config is present 3. XDG default — only used when no config file is configured Extent_id remains in cache filenames to support future multi-extent configurations within a single instance. --- src/climate_api/config.py | 29 +++++++++++++++++++ .../data_manager/services/downloader.py | 9 +++--- src/climate_api/ingestions/services.py | 6 +++- tests/conftest.py | 1 + tests/test_config.py | 24 ++++++++++++++- tests/test_downloader.py | 24 +++++++++++++-- 6 files changed, 85 insertions(+), 8 deletions(-) diff --git a/src/climate_api/config.py b/src/climate_api/config.py index 134976f7..1213f3de 100644 --- a/src/climate_api/config.py +++ b/src/climate_api/config.py @@ -7,6 +7,8 @@ import yaml +_MISSING = object() + def _substitute_env_vars(text: str) -> str: """Replace ${VAR:-default} patterns with values from the environment.""" @@ -54,3 +56,30 @@ def _load_config() -> dict[str, Any]: raise ValueError(f"CLIMATE_API_CONFIG must be a YAML mapping at the top level: {path}") _cache = dict(loaded or {}) return _cache + + +def get_data_dir() -> Path | None: + """Return the data directory declared in CLIMATE_API_CONFIG, or None if no config is present. + + Raises ValueError if a config file is present but data_dir is not set, so + misconfigured instances fail fast at startup rather than silently sharing + a default directory with other instances. + + Callers should check CACHE_OVERRIDE themselves before calling this function; + CACHE_OVERRIDE is a legacy escape hatch that bypasses config-level validation. + """ + config_path = get_config_path() + if config_path is None: + return None + + config = get_config() + raw = config.get("data_dir", _MISSING) + if raw is _MISSING: + raise ValueError( + "data_dir is required in CLIMATE_API_CONFIG when a config file is present. " + "Set it to the directory where downloaded data should be stored, " + "e.g. data_dir: ./data" + ) + if not isinstance(raw, (str, Path)): + raise ValueError(f"data_dir in CLIMATE_API_CONFIG must be a path string, got {type(raw).__name__}") + return (config_path.parent / raw).resolve() diff --git a/src/climate_api/data_manager/services/downloader.py b/src/climate_api/data_manager/services/downloader.py index 25be6b23..39f4ba3b 100644 --- a/src/climate_api/data_manager/services/downloader.py +++ b/src/climate_api/data_manager/services/downloader.py @@ -16,19 +16,20 @@ from geozarr_toolkit import MultiscalesConventionMetadata, create_geozarr_attrs from topozarr.coarsen import create_pyramid +from climate_api import config as api_config + from .utils import get_lon_lat_dims, get_time_dim logger = logging.getLogger(__name__) def _resolve_download_dir() -> Path: - # CACHE_OVERRIDE keeps existing Docker/dev deployments working unchanged. override = os.getenv("CACHE_OVERRIDE") if override: return Path(override) - # Default to an XDG-compliant user-writable location so the package works - # when installed with pip (where a package-relative path would land inside - # site-packages and typically be non-writable). + data_dir = api_config.get_data_dir() + if data_dir is not None: + return data_dir / "downloads" xdg_data = Path(os.getenv("XDG_DATA_HOME", Path.home() / ".local" / "share")) return xdg_data / "climate-api" / "downloads" diff --git a/src/climate_api/ingestions/services.py b/src/climate_api/ingestions/services.py index 889d461e..1f0826b7 100644 --- a/src/climate_api/ingestions/services.py +++ b/src/climate_api/ingestions/services.py @@ -49,10 +49,14 @@ def _resolve_artifacts_dir() -> Path: - # CACHE_OVERRIDE keeps existing Docker/dev deployments working unchanged. + from climate_api import config as api_config + override = os.getenv("CACHE_OVERRIDE") if override: return Path(override) / "artifacts" + data_dir = api_config.get_data_dir() + if data_dir is not None: + return data_dir / "artifacts" xdg_data = Path(os.getenv("XDG_DATA_HOME", Path.home() / ".local" / "share")) return xdg_data / "climate-api" / "artifacts" diff --git a/tests/conftest.py b/tests/conftest.py index 1f66391f..0fd7c56d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -13,6 +13,7 @@ name: Sierra Leone bbox: [-13.5, 6.9, -10.1, 10.0] country_code: SLE +data_dir: ./data """ diff --git a/tests/test_config.py b/tests/test_config.py index 34f452e3..f14924a7 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -2,11 +2,33 @@ import pytest -from climate_api.config import get_config +from climate_api.config import get_config, get_data_dir from climate_api.data_registry.services import datasets as dataset_registry from climate_api.extents import services as extent_services +def test_get_data_dir_returns_none_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False) + assert get_data_dir() is None + + +def test_get_data_dir_raises_when_config_present_but_no_data_dir( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + config_file = tmp_path / "climate-api.yaml" + config_file.write_text("extent:\n id: nor\n", encoding="utf-8") + monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) + with pytest.raises(ValueError, match="data_dir is required"): + get_data_dir() + + +def test_get_data_dir_resolves_relative_to_config_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + config_file = tmp_path / "climate-api.yaml" + config_file.write_text("data_dir: ./data\n", encoding="utf-8") + monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) + assert get_data_dir() == tmp_path / "data" + + def test_get_config_returns_empty_when_unset(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False) assert get_config() == {} diff --git a/tests/test_downloader.py b/tests/test_downloader.py index d87ed10a..d9dcaa05 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -23,9 +23,19 @@ def test_resolve_download_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatc assert downloader._resolve_download_dir() == Path(override) -def test_resolve_download_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch) -> None: +def test_resolve_download_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + config_file = tmp_path / "climate-api.yaml" + config_file.write_text("data_dir: ./data\nextent:\n id: test\n", encoding="utf-8") + monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) + monkeypatch.delenv("CACHE_OVERRIDE", raising=False) + monkeypatch.delenv("XDG_DATA_HOME", raising=False) + assert downloader._resolve_download_dir() == tmp_path / "data" / "downloads" + + +def test_resolve_download_dir_uses_xdg_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None: with tempfile.TemporaryDirectory() as xdg: monkeypatch.delenv("CACHE_OVERRIDE", raising=False) + monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False) monkeypatch.setenv("XDG_DATA_HOME", xdg) assert downloader._resolve_download_dir() == Path(xdg) / "climate-api" / "downloads" @@ -37,9 +47,19 @@ def test_resolve_artifacts_dir_uses_cache_override(monkeypatch: pytest.MonkeyPat assert ingestion_services._resolve_artifacts_dir() == Path(override) / "artifacts" -def test_resolve_artifacts_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch) -> None: +def test_resolve_artifacts_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + config_file = tmp_path / "climate-api.yaml" + config_file.write_text("data_dir: ./data\nextent:\n id: test\n", encoding="utf-8") + monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) + monkeypatch.delenv("CACHE_OVERRIDE", raising=False) + monkeypatch.delenv("XDG_DATA_HOME", raising=False) + assert ingestion_services._resolve_artifacts_dir() == tmp_path / "data" / "artifacts" + + +def test_resolve_artifacts_dir_uses_xdg_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None: with tempfile.TemporaryDirectory() as xdg: monkeypatch.delenv("CACHE_OVERRIDE", raising=False) + monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False) monkeypatch.setenv("XDG_DATA_HOME", xdg) assert ingestion_services._resolve_artifacts_dir() == Path(xdg) / "climate-api" / "artifacts" From 2ddfc650d50d7ecd9c7e88ee07a765676c3a293c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:31:06 +0200 Subject: [PATCH 38/46] =?UTF-8?q?fix:=20correct=20worldpop=20temporal=20ex?= =?UTF-8?q?tent=20to=202015=E2=80=932030?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/climate_api/data/datasets/worldpop.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/climate_api/data/datasets/worldpop.yaml b/src/climate_api/data/datasets/worldpop.yaml index 63868831..78fbb99f 100644 --- a/src/climate_api/data/datasets/worldpop.yaml +++ b/src/climate_api/data/datasets/worldpop.yaml @@ -13,7 +13,8 @@ bbox: [-180, -90, 180, 90] crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 temporal: - begin: "2000" + begin: "2015" + end: "2030" trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian resolution: P1Y ingestion: From 38e9aa2ac7837c2b377eaf672e74c23067278920 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:37:40 +0200 Subject: [PATCH 39/46] =?UTF-8?q?refactor:=20remove=20extent=5Fid=20from?= =?UTF-8?q?=20cache=20functions=20=E2=80=94=20one=20extent=20per=20instanc?= =?UTF-8?q?e?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../data_manager/services/downloader.py | 28 ++++++++----------- src/climate_api/ingestions/services.py | 9 +++--- tests/test_datasets.py | 28 +++++++++---------- tests/test_downloader.py | 23 ++++++--------- 4 files changed, 38 insertions(+), 50 deletions(-) diff --git a/src/climate_api/data_manager/services/downloader.py b/src/climate_api/data_manager/services/downloader.py index 39f4ba3b..f9c5ad13 100644 --- a/src/climate_api/data_manager/services/downloader.py +++ b/src/climate_api/data_manager/services/downloader.py @@ -47,7 +47,6 @@ def download_dataset( country_code: str | None, overwrite: bool, background_tasks: BackgroundTasks | None, - extent_id: str | None = None, ) -> list[Path]: """Download dataset files and return the NetCDF paths created or modified by this run. @@ -60,7 +59,7 @@ def download_dataset( ingestion = dataset["ingestion"] eo_download_func_path = ingestion["function"] eo_download_func = _get_dynamic_function(eo_download_func_path) - before_files = {path.resolve(): path.stat().st_mtime_ns for path in get_cache_files(dataset, extent_id=extent_id)} + before_files = {path.resolve(): path.stat().st_mtime_ns for path in get_cache_files(dataset)} params = dict(ingestion.get("default_params", {})) params.update( @@ -68,7 +67,7 @@ def download_dataset( "start": start, "end": end or datetime.date.today().isoformat(), "dirname": DOWNLOAD_DIR, - "prefix": _get_cache_prefix(dataset, extent_id=extent_id), + "prefix": _get_cache_prefix(dataset), "overwrite": overwrite, } ) @@ -108,21 +107,19 @@ def download_dataset( message = str(exc).strip() or "Unexpected error from upstream data provider" raise HTTPException(status_code=502, detail=f"Upstream dataset download failed: {message}") from exc - after_files = [path.resolve() for path in get_cache_files(dataset, extent_id=extent_id)] + after_files = [path.resolve() for path in get_cache_files(dataset)] changed_files = [ path for path in after_files if path not in before_files or path.stat().st_mtime_ns != before_files[path] ] return changed_files -def build_dataset_zarr( - dataset: dict[str, Any], *, start: str | None = None, end: str | None = None, extent_id: str | None = None -) -> None: +def build_dataset_zarr(dataset: dict[str, Any], *, start: str | None = None, end: str | None = None) -> None: """Collect dataset cache files into one optimised Zarr archive, clipped to request scope.""" logger.info(f"Optimizing cache for dataset {dataset['id']}") ingestion = dataset["ingestion"] - files = get_cache_files(dataset, extent_id=extent_id) + files = get_cache_files(dataset) logger.info(f"Opening {len(files)} files from cache") ds = xr.open_mfdataset(files) @@ -166,7 +163,7 @@ def build_dataset_zarr( # save as zarr logger.info("Saving to optimized zarr file") - zarr_path = DOWNLOAD_DIR / f"{_get_cache_prefix(dataset, extent_id=extent_id)}.zarr" + zarr_path = DOWNLOAD_DIR / f"{_get_cache_prefix(dataset)}.zarr" multiscales = dict(ingestion.get("multiscales", {})) @@ -274,21 +271,20 @@ def _compute_time_space_chunks( return chunks -def _get_cache_prefix(dataset: dict[str, Any], extent_id: str | None = None) -> str: - base = str(dataset["id"]) - return f"{base}_{extent_id}" if extent_id else base +def _get_cache_prefix(dataset: dict[str, Any]) -> str: + return str(dataset["id"]) -def get_cache_files(dataset: dict[str, Any], extent_id: str | None = None) -> list[Path]: +def get_cache_files(dataset: dict[str, Any]) -> list[Path]: """Return all NetCDF cache files matching this dataset's prefix.""" # TODO: not bulletproof -- e.g. 2m_temperature matches 2m_temperature_modified - prefix = _get_cache_prefix(dataset, extent_id=extent_id) + prefix = _get_cache_prefix(dataset) return list(DOWNLOAD_DIR.glob(f"{prefix}*.nc")) -def get_zarr_path(dataset: dict[str, Any], extent_id: str | None = None) -> Path | None: +def get_zarr_path(dataset: dict[str, Any]) -> Path | None: """Return the optimised zarr archive path if it exists.""" - prefix = _get_cache_prefix(dataset, extent_id=extent_id) + prefix = _get_cache_prefix(dataset) optimized = DOWNLOAD_DIR / f"{prefix}.zarr" if optimized.exists(): return optimized diff --git a/src/climate_api/ingestions/services.py b/src/climate_api/ingestions/services.py index 1f0826b7..cb34bd08 100644 --- a/src/climate_api/ingestions/services.py +++ b/src/climate_api/ingestions/services.py @@ -219,14 +219,13 @@ def create_artifact( country_code=country_code, overwrite=overwrite, background_tasks=None, - extent_id=extent_id, ) logger.info("Download finished for dataset '%s': changed_files=%d", dataset["id"], len(downloaded_files)) if prefer_zarr or requires_canonical_zarr: try: logger.info("Building canonical Zarr artifact for dataset '%s'", dataset["id"]) - downloader.build_dataset_zarr(dataset, start=start, end=end, extent_id=extent_id) + downloader.build_dataset_zarr(dataset, start=start, end=end) logger.info("Canonical Zarr artifact built for dataset '%s'", dataset["id"]) except Exception as exc: if requires_canonical_zarr: @@ -246,16 +245,16 @@ def create_artifact( exc_info=True, ) - zarr_path = downloader.get_zarr_path(dataset, extent_id=extent_id) + zarr_path = downloader.get_zarr_path(dataset) if requires_canonical_zarr and zarr_path is None: raise HTTPException( status_code=500, detail="Append sync requires a canonical Zarr artifact, but no Zarr store was produced.", ) cache_files = ( - downloader.get_cache_files(dataset, extent_id=extent_id) + downloader.get_cache_files(dataset) if requires_canonical_zarr - else downloaded_files or downloader.get_cache_files(dataset, extent_id=extent_id) + else downloaded_files or downloader.get_cache_files(dataset) ) primary_path: str | None diff --git a/tests/test_datasets.py b/tests/test_datasets.py index e2bdafe4..c7d0fedc 100644 --- a/tests/test_datasets.py +++ b/tests/test_datasets.py @@ -279,7 +279,7 @@ def test_create_artifact_computes_coverage_from_created_artifact_paths( created_file.write_text("dummy", encoding="utf-8") monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) captured: dict[str, object] = {} @@ -350,7 +350,7 @@ def fake_download_dataset( return [created_file] monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -419,7 +419,7 @@ def fake_download_dataset( monkeypatch.setattr(services, "utc_now", lambda: FixedDateTime(2026, 4, 21, 13, 47, 31, tzinfo=UTC)) monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -466,7 +466,7 @@ def test_create_artifact_returns_409_when_downloaded_artifact_has_no_data( created_file.write_text("dummy", encoding="utf-8") monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -527,7 +527,7 @@ def fake_download_dataset( monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) zarr_path_chirps = tmp_path / "chirps3_precipitation_daily.zarr" - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path_chirps) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path_chirps) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -655,8 +655,8 @@ def fake_find_existing_artifact(**kwargs: object) -> ArtifactRecord | None: monkeypatch.setattr(services, "_find_existing_artifact", fake_find_existing_artifact) monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path) - monkeypatch.setattr(services.downloader, "get_cache_files", lambda dataset, extent_id=None: [created_file]) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path) + monkeypatch.setattr(services.downloader, "get_cache_files", lambda _: [created_file]) monkeypatch.setattr( services, "get_data_coverage_for_paths", @@ -703,17 +703,15 @@ def test_create_artifact_delta_requires_canonical_zarr_when_prefer_zarr_is_false captured_build: dict[str, object] = {} - def fake_build_dataset_zarr( - dataset_arg: dict[str, object], *, start: str | None, end: str | None, extent_id: str | None = None - ) -> None: + def fake_build_dataset_zarr(dataset_arg: dict[str, object], *, start: str | None, end: str | None) -> None: captured_build["dataset_id"] = dataset_arg["id"] captured_build["start"] = start captured_build["end"] = end monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", fake_build_dataset_zarr) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path) - monkeypatch.setattr(services.downloader, "get_cache_files", lambda dataset, extent_id=None: [created_file]) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path) + monkeypatch.setattr(services.downloader, "get_cache_files", lambda _: [created_file]) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, @@ -767,7 +765,7 @@ def fail_build_dataset_zarr(*_: object, **__: object) -> None: monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", fail_build_dataset_zarr) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: None) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: None) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) with pytest.raises(services.HTTPException) as exc_info: @@ -805,8 +803,8 @@ def test_create_artifact_delta_rejects_short_rebuilt_coverage( monkeypatch.setattr(services.downloader, "download_dataset", lambda *_, **__: [created_file]) monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None) - monkeypatch.setattr(services.downloader, "get_zarr_path", lambda dataset, extent_id=None: zarr_path) - monkeypatch.setattr(services.downloader, "get_cache_files", lambda dataset, extent_id=None: [created_file]) + monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path) + monkeypatch.setattr(services.downloader, "get_cache_files", lambda _: [created_file]) monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None) monkeypatch.setattr( services, diff --git a/tests/test_downloader.py b/tests/test_downloader.py index d9dcaa05..5cb9c22f 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -168,20 +168,15 @@ def fake_download( # --------------------------------------------------------------------------- -# _get_cache_prefix — extent_id isolation +# _get_cache_prefix # --------------------------------------------------------------------------- -def test_get_cache_prefix_without_extent_id() -> None: +def test_get_cache_prefix_uses_dataset_id() -> None: dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}} assert downloader._get_cache_prefix(dataset) == "chirps3_precipitation_daily" -def test_get_cache_prefix_with_extent_id() -> None: - dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}} - assert downloader._get_cache_prefix(dataset, extent_id="nor") == "chirps3_precipitation_daily_nor" - - # --------------------------------------------------------------------------- # _validate_spatial_coverage # --------------------------------------------------------------------------- @@ -384,7 +379,7 @@ def test_build_dataset_zarr_flat_creates_zarr(tmp_path: Path, monkeypatch: pytes """Flat zarr is written with the correct variable and no pyramid level dirs.""" nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) downloader.build_dataset_zarr(_FLAT_DATASET) @@ -421,7 +416,7 @@ def test_build_dataset_zarr_normalises_coordinate_names(tmp_path: Path, monkeypa "ingestion": {}, } monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: [path]) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: [path]) downloader.build_dataset_zarr(dataset) @@ -457,7 +452,7 @@ def test_build_dataset_zarr_normalises_xy_coordinate_names(tmp_path: Path, monke "ingestion": {}, } monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: [path]) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: [path]) downloader.build_dataset_zarr(dataset) @@ -485,7 +480,7 @@ def test_build_dataset_zarr_clips_to_requested_daily_range( "ingestion": {}, } monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) downloader.build_dataset_zarr(dataset, start="2024-02-01", end="2024-02-10") @@ -515,7 +510,7 @@ def test_build_dataset_zarr_pyramid_copies_time_to_root(tmp_path: Path, monkeypa """Pyramid zarr build copies the time coordinate to the store root for zarr-layer.""" nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) def fake_create_pyramid(ds: xr.Dataset, levels: int, x_dim: str, y_dim: str, method: str) -> Pyramid: return _make_fake_pyramid(ds, tmp_path / "my_dataset.zarr") @@ -533,7 +528,7 @@ def test_build_dataset_zarr_pyramid_is_openable_via_level_0(tmp_path: Path, monk """open_zarr_dataset returns the dataset from level 0 of the pyramid store.""" nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) def fake_create_pyramid(ds: xr.Dataset, levels: int, x_dim: str, y_dim: str, method: str) -> Pyramid: return _make_fake_pyramid(ds, tmp_path / "my_dataset.zarr") @@ -557,7 +552,7 @@ def test_build_dataset_zarr_pyramid_normalises_coordinate_names( # Source files use lat/lon (WorldPop-style); canonical names must appear in the written store. nc_files = _write_nc_files(tmp_path) monkeypatch.setattr(downloader, "DOWNLOAD_DIR", tmp_path) - monkeypatch.setattr(downloader, "get_cache_files", lambda dataset, extent_id=None: nc_files) + monkeypatch.setattr(downloader, "get_cache_files", lambda _: nc_files) received: list[xr.Dataset] = [] From 0cebfd894cc5c85df06daa11d51f7743d67a9c68 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:41:13 +0200 Subject: [PATCH 40/46] docs: document data_dir requirement and extents field in dataset templates --- docs/adding_custom_datasets.md | 17 +++++++++++++++++ docs/setup_guide.md | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/docs/adding_custom_datasets.md b/docs/adding_custom_datasets.md index 52f96274..7d105fc1 100644 --- a/docs/adding_custom_datasets.md +++ b/docs/adding_custom_datasets.md @@ -124,6 +124,22 @@ sync_availability: `latest_available_function` must accept a `dataset` dict and return a `datetime`. Omit `sync_availability` entirely for `static` datasets or when you always want to sync up to the requested end date. +**Spatial and temporal extents** — declares what the source dataset covers. Used to validate ingest requests before hitting the provider: + +```yaml +extents: + spatial: + bbox: [-180, -50, 180, 50] # [xmin, ymin, xmax, ymax] in WGS84 + crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84 + temporal: + begin: "1981-01-01" + end: "2030-12-31" # omit if ongoing + trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian + resolution: P1D # ISO 8601 duration: PT1H, P1D, P1M, P1Y +``` + +If an ingest request's bounding box has no overlap with `extents.spatial.bbox`, the API returns HTTP 400 immediately. Partial overlap is allowed — the provider will return data for the intersecting area. + **Units** | Field | Required | Description | @@ -151,6 +167,7 @@ extent: name: Rwanda bbox: [28.8, -2.9, 30.9, -1.0] +data_dir: ./data datasets_dir: ./datasets/ ``` diff --git a/docs/setup_guide.md b/docs/setup_guide.md index 68527a1a..faa5bb1a 100644 --- a/docs/setup_guide.md +++ b/docs/setup_guide.md @@ -32,6 +32,8 @@ extent: name: Rwanda bbox: [28.8, -2.9, 30.9, -1.0] country_code: RWA + +data_dir: ./data ``` Field reference: @@ -43,6 +45,8 @@ Field reference: | `bbox` | Yes | Bounding box as `[xmin, ymin, xmax, ymax]` in WGS84 decimal degrees | | `country_code` | No | ISO 3166-1 alpha-3 code — required for WorldPop downloads | +`data_dir` sets the directory where downloaded NetCDF files and Zarr stores are kept. It is required when a config file is present and is resolved relative to the config file. Each instance must have its own `data_dir` to avoid mixing data between deployments. + To find the bounding box for a country, [bboxfinder.com](http://bboxfinder.com) is a useful tool. Values can reference environment variables using `${VAR:-default}` syntax: From 25c4eb5f4aaf2a1814747f2b33d06b341c3ce7e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:45:40 +0200 Subject: [PATCH 41/46] fix: skip data_dir validation when CLIMATE_API_CONFIG file does not exist --- src/climate_api/config.py | 2 +- tests/test_config.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/climate_api/config.py b/src/climate_api/config.py index 1213f3de..87fb85a4 100644 --- a/src/climate_api/config.py +++ b/src/climate_api/config.py @@ -69,7 +69,7 @@ def get_data_dir() -> Path | None: CACHE_OVERRIDE is a legacy escape hatch that bypasses config-level validation. """ config_path = get_config_path() - if config_path is None: + if config_path is None or not config_path.exists(): return None config = get_config() diff --git a/tests/test_config.py b/tests/test_config.py index f14924a7..67e623c4 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -12,6 +12,13 @@ def test_get_data_dir_returns_none_when_no_config(monkeypatch: pytest.MonkeyPatc assert get_data_dir() is None +def test_get_data_dir_returns_none_when_config_path_set_but_file_missing( + monkeypatch: pytest.MonkeyPatch, tmp_path: Path +) -> None: + monkeypatch.setenv("CLIMATE_API_CONFIG", str(tmp_path / "nonexistent.yaml")) + assert get_data_dir() is None + + def test_get_data_dir_raises_when_config_present_but_no_data_dir( monkeypatch: pytest.MonkeyPatch, tmp_path: Path ) -> None: From add8a169550ff10ff11c42baf9008ee87e7986f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:46:37 +0200 Subject: [PATCH 42/46] docs: add data_dir to example config and clarify CACHE_OVERRIDE as legacy --- .env.example | 3 ++- climate-api.yaml.example | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.env.example b/.env.example index 70b4b505..6ef48d90 100644 --- a/.env.example +++ b/.env.example @@ -17,7 +17,8 @@ CLIMATE_API_CONFIG=./climate-api.yaml # See docs/setup_guide.md for registration and .netrc setup instructions. # ── Download and ingestion ──────────────────────────────────────────────────── -# Override the download cache directory (default: data/downloads). +# Legacy override for the download cache directory. Prefer setting data_dir in +# climate-api.yaml instead. CACHE_OVERRIDE is kept for Docker/CI compatibility. # CACHE_OVERRIDE=/path/to/cache # Fallback bounding box used when a request does not include an explicit bbox. diff --git a/climate-api.yaml.example b/climate-api.yaml.example index a041fd84..cf66504b 100644 --- a/climate-api.yaml.example +++ b/climate-api.yaml.example @@ -8,4 +8,6 @@ extent: bbox: [-13.5, 6.9, -10.1, 10.0] country_code: SLE +data_dir: ./data # required — directory for downloaded NetCDF files and Zarr stores + # datasets_dir: ./datasets/ # optional — custom templates merged with built-ins From cd6e76dc6115ed2cfd3e58cbda4ec650b61f32e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:50:59 +0200 Subject: [PATCH 43/46] =?UTF-8?q?refactor:=20remove=20CACHE=5FOVERRIDE=20?= =?UTF-8?q?=E2=80=94=20use=20data=5Fdir=20from=20config=20or=20XDG=20fallb?= =?UTF-8?q?ack?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Data directory resolution now uses data_dir from climate-api.yaml (required when a config file is present) with a clean XDG fallback. The legacy CACHE_OVERRIDE environment variable is gone from all resolver functions, tests, and .env.example. --- .env.example | 4 ---- src/climate_api/config.py | 2 -- .../data_manager/services/downloader.py | 3 --- src/climate_api/ingestions/services.py | 3 --- src/climate_api/publications/services.py | 8 +++++--- tests/test_downloader.py | 18 ----------------- tests/test_publications.py | 20 ++++++++++--------- 7 files changed, 16 insertions(+), 42 deletions(-) diff --git a/.env.example b/.env.example index 6ef48d90..a849c7cc 100644 --- a/.env.example +++ b/.env.example @@ -17,10 +17,6 @@ CLIMATE_API_CONFIG=./climate-api.yaml # See docs/setup_guide.md for registration and .netrc setup instructions. # ── Download and ingestion ──────────────────────────────────────────────────── -# Legacy override for the download cache directory. Prefer setting data_dir in -# climate-api.yaml instead. CACHE_OVERRIDE is kept for Docker/CI compatibility. -# CACHE_OVERRIDE=/path/to/cache - # Fallback bounding box used when a request does not include an explicit bbox. # Format: xmin,ymin,xmax,ymax # DOWNLOAD_BBOX=-13.5,6.9,-10.1,10.0 diff --git a/src/climate_api/config.py b/src/climate_api/config.py index 87fb85a4..f91c8155 100644 --- a/src/climate_api/config.py +++ b/src/climate_api/config.py @@ -65,8 +65,6 @@ def get_data_dir() -> Path | None: misconfigured instances fail fast at startup rather than silently sharing a default directory with other instances. - Callers should check CACHE_OVERRIDE themselves before calling this function; - CACHE_OVERRIDE is a legacy escape hatch that bypasses config-level validation. """ config_path = get_config_path() if config_path is None or not config_path.exists(): diff --git a/src/climate_api/data_manager/services/downloader.py b/src/climate_api/data_manager/services/downloader.py index f9c5ad13..4b23ffb5 100644 --- a/src/climate_api/data_manager/services/downloader.py +++ b/src/climate_api/data_manager/services/downloader.py @@ -24,9 +24,6 @@ def _resolve_download_dir() -> Path: - override = os.getenv("CACHE_OVERRIDE") - if override: - return Path(override) data_dir = api_config.get_data_dir() if data_dir is not None: return data_dir / "downloads" diff --git a/src/climate_api/ingestions/services.py b/src/climate_api/ingestions/services.py index cb34bd08..9f5e26e4 100644 --- a/src/climate_api/ingestions/services.py +++ b/src/climate_api/ingestions/services.py @@ -51,9 +51,6 @@ def _resolve_artifacts_dir() -> Path: from climate_api import config as api_config - override = os.getenv("CACHE_OVERRIDE") - if override: - return Path(override) / "artifacts" data_dir = api_config.get_data_dir() if data_dir is not None: return data_dir / "artifacts" diff --git a/src/climate_api/publications/services.py b/src/climate_api/publications/services.py index cc531dd8..0d2d4c04 100644 --- a/src/climate_api/publications/services.py +++ b/src/climate_api/publications/services.py @@ -19,9 +19,11 @@ def _resolve_pygeoapi_dir() -> Path: - override = os.getenv("CACHE_OVERRIDE") - if override: - return Path(override) / "pygeoapi" + from climate_api import config as api_config + + data_dir = api_config.get_data_dir() + if data_dir is not None: + return data_dir / "pygeoapi" xdg_data = Path(os.getenv("XDG_DATA_HOME", Path.home() / ".local" / "share")) return xdg_data / "climate-api" / "pygeoapi" diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 5cb9c22f..6c314cab 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -16,49 +16,31 @@ from climate_api.ingestions import services as ingestion_services -def test_resolve_download_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatch) -> None: - with tempfile.TemporaryDirectory() as override: - monkeypatch.setenv("CACHE_OVERRIDE", override) - monkeypatch.delenv("XDG_DATA_HOME", raising=False) - assert downloader._resolve_download_dir() == Path(override) - - def test_resolve_download_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: config_file = tmp_path / "climate-api.yaml" config_file.write_text("data_dir: ./data\nextent:\n id: test\n", encoding="utf-8") monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) - monkeypatch.delenv("CACHE_OVERRIDE", raising=False) monkeypatch.delenv("XDG_DATA_HOME", raising=False) assert downloader._resolve_download_dir() == tmp_path / "data" / "downloads" def test_resolve_download_dir_uses_xdg_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None: with tempfile.TemporaryDirectory() as xdg: - monkeypatch.delenv("CACHE_OVERRIDE", raising=False) monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False) monkeypatch.setenv("XDG_DATA_HOME", xdg) assert downloader._resolve_download_dir() == Path(xdg) / "climate-api" / "downloads" -def test_resolve_artifacts_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatch) -> None: - with tempfile.TemporaryDirectory() as override: - monkeypatch.setenv("CACHE_OVERRIDE", override) - monkeypatch.delenv("XDG_DATA_HOME", raising=False) - assert ingestion_services._resolve_artifacts_dir() == Path(override) / "artifacts" - - def test_resolve_artifacts_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: config_file = tmp_path / "climate-api.yaml" config_file.write_text("data_dir: ./data\nextent:\n id: test\n", encoding="utf-8") monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) - monkeypatch.delenv("CACHE_OVERRIDE", raising=False) monkeypatch.delenv("XDG_DATA_HOME", raising=False) assert ingestion_services._resolve_artifacts_dir() == tmp_path / "data" / "artifacts" def test_resolve_artifacts_dir_uses_xdg_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None: with tempfile.TemporaryDirectory() as xdg: - monkeypatch.delenv("CACHE_OVERRIDE", raising=False) monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False) monkeypatch.setenv("XDG_DATA_HOME", xdg) assert ingestion_services._resolve_artifacts_dir() == Path(xdg) / "climate-api" / "artifacts" diff --git a/tests/test_publications.py b/tests/test_publications.py index 403d3508..93dbd44b 100644 --- a/tests/test_publications.py +++ b/tests/test_publications.py @@ -1,3 +1,5 @@ +from pathlib import Path + import pytest from climate_api.publications import services @@ -9,21 +11,21 @@ def test_load_base_config_returns_mapping() -> None: assert "server" in config -def test_resolve_pygeoapi_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatch, tmp_path: object) -> None: - import tempfile +def test_resolve_pygeoapi_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + from climate_api import config as api_config - with tempfile.TemporaryDirectory() as override: - monkeypatch.setenv("CACHE_OVERRIDE", override) - monkeypatch.delenv("XDG_DATA_HOME", raising=False) - result = services._resolve_pygeoapi_dir() - assert str(result) == f"{override}/pygeoapi" + monkeypatch.setattr(api_config, "get_data_dir", lambda: tmp_path / "data") + result = services._resolve_pygeoapi_dir() + assert result == tmp_path / "data" / "pygeoapi" -def test_resolve_pygeoapi_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch, tmp_path: object) -> None: +def test_resolve_pygeoapi_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch) -> None: import tempfile + from climate_api import config as api_config + + monkeypatch.setattr(api_config, "get_data_dir", lambda: None) with tempfile.TemporaryDirectory() as xdg: - monkeypatch.delenv("CACHE_OVERRIDE", raising=False) monkeypatch.setenv("XDG_DATA_HOME", xdg) result = services._resolve_pygeoapi_dir() assert str(result) == f"{xdg}/climate-api/pygeoapi" From 59771887407599c16a753fc0d34f65258ca11603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 16:57:30 +0200 Subject: [PATCH 44/46] refactor: rename datasets_dir to templates_dir in config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clarifies the distinction from data_dir (runtime storage) — templates_dir points at user-supplied YAML templates and will cover both dataset and processing templates going forward. --- .env.example | 2 +- climate-api.yaml.example | 2 +- docs/adding_custom_datasets.md | 8 ++--- docs/setup_guide.md | 2 +- .../data_registry/services/datasets.py | 12 +++---- tests/test_config.py | 34 +++++++++---------- 6 files changed, 30 insertions(+), 30 deletions(-) diff --git a/.env.example b/.env.example index a849c7cc..f04aa118 100644 --- a/.env.example +++ b/.env.example @@ -1,5 +1,5 @@ # ── Instance configuration ──────────────────────────────────────────────────── -# Path to the instance config file (extent, optional datasets_dir). +# Path to the instance config file (extent, optional templates_dir). # Copy the example before editing: cp climate-api.yaml.example climate-api.yaml # climate-api.yaml is gitignored so your local extent stays out of version control. # When running via `make run` from the repo root, the relative path below works. diff --git a/climate-api.yaml.example b/climate-api.yaml.example index cf66504b..944145c3 100644 --- a/climate-api.yaml.example +++ b/climate-api.yaml.example @@ -10,4 +10,4 @@ extent: data_dir: ./data # required — directory for downloaded NetCDF files and Zarr stores -# datasets_dir: ./datasets/ # optional — custom templates merged with built-ins +# templates_dir: ./templates/ # optional — custom dataset/processing templates merged with built-ins diff --git a/docs/adding_custom_datasets.md b/docs/adding_custom_datasets.md index 7d105fc1..af0690db 100644 --- a/docs/adding_custom_datasets.md +++ b/docs/adding_custom_datasets.md @@ -2,7 +2,7 @@ This guide explains how to add a new dataset source to your Climate API instance — for example a national meteorological service, a regional satellite product, or a custom model output. -The built-in dataset templates (CHIRPS3, ERA5-Land, WorldPop) ship as package data. Custom datasets are layered on top by pointing `datasets_dir` in your `climate-api.yaml` at a directory containing your own YAML template files. +The built-in dataset templates (CHIRPS3, ERA5-Land, WorldPop) ship as package data. Custom datasets are layered on top by pointing `templates_dir` in your `climate-api.yaml` at a directory containing your own YAML template files. ## Overview @@ -159,7 +159,7 @@ ingestion: ## Step 3: Point the instance at your templates directory -Add `datasets_dir` to your `climate-api.yaml`: +Add `templates_dir` to your `climate-api.yaml`: ```yaml extent: @@ -168,10 +168,10 @@ extent: bbox: [28.8, -2.9, 30.9, -1.0] data_dir: ./data -datasets_dir: ./datasets/ +templates_dir: ./datasets/ ``` -All `*.yaml` and `*.yml` files in `datasets_dir` are loaded and merged with the built-in templates (CHIRPS3, ERA5-Land, WorldPop). Custom templates are additive — the built-ins remain available unless you deliberately override one by using the same `id`. +All `*.yaml` and `*.yml` files in `templates_dir` are loaded and merged with the built-in templates (CHIRPS3, ERA5-Land, WorldPop). Custom templates are additive — the built-ins remain available unless you deliberately override one by using the same `id`. ## Step 4: Ingest and publish diff --git a/docs/setup_guide.md b/docs/setup_guide.md index faa5bb1a..0077870c 100644 --- a/docs/setup_guide.md +++ b/docs/setup_guide.md @@ -200,7 +200,7 @@ curl -s -X POST http://127.0.0.1:8000/ingestions \ }' | jq ``` -ERA5-Land data has a configured lag of 120 hours (5 days) — the sync planner will not request data from the last 120 hours. This can be adjusted by supplying a custom `era5_land.yaml` via `datasets_dir` in your `climate-api.yaml`. +ERA5-Land data has a configured lag of 120 hours (5 days) — the sync planner will not request data from the last 120 hours. This can be adjusted by supplying a custom `era5_land.yaml` via `templates_dir` in your `climate-api.yaml`. --- diff --git a/src/climate_api/data_registry/services/datasets.py b/src/climate_api/data_registry/services/datasets.py index c61e501d..327cfd84 100644 --- a/src/climate_api/data_registry/services/datasets.py +++ b/src/climate_api/data_registry/services/datasets.py @@ -23,7 +23,7 @@ def list_datasets() -> list[dict[str, Any]]: """Load all dataset templates and return a flat list. Built-in templates from climate_api/data/datasets/ are always loaded. When - datasets_dir is set in CLIMATE_API_CONFIG, templates from that directory are + templates_dir is set in CLIMATE_API_CONFIG, templates from that directory are merged on top — a custom template with the same id overrides the built-in one. CONFIGS_DIR (test override via monkeypatch) bypasses this and loads only @@ -34,14 +34,14 @@ def list_datasets() -> list[dict[str, Any]]: merged: dict[str, dict[str, Any]] = {d["id"]: d for d in _load_builtin_datasets()} - config_datasets_dir = api_config.get_config().get("datasets_dir") - if config_datasets_dir: - if not isinstance(config_datasets_dir, (str, Path)): + config_templates_dir = api_config.get_config().get("templates_dir") + if config_templates_dir: + if not isinstance(config_templates_dir, (str, Path)): raise ValueError( - f"datasets_dir in CLIMATE_API_CONFIG must be a path string, got {type(config_datasets_dir).__name__}" + f"templates_dir in CLIMATE_API_CONFIG must be a path string, got {type(config_templates_dir).__name__}" ) config_path = api_config.get_config_path() - resolved = (config_path.parent / config_datasets_dir).resolve() if config_path else Path(config_datasets_dir) + resolved = (config_path.parent / config_templates_dir).resolve() if config_path else Path(config_templates_dir) for dataset in _load_from_dir(resolved): merged[dataset["id"]] = dataset diff --git a/tests/test_config.py b/tests/test_config.py index 67e623c4..a3eb9c9e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -120,10 +120,10 @@ def test_builtin_datasets_include_chirps_era5_worldpop(monkeypatch: pytest.Monke assert "worldpop_population_yearly" in ids -def test_datasets_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: - datasets_dir = tmp_path / "datasets" - datasets_dir.mkdir() - (datasets_dir / "custom.yaml").write_text( +def test_templates_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + templates_dir = tmp_path / "datasets" + templates_dir.mkdir() + (templates_dir / "custom.yaml").write_text( """ - id: custom_dataset name: Custom dataset @@ -136,7 +136,7 @@ def test_datasets_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, encoding="utf-8", ) config_file = tmp_path / "climate-api.yaml" - config_file.write_text(f"datasets_dir: {datasets_dir}\n", encoding="utf-8") + config_file.write_text(f"templates_dir: {templates_dir}\n", encoding="utf-8") monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None) monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) @@ -146,18 +146,18 @@ def test_datasets_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, assert "chirps3_precipitation_daily" in ids -def test_datasets_dir_resolved_relative_to_config_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: - """datasets_dir is resolved relative to the config file, not CWD. +def test_templates_dir_resolved_relative_to_config_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + """templates_dir is resolved relative to the config file, not CWD. This matters when running the installed `climate-api` CLI from a directory - other than the repo root, where a relative datasets_dir in the config must + other than the repo root, where a relative templates_dir in the config must still point at the correct sibling directory. """ deployment_dir = tmp_path / "deployment" deployment_dir.mkdir() - datasets_dir = deployment_dir / "datasets" - datasets_dir.mkdir() - (datasets_dir / "custom.yaml").write_text( + templates_dir = deployment_dir / "datasets" + templates_dir.mkdir() + (templates_dir / "custom.yaml").write_text( """ - id: deployed_dataset variable: val @@ -169,7 +169,7 @@ def test_datasets_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monke encoding="utf-8", ) config_file = deployment_dir / "climate-api.yaml" - config_file.write_text("datasets_dir: ./datasets\n", encoding="utf-8") + config_file.write_text("templates_dir: ./datasets\n", encoding="utf-8") monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None) monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) @@ -178,10 +178,10 @@ def test_datasets_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monke assert "deployed_dataset" in ids -def test_datasets_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: - datasets_dir = tmp_path / "datasets" - datasets_dir.mkdir() - (datasets_dir / "chirps3.yaml").write_text( +def test_templates_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: + templates_dir = tmp_path / "datasets" + templates_dir.mkdir() + (templates_dir / "chirps3.yaml").write_text( """ - id: chirps3_precipitation_daily name: Custom CHIRPS override @@ -194,7 +194,7 @@ def test_datasets_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.Monk encoding="utf-8", ) config_file = tmp_path / "climate-api.yaml" - config_file.write_text(f"datasets_dir: {datasets_dir}\n", encoding="utf-8") + config_file.write_text(f"templates_dir: {templates_dir}\n", encoding="utf-8") monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None) monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) From d32c4408106aed3b5c730bd2329f20f4f1214240 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 17:01:52 +0200 Subject: [PATCH 45/46] refactor: templates_dir uses datasets/ subfolder for dataset templates templates_dir now acts as a root directory. Dataset templates go in templates_dir/datasets/, leaving room for processing/ and other template types alongside it without structural changes. --- climate-api.yaml.example | 2 +- docs/adding_custom_datasets.md | 12 ++++++--- docs/setup_guide.md | 2 +- .../data_registry/services/datasets.py | 8 +++--- tests/test_config.py | 25 +++++++++---------- 5 files changed, 28 insertions(+), 21 deletions(-) diff --git a/climate-api.yaml.example b/climate-api.yaml.example index 944145c3..94ad2963 100644 --- a/climate-api.yaml.example +++ b/climate-api.yaml.example @@ -10,4 +10,4 @@ extent: data_dir: ./data # required — directory for downloaded NetCDF files and Zarr stores -# templates_dir: ./templates/ # optional — custom dataset/processing templates merged with built-ins +# templates_dir: ./templates/ # optional — root for custom templates; datasets go in templates/datasets/ diff --git a/docs/adding_custom_datasets.md b/docs/adding_custom_datasets.md index af0690db..1dd4c71c 100644 --- a/docs/adding_custom_datasets.md +++ b/docs/adding_custom_datasets.md @@ -159,7 +159,13 @@ ingestion: ## Step 3: Point the instance at your templates directory -Add `templates_dir` to your `climate-api.yaml`: +Add `templates_dir` to your `climate-api.yaml` and place your YAML file in the `datasets/` subfolder: + +``` +templates/ +└── datasets/ + └── enacts_rainfall.yaml +``` ```yaml extent: @@ -168,10 +174,10 @@ extent: bbox: [28.8, -2.9, 30.9, -1.0] data_dir: ./data -templates_dir: ./datasets/ +templates_dir: ./templates/ ``` -All `*.yaml` and `*.yml` files in `templates_dir` are loaded and merged with the built-in templates (CHIRPS3, ERA5-Land, WorldPop). Custom templates are additive — the built-ins remain available unless you deliberately override one by using the same `id`. +All `*.yaml` and `*.yml` files in `templates_dir/datasets/` are loaded and merged with the built-in templates (CHIRPS3, ERA5-Land, WorldPop). Custom templates are additive — the built-ins remain available unless you deliberately override one by using the same `id`. ## Step 4: Ingest and publish diff --git a/docs/setup_guide.md b/docs/setup_guide.md index 0077870c..0f1527a9 100644 --- a/docs/setup_guide.md +++ b/docs/setup_guide.md @@ -200,7 +200,7 @@ curl -s -X POST http://127.0.0.1:8000/ingestions \ }' | jq ``` -ERA5-Land data has a configured lag of 120 hours (5 days) — the sync planner will not request data from the last 120 hours. This can be adjusted by supplying a custom `era5_land.yaml` via `templates_dir` in your `climate-api.yaml`. +ERA5-Land data has a configured lag of 120 hours (5 days) — the sync planner will not request data from the last 120 hours. This can be adjusted by placing a custom `era5_land.yaml` in `templates_dir/datasets/` — see `adding_custom_datasets.md`. --- diff --git a/src/climate_api/data_registry/services/datasets.py b/src/climate_api/data_registry/services/datasets.py index 327cfd84..0182aec6 100644 --- a/src/climate_api/data_registry/services/datasets.py +++ b/src/climate_api/data_registry/services/datasets.py @@ -41,9 +41,11 @@ def list_datasets() -> list[dict[str, Any]]: f"templates_dir in CLIMATE_API_CONFIG must be a path string, got {type(config_templates_dir).__name__}" ) config_path = api_config.get_config_path() - resolved = (config_path.parent / config_templates_dir).resolve() if config_path else Path(config_templates_dir) - for dataset in _load_from_dir(resolved): - merged[dataset["id"]] = dataset + root = (config_path.parent / config_templates_dir).resolve() if config_path else Path(config_templates_dir) + datasets_subdir = root / "datasets" + if datasets_subdir.is_dir(): + for dataset in _load_from_dir(datasets_subdir): + merged[dataset["id"]] = dataset return list(merged.values()) diff --git a/tests/test_config.py b/tests/test_config.py index a3eb9c9e..2aca7aeb 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -121,9 +121,9 @@ def test_builtin_datasets_include_chirps_era5_worldpop(monkeypatch: pytest.Monke def test_templates_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: - templates_dir = tmp_path / "datasets" - templates_dir.mkdir() - (templates_dir / "custom.yaml").write_text( + datasets_subdir = tmp_path / "templates" / "datasets" + datasets_subdir.mkdir(parents=True) + (datasets_subdir / "custom.yaml").write_text( """ - id: custom_dataset name: Custom dataset @@ -136,7 +136,7 @@ def test_templates_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch encoding="utf-8", ) config_file = tmp_path / "climate-api.yaml" - config_file.write_text(f"templates_dir: {templates_dir}\n", encoding="utf-8") + config_file.write_text(f"templates_dir: {tmp_path / 'templates'}\n", encoding="utf-8") monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None) monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) @@ -154,10 +154,9 @@ def test_templates_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monk still point at the correct sibling directory. """ deployment_dir = tmp_path / "deployment" - deployment_dir.mkdir() - templates_dir = deployment_dir / "datasets" - templates_dir.mkdir() - (templates_dir / "custom.yaml").write_text( + datasets_subdir = deployment_dir / "templates" / "datasets" + datasets_subdir.mkdir(parents=True) + (datasets_subdir / "custom.yaml").write_text( """ - id: deployed_dataset variable: val @@ -169,7 +168,7 @@ def test_templates_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monk encoding="utf-8", ) config_file = deployment_dir / "climate-api.yaml" - config_file.write_text("templates_dir: ./datasets\n", encoding="utf-8") + config_file.write_text("templates_dir: ./templates\n", encoding="utf-8") monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None) monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) @@ -179,9 +178,9 @@ def test_templates_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monk def test_templates_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: - templates_dir = tmp_path / "datasets" - templates_dir.mkdir() - (templates_dir / "chirps3.yaml").write_text( + datasets_subdir = tmp_path / "templates" / "datasets" + datasets_subdir.mkdir(parents=True) + (datasets_subdir / "chirps3.yaml").write_text( """ - id: chirps3_precipitation_daily name: Custom CHIRPS override @@ -194,7 +193,7 @@ def test_templates_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.Mon encoding="utf-8", ) config_file = tmp_path / "climate-api.yaml" - config_file.write_text(f"templates_dir: {templates_dir}\n", encoding="utf-8") + config_file.write_text(f"templates_dir: {tmp_path / 'templates'}\n", encoding="utf-8") monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None) monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file)) From 9e8c70a6b4f51898ec68c0ca7bbb584cef09d6f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Sandvik?= Date: Sat, 9 May 2026 17:13:09 +0200 Subject: [PATCH 46/46] =?UTF-8?q?fix:=20address=20Copilot=20review=20?= =?UTF-8?q?=E2=80=94=20bbox=20validation=20and=20config=20docstring?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Validate request bbox against extents using the env fallback (DOWNLOAD_BBOX) when no explicit bbox is provided, so coverage checks apply to all request paths - Guard against malformed template extents.spatial.bbox (non-list or wrong length) to avoid a 500 on user-supplied templates - Update get_data_dir() docstring to accurately describe the None-on-missing-file behaviour introduced for CI safety --- src/climate_api/config.py | 7 +++-- .../data_manager/services/downloader.py | 4 +-- tests/test_downloader.py | 31 +++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/src/climate_api/config.py b/src/climate_api/config.py index f91c8155..56844101 100644 --- a/src/climate_api/config.py +++ b/src/climate_api/config.py @@ -59,9 +59,12 @@ def _load_config() -> dict[str, Any]: def get_data_dir() -> Path | None: - """Return the data directory declared in CLIMATE_API_CONFIG, or None if no config is present. + """Return the data directory declared in CLIMATE_API_CONFIG. - Raises ValueError if a config file is present but data_dir is not set, so + Returns None when CLIMATE_API_CONFIG is unset or points to a file that does + not exist (e.g. CI environments where the config is gitignored). + + Raises ValueError if the config file exists but data_dir is not set, so misconfigured instances fail fast at startup rather than silently sharing a default directory with other instances. diff --git a/src/climate_api/data_manager/services/downloader.py b/src/climate_api/data_manager/services/downloader.py index 4b23ffb5..98874566 100644 --- a/src/climate_api/data_manager/services/downloader.py +++ b/src/climate_api/data_manager/services/downloader.py @@ -52,7 +52,7 @@ def download_dataset( When running in the background-task path, the download is deferred and this function returns an empty list because no files have been created yet. """ - _validate_spatial_coverage(dataset, bbox) + _validate_spatial_coverage(dataset, bbox if bbox is not None else _bbox_from_env()) ingestion = dataset["ingestion"] eo_download_func_path = ingestion["function"] eo_download_func = _get_dynamic_function(eo_download_func_path) @@ -297,7 +297,7 @@ def _validate_spatial_coverage(dataset: dict[str, Any], bbox: list[float] | None if not spatial: return cov_bbox = spatial.get("bbox") - if not cov_bbox: + if not isinstance(cov_bbox, (list, tuple)) or len(cov_bbox) != 4: return cov_xmin, cov_ymin, cov_xmax, cov_ymax = cov_bbox xmin, ymin, xmax, ymax = bbox diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 6c314cab..e927d699 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -182,6 +182,12 @@ def test_validate_spatial_coverage_passes_when_no_bbox() -> None: downloader._validate_spatial_coverage(dataset, bbox=None) +def test_validate_spatial_coverage_passes_when_template_bbox_malformed() -> None: + extents: dict[str, Any] = {"spatial": {"bbox": "not-a-list"}} + dataset: dict[str, Any] = {"id": "bad_template", "ingestion": {}, "extents": extents} + downloader._validate_spatial_coverage(dataset, bbox=[-10.0, -10.0, 10.0, 10.0]) + + def test_validate_spatial_coverage_passes_when_bbox_inside_extents() -> None: dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "extents": _CHIRPS3_EXTENTS} downloader._validate_spatial_coverage(dataset, bbox=[-10.0, -10.0, 10.0, 10.0]) @@ -212,6 +218,31 @@ def test_validate_spatial_coverage_raises_when_bbox_outside_lon_extents() -> Non assert "Longitude" in str(exc_info.value.detail) +def test_download_dataset_validates_env_bbox_against_extents( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Coverage validation uses the env fallback bbox when no bbox is passed in the request.""" + dataset: dict[str, Any] = { + "id": "chirps3_precipitation_daily", + "ingestion": {"function": "ignored.path"}, + "extents": _CHIRPS3_EXTENTS, + } + monkeypatch.setenv("DOWNLOAD_BBOX", "4.5,57.9,31.1,71.2") + + with pytest.raises(HTTPException) as exc_info: + downloader.download_dataset( + dataset=dataset, + start="2020-01-01", + end="2020-01-31", + bbox=None, + country_code=None, + overwrite=False, + background_tasks=None, + ) + assert exc_info.value.status_code == 400 + assert "does not cover this extent" in str(exc_info.value.detail) + + def test_download_dataset_returns_400_when_bbox_outside_dataset_extents( monkeypatch: pytest.MonkeyPatch, ) -> None: