diff --git a/.env.example b/.env.example
index 70b4b505..f04aa118 100644
--- a/.env.example
+++ b/.env.example
@@ -1,5 +1,5 @@
# ── Instance configuration ────────────────────────────────────────────────────
-# Path to the instance config file (extent, optional datasets_dir).
+# Path to the instance config file (extent, optional templates_dir).
# Copy the example before editing: cp climate-api.yaml.example climate-api.yaml
# climate-api.yaml is gitignored so your local extent stays out of version control.
# When running via `make run` from the repo root, the relative path below works.
@@ -17,9 +17,6 @@ CLIMATE_API_CONFIG=./climate-api.yaml
# See docs/setup_guide.md for registration and .netrc setup instructions.
# ── Download and ingestion ────────────────────────────────────────────────────
-# Override the download cache directory (default: data/downloads).
-# CACHE_OVERRIDE=/path/to/cache
-
# Fallback bounding box used when a request does not include an explicit bbox.
# Format: xmin,ymin,xmax,ymax
# DOWNLOAD_BBOX=-13.5,6.9,-10.1,10.0
diff --git a/Makefile b/Makefile
index 63b8bc00..ad78f459 100644
--- a/Makefile
+++ b/Makefile
@@ -7,7 +7,7 @@ sync: ## Install dependencies with uv
uv sync
run: openapi ## Start the app with uvicorn
- uv run uvicorn climate_api.main:app --reload
+ uv run uvicorn climate_api.main:app --reload --reload-include "*.html" --reload-include "*.yaml" --reload-include "*.yml"
lint: ## Check linting, formatting, and types (no autofix)
uv run ruff check .
diff --git a/climate-api.yaml.example b/climate-api.yaml.example
index a041fd84..94ad2963 100644
--- a/climate-api.yaml.example
+++ b/climate-api.yaml.example
@@ -8,4 +8,6 @@ extent:
bbox: [-13.5, 6.9, -10.1, 10.0]
country_code: SLE
-# datasets_dir: ./datasets/ # optional — custom templates merged with built-ins
+data_dir: ./data # required — directory for downloaded NetCDF files and Zarr stores
+
+# templates_dir: ./templates/ # optional — root for custom templates; datasets go in templates/datasets/
diff --git a/climate_api/config.py b/climate_api/config.py
index 134976f7..56844101 100644
--- a/climate_api/config.py
+++ b/climate_api/config.py
@@ -7,6 +7,8 @@
import yaml
+_MISSING = object()
+
def _substitute_env_vars(text: str) -> str:
"""Replace ${VAR:-default} patterns with values from the environment."""
@@ -54,3 +56,31 @@ def _load_config() -> dict[str, Any]:
raise ValueError(f"CLIMATE_API_CONFIG must be a YAML mapping at the top level: {path}")
_cache = dict(loaded or {})
return _cache
+
+
+def get_data_dir() -> Path | None:
+ """Return the data directory declared in CLIMATE_API_CONFIG.
+
+ Returns None when CLIMATE_API_CONFIG is unset or points to a file that does
+ not exist (e.g. CI environments where the config is gitignored).
+
+ Raises ValueError if the config file exists but data_dir is not set, so
+ misconfigured instances fail fast at startup rather than silently sharing
+ a default directory with other instances.
+
+ """
+ config_path = get_config_path()
+ if config_path is None or not config_path.exists():
+ return None
+
+ config = get_config()
+ raw = config.get("data_dir", _MISSING)
+ if raw is _MISSING:
+ raise ValueError(
+ "data_dir is required in CLIMATE_API_CONFIG when a config file is present. "
+ "Set it to the directory where downloaded data should be stored, "
+ "e.g. data_dir: ./data"
+ )
+ if not isinstance(raw, (str, Path)):
+ raise ValueError(f"data_dir in CLIMATE_API_CONFIG must be a path string, got {type(raw).__name__}")
+ return (config_path.parent / raw).resolve()
diff --git a/climate_api/data/datasets/chirps3.yaml b/climate_api/data/datasets/chirps3.yaml
index b977bb2d..998fbb89 100644
--- a/climate_api/data/datasets/chirps3.yaml
+++ b/climate_api/data/datasets/chirps3.yaml
@@ -7,9 +7,21 @@
sync_execution: append
sync_availability:
latest_available_function: climate_api.providers.availability.chirps3_daily_latest_available
- ingestion:
+ extents:
+ spatial:
+ bbox: [-180, -50, 180, 50]
+ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84
+ temporal:
+ begin: "1981-01-01"
+ trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian
+ resolution: P1D
+ ingestion:
function: dhis2eo.data.chc.chirps3.daily.download
units: mm
resolution: 5 km x 5 km
source: CHIRPS v3
source_url: https://www.chc.ucsb.edu/data/chirps3
+ display:
+ colormap: blues
+ range: [0.0, 20.0]
+ nodata: -9999.0
diff --git a/climate_api/data/datasets/era5_land.yaml b/climate_api/data/datasets/era5_land.yaml
index 91716520..146419c4 100644
--- a/climate_api/data/datasets/era5_land.yaml
+++ b/climate_api/data/datasets/era5_land.yaml
@@ -8,15 +8,28 @@
sync_availability:
latest_available_function: climate_api.providers.availability.lagged_latest_available
lag_hours: 120
- ingestion:
+ extents:
+ spatial:
+ bbox: [-180, -90, 180, 90]
+ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84
+ temporal:
+ begin: "1950-01-01"
+ trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian
+ resolution: PT1H
+ ingestion:
function: dhis2eo.data.destine.era5_land.hourly.download
default_params:
variables: ['t2m']
+ transforms:
+ - climate_api.transforms.convert_units
units: kelvin
convert_units: degC
resolution: 9 km x 9 km
source: ERA5-Land Reanalysis
source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land
+ display:
+ colormap: rdbu_r
+ range: [15.0, 40.0]
- id: era5land_precipitation_hourly
name: Total precipitation (ERA5-Land)
@@ -28,13 +41,27 @@
sync_availability:
latest_available_function: climate_api.providers.availability.lagged_latest_available
lag_hours: 120
- ingestion:
+ extents:
+ spatial:
+ bbox: [-180, -90, 180, 90]
+ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84
+ temporal:
+ begin: "1950-01-01"
+ trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian
+ resolution: PT1H
+ ingestion:
function: dhis2eo.data.destine.era5_land.hourly.download
default_params:
variables: ['tp']
- pre_process: ['deaccumulate_era5']
+ transforms:
+ - climate_api.transforms.deaccumulate_era5
+ - climate_api.transforms.convert_units
units: m
convert_units: mm
resolution: 9 km x 9 km
source: ERA5-Land Reanalysis
source_url: https://earthdatahub.destine.eu/collections/era5/datasets/reanalysis-era5-land
+ display:
+ colormap: blues
+ range: [0.0, 5.0]
+ nodata: 0.0
diff --git a/climate_api/data/datasets/worldpop.yaml b/climate_api/data/datasets/worldpop.yaml
index ead982b9..78fbb99f 100644
--- a/climate_api/data/datasets/worldpop.yaml
+++ b/climate_api/data/datasets/worldpop.yaml
@@ -8,6 +8,15 @@
latest_available_function: climate_api.providers.availability.worldpop_release_latest_available
# WorldPop projections are intentionally request-driven for future years.
allow_future: true
+ extents:
+ spatial:
+ bbox: [-180, -90, 180, 90]
+ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84
+ temporal:
+ begin: "2015"
+ end: "2030"
+ trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian
+ resolution: P1Y
ingestion:
function: dhis2eo.data.worldpop.pop_total.yearly.download
multiscales:
@@ -18,3 +27,7 @@
resolution: 100m x 100m
source: WorldPop Global2
source_url: https://hub.worldpop.org/project/categories?id=3
+ display:
+ colormap: reds
+ range: [0.0, 25.0]
+ nodata: 0.0
diff --git a/climate_api/data_manager/services/downloader.py b/climate_api/data_manager/services/downloader.py
index 3c93772e..291f08e5 100644
--- a/climate_api/data_manager/services/downloader.py
+++ b/climate_api/data_manager/services/downloader.py
@@ -16,19 +16,17 @@
from geozarr_toolkit import MultiscalesConventionMetadata, create_geozarr_attrs
from topozarr.coarsen import create_pyramid
+from climate_api import config as api_config
+
from .utils import get_lon_lat_dims, get_time_dim
logger = logging.getLogger(__name__)
def _resolve_download_dir() -> Path:
- # CACHE_OVERRIDE keeps existing Docker/dev deployments working unchanged.
- override = os.getenv("CACHE_OVERRIDE")
- if override:
- return Path(override)
- # Default to an XDG-compliant user-writable location so the package works
- # when installed with pip (where a package-relative path would land inside
- # site-packages and typically be non-writable).
+ data_dir = api_config.get_data_dir()
+ if data_dir is not None:
+ return data_dir / "downloads"
xdg_data = Path(os.getenv("XDG_DATA_HOME", Path.home() / ".local" / "share"))
return xdg_data / "climate-api" / "downloads"
@@ -54,6 +52,7 @@ def download_dataset(
When running in the background-task path, the download is deferred and this function
returns an empty list because no files have been created yet.
"""
+ _validate_spatial_coverage(dataset, bbox if bbox is not None else _bbox_from_env())
ingestion = dataset["ingestion"]
eo_download_func_path = ingestion["function"]
eo_download_func = _get_dynamic_function(eo_download_func_path)
@@ -143,6 +142,7 @@ def build_dataset_zarr(dataset: dict[str, Any], *, start: str | None = None, end
dims = [lon_dim, lat_dim]
ds = _select_time_range(ds, dataset=dataset, start=start, end=end)
+ ds = _run_transforms(ds, dataset)
xmin = ds[lon_dim].min().item()
xmax = ds[lon_dim].max().item()
@@ -243,6 +243,16 @@ def _select_time_range(
return selected
+def _run_transforms(ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset:
+ for entry in dataset.get("transforms", []):
+ func_path = entry if isinstance(entry, str) else entry["function"]
+ params = {} if isinstance(entry, str) else entry.get("params", {})
+ func = _get_dynamic_function(func_path)
+ logger.info("Applying transform %s to dataset %s", func_path, dataset.get("id", "?"))
+ ds = func(ds, dataset, **params)
+ return ds
+
+
def _compute_time_space_chunks(
ds: xr.Dataset,
dataset: dict[str, Any],
@@ -289,6 +299,39 @@ def get_zarr_path(dataset: dict[str, Any]) -> Path | None:
return None
+def _validate_spatial_coverage(dataset: dict[str, Any], bbox: list[float] | None) -> None:
+ """Raise HTTP 400 if the request bbox falls outside the dataset's declared extents."""
+ extents = dataset.get("extents")
+ if not extents or bbox is None:
+ return
+ spatial = extents.get("spatial")
+ if not spatial:
+ return
+ cov_bbox = spatial.get("bbox")
+ if not isinstance(cov_bbox, (list, tuple)) or len(cov_bbox) != 4:
+ return
+ cov_xmin, cov_ymin, cov_xmax, cov_ymax = cov_bbox
+ xmin, ymin, xmax, ymax = bbox
+ if ymin > cov_ymax or ymax < cov_ymin:
+ raise HTTPException(
+ status_code=400,
+ detail=(
+ f"Dataset '{dataset['id']}' does not cover this extent. "
+ f"Latitude coverage: {cov_ymin}°–{cov_ymax}°, "
+ f"requested: {ymin}°–{ymax}°."
+ ),
+ )
+ if xmin > cov_xmax or xmax < cov_xmin:
+ raise HTTPException(
+ status_code=400,
+ detail=(
+ f"Dataset '{dataset['id']}' does not cover this extent. "
+ f"Longitude coverage: {cov_xmin}°–{cov_xmax}°, "
+ f"requested: {xmin}°–{xmax}°."
+ ),
+ )
+
+
def _get_dynamic_function(full_path: str) -> Callable[..., Any]:
"""Import and return a function given its dotted module path."""
parts = full_path.split(".")
diff --git a/climate_api/data_registry/services/datasets.py b/climate_api/data_registry/services/datasets.py
index c61e501d..0182aec6 100644
--- a/climate_api/data_registry/services/datasets.py
+++ b/climate_api/data_registry/services/datasets.py
@@ -23,7 +23,7 @@ def list_datasets() -> list[dict[str, Any]]:
"""Load all dataset templates and return a flat list.
Built-in templates from climate_api/data/datasets/ are always loaded. When
- datasets_dir is set in CLIMATE_API_CONFIG, templates from that directory are
+ templates_dir is set in CLIMATE_API_CONFIG, templates from that directory are
merged on top — a custom template with the same id overrides the built-in one.
CONFIGS_DIR (test override via monkeypatch) bypasses this and loads only
@@ -34,16 +34,18 @@ def list_datasets() -> list[dict[str, Any]]:
merged: dict[str, dict[str, Any]] = {d["id"]: d for d in _load_builtin_datasets()}
- config_datasets_dir = api_config.get_config().get("datasets_dir")
- if config_datasets_dir:
- if not isinstance(config_datasets_dir, (str, Path)):
+ config_templates_dir = api_config.get_config().get("templates_dir")
+ if config_templates_dir:
+ if not isinstance(config_templates_dir, (str, Path)):
raise ValueError(
- f"datasets_dir in CLIMATE_API_CONFIG must be a path string, got {type(config_datasets_dir).__name__}"
+ f"templates_dir in CLIMATE_API_CONFIG must be a path string, got {type(config_templates_dir).__name__}"
)
config_path = api_config.get_config_path()
- resolved = (config_path.parent / config_datasets_dir).resolve() if config_path else Path(config_datasets_dir)
- for dataset in _load_from_dir(resolved):
- merged[dataset["id"]] = dataset
+ root = (config_path.parent / config_templates_dir).resolve() if config_path else Path(config_templates_dir)
+ datasets_subdir = root / "datasets"
+ if datasets_subdir.is_dir():
+ for dataset in _load_from_dir(datasets_subdir):
+ merged[dataset["id"]] = dataset
return list(merged.values())
diff --git a/climate_api/ingestions/services.py b/climate_api/ingestions/services.py
index dfc5efd4..9f5e26e4 100644
--- a/climate_api/ingestions/services.py
+++ b/climate_api/ingestions/services.py
@@ -49,10 +49,11 @@
def _resolve_artifacts_dir() -> Path:
- # CACHE_OVERRIDE keeps existing Docker/dev deployments working unchanged.
- override = os.getenv("CACHE_OVERRIDE")
- if override:
- return Path(override) / "artifacts"
+ from climate_api import config as api_config
+
+ data_dir = api_config.get_data_dir()
+ if data_dir is not None:
+ return data_dir / "artifacts"
xdg_data = Path(os.getenv("XDG_DATA_HOME", Path.home() / ".local" / "share"))
return xdg_data / "climate-api" / "artifacts"
diff --git a/climate_api/publications/services.py b/climate_api/publications/services.py
index cc531dd8..0d2d4c04 100644
--- a/climate_api/publications/services.py
+++ b/climate_api/publications/services.py
@@ -19,9 +19,11 @@
def _resolve_pygeoapi_dir() -> Path:
- override = os.getenv("CACHE_OVERRIDE")
- if override:
- return Path(override) / "pygeoapi"
+ from climate_api import config as api_config
+
+ data_dir = api_config.get_data_dir()
+ if data_dir is not None:
+ return data_dir / "pygeoapi"
xdg_data = Path(os.getenv("XDG_DATA_HOME", Path.home() / ".local" / "share"))
return xdg_data / "climate-api" / "pygeoapi"
diff --git a/climate_api/stac/services.py b/climate_api/stac/services.py
index 4a0c730e..7f53347a 100644
--- a/climate_api/stac/services.py
+++ b/climate_api/stac/services.py
@@ -25,6 +25,7 @@
CATALOG_DESCRIPTION = "Published Climate API GeoZarr datasets"
STAC_VERSION = "1.1.0"
DATACUBE_EXTENSION = "https://stac-extensions.github.io/datacube/v2.3.0/schema.json"
+RENDER_EXTENSION = "https://stac-extensions.github.io/render/v2.0.0/schema.json"
ZARR_EXTENSION = "https://stac-extensions.github.io/zarr/v1.1.0/schema.json"
DEFAULT_STAC_LICENSE = "various"
SPATIAL_STEP_DECIMALS = 8
@@ -89,11 +90,16 @@ def build_collection(dataset_id: str, request: Request) -> dict[str, object]:
collection_payload["stac_version"] = STAC_VERSION
collection_payload["description"] = template.description
collection_payload["title"] = template.title
+ renders = _build_renders(artifact, source_dataset)
+ extensions = {DATACUBE_EXTENSION, ZARR_EXTENSION}
+ if renders is not None:
+ collection_payload["renders"] = renders
+ extensions.add(RENDER_EXTENSION)
existing_extensions = collection_payload.get("stac_extensions", [])
if isinstance(existing_extensions, list):
- collection_payload["stac_extensions"] = sorted({*existing_extensions, DATACUBE_EXTENSION, ZARR_EXTENSION})
+ collection_payload["stac_extensions"] = sorted({*existing_extensions, *extensions})
else:
- collection_payload["stac_extensions"] = sorted([DATACUBE_EXTENSION, ZARR_EXTENSION])
+ collection_payload["stac_extensions"] = sorted(extensions)
collection_payload["links"] = template_links
assets = collection_payload.setdefault("assets", {})
zarr_from_xstac = assets.get("zarr", {}) if isinstance(assets, dict) else {}
@@ -427,6 +433,30 @@ def _zarr_open_kwargs(artifact: ArtifactRecord) -> dict[str, bool | None]:
return {"consolidated": _zarr_consolidated_flag(_artifact_store_path(artifact))}
+def _build_renders(artifact: ArtifactRecord, source_dataset: dict[str, Any]) -> dict[str, Any] | None:
+ display = source_dataset.get("display")
+ if not isinstance(display, dict):
+ return None
+ colormap_name = display.get("colormap")
+ value_range = display.get("range")
+ if not isinstance(colormap_name, str) or not isinstance(value_range, list) or len(value_range) != 2:
+ return None
+ render: dict[str, Any] = {
+ "title": artifact.dataset_name,
+ "assets": ["zarr"],
+ "rescale": [[float(value_range[0]), float(value_range[1])]],
+ "colormap_name": colormap_name,
+ "climate_api:variable": artifact.variable,
+ }
+ nodata = display.get("nodata")
+ if nodata is not None:
+ render["nodata"] = float(nodata)
+ units = source_dataset.get("convert_units") or source_dataset.get("units")
+ if isinstance(units, str):
+ render["climate_api:units"] = units
+ return {"default": render}
+
+
def _zarr_consolidated_flag(artifact_path: str) -> bool | None:
if "://" in artifact_path:
return None
diff --git a/climate_api/system/routes.py b/climate_api/system/routes.py
index e52a5911..4b9e7fe8 100644
--- a/climate_api/system/routes.py
+++ b/climate_api/system/routes.py
@@ -1,13 +1,15 @@
"""Root API endpoints."""
import sys
+import urllib.parse
from importlib.metadata import version as _pkg_version
from fastapi import APIRouter, Request
from fastapi.responses import HTMLResponse, JSONResponse, Response
+from starlette.responses import RedirectResponse
from .schemas import AppInfo, HealthStatus, Status
-from .templates import ROOT_RESPONSES, app_version, render_landing, root_json, wants_json
+from .templates import ROOT_RESPONSES, app_version, render_landing, render_manage, render_maps, root_json, wants_json
router = APIRouter()
@@ -21,6 +23,96 @@ def read_index(request: Request) -> Response:
return HTMLResponse(render_landing(app_version, base))
+@router.get("/map", response_class=HTMLResponse, include_in_schema=False)
+def maps(request: Request) -> HTMLResponse:
+ """Return the interactive map viewer."""
+ base = str(request.base_url).rstrip("/")
+ return HTMLResponse(render_maps(base))
+
+
+@router.get("/manage", response_class=HTMLResponse, include_in_schema=False)
+def manage(
+ request: Request,
+ message: str | None = None,
+ error: str | None = None,
+) -> HTMLResponse:
+ """Return the management interface for ingestion and sync operations."""
+ base = str(request.base_url).rstrip("/")
+ return HTMLResponse(render_manage(app_version, base, message=message, error=error))
+
+
+@router.post("/manage/ingest", include_in_schema=False)
+async def manage_ingest(request: Request) -> RedirectResponse:
+ """Handle ingest form submission and redirect to the management page."""
+ from fastapi import HTTPException
+
+ from climate_api.data_registry.services.datasets import get_dataset
+ from climate_api.extents.services import get_extent
+ from climate_api.ingestions.services import create_artifact
+
+ base = str(request.base_url).rstrip("/")
+ try:
+ form = await request.form()
+ dataset_id = str(form.get("dataset_id", ""))
+ start = str(form.get("start", ""))
+ end = str(form.get("end", "")) or None
+ publish = "publish" in form
+ overwrite = "overwrite" in form
+
+ template = get_dataset(dataset_id)
+ if template is None:
+ msg = urllib.parse.quote(f"Dataset template '{dataset_id}' not found")
+ return RedirectResponse(f"{base}/manage?error={msg}", status_code=303)
+
+ extent = get_extent()
+ resolved_bbox = list(extent["bbox"]) if extent else None
+ extent_id = extent["id"] if extent else None
+ country_code = extent.get("country_code") if extent else None
+
+ create_artifact(
+ dataset=template,
+ start=start,
+ end=end,
+ extent_id=extent_id,
+ bbox=resolved_bbox,
+ country_code=country_code,
+ overwrite=overwrite,
+ prefer_zarr=True,
+ publish=publish,
+ )
+ name = urllib.parse.quote(template.get("name", dataset_id))
+ return RedirectResponse(f"{base}/manage?message=Ingested+{name}", status_code=303)
+ except HTTPException as exc:
+ msg = urllib.parse.quote(str(exc.detail))
+ return RedirectResponse(f"{base}/manage?error={msg}", status_code=303)
+ except Exception as exc:
+ msg = urllib.parse.quote(str(exc))
+ return RedirectResponse(f"{base}/manage?error={msg}", status_code=303)
+
+
+@router.post("/manage/sync", include_in_schema=False)
+async def manage_sync(request: Request) -> RedirectResponse:
+ """Handle sync form submission and redirect to the management page."""
+ from fastapi import HTTPException
+
+ from climate_api.ingestions.services import sync_dataset
+
+ base = str(request.base_url).rstrip("/")
+ try:
+ form = await request.form()
+ dataset_id = str(form.get("dataset_id", ""))
+ publish = "publish" in form
+
+ sync_dataset(dataset_id=dataset_id, end=None, prefer_zarr=True, publish=publish)
+ return RedirectResponse(f"{base}/manage?message=Sync+completed", status_code=303)
+ except HTTPException as exc:
+ msg = urllib.parse.quote(str(exc.detail))
+ return RedirectResponse(f"{base}/manage?error={msg}", status_code=303)
+ except Exception as exc:
+ msg = urllib.parse.quote(str(exc))
+ return RedirectResponse(f"{base}/manage?error={msg}", status_code=303)
+
+
@router.get("/health")
def health() -> HealthStatus:
"""Return health status for container health checks."""
diff --git a/climate_api/system/templates.py b/climate_api/system/templates.py
index 6caa992a..6cfeca77 100644
--- a/climate_api/system/templates.py
+++ b/climate_api/system/templates.py
@@ -2,6 +2,7 @@
import importlib.resources
import logging
+from datetime import date
from importlib.metadata import PackageNotFoundError
from importlib.metadata import version as _pkg_version
from typing import Any
@@ -9,6 +10,7 @@
import jinja2
from fastapi import Request
+from climate_api.data_registry.services import datasets as registry_datasets
from climate_api.extents.services import get_extent
from climate_api.ingestions.services import list_datasets
@@ -94,23 +96,60 @@ def wants_json(request: Request) -> bool:
return json_q >= 0 and (html_q < 0 or json_q >= html_q)
-def render_landing(version: str, base: str) -> str:
- """Render the root landing page with live instance status."""
+def render_maps(base: str) -> str:
+ """Render the map viewer page."""
+ return get_template("map-viewer.html").render(base=base)
+
+
+def _load_extent() -> dict[str, Any] | None:
try:
- extent: dict[str, Any] | None = get_extent()
+ return get_extent()
except ValueError:
- extent = None
+ return None
+ except Exception:
+ _log.exception("Unexpected error loading extent")
+ return None
+
+
+def _load_templates() -> list[dict[str, Any]]:
+ try:
+ return registry_datasets.list_datasets()
except Exception:
- _log.exception("Unexpected error loading extent for landing page")
- extent = None
+ _log.exception("Unexpected error loading dataset templates")
+ return []
+
+
+def _load_datasets() -> list[Any]:
try:
- datasets = list_datasets().items
+ return list_datasets().items
except Exception:
- _log.exception("Unexpected error loading datasets for landing page")
- datasets = []
+ _log.exception("Unexpected error loading datasets")
+ return []
+
+
+def render_landing(version: str, base: str) -> str:
+ """Render the root landing page with live instance status."""
return get_template("landing_page.html").render(
version=version,
base=base,
- extent=extent,
- datasets=datasets,
+ extent=_load_extent(),
+ datasets=_load_datasets(),
+ templates=_load_templates(),
+ )
+
+
+def render_manage(version: str, base: str, message: str | None = None, error: str | None = None) -> str:
+ """Render the management page."""
+ today = date.today().isoformat()
+ year_ago = date.today().replace(year=date.today().year - 1).isoformat()
+ return get_template("manage.html").render(
+ version=version,
+ base=base,
+ extent=_load_extent(),
+ templates=_load_templates(),
+ datasets=_load_datasets(),
+ today=today,
+ year_ago=year_ago,
+ message=message,
+ error=error,
)
diff --git a/climate_api/templates/landing_page.html b/climate_api/templates/landing_page.html
index 49cd5999..6bd7703b 100644
--- a/climate_api/templates/landing_page.html
+++ b/climate_api/templates/landing_page.html
@@ -264,10 +264,53 @@
Datasets {{ datasets | length }}
{% endif %}
+
+
+
Available dataset templates {{ templates | length }}
+ {% if templates %}
+
+
+
+ | Name |
+ Variable |
+ Period |
+ Source |
+
+
+
+ {% for t in templates %}
+
+ | {{ t.name }} |
+ {{ t.variable }} |
+ {{ t.period_type }} |
+
+ {% if t.source_url %}
+ {{ t.source or t.id }}
+ {% else %}
+ {{ t.source or '—' }}
+ {% endif %}
+ |
+
+ {% endfor %}
+
+
+ {% else %}
+
No dataset templates found.
+ {% endif %}
+
+
Explore
+ -
+ Manage
+ Ingest and sync datasets without using the API directly
+
-
API documentationExplore
>Interactive Swagger UI for all endpoints
+ -
+ Map viewer
+ Browse published datasets on an interactive map
+
-
STAC Catalog
+
+
+
+
+ Manage — DHIS2 Climate API
+
+
+
+
+
+
+ {% if message %}
+
{{ message }}
+ {% endif %}
+ {% if error %}
+ {{ error }}
+ {% endif %}
+
+
+
+
Ingest dataset
+ {% if not extent %}
+
+ No extent configured. Set extent: in
+ CLIMATE_API_CONFIG before ingesting data.
+
+ {% elif not templates %}
+
No dataset templates found.
+ {% else %}
+
+ {% endif %}
+
+
+
+
+
Ingested datasets {{ datasets | length }}
+ {% if datasets %}
+
+
+
+ | Name |
+ Period |
+ Temporal coverage |
+ Status |
+ |
+
+
+
+ {% for ds in datasets %}
+
+ |
+ {{ ds.dataset_name }}
+ |
+ {{ ds.period_type }} |
+ {{ ds.extent.temporal.start }} – {{ ds.extent.temporal.end }} |
+
+ {% if ds.publication.status == "published" %}
+ published
+ {% else %}
+ unpublished
+ {% endif %}
+ |
+
+
+ |
+
+ {% endfor %}
+
+
+ {% else %}
+
+ No datasets ingested yet. Use the form above to ingest your first dataset.
+
+ {% endif %}
+
+
+
+
+
+
diff --git a/climate_api/templates/map-viewer.html b/climate_api/templates/map-viewer.html
new file mode 100644
index 00000000..8bb424f8
--- /dev/null
+++ b/climate_api/templates/map-viewer.html
@@ -0,0 +1,510 @@
+
+
+
+
+
+ Map viewer — DHIS2 Climate API
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ - Source
+ - —
+ - Units
+ - —
+
+
+
+
+
Loading datasets...
+
+
+
+
+
+
+
diff --git a/climate_api/transforms/__init__.py b/climate_api/transforms/__init__.py
new file mode 100644
index 00000000..e5988537
--- /dev/null
+++ b/climate_api/transforms/__init__.py
@@ -0,0 +1,13 @@
+"""Built-in dataset transform functions for the transforms pipeline.
+
+Each function has the signature:
+ (ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset
+
+Functions can be referenced by their dotted module path in the dataset YAML
+``transforms`` list, the same way ``ingestion.function`` works.
+"""
+
+from .deaccumulate import deaccumulate_era5
+from .unit_conversion import convert_units
+
+__all__ = ["convert_units", "deaccumulate_era5"]
diff --git a/climate_api/transforms/deaccumulate.py b/climate_api/transforms/deaccumulate.py
new file mode 100644
index 00000000..fd46dec8
--- /dev/null
+++ b/climate_api/transforms/deaccumulate.py
@@ -0,0 +1,22 @@
+"""Deaccumulation transforms for ERA5 accumulated fields."""
+
+from typing import Any
+
+import xarray as xr
+
+
+def deaccumulate_era5(ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset:
+ """Convert ERA5 accumulated fields to per-step values by forward differencing.
+
+ ERA5 stores precipitation and other flux variables as accumulations from the
+ start of the forecast step. This subtracts consecutive steps so each value
+ represents the amount in that step alone, then clips negative artefacts.
+ """
+ varname = dataset["variable"]
+ da = ds[varname]
+ time_dim = next(d for d in da.dims if "time" in d)
+ diff = da.diff(dim=time_dim)
+ diff = diff.clip(min=0)
+ # Drop the first time step (no previous step to diff against) and reassign.
+ ds = ds.sel({time_dim: ds[time_dim][1:]})
+ return ds.assign({varname: diff.assign_attrs(da.attrs)})
diff --git a/climate_api/transforms/unit_conversion.py b/climate_api/transforms/unit_conversion.py
new file mode 100644
index 00000000..65045ff8
--- /dev/null
+++ b/climate_api/transforms/unit_conversion.py
@@ -0,0 +1,39 @@
+"""Unit conversion transform: scale + offset applied to the dataset variable."""
+
+import logging
+from typing import Any
+
+import xarray as xr
+
+logger = logging.getLogger(__name__)
+
+# (from_units, to_units) -> (display_label, scale, offset)
+# Applied as: converted = original * scale + offset
+_CONVERSIONS: dict[tuple[str, str], tuple[str, float, float]] = {
+ ("kelvin", "degc"): ("degC", 1.0, -273.15),
+ ("m", "mm"): ("mm", 1000.0, 0.0),
+}
+
+
+def convert_units(ds: xr.Dataset, dataset: dict[str, Any]) -> xr.Dataset:
+ """Convert the dataset variable from ``units`` to ``convert_units``.
+
+ Reads ``units`` and ``convert_units`` from the dataset template dict.
+ Returns the dataset unchanged if either field is absent or the conversion
+ is not registered in ``_CONVERSIONS``.
+ """
+ convert_to = dataset.get("convert_units")
+ if not convert_to:
+ return ds
+ units = dataset.get("units", "")
+ key = (units.lower(), convert_to.lower())
+ conversion = _CONVERSIONS.get(key)
+ if conversion is None:
+ logger.warning("No unit conversion registered for %s -> %s; skipping", units, convert_to)
+ return ds
+ label, scale, offset = conversion
+ varname = dataset["variable"]
+ logger.info("Converting %s from %s to %s", varname, units, label)
+ da = ds[varname]
+ converted = da * scale + offset if scale != 1.0 else da + offset
+ return ds.assign({varname: converted.assign_attrs({**da.attrs, "units": label})})
diff --git a/docs/adding_custom_datasets.md b/docs/adding_custom_datasets.md
index 52f96274..1dd4c71c 100644
--- a/docs/adding_custom_datasets.md
+++ b/docs/adding_custom_datasets.md
@@ -2,7 +2,7 @@
This guide explains how to add a new dataset source to your Climate API instance — for example a national meteorological service, a regional satellite product, or a custom model output.
-The built-in dataset templates (CHIRPS3, ERA5-Land, WorldPop) ship as package data. Custom datasets are layered on top by pointing `datasets_dir` in your `climate-api.yaml` at a directory containing your own YAML template files.
+The built-in dataset templates (CHIRPS3, ERA5-Land, WorldPop) ship as package data. Custom datasets are layered on top by pointing `templates_dir` in your `climate-api.yaml` at a directory containing your own YAML template files.
## Overview
@@ -124,6 +124,22 @@ sync_availability:
`latest_available_function` must accept a `dataset` dict and return a `datetime`. Omit `sync_availability` entirely for `static` datasets or when you always want to sync up to the requested end date.
+**Spatial and temporal extents** — declares what the source dataset covers. Used to validate ingest requests before hitting the provider:
+
+```yaml
+extents:
+ spatial:
+ bbox: [-180, -50, 180, 50] # [xmin, ymin, xmax, ymax] in WGS84
+ crs: http://www.opengis.net/def/crs/OGC/1.3/CRS84
+ temporal:
+ begin: "1981-01-01"
+ end: "2030-12-31" # omit if ongoing
+ trs: http://www.opengis.net/def/uom/ISO-8601/0/Gregorian
+ resolution: P1D # ISO 8601 duration: PT1H, P1D, P1M, P1Y
+```
+
+If an ingest request's bounding box has no overlap with `extents.spatial.bbox`, the API returns HTTP 400 immediately. Partial overlap is allowed — the provider will return data for the intersecting area.
+
**Units**
| Field | Required | Description |
@@ -143,7 +159,13 @@ ingestion:
## Step 3: Point the instance at your templates directory
-Add `datasets_dir` to your `climate-api.yaml`:
+Add `templates_dir` to your `climate-api.yaml` and place your YAML file in the `datasets/` subfolder:
+
+```
+templates/
+└── datasets/
+ └── enacts_rainfall.yaml
+```
```yaml
extent:
@@ -151,10 +173,11 @@ extent:
name: Rwanda
bbox: [28.8, -2.9, 30.9, -1.0]
-datasets_dir: ./datasets/
+data_dir: ./data
+templates_dir: ./templates/
```
-All `*.yaml` and `*.yml` files in `datasets_dir` are loaded and merged with the built-in templates (CHIRPS3, ERA5-Land, WorldPop). Custom templates are additive — the built-ins remain available unless you deliberately override one by using the same `id`.
+All `*.yaml` and `*.yml` files in `templates_dir/datasets/` are loaded and merged with the built-in templates (CHIRPS3, ERA5-Land, WorldPop). Custom templates are additive — the built-ins remain available unless you deliberately override one by using the same `id`.
## Step 4: Ingest and publish
diff --git a/docs/setup_guide.md b/docs/setup_guide.md
index 68527a1a..0f1527a9 100644
--- a/docs/setup_guide.md
+++ b/docs/setup_guide.md
@@ -32,6 +32,8 @@ extent:
name: Rwanda
bbox: [28.8, -2.9, 30.9, -1.0]
country_code: RWA
+
+data_dir: ./data
```
Field reference:
@@ -43,6 +45,8 @@ Field reference:
| `bbox` | Yes | Bounding box as `[xmin, ymin, xmax, ymax]` in WGS84 decimal degrees |
| `country_code` | No | ISO 3166-1 alpha-3 code — required for WorldPop downloads |
+`data_dir` sets the directory where downloaded NetCDF files and Zarr stores are kept. It is required when a config file is present and is resolved relative to the config file. Each instance must have its own `data_dir` to avoid mixing data between deployments.
+
To find the bounding box for a country, [bboxfinder.com](http://bboxfinder.com) is a useful tool.
Values can reference environment variables using `${VAR:-default}` syntax:
@@ -196,7 +200,7 @@ curl -s -X POST http://127.0.0.1:8000/ingestions \
}' | jq
```
-ERA5-Land data has a configured lag of 120 hours (5 days) — the sync planner will not request data from the last 120 hours. This can be adjusted by supplying a custom `era5_land.yaml` via `datasets_dir` in your `climate-api.yaml`.
+ERA5-Land data has a configured lag of 120 hours (5 days) — the sync planner will not request data from the last 120 hours. This can be adjusted by placing a custom `era5_land.yaml` in `templates_dir/datasets/` — see `adding_custom_datasets.md`.
---
diff --git a/tests/conftest.py b/tests/conftest.py
index 1f66391f..0fd7c56d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -13,6 +13,7 @@
name: Sierra Leone
bbox: [-13.5, 6.9, -10.1, 10.0]
country_code: SLE
+data_dir: ./data
"""
diff --git a/tests/test_config.py b/tests/test_config.py
index 34f452e3..2aca7aeb 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -2,11 +2,40 @@
import pytest
-from climate_api.config import get_config
+from climate_api.config import get_config, get_data_dir
from climate_api.data_registry.services import datasets as dataset_registry
from climate_api.extents import services as extent_services
+def test_get_data_dir_returns_none_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None:
+ monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False)
+ assert get_data_dir() is None
+
+
+def test_get_data_dir_returns_none_when_config_path_set_but_file_missing(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+ monkeypatch.setenv("CLIMATE_API_CONFIG", str(tmp_path / "nonexistent.yaml"))
+ assert get_data_dir() is None
+
+
+def test_get_data_dir_raises_when_config_present_but_no_data_dir(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+ config_file = tmp_path / "climate-api.yaml"
+ config_file.write_text("extent:\n id: nor\n", encoding="utf-8")
+ monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
+ with pytest.raises(ValueError, match="data_dir is required"):
+ get_data_dir()
+
+
+def test_get_data_dir_resolves_relative_to_config_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ config_file = tmp_path / "climate-api.yaml"
+ config_file.write_text("data_dir: ./data\n", encoding="utf-8")
+ monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
+ assert get_data_dir() == tmp_path / "data"
+
+
def test_get_config_returns_empty_when_unset(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False)
assert get_config() == {}
@@ -91,10 +120,10 @@ def test_builtin_datasets_include_chirps_era5_worldpop(monkeypatch: pytest.Monke
assert "worldpop_population_yearly" in ids
-def test_datasets_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
- datasets_dir = tmp_path / "datasets"
- datasets_dir.mkdir()
- (datasets_dir / "custom.yaml").write_text(
+def test_templates_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ datasets_subdir = tmp_path / "templates" / "datasets"
+ datasets_subdir.mkdir(parents=True)
+ (datasets_subdir / "custom.yaml").write_text(
"""
- id: custom_dataset
name: Custom dataset
@@ -107,7 +136,7 @@ def test_datasets_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch,
encoding="utf-8",
)
config_file = tmp_path / "climate-api.yaml"
- config_file.write_text(f"datasets_dir: {datasets_dir}\n", encoding="utf-8")
+ config_file.write_text(f"templates_dir: {tmp_path / 'templates'}\n", encoding="utf-8")
monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None)
monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
@@ -117,18 +146,17 @@ def test_datasets_dir_in_config_adds_to_bundled(monkeypatch: pytest.MonkeyPatch,
assert "chirps3_precipitation_daily" in ids
-def test_datasets_dir_resolved_relative_to_config_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
- """datasets_dir is resolved relative to the config file, not CWD.
+def test_templates_dir_resolved_relative_to_config_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ """templates_dir is resolved relative to the config file, not CWD.
This matters when running the installed `climate-api` CLI from a directory
- other than the repo root, where a relative datasets_dir in the config must
+ other than the repo root, where a relative templates_dir in the config must
still point at the correct sibling directory.
"""
deployment_dir = tmp_path / "deployment"
- deployment_dir.mkdir()
- datasets_dir = deployment_dir / "datasets"
- datasets_dir.mkdir()
- (datasets_dir / "custom.yaml").write_text(
+ datasets_subdir = deployment_dir / "templates" / "datasets"
+ datasets_subdir.mkdir(parents=True)
+ (datasets_subdir / "custom.yaml").write_text(
"""
- id: deployed_dataset
variable: val
@@ -140,7 +168,7 @@ def test_datasets_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monke
encoding="utf-8",
)
config_file = deployment_dir / "climate-api.yaml"
- config_file.write_text("datasets_dir: ./datasets\n", encoding="utf-8")
+ config_file.write_text("templates_dir: ./templates\n", encoding="utf-8")
monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None)
monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
@@ -149,10 +177,10 @@ def test_datasets_dir_resolved_relative_to_config_file(monkeypatch: pytest.Monke
assert "deployed_dataset" in ids
-def test_datasets_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
- datasets_dir = tmp_path / "datasets"
- datasets_dir.mkdir()
- (datasets_dir / "chirps3.yaml").write_text(
+def test_templates_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ datasets_subdir = tmp_path / "templates" / "datasets"
+ datasets_subdir.mkdir(parents=True)
+ (datasets_subdir / "chirps3.yaml").write_text(
"""
- id: chirps3_precipitation_daily
name: Custom CHIRPS override
@@ -165,7 +193,7 @@ def test_datasets_dir_in_config_overrides_bundled_by_id(monkeypatch: pytest.Monk
encoding="utf-8",
)
config_file = tmp_path / "climate-api.yaml"
- config_file.write_text(f"datasets_dir: {datasets_dir}\n", encoding="utf-8")
+ config_file.write_text(f"templates_dir: {tmp_path / 'templates'}\n", encoding="utf-8")
monkeypatch.setattr(dataset_registry, "CONFIGS_DIR", None)
monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
diff --git a/tests/test_datasets.py b/tests/test_datasets.py
index 5dd31e71..c7d0fedc 100644
--- a/tests/test_datasets.py
+++ b/tests/test_datasets.py
@@ -526,7 +526,8 @@ def fake_download_dataset(
monkeypatch.setattr(services.downloader, "download_dataset", fake_download_dataset)
monkeypatch.setattr(services.downloader, "build_dataset_zarr", lambda *_, **__: None)
- monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: tmp_path / "chirps3_precipitation_daily.zarr")
+ zarr_path_chirps = tmp_path / "chirps3_precipitation_daily.zarr"
+ monkeypatch.setattr(services.downloader, "get_zarr_path", lambda _: zarr_path_chirps)
monkeypatch.setattr(services, "_find_existing_artifact", lambda **_: None)
monkeypatch.setattr(
services,
diff --git a/tests/test_downloader.py b/tests/test_downloader.py
index aef87a8a..e927d699 100644
--- a/tests/test_downloader.py
+++ b/tests/test_downloader.py
@@ -16,30 +16,32 @@
from climate_api.ingestions import services as ingestion_services
-def test_resolve_download_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatch) -> None:
- with tempfile.TemporaryDirectory() as override:
- monkeypatch.setenv("CACHE_OVERRIDE", override)
- monkeypatch.delenv("XDG_DATA_HOME", raising=False)
- assert downloader._resolve_download_dir() == Path(override)
+def test_resolve_download_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ config_file = tmp_path / "climate-api.yaml"
+ config_file.write_text("data_dir: ./data\nextent:\n id: test\n", encoding="utf-8")
+ monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
+ monkeypatch.delenv("XDG_DATA_HOME", raising=False)
+ assert downloader._resolve_download_dir() == tmp_path / "data" / "downloads"
-def test_resolve_download_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_resolve_download_dir_uses_xdg_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None:
with tempfile.TemporaryDirectory() as xdg:
- monkeypatch.delenv("CACHE_OVERRIDE", raising=False)
+ monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False)
monkeypatch.setenv("XDG_DATA_HOME", xdg)
assert downloader._resolve_download_dir() == Path(xdg) / "climate-api" / "downloads"
-def test_resolve_artifacts_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatch) -> None:
- with tempfile.TemporaryDirectory() as override:
- monkeypatch.setenv("CACHE_OVERRIDE", override)
- monkeypatch.delenv("XDG_DATA_HOME", raising=False)
- assert ingestion_services._resolve_artifacts_dir() == Path(override) / "artifacts"
+def test_resolve_artifacts_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ config_file = tmp_path / "climate-api.yaml"
+ config_file.write_text("data_dir: ./data\nextent:\n id: test\n", encoding="utf-8")
+ monkeypatch.setenv("CLIMATE_API_CONFIG", str(config_file))
+ monkeypatch.delenv("XDG_DATA_HOME", raising=False)
+ assert ingestion_services._resolve_artifacts_dir() == tmp_path / "data" / "artifacts"
-def test_resolve_artifacts_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch) -> None:
+def test_resolve_artifacts_dir_uses_xdg_when_no_config(monkeypatch: pytest.MonkeyPatch) -> None:
with tempfile.TemporaryDirectory() as xdg:
- monkeypatch.delenv("CACHE_OVERRIDE", raising=False)
+ monkeypatch.delenv("CLIMATE_API_CONFIG", raising=False)
monkeypatch.setenv("XDG_DATA_HOME", xdg)
assert ingestion_services._resolve_artifacts_dir() == Path(xdg) / "climate-api" / "artifacts"
@@ -147,6 +149,123 @@ def fake_download(
assert "Upstream dataset download failed: provider timeout" == str(exc_info.value.detail)
+# ---------------------------------------------------------------------------
+# _get_cache_prefix
+# ---------------------------------------------------------------------------
+
+
+def test_get_cache_prefix_uses_dataset_id() -> None:
+ dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}}
+ assert downloader._get_cache_prefix(dataset) == "chirps3_precipitation_daily"
+
+
+# ---------------------------------------------------------------------------
+# _validate_spatial_coverage
+# ---------------------------------------------------------------------------
+
+
+_CHIRPS3_EXTENTS: dict[str, Any] = {
+ "spatial": {"bbox": [-180, -50, 180, 50], "crs": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"}
+}
+_LIMITED_LON_EXTENTS: dict[str, Any] = {
+ "spatial": {"bbox": [-180, -90, 60, 90], "crs": "http://www.opengis.net/def/crs/OGC/1.3/CRS84"}
+}
+
+
+def test_validate_spatial_coverage_passes_when_no_extents_declared() -> None:
+ dataset: dict[str, Any] = {"id": "worldpop_population_yearly", "ingestion": {}}
+ downloader._validate_spatial_coverage(dataset, bbox=[4.5, 57.9, 31.1, 71.2])
+
+
+def test_validate_spatial_coverage_passes_when_no_bbox() -> None:
+ dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "extents": _CHIRPS3_EXTENTS}
+ downloader._validate_spatial_coverage(dataset, bbox=None)
+
+
+def test_validate_spatial_coverage_passes_when_template_bbox_malformed() -> None:
+ extents: dict[str, Any] = {"spatial": {"bbox": "not-a-list"}}
+ dataset: dict[str, Any] = {"id": "bad_template", "ingestion": {}, "extents": extents}
+ downloader._validate_spatial_coverage(dataset, bbox=[-10.0, -10.0, 10.0, 10.0])
+
+
+def test_validate_spatial_coverage_passes_when_bbox_inside_extents() -> None:
+ dataset: dict[str, Any] = {"id": "chirps3_precipitation_daily", "ingestion": {}, "extents": _CHIRPS3_EXTENTS}
+ downloader._validate_spatial_coverage(dataset, bbox=[-10.0, -10.0, 10.0, 10.0])
+
+
+def test_validate_spatial_coverage_raises_when_bbox_outside_lat_extents() -> None:
+ dataset: dict[str, Any] = {
+ "id": "chirps3_precipitation_daily",
+ "ingestion": {},
+ "extents": _CHIRPS3_EXTENTS,
+ }
+ with pytest.raises(HTTPException) as exc_info:
+ downloader._validate_spatial_coverage(dataset, bbox=[4.5, 57.9, 31.1, 71.2])
+ assert exc_info.value.status_code == 400
+ assert "does not cover this extent" in str(exc_info.value.detail)
+ assert "Latitude" in str(exc_info.value.detail)
+
+
+def test_validate_spatial_coverage_raises_when_bbox_outside_lon_extents() -> None:
+ dataset: dict[str, Any] = {
+ "id": "some_dataset",
+ "ingestion": {},
+ "extents": _LIMITED_LON_EXTENTS,
+ }
+ with pytest.raises(HTTPException) as exc_info:
+ downloader._validate_spatial_coverage(dataset, bbox=[70.0, -10.0, 90.0, 10.0])
+ assert exc_info.value.status_code == 400
+ assert "Longitude" in str(exc_info.value.detail)
+
+
+def test_download_dataset_validates_env_bbox_against_extents(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ """Coverage validation uses the env fallback bbox when no bbox is passed in the request."""
+ dataset: dict[str, Any] = {
+ "id": "chirps3_precipitation_daily",
+ "ingestion": {"function": "ignored.path"},
+ "extents": _CHIRPS3_EXTENTS,
+ }
+ monkeypatch.setenv("DOWNLOAD_BBOX", "4.5,57.9,31.1,71.2")
+
+ with pytest.raises(HTTPException) as exc_info:
+ downloader.download_dataset(
+ dataset=dataset,
+ start="2020-01-01",
+ end="2020-01-31",
+ bbox=None,
+ country_code=None,
+ overwrite=False,
+ background_tasks=None,
+ )
+ assert exc_info.value.status_code == 400
+ assert "does not cover this extent" in str(exc_info.value.detail)
+
+
+def test_download_dataset_returns_400_when_bbox_outside_dataset_extents(
+ monkeypatch: pytest.MonkeyPatch,
+) -> None:
+ dataset: dict[str, Any] = {
+ "id": "chirps3_precipitation_daily",
+ "ingestion": {"function": "ignored.path"},
+ "extents": _CHIRPS3_EXTENTS,
+ }
+
+ with pytest.raises(HTTPException) as exc_info:
+ downloader.download_dataset(
+ dataset=dataset,
+ start="2020-01-01",
+ end="2020-01-31",
+ bbox=[4.5, 57.9, 31.1, 71.2],
+ country_code=None,
+ overwrite=False,
+ background_tasks=None,
+ )
+ assert exc_info.value.status_code == 400
+ assert "does not cover this extent" in str(exc_info.value.detail)
+
+
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
diff --git a/tests/test_publications.py b/tests/test_publications.py
index 403d3508..93dbd44b 100644
--- a/tests/test_publications.py
+++ b/tests/test_publications.py
@@ -1,3 +1,5 @@
+from pathlib import Path
+
import pytest
from climate_api.publications import services
@@ -9,21 +11,21 @@ def test_load_base_config_returns_mapping() -> None:
assert "server" in config
-def test_resolve_pygeoapi_dir_uses_cache_override(monkeypatch: pytest.MonkeyPatch, tmp_path: object) -> None:
- import tempfile
+def test_resolve_pygeoapi_dir_uses_data_dir_from_config(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
+ from climate_api import config as api_config
- with tempfile.TemporaryDirectory() as override:
- monkeypatch.setenv("CACHE_OVERRIDE", override)
- monkeypatch.delenv("XDG_DATA_HOME", raising=False)
- result = services._resolve_pygeoapi_dir()
- assert str(result) == f"{override}/pygeoapi"
+ monkeypatch.setattr(api_config, "get_data_dir", lambda: tmp_path / "data")
+ result = services._resolve_pygeoapi_dir()
+ assert result == tmp_path / "data" / "pygeoapi"
-def test_resolve_pygeoapi_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch, tmp_path: object) -> None:
+def test_resolve_pygeoapi_dir_uses_xdg_data_home(monkeypatch: pytest.MonkeyPatch) -> None:
import tempfile
+ from climate_api import config as api_config
+
+ monkeypatch.setattr(api_config, "get_data_dir", lambda: None)
with tempfile.TemporaryDirectory() as xdg:
- monkeypatch.delenv("CACHE_OVERRIDE", raising=False)
monkeypatch.setenv("XDG_DATA_HOME", xdg)
result = services._resolve_pygeoapi_dir()
assert str(result) == f"{xdg}/climate-api/pygeoapi"
diff --git a/tests/test_transforms.py b/tests/test_transforms.py
new file mode 100644
index 00000000..d1ab1dd6
--- /dev/null
+++ b/tests/test_transforms.py
@@ -0,0 +1,101 @@
+import numpy as np
+import xarray as xr
+
+from climate_api.transforms import convert_units, deaccumulate_era5
+
+
+def _ds(varname: str, values: list[float], time_steps: int = 1) -> xr.Dataset:
+ if time_steps > 1:
+ data = np.array(values, dtype=float).reshape(time_steps, -1)
+ return xr.Dataset({varname: xr.DataArray(data, dims=["time", "x"])})
+ return xr.Dataset({varname: xr.DataArray(np.array(values, dtype=float))})
+
+
+class TestConvertUnits:
+ def test_kelvin_to_celsius(self):
+ ds = _ds("t2m", [273.15, 293.15, 313.15])
+ result = convert_units(ds, {"variable": "t2m", "units": "kelvin", "convert_units": "degC"})
+ np.testing.assert_allclose(result["t2m"].values, [0.0, 20.0, 40.0])
+ assert result["t2m"].attrs["units"] == "degC"
+
+ def test_metres_to_mm(self):
+ ds = _ds("tp", [0.001, 0.005])
+ result = convert_units(ds, {"variable": "tp", "units": "m", "convert_units": "mm"})
+ np.testing.assert_allclose(result["tp"].values, [1.0, 5.0])
+ assert result["tp"].attrs["units"] == "mm"
+
+ def test_no_convert_units_field_is_noop(self):
+ ds = _ds("t2m", [300.0])
+ result = convert_units(ds, {"variable": "t2m", "units": "kelvin"})
+ np.testing.assert_array_equal(result["t2m"].values, ds["t2m"].values)
+
+ def test_unknown_conversion_is_noop(self):
+ ds = _ds("x", [1.0])
+ result = convert_units(ds, {"variable": "x", "units": "foo", "convert_units": "bar"})
+ np.testing.assert_array_equal(result["x"].values, ds["x"].values)
+
+ def test_preserves_existing_attrs(self):
+ ds = xr.Dataset({"t2m": xr.DataArray([300.0], attrs={"long_name": "temperature", "units": "K"})})
+ result = convert_units(ds, {"variable": "t2m", "units": "kelvin", "convert_units": "degC"})
+ assert result["t2m"].attrs["long_name"] == "temperature"
+
+
+class TestDeaccumulateEra5:
+ def test_differences_along_time(self):
+ ds = _ds("tp", [0.0, 1.0, 3.0, 6.0], time_steps=4)
+ result = deaccumulate_era5(ds, {"variable": "tp"})
+ assert result.sizes["time"] == 3
+ np.testing.assert_array_equal(result["tp"].values.flatten(), [1.0, 2.0, 3.0])
+
+ def test_clips_negative_values(self):
+ ds = _ds("tp", [3.0, 1.0, 4.0], time_steps=3)
+ result = deaccumulate_era5(ds, {"variable": "tp"})
+ assert (result["tp"].values >= 0).all()
+
+ def test_preserves_attrs(self):
+ data = np.array([[0.0], [1.0]])
+ ds = xr.Dataset({"tp": xr.DataArray(data, dims=["time", "x"], attrs={"units": "m"})})
+ result = deaccumulate_era5(ds, {"variable": "tp"})
+ assert result["tp"].attrs["units"] == "m"
+
+
+class TestRunTransformsPipeline:
+ def test_pipeline_via_dotted_path(self):
+ ds = _ds("t2m", [273.15])
+ dataset = {
+ "variable": "t2m",
+ "units": "kelvin",
+ "convert_units": "degC",
+ "transforms": ["climate_api.transforms.convert_units"],
+ }
+ from climate_api.data_manager.services.downloader import _run_transforms
+
+ result = _run_transforms(ds, dataset)
+ np.testing.assert_allclose(result["t2m"].values, [0.0])
+
+ def test_empty_transforms_is_noop(self):
+ ds = _ds("x", [1.0, 2.0])
+ from climate_api.data_manager.services.downloader import _run_transforms
+
+ result = _run_transforms(ds, {"variable": "x", "transforms": []})
+ np.testing.assert_array_equal(result["x"].values, ds["x"].values)
+
+ def test_no_transforms_key_is_noop(self):
+ ds = _ds("x", [1.0])
+ from climate_api.data_manager.services.downloader import _run_transforms
+
+ result = _run_transforms(ds, {"variable": "x"})
+ np.testing.assert_array_equal(result["x"].values, ds["x"].values)
+
+ def test_dict_entry_with_params(self):
+ ds = _ds("t2m", [273.15])
+ dataset = {
+ "variable": "t2m",
+ "units": "kelvin",
+ "convert_units": "degC",
+ "transforms": [{"function": "climate_api.transforms.convert_units"}],
+ }
+ from climate_api.data_manager.services.downloader import _run_transforms
+
+ result = _run_transforms(ds, dataset)
+ np.testing.assert_allclose(result["t2m"].values, [0.0])