Source code for xrspatial.templates

"""Empty study-area DataArrays for common regions.

:func:`from_template` turns a region name into an empty (NaN-filled) raster you
can start an analysis from, instead of looking up a bounding box, choosing a
CRS, and assembling coordinates by hand. The result follows the xarray-spatial
array contract (2-D ``['y', 'x']`` grid with pixel-center coordinates and
``res``/``crs`` attributes), so it feeds straight into ``slope``, ``hillshade``,
``rasterize``, and the rest of the library.
"""
import numpy as np
import xarray as xr

from xrspatial._template_data import (_CITIES, _CITY_DEFAULT_RESOLUTION, _COUNTRY_BBOXES,
                                      _COUNTRY_DEFAULT_RESOLUTION, _EQUAL_AREA_FALLBACK_EPSG,
                                      _REGIONS, _UPS_NORTH_EPSG, _UPS_SOUTH_EPSG)
from xrspatial.reproject._crs_utils import _require_pyproj, _resolve_crs
from xrspatial.reproject._grid import _edge_samples, _make_output_coords, _transform_boundary

_PRESERVE_OPTIONS = ("area", "shape")

# Guard against a stray fine resolution allocating an enormous array. Applied to
# every backend, including dask: a lazy grid this large is almost always a typo,
# and the uniform cap keeps the error identical across backends.
_MAX_CELLS = 500_000_000


def _resolve(name):
    """Resolve a name to a spec dict.

    Keys: ``bounds`` (projected, native CRS), ``crs`` (native EPSG int),
    ``default_resolution``, ``key``, ``lonlat`` (lon/lat bbox), and the optional
    ``area_epsg`` / ``shape_epsg`` overrides used by the ``preserve`` path.
    """
    if not isinstance(name, str):
        raise TypeError(
            f"name must be a string region name or country code, "
            f"got {type(name).__name__}"
        )

    region = _REGIONS.get(name.lower())
    if region is not None:
        return dict(
            bounds=region["bounds"], crs=region["crs"],
            default_resolution=region["default_resolution"], key=name.lower(),
            lonlat=region["lonlat"], area_epsg=region.get("area_epsg"),
            shape_epsg=region.get("shape_epsg"),
        )

    city = _CITIES.get(name.lower())
    if city is not None:
        return dict(
            bounds=city["bounds"], crs=city["crs"],
            default_resolution=_CITY_DEFAULT_RESOLUTION, key=name.lower(),
            lonlat=city["lonlat"], area_epsg=None, shape_epsg=None,
        )

    code = name.upper()
    bbox = _COUNTRY_BBOXES.get(code)
    if bbox is not None:
        return dict(
            bounds=bbox, crs=4326,
            default_resolution=_COUNTRY_DEFAULT_RESOLUTION, key=code,
            lonlat=bbox, area_epsg=None, shape_epsg=None,
        )

    regions = ", ".join(sorted(_REGIONS))
    raise ValueError(
        f"Unknown template {name!r}. Available named regions: {regions}. "
        f"{len(_CITIES)} world cities are also supported (lowercase name, e.g. "
        f"'london', 'tokyo'). Countries must be an ISO-3166 / GADM alpha-3 code "
        f"(e.g. 'USA', 'FRA', 'JPN'); {len(_COUNTRY_BBOXES)} are supported. "
        f"Call xrspatial.templates.list_templates() to list every accepted name."
    )


def _preserve_epsg(lonlat, preserve, area_epsg, shape_epsg):
    """Pick an EPSG code that preserves ``preserve`` for the lon/lat bbox."""
    lon_min, lat_min, lon_max, lat_max = lonlat
    lat_c = (lat_min + lat_max) / 2.0
    lon_c = (lon_min + lon_max) / 2.0
    # Normalize to [-180, 180] so antimeridian-wrapped country boxes (lon > 180)
    # still query a sensible UTM zone.
    lon_c = ((lon_c + 180.0) % 360.0) - 180.0

    if preserve == "area":
        return int(area_epsg) if area_epsg is not None \
            else _EQUAL_AREA_FALLBACK_EPSG

    # preserve == "shape": curated override, else UTM zone (UPS at the poles).
    if shape_epsg is not None:
        return int(shape_epsg)
    if abs(lat_c) > 84.0:
        return _UPS_NORTH_EPSG if lat_c > 0 else _UPS_SOUTH_EPSG

    _require_pyproj()
    from pyproj.aoi import AreaOfInterest
    from pyproj.database import query_utm_crs_info
    aoi = AreaOfInterest(lon_c - 0.01, lat_c - 0.01, lon_c + 0.01, lat_c + 0.01)
    infos = query_utm_crs_info("WGS 84", aoi)
    if not infos:
        return _UPS_NORTH_EPSG if lat_c >= 0 else _UPS_SOUTH_EPSG
    return int(infos[0].code)


def _project_bbox_bounds(lonlat, epsg):
    """Project a lon/lat bbox into ``epsg`` and return (left, bottom, right, top)."""
    lon_min, lat_min, lon_max, lat_max = lonlat
    xs, ys = _edge_samples(lon_min, lat_min, lon_max, lat_max, 101)
    tx, ty = _transform_boundary(_resolve_crs(4326), _resolve_crs(epsg), xs, ys)
    tx, ty = np.asarray(tx), np.asarray(ty)
    valid = np.isfinite(tx) & np.isfinite(ty)
    if not valid.any():
        raise ValueError(
            f"projecting the template into EPSG:{epsg} produced no finite "
            f"coordinates"
        )
    tx, ty = tx[valid], ty[valid]
    return (float(tx.min()), float(ty.min()), float(tx.max()), float(ty.max()))


def _default_preserve_resolution(bounds):
    """Default metre resolution: ~1000 cells on the long axis, snapped 1/2/5."""
    import math
    left, bottom, right, top = bounds
    raw = max(right - left, top - bottom) / 1000.0
    if raw <= 0:
        return 1.0
    base = 10 ** math.floor(math.log10(raw))
    for m in (1, 2, 5):
        if raw <= m * base:
            return float(m * base)
    return float(10 * base)


def _normalize_resolution(resolution, default):
    """Return a positive (res_x, res_y) tuple from a scalar/tuple/None."""
    if resolution is None:
        resolution = default
    if isinstance(resolution, (tuple, list)):
        if len(resolution) != 2:
            raise ValueError(
                "resolution tuple must be (res_x, res_y), "
                f"got {len(resolution)} values"
            )
        res_x, res_y = resolution
    else:
        res_x = res_y = resolution
    res_x, res_y = float(res_x), float(res_y)
    if res_x <= 0 or res_y <= 0:
        raise ValueError(f"resolution must be positive, got {(res_x, res_y)}")
    return res_x, res_y


def _make_data(shape, fill, backend, chunks):
    """Build a NaN-filled array of ``shape`` for the requested backend."""
    backend = backend.lower()
    if backend in ("dask", "dask+numpy"):
        import dask.array as da
        return da.full(shape, fill, dtype="float32", chunks=chunks)
    if backend == "numpy":
        return np.full(shape, fill, dtype="float32")
    if backend == "cupy":
        import cupy
        return cupy.full(shape, fill, dtype="float32")
    if backend == "dask+cupy":
        import cupy
        import dask.array as da
        meta = cupy.empty((0, 0), dtype="float32")
        return da.full(shape, fill, dtype="float32",
                       chunks=chunks).map_blocks(cupy.asarray, meta=meta)
    raise ValueError(
        f"Unknown backend {backend!r}; choose from 'numpy', 'dask+numpy', "
        f"'cupy', 'dask+cupy'."
    )


def _cf_crs_attrs(crs):
    """CF Conventions grid-mapping attributes for an EPSG code.

    Emits the two canonical CF-1.7 identifiers via :meth:`pyproj.CRS.to_cf`:
    ``grid_mapping_name`` (the controlled-vocabulary projection token, e.g.
    ``'albers_conical_equal_area'``) and ``crs_wkt`` (the full WKT, which
    carries the human-readable CRS name). This mirrors the rest of the
    library, which identifies a CRS by ``crs`` / ``crs_wkt`` and derives
    anything else with pyproj on demand.

    Best-effort: without pyproj installed the mapping is empty, so the
    default (non-reproject) path stays dependency-free, same as the old
    ``crs_name`` behaviour. ``grid_mapping_name`` is also left out for
    projections CF does not define (e.g. Equal Earth), where ``to_cf``
    returns ``crs_wkt`` alone.
    """
    try:
        import pyproj
    except ImportError:
        return {}
    cf = pyproj.CRS.from_epsg(int(crs)).to_cf()
    return {k: cf[k] for k in ("grid_mapping_name", "crs_wkt")
            if cf.get(k) is not None}


[docs] def list_templates(kind=None): """List the template names ``from_template`` accepts. Every name in the result is a valid ``from_template`` argument: region and city names are lowercase, country codes are uppercase ISO-3166 / GADM alpha-3. Parameters ---------- kind : {'regions', 'cities', 'countries'}, optional Return just one group as a sorted list. When omitted (the default), return a dict mapping each group to its sorted list of names. Returns ------- dict of str to list of str, or list of str With ``kind=None``, ``{'regions': [...], 'cities': [...], 'countries': [...]}``. With ``kind`` set, the sorted list for that group. Examples -------- .. sourcecode:: python >>> from xrspatial.templates import list_templates >>> names = list_templates() >>> sorted(names) ['cities', 'countries', 'regions'] >>> 'conus' in names['regions'] True >>> 'london' in list_templates('cities') True """ groups = { "regions": sorted(_REGIONS), "cities": sorted(_CITIES), "countries": sorted(_COUNTRY_BBOXES), } if kind is None: return groups if kind not in groups: raise ValueError( f"kind must be one of {tuple(groups)} or None, got {kind!r}." ) return groups[kind]
[docs] def from_template(name, resolution=None, *, preserve=None, backend="numpy", fill=np.nan, chunks="auto"): """Create an empty DataArray for a common study area. The returned raster is NaN-filled and obeys the xarray-spatial array contract: a 2-D ``['y', 'x']`` grid with pixel-center 1-D coordinates (north-up, descending ``y``) and ``res``/``crs`` attributes. It covers the study area's rectangular bounding box and is meant as a starting canvas. The requested ``resolution`` is honored exactly. The study-area box is rarely a whole number of cells wide, so the far edges (right, top) are nudged out by up to half a cell to land on an exact multiple of the cell size, anchoring the lower-left corner. ``attrs['res']`` therefore matches the ``resolution`` you pass. Parameters ---------- name : str A curated region name (case-insensitive), e.g. ``'conus'``, ``'nyc'``, ``'europe'``, ``'world'``; a world-city name (case-insensitive), e.g. ``'london'``, ``'tokyo'``, ``'sao_paulo'``; or an ISO-3166 / GADM alpha-3 country code, e.g. ``'USA'``, ``'FRA'``, ``'JPN'``. Curated regions and cities come back in a projected CRS (cities in their UTM zone); country codes come back in EPSG:4326. Where two cities share a name the larger keeps the bare name and the others take a ``_<iso2>`` suffix (e.g. ``'hyderabad'`` vs ``'hyderabad_pk'``). resolution : float or tuple of float, optional Cell size in the template's CRS units (metres for projected regions, degrees for country codes). A scalar gives square cells; a ``(res_x, res_y)`` tuple sets each axis. Defaults to a per-template value so a bare ``from_template('conus')`` works. preserve : {'area', 'shape'}, optional Reproject the template into an EPSG-coded projection chosen for the property it preserves, instead of the template's default CRS. ``'area'`` gives an equal-area projection (a curated national/continental code such as EPSG:5070, or EPSG:8857 Equal Earth where none is curated). ``'shape'`` gives a conformal projection: the centroid's UTM zone (UPS at the poles). When set, ``resolution`` is in metres and ``attrs['crs']`` is the chosen EPSG int. Requires pyproj. backend : str, default='numpy' Array backend: ``'numpy'``, ``'dask+numpy'`` (alias ``'dask'``), ``'cupy'``, or ``'dask+cupy'``. fill : float, default=numpy.nan Value the grid is filled with. The dtype is always ``float32``. chunks : int, str, or tuple, default='auto' Dask chunk specification; only used by the dask backends. Returns ------- template : xarray.DataArray Empty 2-D raster with ``dims=('y', 'x')`` and pixel-center coordinates. ``attrs`` carries ``res`` and ``crs`` plus the CF Conventions grid-mapping keys ``grid_mapping_name`` (the projection token, e.g. ``'albers_conical_equal_area'``) and ``crs_wkt`` (full WKT, which carries the human-readable CRS name). The ``x`` / ``y`` coordinates follow CF axis conventions: ``units`` ``'m'`` with ``standard_name`` ``'projection_x_coordinate'`` / ``'projection_y_coordinate'`` for projected templates, or ``'degrees_east'`` / ``'degrees_north'`` with ``standard_name`` ``'longitude'`` / ``'latitude'`` for EPSG:4326. The grid-mapping keys require pyproj; without it they are omitted (the default, dependency-free path), and ``grid_mapping_name`` is also omitted for projections CF does not define (e.g. Equal Earth), leaving ``crs_wkt`` alone. These keys sit directly on ``attrs`` (the library's flat CRS convention), not on a separate CF grid-mapping variable, so a strict CF reader will not auto-detect them as a grid mapping. Examples -------- .. sourcecode:: python >>> from xrspatial import from_template >>> agg = from_template("conus") # Albers, default 5 km cells >>> agg.attrs["crs"] 5070 >>> agg = from_template("conus", resolution=1000) # 1 km cells >>> agg = from_template("FRA") # France bbox in EPSG:4326 >>> agg.attrs["crs"] 4326 >>> from_template("london").attrs["crs"] # greater London, UTM 30N 32630 >>> from_template("FRA", preserve="shape").attrs["crs"] # UTM 31N 32631 >>> from_template("FRA", preserve="area").attrs["crs"] # Equal Earth 8857 """ spec = _resolve(name) key = spec["key"] if preserve is not None: if not isinstance(preserve, str) or preserve.lower() \ not in _PRESERVE_OPTIONS: raise ValueError( f"preserve must be one of {_PRESERVE_OPTIONS} or None, " f"got {preserve!r}. (No EPSG-coded projection exists for " f"distance/direction, so those are not offered.)" ) preserve = preserve.lower() crs = _preserve_epsg(spec["lonlat"], preserve, spec["area_epsg"], spec["shape_epsg"]) bounds = _project_bbox_bounds(spec["lonlat"], crs) default_res = _default_preserve_resolution(bounds) else: bounds = spec["bounds"] crs = spec["crs"] default_res = spec["default_resolution"] left, bottom, right, top = bounds res_x, res_y = _normalize_resolution(resolution, default_res) width = max(1, int(round((right - left) / res_x))) height = max(1, int(round((top - bottom) / res_y))) n_cells = width * height if n_cells > _MAX_CELLS: raise ValueError( f"resolution {(res_x, res_y)} produces a {height} x {width} grid " f"({n_cells:,} cells), exceeding the {_MAX_CELLS:,}-cell limit. " f"Use a coarser resolution." ) # Honor the requested resolution exactly: anchor the lower-left corner and # nudge the far edges to an exact multiple of the cell size, so res comes # back as (res_x, res_y) instead of drifting when the bbox extent isn't a # whole number of cells. Mirrors the reproject grid path # (_compute_output_grid in reproject/_grid.py). right = left + width * res_x top = bottom + height * res_y ys, xs = _make_output_coords((left, bottom, right, top), (height, width)) data = _make_data((height, width), fill, backend, chunks) attrs = {"res": (res_x, res_y), "crs": crs} attrs.update(_cf_crs_attrs(crs)) template = xr.DataArray( data, name=key, coords={"y": ys, "x": xs}, dims=["y", "x"], attrs=attrs, ) # CF coordinate metadata (CF Conventions sec. 4). Every template emits # either EPSG:4326 (country codes) or a metre-based projected CRS # (curated regions and all preserve paths). if crs == 4326: template["x"].attrs.update(units="degrees_east", standard_name="longitude") template["y"].attrs.update(units="degrees_north", standard_name="latitude") else: template["x"].attrs.update(units="m", standard_name="projection_x_coordinate") template["y"].attrs.update(units="m", standard_name="projection_y_coordinate") return template