Source code for xrspatial.geotiff._writers.gpu

"""GPU writer entry point.

Holds ``write_geotiff_gpu``, which compresses tiles on the device via
nvCOMP (when available) and falls back to the
eager CPU writer when nvCOMP is missing or the device path raises
under ``on_gpu_failure='auto'``.
"""
from __future__ import annotations

import numbers
import warnings
from typing import TYPE_CHECKING

import numpy as np
import xarray as xr

if TYPE_CHECKING:
    from typing import BinaryIO

    import cupy

from .._attrs import (_EXPERIMENTAL_CODECS, _extract_rich_tags, _resolve_nodata_attr,
                      _should_restore_nan_sentinel)
from .._coords import _BAND_DIM_NAMES, _has_no_georef_marker
from .._coords import require_transform_for_georeferenced as _require_transform_for_georeferenced
from .._coords import resolve_georef as _resolve_georef
from .._crs import _validate_crs_arg, _validate_crs_fallback, _wkt_to_epsg
from .._nodata import NodataLifecycle as _NL
from .._runtime import GeoTIFFFallbackWarning, _resolve_spatial_coords
from .._validation import (_validate_3d_writer_dims, _validate_no_rotated_affine,
                           _validate_nodata_arg, _validate_tile_size_arg,
                           _validate_writer_spatial_shape, validate_write_metadata)


def _compute_gpu_samples_hint(data) -> int:
    """Return the band count using the same convention the GPU writer's
    band-first -> band-last remap uses.

    The remap below in ``write_geotiff_gpu`` moves bands from
    ``shape[0]`` to ``shape[2]`` for band-first DataArrays. The
    MinIsWhite single-band guard runs *before* that remap, so reading
    ``data.shape[2]`` blindly would treat a band-first ``(1, H, W)``
    array as having ``W`` samples and miss the single-band rejection.
    Picking the band axis the same way the remap does keeps the guard
    and the actual on-device band placement in sync.

    Returns 1 for non-3D inputs. For 3D inputs without ``.dims`` (raw
    arrays), defaults to band-last (``shape[2]``); the writer's other
    entry-point validators reject ambiguous-dim DataArrays separately.
    """
    if getattr(data, 'ndim', None) != 3:
        return 1
    dims = getattr(data, 'dims', None)
    if (dims is not None
            and len(dims) == 3
            and dims[0] in _BAND_DIM_NAMES):
        return int(data.shape[0])
    return int(data.shape[2])


[docs] def write_geotiff_gpu(data: xr.DataArray | cupy.ndarray | np.ndarray, path: str | BinaryIO, *, crs: int | str | None = None, nodata: float | int | None = None, compression: str = 'zstd', compression_level: int | None = None, tiled: bool = True, tile_size: int = 256, predictor: bool | int = False, cog: bool = False, overview_levels: list[int] | None = None, overview_resampling: str = 'mean', bigtiff: bool | None = None, streaming_buffer_bytes: int = 256 * 1024 * 1024, max_z_error: float = 0.0, photometric: str | int = 'auto', allow_internal_only_jpeg: bool = False, allow_experimental_codecs: bool = False, allow_unparseable_crs: bool = False, drop_rotation: bool = False, ) -> str | BinaryIO: """Write a CuPy-backed DataArray as a GeoTIFF with GPU compression. Release-contract tier (see ``docs/source/reference/release_gate_geotiff.rst`` and ``docs/source/reference/geotiff_release_contract.rst``): the entire entry point is [experimental]. The surface may shift without a deprecation window and ``to_geotiff`` is the canonical writer. Requires cupy + numba CUDA plus optional nvCOMP / nvJPEG / nvJPEG2K libraries for codec-specific acceleration; cross-backend numerical parity with ``to_geotiff`` is tested for the Tier 1 codec set only. Tier 3 codecs (``'lerc'``, ``'jpeg2000'`` / ``'j2k'``, ``'lz4'``) require the explicit ``allow_experimental_codecs=True`` opt-in; the [internal-only] ``'jpeg'`` codec keeps its own dedicated ``allow_internal_only_jpeg`` flag. See :data:`xrspatial.geotiff.SUPPORTED_FEATURES` for the full tier map. Tiles are extracted and compressed on the GPU via nvCOMP, then assembled into a TIFF file on CPU. The CuPy array stays on device throughout compression -- only the compressed bytes transfer to CPU for file writing. When ``cog=True``, generates overview pyramids on GPU and writes a Cloud Optimized GeoTIFF with all IFDs at the file start for efficient range-request access. Falls back to CPU compression if nvCOMP is not available. Parameters ---------- data : xr.DataArray (CuPy- or NumPy-backed), cupy.ndarray, or np.ndarray [experimental] 2D or 3D raster. CuPy-backed inputs stay on device; NumPy/Dask inputs are uploaded via ``cupy.asarray(np.asarray(data))`` before compression (matches ``to_geotiff`` parity). path : str or binary file-like [experimental] Output file path or any object with a ``write`` method (e.g. ``io.BytesIO``). ``cog=True`` requires a string path: the auto-dispatch path through ``to_geotiff(gpu=True, cog=True)`` rejects file-like destinations, and the explicit GPU writer mirrors that rule. crs : int, numpy.integer, str, or None [experimental] EPSG code (int or numpy integer scalar) or WKT string. EPSG codes are strongly preferred for interop; the WKT-only path emits a user-defined CRS (32767) with the WKT stored in ``GTCitationGeoKey``, which many non-libgeotiff readers ignore. A ``UserWarning`` is emitted when the WKT-only path is taken. nodata : float, int, or None [experimental] NoData value. compression : str [experimental for Tier 1 codecs on this path; experimental gated by ``allow_experimental_codecs=True`` for Tier 3 codecs; internal-only gated by ``allow_internal_only_jpeg=True`` for ``'jpeg'``] Codec name. Accepts the same set ``to_geotiff`` lists in its own signature: ``'none'``, ``'deflate'``, ``'lzw'``, ``'jpeg'``, ``'packbits'``, ``'zstd'``, ``'lz4'``, ``'jpeg2000'`` (alias ``'j2k'``), or ``'lerc'``. Routing per codec: - ``'zstd'`` (default) and ``'deflate'``: nvCOMP batch compression on the GPU -- the fastest paths and the reason to use this entry point. - ``'jpeg'``: rejected by default for parity with ``to_geotiff``. Both writers emit self-contained JFIF tiles and skip the required TIFF JPEGTables tag (347), so the resulting files are unreadable by libtiff, GDAL, and rasterio. Pass ``allow_internal_only_jpeg=True`` to opt in to the experimental encode path; the writer then routes to nvJPEG when libnvjpeg is loadable and falls back to Pillow otherwise, and emits a ``GeoTIFFFallbackWarning`` so the caller knows the output will not round-trip through external readers. - ``'jpeg2000'`` and ``'j2k'``: nvJPEG2K GPU encode when available, glymur CPU encode otherwise. The two paths are not byte-for-byte identical (different libraries, different default parameters); use ``to_geotiff`` if you need exact CPU-writer parity. - ``'lerc'``, ``'lzw'``, ``'packbits'``, and ``'lz4'``: no nvCOMP/CUDA accelerator, so these fall through to the CPU encoder for byte-stable parity with ``to_geotiff``. compression_level : int or None [experimental] Compression effort level. Accepted for API compatibility but currently ignored -- nvCOMP does not expose level control. tiled : bool [experimental] Must be True (default). The GPU writer is tiled-only because nvCOMP batch compression operates on per-tile streams; passing ``tiled=False`` raises ``ValueError`` rather than silently producing a tiled file. Accepted for API parity with ``to_geotiff``. tile_size : int [experimental] Tile size in pixels (default 256). Must be a positive multiple of 16; this is a TIFF 6 spec requirement on TileWidth and TileLength for broad reader compatibility. ``write_geotiff_gpu`` is always tiled, so the check fires for every call. predictor : bool or int [experimental] TIFF predictor. ``False``/``0``/``1`` -> none, ``True``/``2`` -> horizontal differencing, ``3`` -> floating-point predictor (float dtypes only). cog : bool [experimental] Write as Cloud Optimized GeoTIFF with overviews. overview_levels : list[int] or None [experimental] Overview decimation factors relative to full resolution. Each entry must be a power-of-two integer >= 2, and the list must be strictly increasing (e.g. ``[2, 4, 8]`` writes overviews at 1/2, 1/4 and 1/8 of the full resolution). Invalid values raise ``ValueError``. Only used when ``cog=True``. If None and ``cog=True``, auto-generates ``[2, 4, 8, ...]`` by halving until the smallest overview fits in a single tile. overview_resampling : str [experimental] Resampling method for overviews: 'mean' (default), 'nearest', 'min', 'max', 'median', 'mode', or 'cubic'. ``mode`` and ``cubic`` fall back to the CPU implementation in ``xrspatial.geotiff._writer`` so the GPU writer produces the same overview bytes as the CPU writer. bigtiff : bool or None [experimental] Force BigTIFF (64-bit offsets). None auto-promotes when the estimated file size would exceed the classic-TIFF 4 GB limit. streaming_buffer_bytes : int [internal-only] Accepted for API parity with ``to_geotiff``. The GPU writer materialises the entire array on device and has no streaming concept, so this kwarg is a no-op. Default matches ``to_geotiff`` (256 MB) so callers passing the same kwargs to either entry point see the same default and the same type. max_z_error : float [internal-only] Per-pixel error budget for LERC compression. The GPU writer does not implement LERC (nvCOMP has no LERC backend), so any non-zero value raises ``ValueError``. Accepted at the signature level for API parity with ``to_geotiff``. photometric : str or int [experimental] Photometric interpretation for the TIFF Photometric tag (262). See :func:`to_geotiff` for the full set of accepted values; the GPU writer forwards this kwarg unchanged. Default ``'auto'`` writes MinIsBlack for any band count, so a 4-band raster is not silently tagged as RGB+alpha. allow_experimental_codecs : bool [experimental] Opt in to the Tier 3 experimental codecs ``'lerc'``, ``'jpeg2000'`` / ``'j2k'``, and ``'lz4'`` (default ``False``). Mirrors the same kwarg on ``to_geotiff`` so the two writers expose a consistent surface; the GPU dispatch path through ``to_geotiff`` forwards the kwarg unchanged. Setting ``compression=`` to one of those codecs without this flag raises ``ValueError`` whose message names the flag. With the flag set, the write proceeds and a ``GeoTIFFFallbackWarning`` is emitted once per call. Does NOT cover ``compression='jpeg'``: the internal-only JPEG path keeps its own dedicated ``allow_internal_only_jpeg`` flag. allow_internal_only_jpeg : bool [internal-only] Opt in to the ``compression='jpeg'`` encode path (default ``False``). The encoder emits self-contained JFIF tiles without the TIFF JPEGTables tag (347); the file decodes through this library's reader but not through libtiff, GDAL, or rasterio. With the flag set, the write proceeds and a ``GeoTIFFFallbackWarning`` is emitted at call time. Without the flag, ``compression='jpeg'`` raises ``ValueError`` for parity with ``to_geotiff``. allow_unparseable_crs : bool [experimental] Opt in to writing an unvalidatable CRS string into ``GTCitationGeoKey`` (default ``False``). See :func:`to_geotiff` for the full description; the GPU writer applies the same fail-closed default. drop_rotation : bool, default False [experimental] Opt in to writing a DataArray that carries ``attrs['rotated_affine']``. Mirrors the same kwarg on ``to_geotiff`` so the two writers share one gate. Default ``False`` refuses the write with ``ValueError``; the GPU writer does not emit a ``ModelTransformationTag`` either, so the silent-loss surface is identical on both backends. Returns ------- str or binary file-like The ``path`` argument (a string for filesystem paths, the file-like object for BytesIO destinations). Returning the path mirrors ``to_geotiff`` and ``write_vrt`` so callers can handle the three writers uniformly. Raises ------ ValueError If ``data`` is a 3D DataArray whose ``dims`` is not ``(band, y, x)`` or ``(y, x, band)`` (accepting band-name aliases ``bands`` / ``channel`` and spatial-name aliases ``lat`` / ``lon`` / ``row`` / ``col``). A leading non-band dim such as ``time`` would otherwise silently round-trip with the leading axis treated as ``y``. """ if not tiled: raise ValueError( "write_geotiff_gpu requires tiled=True. nvCOMP batch " "compression is tile-based; the strip layout is not " "implemented on the GPU path. Use to_geotiff(..., gpu=False, " "tiled=False) for strip output on CPU.") # JPEG-in-TIFF parity with to_geotiff. The GPU encode path writes # self-contained JFIF tiles without the TIFF JPEGTables # tag (347), matching the broken CPU encoder. ``to_geotiff`` refuses # the codec outright; this writer offered no rejection at all, so # callers could produce GeoTIFFs that decoded through xrspatial but # broke in libtiff/GDAL/rasterio. Reject by default with the same # wording so both entry points agree, and surface an opt-in flag for # users who explicitly want the internal-only path. if (isinstance(compression, str) and compression.lower() == 'jpeg' and not allow_internal_only_jpeg): raise ValueError( "compression='jpeg' is not supported: the encoder writes " "self-contained JFIF streams without the required " "JPEGTables tag (347), so other readers (libtiff, GDAL, " "rasterio) reject the file. Use 'deflate', 'zstd', or " "'lzw' instead. Pass allow_internal_only_jpeg=True to opt " "in to the experimental internal-reader-only path (issue " "#1845).") if (isinstance(compression, str) and compression.lower() == 'jpeg' and allow_internal_only_jpeg): warnings.warn( "write_geotiff_gpu(compression='jpeg', " "allow_internal_only_jpeg=True) writes JFIF tiles without " "the TIFF JPEGTables tag (347); the file decodes through " "xrspatial but may fail in libtiff, GDAL, or rasterio. " "See issue #1845.", GeoTIFFFallbackWarning, stacklevel=2, ) # Tier 3 experimental-codec gate. Lerc, jpeg2000 / j2k, and lz4 # require ``allow_experimental_codecs=True``; the GPU # writer mirrors the same gate ``to_geotiff`` enforces so the two # entry points agree. The GPU dispatch path through # ``to_geotiff(gpu=True, compression='lerc', ...)`` forwards the # kwarg, so the warning is emitted once at the CPU dispatcher and a # second time here when the GPU writer re-runs the same check; that # mirrors ``allow_internal_only_jpeg`` (which already double-warns # under that codepath) and keeps the explicit GPU entry point usable # standalone. if isinstance(compression, str): _gpu_codec = compression.lower() if (_gpu_codec in _EXPERIMENTAL_CODECS and not allow_experimental_codecs): raise ValueError( f"compression={compression!r} is experimental: cross-backend " "numerical parity is not claimed and reader support across " "GDAL versions is uneven. Pass allow_experimental_codecs=True " "to opt in, or use 'deflate', 'zstd', or 'lzw' for a " "stable lossless codec (issue #2137).") if (_gpu_codec in _EXPERIMENTAL_CODECS and allow_experimental_codecs): warnings.warn( f"write_geotiff_gpu(compression={compression!r}, " "allow_experimental_codecs=True): experimental codec, " "GPU encode path is not byte-identical to the CPU writer " "(different backend libraries). See issue #2137.", GeoTIFFFallbackWarning, stacklevel=2, ) # MinIsWhite pre-inversion runs in the eager CPU writer. # The GPU writer assembles tile bytes directly on device; threading # the pixel + nodata-sentinel transform through that pipeline is out # of scope for the round-trip fix. Refuse the combination so callers # do not silently get inverted on-disk values. Move the array to the # CPU and call the eager ``write`` path for MinIsWhite output. from .._writer import _resolve_photometric as _resolve_photo_gpu _gpu_samples_hint = _compute_gpu_samples_hint(data) _gpu_resolved_photo, _ = _resolve_photo_gpu( photometric, _gpu_samples_hint) if _gpu_resolved_photo == 0 and _gpu_samples_hint == 1: raise NotImplementedError( "photometric='miniswhite' on the GPU writer is not " "supported: the writer-side pixel inversion that mirrors " "the reader's unconditional MinIsWhite inversion (issue " "#1836) is only wired into the eager CPU ``write`` path. " "Move the array to host memory and call to_geotiff with " "gpu=False, or write with photometric='minisblack' / " "'auto'.") # write_geotiff_gpu is always tiled, so validate tile_size here and # keep parity with the public to_geotiff entry point. _validate_tile_size_arg(tile_size) _validate_nodata_arg(nodata) # Refuse to silently drop ``attrs['rotated_affine']``. Mirror the # gate ``to_geotiff`` runs upstream so direct callers of # ``write_geotiff_gpu`` get the same rejection. _drop_rotation_attrs = getattr(data, 'attrs', None) or {} _validate_no_rotated_affine( _drop_rotation_attrs, drop_rotation=drop_rotation, entry_point="write_geotiff_gpu", ) # Reject empty spatial shapes. ``write_geotiff_gpu`` is a public # entry point and direct callers (with cupy.ndarray or raw # numpy) do not flow through ``to_geotiff``'s guard, so check here # before any GPU work starts. _validate_writer_spatial_shape( getattr(data, 'shape', None), getattr(data, 'dims', None), entry_point="write_geotiff_gpu", ) # Reject ``gdal_metadata_xml`` / ``extra_tags`` pass-through writes # unless the caller opted in via ``allow_experimental_codecs=True``. # Mirrors ``to_geotiff`` so the two writers expose the same surface. from .._attrs import _validate_write_rich_tag_optin _attrs_for_optin = getattr(data, 'attrs', None) or {} _validate_write_rich_tag_optin( _attrs_for_optin, allow_experimental_codecs=allow_experimental_codecs, entry_point="write_geotiff_gpu", ) # Ambiguous-metadata checks; mirrors ``to_geotiff`` so the GPU # writer enforces the same crs/crs_wkt consistency rule. Alias # resolution keeps the validator consistent across alias-named # coord arrays (lat/lon, latitude/longitude, row/col). _attrs = getattr(data, 'attrs', None) or {} _coord_y, _coord_x = _resolve_spatial_coords(data) validate_write_metadata({ 'crs_kwarg': crs, 'attrs_crs': _attrs.get('crs'), 'attrs_crs_wkt': _attrs.get('crs_wkt'), 'nodata_kwarg': nodata, 'attrs_nodata': _attrs.get('nodata'), 'attrs_nodatavals': _attrs.get('nodatavals'), 'coord_y': _coord_y, 'coord_x': _coord_x, 'no_georef_marker': _has_no_georef_marker(data), }) if max_z_error < 0: raise ValueError( f"max_z_error must be >= 0, got {max_z_error}") if max_z_error != 0: raise ValueError( "max_z_error is not supported on the GPU writer " "(nvCOMP has no LERC backend). Use to_geotiff(..., gpu=False) " "or omit max_z_error.") # Mirror to_geotiff's path-type + cog=True gating verbatim so callers # see identical errors from the two entry points. The auto-dispatch # path through ``to_geotiff(gpu=True, cog=True, path=BytesIO)`` raises # before reaching here; the explicit GPU writer mirrors the same gate # so callers cannot bypass it. Non-cog file-like writes remain # supported on this entry point. _path_is_file_like = ( not isinstance(path, str)) and hasattr(path, 'write') if _path_is_file_like: if cog: raise ValueError( "cog=True is not supported for file-like destinations. " "Pass a string path or write to BytesIO without cog=True.") elif not isinstance(path, str): raise TypeError( f"path must be a str or a binary file-like with a write() " f"method, got {type(path).__name__}") # streaming_buffer_bytes is intentionally a no-op on the GPU path; # the kwarg exists for API parity with to_geotiff so callers can pass # the same kwargs to both entry points without filtering. del streaming_buffer_bytes try: import cupy except ImportError: raise ImportError("cupy is required for GPU writes") from .._gpu_decode import gpu_compress_tiles, make_overview_gpu from .._writer import _assemble_tiff, _compression_tag, _write_bytes, normalize_predictor # Extract array and metadata geo_transform = None epsg = None wkt_fallback = None # WKT string when EPSG is not available raster_type = 1 gdal_meta_xml = None extra_tags_list = None x_res = None y_res = None res_unit = None # ``numbers.Integral`` covers numpy integer scalars (``np.int32``, # ``np.int64``) so they hit the EPSG branch instead of falling # through to ``epsg=None``. Validator already rejects bool. _validate_crs_arg(crs) if isinstance(crs, numbers.Integral): epsg = int(crs) elif isinstance(crs, str): epsg = _wkt_to_epsg(crs) if epsg is None: wkt_fallback = crs # ``attrs['masked_nodata']`` records whether the read side promoted # the sentinel to NaN. Default True preserves the NaN->sentinel # rewrite for external DataArrays and bare cupy / numpy positional # arrays that have no attrs to read from. restore_sentinel = True if isinstance(data, xr.DataArray): restore_sentinel = _should_restore_nan_sentinel(data.attrs) arr = data.data # Handle Dask arrays: compute to materialize if hasattr(arr, 'compute'): arr = arr.compute() # Now arr should be CuPy or numpy if hasattr(arr, 'get'): pass # CuPy array, already on GPU else: arr = cupy.asarray(np.asarray(arr)) # numpy -> GPU # Reject ambiguous 3D layouts. Mirrors the gate in # ``to_geotiff``: a leading non-band dim like ``time`` would # otherwise round-trip with the leading axis silently treated # as ``y``. if arr.ndim == 3: _validate_3d_writer_dims(data.dims) # Handle band-first dimension order (band, y, x) -> (y, x, band). # rioxarray and CF-style multi-band rasters land here; without # this remap the writer treats arr.shape[2] as the band axis and # produces a transposed file. The CPU writer does the same remap # at the matching step in to_geotiff(). if arr.ndim == 3 and data.dims[0] in _BAND_DIM_NAMES: arr = cupy.ascontiguousarray(cupy.moveaxis(arr, 0, -1)) # Resolve via the centralised resolver. Same precedence as the # CPU writer: prefer attrs['transform'] (bit-stable on # round-trip) over the coord-derived transform, which drifts on # fractional pixel sizes. geo_transform = _resolve_georef(data).transform # Match the CPU writer's fail-closed guard: an array with spatial # coords but no derivable transform (e.g. 1x1 without # ``attrs['transform']``) must not silently round-trip as a # non-georeferenced TIFF. _require_transform_for_georeferenced(data, geo_transform) # Resolve CRS the same way the CPU writer does. attrs['crs'] may # be an int EPSG or a WKT string; attrs['crs_wkt'] only carries # WKT. Without the WKT branch the GPU writer silently drops CRS # on files whose original CRS only resolves to WKT (no recognized # EPSG). if epsg is None and crs is None: crs_attr = data.attrs.get('crs') if isinstance(crs_attr, str): epsg = _wkt_to_epsg(crs_attr) if epsg is None and wkt_fallback is None: wkt_fallback = crs_attr elif crs_attr is not None: # Same gate as the kwarg path: reject bool / non-int # types and confirm the EPSG resolves before writing it # to disk. Without this, ``attrs={'crs': True}`` round- # trips as EPSG=1. _validate_crs_arg(crs_attr) epsg = int(crs_attr) if epsg is None: wkt = data.attrs.get('crs_wkt') if isinstance(wkt, str): epsg = _wkt_to_epsg(wkt) if epsg is None and wkt_fallback is None: wkt_fallback = wkt if nodata is None: nodata = _resolve_nodata_attr(data.attrs) # Mirror the CPU writer's pass-through of GDAL metadata, the # extra_tags list, the friendly image_description / extra_samples # / colormap synthesis, and the resolution tags. Without these, # a GPU write -> CPU read round-trip silently drops every rich # tag. _rich = _extract_rich_tags(data.attrs) raster_type = _rich['raster_type'] gdal_meta_xml = _rich['gdal_metadata_xml'] extra_tags_list = _rich['extra_tags'] x_res = _rich['x_resolution'] y_res = _rich['y_resolution'] res_unit = _rich['resolution_unit'] else: if hasattr(data, 'compute'): data = data.compute() # Dask -> CuPy or numpy if hasattr(data, 'device'): arr = data # already CuPy elif hasattr(data, 'get'): arr = data # CuPy else: arr = cupy.asarray(np.asarray(data)) # numpy/list -> GPU if arr.ndim not in (2, 3): raise ValueError(f"Expected 2D or 3D array, got {arr.ndim}D") height, width = arr.shape[:2] samples = arr.shape[2] if arr.ndim == 3 else 1 np_dtype = np.dtype(str(arr.dtype)) # cupy dtype -> numpy dtype # Mirror the CPU writer's NaN-to-sentinel substitution. # Without this step the GPU writer emits raw NaN bytes interleaved # with valid data even when ``nodata=<finite>`` is supplied; the # GDAL_NODATA tag still advertises the sentinel but external readers # (rasterio / GDAL / QGIS) mask only on the sentinel value and # therefore see the NaN pixels as valid data. The CPU writer does # the equivalent rewrite at ``to_geotiff`` (lines around # ``arr.copy(); arr[nan_mask] = arr.dtype.type(nodata)``); both # paths must produce byte-equivalent files for the same input. # We always copy before the in-place sentinel write. Some upstream # branches above already produce a fresh buffer (``cupy.asarray`` # from numpy/dask, ``ascontiguousarray`` from the band-first # moveaxis); others (a CuPy-backed DataArray taking the no-moveaxis # path, or a plain CuPy positional ``data``) hand ``arr`` back as # the caller's buffer. Rather than tracking provenance across that # branch tree, copy unconditionally when we are about to mutate -- # the cost is one GPU array allocation, only on the NaN-present # path, and it guarantees the CPU writer's defensive-copy semantics # in every case. # Shared NodataLifecycle gate for NaN->sentinel restore. if _NL(declared=nodata, dtype_in=np_dtype).writer_restore_sentinel( buffer_dtype=np_dtype, restore_sentinel=restore_sentinel, ): nan_mask = cupy.isnan(arr) if bool(nan_mask.any()): arr = arr.copy() arr[nan_mask] = np_dtype.type(nodata) comp_tag = _compression_tag(compression) pred_val = normalize_predictor(predictor, np_dtype, comp_tag) def _gpu_compress_to_part(gpu_arr, w, h, spp): """Compress a GPU array into a (stub, w, h, offsets, counts, tiles) tuple.""" compressed = gpu_compress_tiles( gpu_arr, tile_size, tile_size, w, h, comp_tag, pred_val, np_dtype, spp) rel_off = [] bc = [] off = 0 for tile in compressed: rel_off.append(off) bc.append(len(tile)) off += len(tile) stub = np.empty((1, 1, spp) if spp > 1 else (1, 1), dtype=np_dtype) return (stub, w, h, rel_off, bc, compressed) # Full resolution parts = [_gpu_compress_to_part(arr, width, height, samples)] # Overview generation mirrors the CPU writer's 8-level cap. if cog: if overview_levels is None: from .._overview import _MAX_OVERVIEW_LEVELS # Auto-generated lists hold actual decimation factors (2, # 4, 8, ...) so the loop below treats auto-generated and # user-supplied lists identically. overview_levels = [] oh, ow = height, width factor = 2 while (oh > tile_size and ow > tile_size and len(overview_levels) < _MAX_OVERVIEW_LEVELS): oh //= 2 ow //= 2 if oh > 0 and ow > 0: overview_levels.append(factor) factor *= 2 else: # Validate explicit lists: power-of-two factors >= 2, # strictly increasing, feasible for the input shape. # Previously the values were ignored and only the list # length mattered. from .._overview import _validate_overview_levels overview_levels = _validate_overview_levels( overview_levels, height=height, width=width) # Pass ``nodata`` so the GPU reducer masks the sentinel back to # NaN before averaging. Without this, the NaN->sentinel rewrite # done above on ``arr`` leaks the sentinel into the overview # reduction and poisons the pyramid. Rewrite any all-sentinel # cell NaN back to the sentinel after each level # so the on-disk overview tiles still carry the sentinel value # external readers expect. # # The sentinel rewrite uses ``cupy.putmask`` rather than # ``current.copy(); current[nan_mask] = sentinel`` because # ``make_overview_gpu`` always returns a freshly allocated # buffer (``_block_reduce_2d_gpu`` ends in ``cupy.nan*`` / # ``cupy.around(...).astype(...)`` / ``cropped[::2, ::2].copy()``, # and the 3-D path ends in ``cupy.stack``) so nothing else # aliases the buffer between the return and the rewrite. # Dropping the redundant ``copy()`` skips one chunk-sized GPU # allocation per overview level. Mirrors the in-place sentinel # rewrite ``_apply_nodata_mask_gpu`` uses. current = arr cumulative_factor = 1 # ``make_overview_gpu`` preserves dtype, so the sentinel cast is # loop-invariant. Hoist it (and the float/finite gate) out of the # inner ``while`` to skip redundant per-level scalar work. # The lifecycle's writer_restore_sentinel mirrors the gate used # for the full-resolution rewrite above so the overview loop # stays in sync with the base rewrite. rewrite_nodata = _NL( declared=nodata, dtype_in=np_dtype, ).writer_restore_sentinel( buffer_dtype=np_dtype, restore_sentinel=restore_sentinel, ) sentinel_scalar = ( np_dtype.type(nodata) if rewrite_nodata else None ) for target_factor in overview_levels: # Halve repeatedly until the cumulative decimation matches # the requested factor. Validation has already established # that ``target_factor`` is a power of two and strictly # greater than ``cumulative_factor``. while cumulative_factor < target_factor: current = make_overview_gpu(current, method=overview_resampling, nodata=nodata) cumulative_factor *= 2 if rewrite_nodata: nan_mask = cupy.isnan(current) if bool(nan_mask.any().item()): cupy.putmask(current, nan_mask, sentinel_scalar) oh, ow = current.shape[:2] parts.append(_gpu_compress_to_part(current, ow, oh, samples)) # Refuse to write an unvalidatable CRS string into GTCitationGeoKey # unless the caller opts in. if epsg is None: _validate_crs_fallback(wkt_fallback, allow_unparseable_crs) file_bytes = _assemble_tiff( width, height, np_dtype, comp_tag, pred_val, True, tile_size, parts, geo_transform, epsg, nodata, is_cog=(cog and len(parts) > 1), raster_type=raster_type, crs_wkt=wkt_fallback if epsg is None else None, gdal_metadata_xml=gdal_meta_xml, extra_tags=extra_tags_list, x_resolution=x_res, y_resolution=y_res, resolution_unit=res_unit, force_bigtiff=bigtiff, photometric=photometric, ) _write_bytes(file_bytes, path) return path