diff --git a/xrspatial/geotiff/_vrt.py b/xrspatial/geotiff/_vrt.py index e3bb67bb..e54ac99f 100644 --- a/xrspatial/geotiff/_vrt.py +++ b/xrspatial/geotiff/_vrt.py @@ -1881,6 +1881,40 @@ def _check_no_mixed_raster_type(sources_meta: list[dict]) -> None: ) +def _check_no_mixed_georef(sources_meta: list[dict]) -> None: + """Reject mixing georeferenced and non-georeferenced VRT sources. + + A source TIFF with no georeferencing (``has_georef`` False) carries a + default identity transform, not a real mapping. The mosaic-placement + math reads ``origin_x`` / ``pixel_width`` off each source transform to + position its tile, so a non-georeferenced source dropped into an + otherwise georeferenced mosaic would be placed at pixel-space origin + ``(0, 0)`` as if those identity values were real CRS coordinates. + Refuse rather than emit a mosaic that mislocates the tile. + + The all-georeferenced and all-non-georeferenced cases both pass: + build_vrt emits a ```` only when every source is + georeferenced, so the all-non-georeferenced VRT preserves + ``georef_status='none'`` on read. + """ + if not sources_meta: + return + georef_flags = [bool(m.get('has_georef')) for m in sources_meta] + if any(georef_flags) and not all(georef_flags): + georeffed = [m['path'] for m, g in zip(sources_meta, georef_flags) + if g] + plain = [m['path'] for m, g in zip(sources_meta, georef_flags) + if not g] + raise ValueError( + f"VRT sources mix georeferenced and non-georeferenced TIFFs. " + f"Georeferenced: {georeffed!r}; non-georeferenced: {plain!r}. " + f"A non-georeferenced source carries only a default identity " + f"transform, so it cannot be placed on a georeferenced " + f"mosaic. Either georeference every source or none of them " + f"before building the VRT." + ) + + def _nodata_values_agree(a, b) -> bool: """Return True iff two nodata sentinels are the same value. @@ -2039,6 +2073,7 @@ def write_vrt(vrt_path: str, source_files: list[str], *, 'bps': bps, 'sample_format': ifd.sample_format, 'transform': geo.transform, + 'has_georef': geo.has_georef, 'crs_wkt': geo.crs_wkt, 'nodata': geo.nodata, 'raster_type': geo.raster_type, @@ -2093,6 +2128,7 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: # the read-side ``MixedBandMetadataError`` opt-out pattern. _check_no_rotated_source_transforms(sources_meta) _check_no_mixed_raster_type(sources_meta) + _check_no_mixed_georef(sources_meta) _check_no_mixed_nodata(sources_meta, caller_nodata=nodata) # Pixel size, sample format, band count, and CRS share the @@ -2184,8 +2220,17 @@ def _pixel_size_mismatch(a: float, b: float) -> bool: lines = [f''] if srs: lines.append(f' {_xml_text(srs)}') - lines.append(f' {mosaic_x0}, {res_x}, 0.0, ' - f'{mosaic_y_top}, 0.0, {res_y}') + # Only emit a when the sources actually carry + # georeferencing. A non-georeferenced source has a default identity + # transform (origin 0,0; pixel 1,-1), and emitting that as a real + # GeoTransform fabricates georeferencing: the VRT would read back as + # ``georef_status='transform_only'`` instead of ``'none'``. The + # mixed-georef gate above guarantees every source agrees, so checking + # ``first`` is enough. Omitting the element makes the reader fall back + # to integer pixel coordinates, preserving ``georef_status='none'``. + if first.get('has_georef'): + lines.append(f' {mosaic_x0}, {res_x}, 0.0, ' + f'{mosaic_y_top}, 0.0, {res_y}') for band_num in range(1, n_bands + 1): lines.append( diff --git a/xrspatial/geotiff/tests/unit/test_signatures.py b/xrspatial/geotiff/tests/unit/test_signatures.py index 814af5b8..a230f866 100644 --- a/xrspatial/geotiff/tests/unit/test_signatures.py +++ b/xrspatial/geotiff/tests/unit/test_signatures.py @@ -2612,6 +2612,47 @@ def _make_single_tile_vrt(tmp_path, arr: np.ndarray) -> str: return vrt_path +def _make_georef_single_tile_vrt(tmp_path, arr: np.ndarray) -> str: + """Single-source VRT with an explicit ``origin (0,0); pixel (1,-1)`` + GeoTransform. + + ``_make_single_tile_vrt`` writes a bare numpy array, which produces a + non-georeferenced tile; build_vrt then omits the ```` + (see issue #2966) and the VRT reads back with integer pixel coords. + Tests that assert the windowed *transform* / float coordinate shift + need a real GeoTransform pinned to origin ``(0, 0)`` so the + pixel-is-area center math (``coord = (idx + 0.5) * res``) lands on the + documented values. Hand-build the XML so the GeoTransform is explicit + rather than derived from a tile's own (origin-shifted) tags. + """ + h, w = arr.shape[:2] + dtype_map = {np.dtype('float32'): 'Float32', + np.dtype('float64'): 'Float64', + np.dtype('uint8'): 'Byte', + np.dtype('int32'): 'Int32', + np.dtype('uint16'): 'UInt16'} + data_type = dtype_map[arr.dtype] + tile_path = _write_tile_to_vrt(tmp_path, 'georef_tile.tif', arr) + lines = [ + f'', + ' 0.0, 1.0, 0.0, 0.0, 0.0, -1.0', + f' ', + ' ', + f' ' + f'{os.path.basename(tile_path)}', + ' 1', + f' ', + f' ', + ' ', + ' ', + '', + ] + vrt_path = str(tmp_path / 'georef_single.vrt') + with open(vrt_path, 'w') as f: + f.write('\n'.join(lines)) + return vrt_path + + def _make_2x1_mosaic_vrt(tmp_path, left: np.ndarray, right: np.ndarray) -> str: """Create a 2x1 horizontal mosaic VRT for cross-source window tests. @@ -2763,7 +2804,7 @@ def test_window_transform_origin_shift(self, tmp_path): agree. """ arr = np.arange(8 * 16, dtype=np.float32).reshape(8, 16) - vrt = _make_single_tile_vrt(tmp_path, arr) + vrt = _make_georef_single_tile_vrt(tmp_path, arr) result = _read_vrt(vrt, window=(2, 3, 6, 10)) @@ -2787,7 +2828,7 @@ def test_window_coords_match_transform_shift(self, tmp_path): first y coord must be ``0 + (2 + 0.5) * -1.0 = -2.5``. """ arr = np.arange(8 * 16, dtype=np.float32).reshape(8, 16) - vrt = _make_single_tile_vrt(tmp_path, arr) + vrt = _make_georef_single_tile_vrt(tmp_path, arr) result = _read_vrt(vrt, window=(2, 3, 6, 10)) diff --git a/xrspatial/geotiff/tests/vrt/test_metadata.py b/xrspatial/geotiff/tests/vrt/test_metadata.py index e9e61265..9eb197ed 100644 --- a/xrspatial/geotiff/tests/vrt/test_metadata.py +++ b/xrspatial/geotiff/tests/vrt/test_metadata.py @@ -33,7 +33,8 @@ from xrspatial.geotiff import (GeoTIFFFallbackWarning, MixedBandMetadataError, open_geotiff, _read_geotiff_dask, _read_vrt, to_geotiff, build_vrt) -from xrspatial.geotiff._attrs import GEOREF_STATUS_FULL, GEOREF_STATUS_TRANSFORM_ONLY +from xrspatial.geotiff._attrs import (GEOREF_STATUS_FULL, GEOREF_STATUS_NONE, + GEOREF_STATUS_TRANSFORM_ONLY) from xrspatial.geotiff._errors import VRTUnsupportedError from xrspatial.geotiff._geotags import GeoTransform from xrspatial.geotiff._vrt import parse_vrt @@ -1795,3 +1796,88 @@ def test_missing_sources_warn_records_holes(tmp_path): assert isinstance(holes[0], dict), f'vrt_holes entry type drifted: {type(holes[0]).__name__}; #1734 documents a dict shape' # noqa: E501 hole_source = holes[0]['source'] assert 'tmp_2321_missing_src.tif' in hole_source, f'hole source path drifted: {hole_source!r}' + + +# --------------------------------------------------------------------------- +# Non-georeferenced sources: build_vrt must not fabricate a GeoTransform +# (issue #2966) +# --------------------------------------------------------------------------- + + +def _write_plain_tif_2966(tmp_path, name, shape=(3, 4)): + """Write a non-georeferenced TIFF (bare array, no CRS/transform). + + The low-level ``write`` helper bypasses ``to_geotiff`` so the source + carries no GeoTIFF transform tags and reads back as + ``georef_status='none'``. + """ + arr = np.arange(int(np.prod(shape)), dtype=np.float32).reshape(shape) + path = str(tmp_path / name) + write(arr, path, compression='none', tiled=False) + return path, arr + + +def _write_georef_tif_2966(tmp_path, name, shape=(3, 4)): + """Write a georeferenced TIFF via ``to_geotiff`` (real CRS + transform).""" + arr = np.arange(int(np.prod(shape)), dtype=np.float32).reshape(shape) + y = np.linspace(50.0, 20.0, shape[0]) + x = np.linspace(100.0, 130.0, shape[1]) + da = xr.DataArray(arr, dims=['y', 'x'], coords={'y': y, 'x': x}, + attrs={'crs': 'EPSG:4326'}) + path = str(tmp_path / name) + to_geotiff(da, path, compression='none') + return path, arr + + +def test_build_vrt_non_georef_source_omits_geotransform(tmp_path): + """A VRT over a non-georeferenced source must not carry a + ```` element (issue #2966).""" + tif, _ = _write_plain_tif_2966(tmp_path, 'plain_2966.tif') + assert open_geotiff(tif).attrs.get('georef_status') == GEOREF_STATUS_NONE + + vrt_path = str(tmp_path / 'plain_2966.vrt') + build_vrt(vrt_path, [tif]) + + xml = pathlib.Path(vrt_path).read_text() + assert '' not in xml, ( + 'build_vrt fabricated a GeoTransform for a non-georeferenced ' + 'source: ' + xml + ) + + +def test_build_vrt_non_georef_source_preserves_status_none(tmp_path): + """The VRT over a non-georeferenced source reads back as + ``georef_status='none'`` with no fabricated transform (issue #2966).""" + tif, arr = _write_plain_tif_2966(tmp_path, 'plain_status_2966.tif') + + vrt_path = str(tmp_path / 'plain_status_2966.vrt') + build_vrt(vrt_path, [tif]) + out = open_geotiff(vrt_path) + + assert out.attrs.get('georef_status') == GEOREF_STATUS_NONE + assert out.attrs.get('transform') is None + np.testing.assert_array_equal(out.values, arr) + + +def test_build_vrt_georef_source_still_emits_geotransform(tmp_path): + """Regression guard: a georeferenced source still gets a + ```` and a non-``none`` status (issue #2966).""" + tif, _ = _write_georef_tif_2966(tmp_path, 'geo_2966.tif') + + vrt_path = str(tmp_path / 'geo_2966.vrt') + build_vrt(vrt_path, [tif]) + + xml = pathlib.Path(vrt_path).read_text() + assert '' in xml + assert open_geotiff(vrt_path).attrs.get('georef_status') != GEOREF_STATUS_NONE + + +def test_build_vrt_mixed_georef_sources_rejected(tmp_path): + """Mixing a georeferenced and a non-georeferenced source is rejected + rather than silently mislocating the plain tile (issue #2966).""" + geo_tif, _ = _write_georef_tif_2966(tmp_path, 'mix_geo_2966.tif') + plain_tif, _ = _write_plain_tif_2966(tmp_path, 'mix_plain_2966.tif') + + vrt_path = str(tmp_path / 'mixed_2966.vrt') + with pytest.raises(ValueError, match='mix georeferenced and non-georeferenced'): + build_vrt(vrt_path, [geo_tif, plain_tif])