Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions xrspatial/geotiff/_vrt.py
Original file line number Diff line number Diff line change
Expand Up @@ -1881,6 +1881,40 @@ def _check_no_mixed_raster_type(sources_meta: list[dict]) -> None:
)


def _check_no_mixed_georef(sources_meta: list[dict]) -> None:
"""Reject mixing georeferenced and non-georeferenced VRT sources.

A source TIFF with no georeferencing (``has_georef`` False) carries a
default identity transform, not a real mapping. The mosaic-placement
math reads ``origin_x`` / ``pixel_width`` off each source transform to
position its tile, so a non-georeferenced source dropped into an
otherwise georeferenced mosaic would be placed at pixel-space origin
``(0, 0)`` as if those identity values were real CRS coordinates.
Refuse rather than emit a mosaic that mislocates the tile.

The all-georeferenced and all-non-georeferenced cases both pass:
build_vrt emits a ``<GeoTransform>`` only when every source is
georeferenced, so the all-non-georeferenced VRT preserves
``georef_status='none'`` on read.
"""
if not sources_meta:
return
georef_flags = [bool(m.get('has_georef')) for m in sources_meta]
if any(georef_flags) and not all(georef_flags):
georeffed = [m['path'] for m, g in zip(sources_meta, georef_flags)
if g]
plain = [m['path'] for m, g in zip(sources_meta, georef_flags)
if not g]
raise ValueError(
f"VRT sources mix georeferenced and non-georeferenced TIFFs. "
f"Georeferenced: {georeffed!r}; non-georeferenced: {plain!r}. "
f"A non-georeferenced source carries only a default identity "
f"transform, so it cannot be placed on a georeferenced "
f"mosaic. Either georeference every source or none of them "
f"before building the VRT."
)


def _nodata_values_agree(a, b) -> bool:
"""Return True iff two nodata sentinels are the same value.

Expand Down Expand Up @@ -2039,6 +2073,7 @@ def write_vrt(vrt_path: str, source_files: list[str], *,
'bps': bps,
'sample_format': ifd.sample_format,
'transform': geo.transform,
'has_georef': geo.has_georef,
'crs_wkt': geo.crs_wkt,
'nodata': geo.nodata,
'raster_type': geo.raster_type,
Expand Down Expand Up @@ -2093,6 +2128,7 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
# the read-side ``MixedBandMetadataError`` opt-out pattern.
_check_no_rotated_source_transforms(sources_meta)
_check_no_mixed_raster_type(sources_meta)
_check_no_mixed_georef(sources_meta)
_check_no_mixed_nodata(sources_meta, caller_nodata=nodata)

# Pixel size, sample format, band count, and CRS share the
Expand Down Expand Up @@ -2184,8 +2220,17 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
lines = [f'<VRTDataset rasterXSize="{int(total_w)}" rasterYSize="{int(total_h)}">']
if srs:
lines.append(f' <SRS>{_xml_text(srs)}</SRS>')
lines.append(f' <GeoTransform>{mosaic_x0}, {res_x}, 0.0, '
f'{mosaic_y_top}, 0.0, {res_y}</GeoTransform>')
# Only emit a <GeoTransform> when the sources actually carry
# georeferencing. A non-georeferenced source has a default identity
# transform (origin 0,0; pixel 1,-1), and emitting that as a real
# GeoTransform fabricates georeferencing: the VRT would read back as
# ``georef_status='transform_only'`` instead of ``'none'``. The
# mixed-georef gate above guarantees every source agrees, so checking
# ``first`` is enough. Omitting the element makes the reader fall back
# to integer pixel coordinates, preserving ``georef_status='none'``.
if first.get('has_georef'):
lines.append(f' <GeoTransform>{mosaic_x0}, {res_x}, 0.0, '
f'{mosaic_y_top}, 0.0, {res_y}</GeoTransform>')

for band_num in range(1, n_bands + 1):
lines.append(
Expand Down
45 changes: 43 additions & 2 deletions xrspatial/geotiff/tests/unit/test_signatures.py
Original file line number Diff line number Diff line change
Expand Up @@ -2612,6 +2612,47 @@ def _make_single_tile_vrt(tmp_path, arr: np.ndarray) -> str:
return vrt_path


def _make_georef_single_tile_vrt(tmp_path, arr: np.ndarray) -> str:
"""Single-source VRT with an explicit ``origin (0,0); pixel (1,-1)``
GeoTransform.

``_make_single_tile_vrt`` writes a bare numpy array, which produces a
non-georeferenced tile; build_vrt then omits the ``<GeoTransform>``
(see issue #2966) and the VRT reads back with integer pixel coords.
Tests that assert the windowed *transform* / float coordinate shift
need a real GeoTransform pinned to origin ``(0, 0)`` so the
pixel-is-area center math (``coord = (idx + 0.5) * res``) lands on the
documented values. Hand-build the XML so the GeoTransform is explicit
rather than derived from a tile's own (origin-shifted) tags.
"""
h, w = arr.shape[:2]
dtype_map = {np.dtype('float32'): 'Float32',
np.dtype('float64'): 'Float64',
np.dtype('uint8'): 'Byte',
np.dtype('int32'): 'Int32',
np.dtype('uint16'): 'UInt16'}
data_type = dtype_map[arr.dtype]
tile_path = _write_tile_to_vrt(tmp_path, 'georef_tile.tif', arr)
lines = [
f'<VRTDataset rasterXSize="{w}" rasterYSize="{h}">',
' <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>',
f' <VRTRasterBand dataType="{data_type}" band="1">',
' <SimpleSource>',
f' <SourceFilename relativeToVRT="1">'
f'{os.path.basename(tile_path)}</SourceFilename>',
' <SourceBand>1</SourceBand>',
f' <SrcRect xOff="0" yOff="0" xSize="{w}" ySize="{h}"/>',
f' <DstRect xOff="0" yOff="0" xSize="{w}" ySize="{h}"/>',
' </SimpleSource>',
' </VRTRasterBand>',
'</VRTDataset>',
]
vrt_path = str(tmp_path / 'georef_single.vrt')
with open(vrt_path, 'w') as f:
f.write('\n'.join(lines))
return vrt_path


def _make_2x1_mosaic_vrt(tmp_path, left: np.ndarray,
right: np.ndarray) -> str:
"""Create a 2x1 horizontal mosaic VRT for cross-source window tests.
Expand Down Expand Up @@ -2763,7 +2804,7 @@ def test_window_transform_origin_shift(self, tmp_path):
agree.
"""
arr = np.arange(8 * 16, dtype=np.float32).reshape(8, 16)
vrt = _make_single_tile_vrt(tmp_path, arr)
vrt = _make_georef_single_tile_vrt(tmp_path, arr)

result = _read_vrt(vrt, window=(2, 3, 6, 10))

Expand All @@ -2787,7 +2828,7 @@ def test_window_coords_match_transform_shift(self, tmp_path):
first y coord must be ``0 + (2 + 0.5) * -1.0 = -2.5``.
"""
arr = np.arange(8 * 16, dtype=np.float32).reshape(8, 16)
vrt = _make_single_tile_vrt(tmp_path, arr)
vrt = _make_georef_single_tile_vrt(tmp_path, arr)

result = _read_vrt(vrt, window=(2, 3, 6, 10))

Expand Down
88 changes: 87 additions & 1 deletion xrspatial/geotiff/tests/vrt/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@

from xrspatial.geotiff import (GeoTIFFFallbackWarning, MixedBandMetadataError, open_geotiff,
_read_geotiff_dask, _read_vrt, to_geotiff, build_vrt)
from xrspatial.geotiff._attrs import GEOREF_STATUS_FULL, GEOREF_STATUS_TRANSFORM_ONLY
from xrspatial.geotiff._attrs import (GEOREF_STATUS_FULL, GEOREF_STATUS_NONE,
GEOREF_STATUS_TRANSFORM_ONLY)
from xrspatial.geotiff._errors import VRTUnsupportedError
from xrspatial.geotiff._geotags import GeoTransform
from xrspatial.geotiff._vrt import parse_vrt
Expand Down Expand Up @@ -1795,3 +1796,88 @@ def test_missing_sources_warn_records_holes(tmp_path):
assert isinstance(holes[0], dict), f'vrt_holes entry type drifted: {type(holes[0]).__name__}; #1734 documents a dict shape' # noqa: E501
hole_source = holes[0]['source']
assert 'tmp_2321_missing_src.tif' in hole_source, f'hole source path drifted: {hole_source!r}'


# ---------------------------------------------------------------------------
# Non-georeferenced sources: build_vrt must not fabricate a GeoTransform
# (issue #2966)
# ---------------------------------------------------------------------------


def _write_plain_tif_2966(tmp_path, name, shape=(3, 4)):
"""Write a non-georeferenced TIFF (bare array, no CRS/transform).

The low-level ``write`` helper bypasses ``to_geotiff`` so the source
carries no GeoTIFF transform tags and reads back as
``georef_status='none'``.
"""
arr = np.arange(int(np.prod(shape)), dtype=np.float32).reshape(shape)
path = str(tmp_path / name)
write(arr, path, compression='none', tiled=False)
return path, arr


def _write_georef_tif_2966(tmp_path, name, shape=(3, 4)):
"""Write a georeferenced TIFF via ``to_geotiff`` (real CRS + transform)."""
arr = np.arange(int(np.prod(shape)), dtype=np.float32).reshape(shape)
y = np.linspace(50.0, 20.0, shape[0])
x = np.linspace(100.0, 130.0, shape[1])
da = xr.DataArray(arr, dims=['y', 'x'], coords={'y': y, 'x': x},
attrs={'crs': 'EPSG:4326'})
path = str(tmp_path / name)
to_geotiff(da, path, compression='none')
return path, arr


def test_build_vrt_non_georef_source_omits_geotransform(tmp_path):
"""A VRT over a non-georeferenced source must not carry a
``<GeoTransform>`` element (issue #2966)."""
tif, _ = _write_plain_tif_2966(tmp_path, 'plain_2966.tif')
assert open_geotiff(tif).attrs.get('georef_status') == GEOREF_STATUS_NONE

vrt_path = str(tmp_path / 'plain_2966.vrt')
build_vrt(vrt_path, [tif])

xml = pathlib.Path(vrt_path).read_text()
assert '<GeoTransform>' not in xml, (
'build_vrt fabricated a GeoTransform for a non-georeferenced '
'source: ' + xml
)


def test_build_vrt_non_georef_source_preserves_status_none(tmp_path):
"""The VRT over a non-georeferenced source reads back as
``georef_status='none'`` with no fabricated transform (issue #2966)."""
tif, arr = _write_plain_tif_2966(tmp_path, 'plain_status_2966.tif')

vrt_path = str(tmp_path / 'plain_status_2966.vrt')
build_vrt(vrt_path, [tif])
out = open_geotiff(vrt_path)

assert out.attrs.get('georef_status') == GEOREF_STATUS_NONE
assert out.attrs.get('transform') is None
np.testing.assert_array_equal(out.values, arr)


def test_build_vrt_georef_source_still_emits_geotransform(tmp_path):
"""Regression guard: a georeferenced source still gets a
``<GeoTransform>`` and a non-``none`` status (issue #2966)."""
tif, _ = _write_georef_tif_2966(tmp_path, 'geo_2966.tif')

vrt_path = str(tmp_path / 'geo_2966.vrt')
build_vrt(vrt_path, [tif])

xml = pathlib.Path(vrt_path).read_text()
assert '<GeoTransform>' in xml
assert open_geotiff(vrt_path).attrs.get('georef_status') != GEOREF_STATUS_NONE


def test_build_vrt_mixed_georef_sources_rejected(tmp_path):
"""Mixing a georeferenced and a non-georeferenced source is rejected
rather than silently mislocating the plain tile (issue #2966)."""
geo_tif, _ = _write_georef_tif_2966(tmp_path, 'mix_geo_2966.tif')
plain_tif, _ = _write_plain_tif_2966(tmp_path, 'mix_plain_2966.tif')

vrt_path = str(tmp_path / 'mixed_2966.vrt')
with pytest.raises(ValueError, match='mix georeferenced and non-georeferenced'):
build_vrt(vrt_path, [geo_tif, plain_tif])
Loading