Skip to content

Commit 03b0d55

Browse files
authored
vrt: stop build_vrt fabricating a GeoTransform for non-georeferenced sources (#2966) (#2971)
A non-georeferenced source TIFF carries a default identity transform (origin 0,0; pixel 1,-1) with has_georef=False. build_vrt stored that transform unconditionally and always emitted a <GeoTransform>, so a VRT over a plain TIFF read back as georef_status='transform_only' with a fabricated (1, 0, 0, 0, -1, 0) transform instead of 'none'. Track has_georef per source and emit <GeoTransform> only when the sources are georeferenced. Add a fail-loud gate that rejects mixing georeferenced and non-georeferenced sources, since a plain tile cannot be placed on a georeferenced mosaic. Two window-transform-shift tests relied on the fabricated identity GeoTransform; point them at a hand-built VRT with an explicit origin (0,0) GeoTransform so they still exercise the shift contract.
1 parent 42d47d8 commit 03b0d55

3 files changed

Lines changed: 177 additions & 5 deletions

File tree

xrspatial/geotiff/_vrt.py

Lines changed: 47 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1881,6 +1881,40 @@ def _check_no_mixed_raster_type(sources_meta: list[dict]) -> None:
18811881
)
18821882

18831883

1884+
def _check_no_mixed_georef(sources_meta: list[dict]) -> None:
1885+
"""Reject mixing georeferenced and non-georeferenced VRT sources.
1886+
1887+
A source TIFF with no georeferencing (``has_georef`` False) carries a
1888+
default identity transform, not a real mapping. The mosaic-placement
1889+
math reads ``origin_x`` / ``pixel_width`` off each source transform to
1890+
position its tile, so a non-georeferenced source dropped into an
1891+
otherwise georeferenced mosaic would be placed at pixel-space origin
1892+
``(0, 0)`` as if those identity values were real CRS coordinates.
1893+
Refuse rather than emit a mosaic that mislocates the tile.
1894+
1895+
The all-georeferenced and all-non-georeferenced cases both pass:
1896+
build_vrt emits a ``<GeoTransform>`` only when every source is
1897+
georeferenced, so the all-non-georeferenced VRT preserves
1898+
``georef_status='none'`` on read.
1899+
"""
1900+
if not sources_meta:
1901+
return
1902+
georef_flags = [bool(m.get('has_georef')) for m in sources_meta]
1903+
if any(georef_flags) and not all(georef_flags):
1904+
georeffed = [m['path'] for m, g in zip(sources_meta, georef_flags)
1905+
if g]
1906+
plain = [m['path'] for m, g in zip(sources_meta, georef_flags)
1907+
if not g]
1908+
raise ValueError(
1909+
f"VRT sources mix georeferenced and non-georeferenced TIFFs. "
1910+
f"Georeferenced: {georeffed!r}; non-georeferenced: {plain!r}. "
1911+
f"A non-georeferenced source carries only a default identity "
1912+
f"transform, so it cannot be placed on a georeferenced "
1913+
f"mosaic. Either georeference every source or none of them "
1914+
f"before building the VRT."
1915+
)
1916+
1917+
18841918
def _nodata_values_agree(a, b) -> bool:
18851919
"""Return True iff two nodata sentinels are the same value.
18861920
@@ -2039,6 +2073,7 @@ def write_vrt(vrt_path: str, source_files: list[str], *,
20392073
'bps': bps,
20402074
'sample_format': ifd.sample_format,
20412075
'transform': geo.transform,
2076+
'has_georef': geo.has_georef,
20422077
'crs_wkt': geo.crs_wkt,
20432078
'nodata': geo.nodata,
20442079
'raster_type': geo.raster_type,
@@ -2093,6 +2128,7 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
20932128
# the read-side ``MixedBandMetadataError`` opt-out pattern.
20942129
_check_no_rotated_source_transforms(sources_meta)
20952130
_check_no_mixed_raster_type(sources_meta)
2131+
_check_no_mixed_georef(sources_meta)
20962132
_check_no_mixed_nodata(sources_meta, caller_nodata=nodata)
20972133

20982134
# Pixel size, sample format, band count, and CRS share the
@@ -2184,8 +2220,17 @@ def _pixel_size_mismatch(a: float, b: float) -> bool:
21842220
lines = [f'<VRTDataset rasterXSize="{int(total_w)}" rasterYSize="{int(total_h)}">']
21852221
if srs:
21862222
lines.append(f' <SRS>{_xml_text(srs)}</SRS>')
2187-
lines.append(f' <GeoTransform>{mosaic_x0}, {res_x}, 0.0, '
2188-
f'{mosaic_y_top}, 0.0, {res_y}</GeoTransform>')
2223+
# Only emit a <GeoTransform> when the sources actually carry
2224+
# georeferencing. A non-georeferenced source has a default identity
2225+
# transform (origin 0,0; pixel 1,-1), and emitting that as a real
2226+
# GeoTransform fabricates georeferencing: the VRT would read back as
2227+
# ``georef_status='transform_only'`` instead of ``'none'``. The
2228+
# mixed-georef gate above guarantees every source agrees, so checking
2229+
# ``first`` is enough. Omitting the element makes the reader fall back
2230+
# to integer pixel coordinates, preserving ``georef_status='none'``.
2231+
if first.get('has_georef'):
2232+
lines.append(f' <GeoTransform>{mosaic_x0}, {res_x}, 0.0, '
2233+
f'{mosaic_y_top}, 0.0, {res_y}</GeoTransform>')
21892234

21902235
for band_num in range(1, n_bands + 1):
21912236
lines.append(

xrspatial/geotiff/tests/unit/test_signatures.py

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2612,6 +2612,47 @@ def _make_single_tile_vrt(tmp_path, arr: np.ndarray) -> str:
26122612
return vrt_path
26132613

26142614

2615+
def _make_georef_single_tile_vrt(tmp_path, arr: np.ndarray) -> str:
2616+
"""Single-source VRT with an explicit ``origin (0,0); pixel (1,-1)``
2617+
GeoTransform.
2618+
2619+
``_make_single_tile_vrt`` writes a bare numpy array, which produces a
2620+
non-georeferenced tile; build_vrt then omits the ``<GeoTransform>``
2621+
(see issue #2966) and the VRT reads back with integer pixel coords.
2622+
Tests that assert the windowed *transform* / float coordinate shift
2623+
need a real GeoTransform pinned to origin ``(0, 0)`` so the
2624+
pixel-is-area center math (``coord = (idx + 0.5) * res``) lands on the
2625+
documented values. Hand-build the XML so the GeoTransform is explicit
2626+
rather than derived from a tile's own (origin-shifted) tags.
2627+
"""
2628+
h, w = arr.shape[:2]
2629+
dtype_map = {np.dtype('float32'): 'Float32',
2630+
np.dtype('float64'): 'Float64',
2631+
np.dtype('uint8'): 'Byte',
2632+
np.dtype('int32'): 'Int32',
2633+
np.dtype('uint16'): 'UInt16'}
2634+
data_type = dtype_map[arr.dtype]
2635+
tile_path = _write_tile_to_vrt(tmp_path, 'georef_tile.tif', arr)
2636+
lines = [
2637+
f'<VRTDataset rasterXSize="{w}" rasterYSize="{h}">',
2638+
' <GeoTransform>0.0, 1.0, 0.0, 0.0, 0.0, -1.0</GeoTransform>',
2639+
f' <VRTRasterBand dataType="{data_type}" band="1">',
2640+
' <SimpleSource>',
2641+
f' <SourceFilename relativeToVRT="1">'
2642+
f'{os.path.basename(tile_path)}</SourceFilename>',
2643+
' <SourceBand>1</SourceBand>',
2644+
f' <SrcRect xOff="0" yOff="0" xSize="{w}" ySize="{h}"/>',
2645+
f' <DstRect xOff="0" yOff="0" xSize="{w}" ySize="{h}"/>',
2646+
' </SimpleSource>',
2647+
' </VRTRasterBand>',
2648+
'</VRTDataset>',
2649+
]
2650+
vrt_path = str(tmp_path / 'georef_single.vrt')
2651+
with open(vrt_path, 'w') as f:
2652+
f.write('\n'.join(lines))
2653+
return vrt_path
2654+
2655+
26152656
def _make_2x1_mosaic_vrt(tmp_path, left: np.ndarray,
26162657
right: np.ndarray) -> str:
26172658
"""Create a 2x1 horizontal mosaic VRT for cross-source window tests.
@@ -2763,7 +2804,7 @@ def test_window_transform_origin_shift(self, tmp_path):
27632804
agree.
27642805
"""
27652806
arr = np.arange(8 * 16, dtype=np.float32).reshape(8, 16)
2766-
vrt = _make_single_tile_vrt(tmp_path, arr)
2807+
vrt = _make_georef_single_tile_vrt(tmp_path, arr)
27672808

27682809
result = _read_vrt(vrt, window=(2, 3, 6, 10))
27692810

@@ -2787,7 +2828,7 @@ def test_window_coords_match_transform_shift(self, tmp_path):
27872828
first y coord must be ``0 + (2 + 0.5) * -1.0 = -2.5``.
27882829
"""
27892830
arr = np.arange(8 * 16, dtype=np.float32).reshape(8, 16)
2790-
vrt = _make_single_tile_vrt(tmp_path, arr)
2831+
vrt = _make_georef_single_tile_vrt(tmp_path, arr)
27912832

27922833
result = _read_vrt(vrt, window=(2, 3, 6, 10))
27932834

xrspatial/geotiff/tests/vrt/test_metadata.py

Lines changed: 87 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@
3333

3434
from xrspatial.geotiff import (GeoTIFFFallbackWarning, MixedBandMetadataError, open_geotiff,
3535
_read_geotiff_dask, _read_vrt, to_geotiff, build_vrt)
36-
from xrspatial.geotiff._attrs import GEOREF_STATUS_FULL, GEOREF_STATUS_TRANSFORM_ONLY
36+
from xrspatial.geotiff._attrs import (GEOREF_STATUS_FULL, GEOREF_STATUS_NONE,
37+
GEOREF_STATUS_TRANSFORM_ONLY)
3738
from xrspatial.geotiff._errors import VRTUnsupportedError
3839
from xrspatial.geotiff._geotags import GeoTransform
3940
from xrspatial.geotiff._vrt import parse_vrt
@@ -1795,3 +1796,88 @@ def test_missing_sources_warn_records_holes(tmp_path):
17951796
assert isinstance(holes[0], dict), f'vrt_holes entry type drifted: {type(holes[0]).__name__}; #1734 documents a dict shape' # noqa: E501
17961797
hole_source = holes[0]['source']
17971798
assert 'tmp_2321_missing_src.tif' in hole_source, f'hole source path drifted: {hole_source!r}'
1799+
1800+
1801+
# ---------------------------------------------------------------------------
1802+
# Non-georeferenced sources: build_vrt must not fabricate a GeoTransform
1803+
# (issue #2966)
1804+
# ---------------------------------------------------------------------------
1805+
1806+
1807+
def _write_plain_tif_2966(tmp_path, name, shape=(3, 4)):
1808+
"""Write a non-georeferenced TIFF (bare array, no CRS/transform).
1809+
1810+
The low-level ``write`` helper bypasses ``to_geotiff`` so the source
1811+
carries no GeoTIFF transform tags and reads back as
1812+
``georef_status='none'``.
1813+
"""
1814+
arr = np.arange(int(np.prod(shape)), dtype=np.float32).reshape(shape)
1815+
path = str(tmp_path / name)
1816+
write(arr, path, compression='none', tiled=False)
1817+
return path, arr
1818+
1819+
1820+
def _write_georef_tif_2966(tmp_path, name, shape=(3, 4)):
1821+
"""Write a georeferenced TIFF via ``to_geotiff`` (real CRS + transform)."""
1822+
arr = np.arange(int(np.prod(shape)), dtype=np.float32).reshape(shape)
1823+
y = np.linspace(50.0, 20.0, shape[0])
1824+
x = np.linspace(100.0, 130.0, shape[1])
1825+
da = xr.DataArray(arr, dims=['y', 'x'], coords={'y': y, 'x': x},
1826+
attrs={'crs': 'EPSG:4326'})
1827+
path = str(tmp_path / name)
1828+
to_geotiff(da, path, compression='none')
1829+
return path, arr
1830+
1831+
1832+
def test_build_vrt_non_georef_source_omits_geotransform(tmp_path):
1833+
"""A VRT over a non-georeferenced source must not carry a
1834+
``<GeoTransform>`` element (issue #2966)."""
1835+
tif, _ = _write_plain_tif_2966(tmp_path, 'plain_2966.tif')
1836+
assert open_geotiff(tif).attrs.get('georef_status') == GEOREF_STATUS_NONE
1837+
1838+
vrt_path = str(tmp_path / 'plain_2966.vrt')
1839+
build_vrt(vrt_path, [tif])
1840+
1841+
xml = pathlib.Path(vrt_path).read_text()
1842+
assert '<GeoTransform>' not in xml, (
1843+
'build_vrt fabricated a GeoTransform for a non-georeferenced '
1844+
'source: ' + xml
1845+
)
1846+
1847+
1848+
def test_build_vrt_non_georef_source_preserves_status_none(tmp_path):
1849+
"""The VRT over a non-georeferenced source reads back as
1850+
``georef_status='none'`` with no fabricated transform (issue #2966)."""
1851+
tif, arr = _write_plain_tif_2966(tmp_path, 'plain_status_2966.tif')
1852+
1853+
vrt_path = str(tmp_path / 'plain_status_2966.vrt')
1854+
build_vrt(vrt_path, [tif])
1855+
out = open_geotiff(vrt_path)
1856+
1857+
assert out.attrs.get('georef_status') == GEOREF_STATUS_NONE
1858+
assert out.attrs.get('transform') is None
1859+
np.testing.assert_array_equal(out.values, arr)
1860+
1861+
1862+
def test_build_vrt_georef_source_still_emits_geotransform(tmp_path):
1863+
"""Regression guard: a georeferenced source still gets a
1864+
``<GeoTransform>`` and a non-``none`` status (issue #2966)."""
1865+
tif, _ = _write_georef_tif_2966(tmp_path, 'geo_2966.tif')
1866+
1867+
vrt_path = str(tmp_path / 'geo_2966.vrt')
1868+
build_vrt(vrt_path, [tif])
1869+
1870+
xml = pathlib.Path(vrt_path).read_text()
1871+
assert '<GeoTransform>' in xml
1872+
assert open_geotiff(vrt_path).attrs.get('georef_status') != GEOREF_STATUS_NONE
1873+
1874+
1875+
def test_build_vrt_mixed_georef_sources_rejected(tmp_path):
1876+
"""Mixing a georeferenced and a non-georeferenced source is rejected
1877+
rather than silently mislocating the plain tile (issue #2966)."""
1878+
geo_tif, _ = _write_georef_tif_2966(tmp_path, 'mix_geo_2966.tif')
1879+
plain_tif, _ = _write_plain_tif_2966(tmp_path, 'mix_plain_2966.tif')
1880+
1881+
vrt_path = str(tmp_path / 'mixed_2966.vrt')
1882+
with pytest.raises(ValueError, match='mix georeferenced and non-georeferenced'):
1883+
build_vrt(vrt_path, [geo_tif, plain_tif])

0 commit comments

Comments
 (0)