From 37b1ef2dc7d31288f4288133fbd06ae59514325b Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 2 Jun 2026 14:49:31 -0700 Subject: [PATCH 1/2] Fix hotspots() silent all-zeros on degenerate dask inputs (#2843) The numpy and cupy hotspots paths validate the global Gi* terms through _gistar_global_stats, which raises for fewer than 2 valid cells or a zero global std. The dask paths kept those terms as lazy 0-d arrays and skipped the check, so a constant, all-NaN, or single-valid-cell raster classified to a silent all-zeros result instead of raising. Add a lazy validation block (_gistar_validate_lazy) that re-applies the numpy-path check at compute time and returns the z-score unchanged. The dask graph stays lazy on call (issue #2772), and the error now fires at compute() like the numpy and cupy backends. Cover the three degenerate cases across numpy, cupy, dask+numpy, and dask+cupy. --- xrspatial/focal.py | 24 +++++++++++++ xrspatial/tests/test_focal.py | 68 +++++++++++++++++++++++++++++++++++ 2 files changed, 92 insertions(+) diff --git a/xrspatial/focal.py b/xrspatial/focal.py index 2c4e24bc5..34d9bf04f 100644 --- a/xrspatial/focal.py +++ b/xrspatial/focal.py @@ -1371,6 +1371,20 @@ def _gistar_global_stats(global_mean, global_std, n): return global_mean, global_std, n +def _gistar_validate_lazy(z_block, global_std, n): + """Validate the global Gi* terms inside the dask graph. + + The dask backends keep ``global_std`` and ``n`` as lazy 0-d arrays, so + the eager ``_gistar_global_stats`` check never runs on them. This block + function re-applies that check at compute time and returns ``z_block`` + unchanged, so degenerate inputs (constant raster, all-NaN raster, or a + single valid cell) raise the same errors as the numpy and cupy paths + instead of silently classifying to all zeros. + """ + _gistar_global_stats(0.0, float(global_std), int(n)) + return z_block + + def _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, global_mean, global_std, n): """Getis-Ord Gi* z-score from the per-cell convolution terms. @@ -1462,6 +1476,11 @@ def _hotspots_dask_numpy(raster, kernel, boundary='nan'): # per block. z_array = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, global_mean, global_std, n) + # Re-apply the numpy-path degenerate-input check lazily so constant / + # all-NaN / single-valid-cell rasters raise at compute time instead of + # classifying to a silent all-zeros raster (issue #2843). + z_array = da.map_blocks(_gistar_validate_lazy, z_array, global_std, n, + dtype=z_array.dtype, meta=z_array._meta) out = z_array.map_blocks(_calc_hotspots_numpy, meta=np.array((), dtype=np.int8)) return out @@ -1497,6 +1516,11 @@ def _hotspots_dask_cupy(raster, kernel, boundary='nan'): z_array = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum, global_mean, global_std, n) + # Re-apply the numpy-path degenerate-input check lazily so constant / + # all-NaN / single-valid-cell rasters raise at compute time instead of + # classifying to a silent all-zeros raster (issue #2843). + z_array = da.map_blocks(_gistar_validate_lazy, z_array, global_std, n, + dtype=z_array.dtype, meta=z_array._meta) out = z_array.map_blocks(_calc_hotspots_cupy, meta=cupy.array((), dtype=cupy.int8)) return out diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py index 822592aa6..66d69f10e 100644 --- a/xrspatial/tests/test_focal.py +++ b/xrspatial/tests/test_focal.py @@ -944,6 +944,74 @@ def test_hotspots_zero_global_std(): hotspots(agg, kernel) +# Degenerate inputs: constant raster (std == 0), all-NaN raster (n == 0), +# and a single valid cell (n == 1). The numpy/cupy paths raise eagerly via +# _gistar_global_stats; the dask paths must raise the same error at compute +# time instead of silently classifying to all zeros (issue #2843). +def _hotspots_degenerate_cases(): + constant = np.zeros((10, 10), dtype=np.float32) + + all_nan = np.full((10, 10), np.nan, dtype=np.float32) + + single_valid = np.full((10, 10), np.nan, dtype=np.float32) + single_valid[0, 0] = 5.0 + + std_msg = "Standard deviation of the input raster values is 0." + n_msg = "needs at least 2 valid" + return [ + ('constant', constant, ZeroDivisionError, std_msg), + ('all_nan', all_nan, ValueError, n_msg), + ('single_valid', single_valid, ValueError, n_msg), + ] + + +_HOTSPOTS_DEGENERATE = _hotspots_degenerate_cases() + + +@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE, + ids=[c[0] for c in _HOTSPOTS_DEGENERATE]) +def test_hotspots_degenerate_numpy_2843(case, data, exc, msg): + agg = create_test_raster(data) + kernel = np.ones((3, 3)) + with pytest.raises(exc, match=msg): + hotspots(agg, kernel) + + +@dask_array_available +@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE, + ids=[c[0] for c in _HOTSPOTS_DEGENERATE]) +def test_hotspots_degenerate_dask_numpy_2843(case, data, exc, msg): + # The dask backend must reject degenerate inputs the same way numpy does, + # but lazily: the error fires at compute(), not at graph-build time. + agg = create_test_raster(data, backend='dask') + kernel = np.ones((3, 3)) + result = hotspots(agg, kernel) + with pytest.raises(exc, match=msg): + result.data.compute() + + +@cuda_and_cupy_available +@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE, + ids=[c[0] for c in _HOTSPOTS_DEGENERATE]) +def test_hotspots_degenerate_cupy_2843(case, data, exc, msg): + agg = create_test_raster(data, backend='cupy') + kernel = np.ones((3, 3)) + with pytest.raises(exc, match=msg): + hotspots(agg, kernel) + + +@cuda_and_cupy_available +@dask_array_available +@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE, + ids=[c[0] for c in _HOTSPOTS_DEGENERATE]) +def test_hotspots_degenerate_dask_cupy_2843(case, data, exc, msg): + agg = create_test_raster(data, backend='dask+cupy') + kernel = np.ones((3, 3)) + result = hotspots(agg, kernel) + with pytest.raises(exc, match=msg): + result.data.compute() + + def test_hotspots_kernel_none_2771(): # Regression for #2771: hotspots skipped custom_kernel validation, so a # None kernel raised AttributeError on kernel.shape instead of ValueError. From 58db7196239b8e47b67ae137528d267b1793c61d Mon Sep 17 00:00:00 2001 From: Brendan Collins Date: Tue, 2 Jun 2026 14:52:13 -0700 Subject: [PATCH 2/2] Update dask laziness doc: hotspots is fully lazy after #2772 (#2843) --- docs/source/reference/dask_laziness.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/reference/dask_laziness.rst b/docs/source/reference/dask_laziness.rst index 42b8dcf9b..f9651a30f 100644 --- a/docs/source/reference/dask_laziness.rst +++ b/docs/source/reference/dask_laziness.rst @@ -78,8 +78,8 @@ Focal operations - Fully lazy - Multiple stats via ``map_overlap``, 3D output * - ``hotspots`` - - Partially lazy - - Computes global mean and std, result is dask + - Fully lazy + - Global mean/std/count stay lazy; degenerate-input check fires at compute Classification