Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/source/reference/dask_laziness.rst
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,8 @@ Focal operations
- Fully lazy
- Multiple stats via ``map_overlap``, 3D output
* - ``hotspots``
- Partially lazy
- Computes global mean and std, result is dask
- Fully lazy
- Global mean/std/count stay lazy; degenerate-input check fires at compute


Classification
Expand Down
24 changes: 24 additions & 0 deletions xrspatial/focal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1371,6 +1371,20 @@ def _gistar_global_stats(global_mean, global_std, n):
return global_mean, global_std, n


def _gistar_validate_lazy(z_block, global_std, n):
"""Validate the global Gi* terms inside the dask graph.

The dask backends keep ``global_std`` and ``n`` as lazy 0-d arrays, so
the eager ``_gistar_global_stats`` check never runs on them. This block
function re-applies that check at compute time and returns ``z_block``
unchanged, so degenerate inputs (constant raster, all-NaN raster, or a
single valid cell) raise the same errors as the numpy and cupy paths
instead of silently classifying to all zeros.
"""
_gistar_global_stats(0.0, float(global_std), int(n))
return z_block


def _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum,
global_mean, global_std, n):
"""Getis-Ord Gi* z-score from the per-cell convolution terms.
Expand Down Expand Up @@ -1462,6 +1476,11 @@ def _hotspots_dask_numpy(raster, kernel, boundary='nan'):
# per block.
z_array = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum,
global_mean, global_std, n)
# Re-apply the numpy-path degenerate-input check lazily so constant /
# all-NaN / single-valid-cell rasters raise at compute time instead of
# classifying to a silent all-zeros raster (issue #2843).
z_array = da.map_blocks(_gistar_validate_lazy, z_array, global_std, n,
dtype=z_array.dtype, meta=z_array._meta)
out = z_array.map_blocks(_calc_hotspots_numpy,
meta=np.array((), dtype=np.int8))
return out
Expand Down Expand Up @@ -1497,6 +1516,11 @@ def _hotspots_dask_cupy(raster, kernel, boundary='nan'):

z_array = _gistar_zscore(weighted_sum, weight_sum, sq_weight_sum,
global_mean, global_std, n)
# Re-apply the numpy-path degenerate-input check lazily so constant /
# all-NaN / single-valid-cell rasters raise at compute time instead of
# classifying to a silent all-zeros raster (issue #2843).
z_array = da.map_blocks(_gistar_validate_lazy, z_array, global_std, n,
dtype=z_array.dtype, meta=z_array._meta)
out = z_array.map_blocks(_calc_hotspots_cupy,
meta=cupy.array((), dtype=cupy.int8))
return out
Expand Down
68 changes: 68 additions & 0 deletions xrspatial/tests/test_focal.py
Original file line number Diff line number Diff line change
Expand Up @@ -944,6 +944,74 @@ def test_hotspots_zero_global_std():
hotspots(agg, kernel)


# Degenerate inputs: constant raster (std == 0), all-NaN raster (n == 0),
# and a single valid cell (n == 1). The numpy/cupy paths raise eagerly via
# _gistar_global_stats; the dask paths must raise the same error at compute
# time instead of silently classifying to all zeros (issue #2843).
def _hotspots_degenerate_cases():
constant = np.zeros((10, 10), dtype=np.float32)

all_nan = np.full((10, 10), np.nan, dtype=np.float32)

single_valid = np.full((10, 10), np.nan, dtype=np.float32)
single_valid[0, 0] = 5.0

std_msg = "Standard deviation of the input raster values is 0."
n_msg = "needs at least 2 valid"
return [
('constant', constant, ZeroDivisionError, std_msg),
('all_nan', all_nan, ValueError, n_msg),
('single_valid', single_valid, ValueError, n_msg),
]


_HOTSPOTS_DEGENERATE = _hotspots_degenerate_cases()


@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE,
ids=[c[0] for c in _HOTSPOTS_DEGENERATE])
def test_hotspots_degenerate_numpy_2843(case, data, exc, msg):
agg = create_test_raster(data)
kernel = np.ones((3, 3))
with pytest.raises(exc, match=msg):
hotspots(agg, kernel)


@dask_array_available
@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE,
ids=[c[0] for c in _HOTSPOTS_DEGENERATE])
def test_hotspots_degenerate_dask_numpy_2843(case, data, exc, msg):
# The dask backend must reject degenerate inputs the same way numpy does,
# but lazily: the error fires at compute(), not at graph-build time.
agg = create_test_raster(data, backend='dask')
kernel = np.ones((3, 3))
result = hotspots(agg, kernel)
with pytest.raises(exc, match=msg):
result.data.compute()


@cuda_and_cupy_available
@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE,
ids=[c[0] for c in _HOTSPOTS_DEGENERATE])
def test_hotspots_degenerate_cupy_2843(case, data, exc, msg):
agg = create_test_raster(data, backend='cupy')
kernel = np.ones((3, 3))
with pytest.raises(exc, match=msg):
hotspots(agg, kernel)


@cuda_and_cupy_available
@dask_array_available
@pytest.mark.parametrize('case,data,exc,msg', _HOTSPOTS_DEGENERATE,
ids=[c[0] for c in _HOTSPOTS_DEGENERATE])
def test_hotspots_degenerate_dask_cupy_2843(case, data, exc, msg):
agg = create_test_raster(data, backend='dask+cupy')
kernel = np.ones((3, 3))
result = hotspots(agg, kernel)
with pytest.raises(exc, match=msg):
result.data.compute()


def test_hotspots_kernel_none_2771():
# Regression for #2771: hotspots skipped custom_kernel validation, so a
# None kernel raised AttributeError on kernel.shape instead of ValueError.
Expand Down
Loading