diff --git a/xrspatial/gpu_rtx/hillshade.py b/xrspatial/gpu_rtx/hillshade.py index 557c83e0..901ad6e3 100644 --- a/xrspatial/gpu_rtx/hillshade.py +++ b/xrspatial/gpu_rtx/hillshade.py @@ -15,24 +15,28 @@ @nb.cuda.jit -def _generate_primary_rays_kernel(data, H, W): +def _generate_primary_rays_kernel(data, H, W, ew_res, ns_res): """ A GPU kernel that given a set of x and y discrete coordinates on a raster terrain generates in @data a list of parallel rays that represent camera rays generated from an orthographic camera that is looking straight down at the surface from an origin height 10000. + + Ray origins are placed at the real-world cell centres (column * ew_res, + row * ns_res) so they land on the resolution-aware mesh built by + ``create_triangulation`` (issue #2861). """ i, j = nb.cuda.grid(2) if i >= 0 and i < H and j >= 0 and j < W: if (j == W-1): - data[i, j, 0] = j - 1e-3 + data[i, j, 0] = j * ew_res - 1e-3 * ew_res else: - data[i, j, 0] = j + 1e-3 + data[i, j, 0] = j * ew_res + 1e-3 * ew_res if (i == H-1): - data[i, j, 1] = i - 1e-3 + data[i, j, 1] = i * ns_res - 1e-3 * ns_res else: - data[i, j, 1] = i + 1e-3 + data[i, j, 1] = i * ns_res + 1e-3 * ns_res data[i, j, 2] = 10000 # Location of the camera (height) data[i, j, 3] = 1e-3 @@ -42,9 +46,10 @@ def _generate_primary_rays_kernel(data, H, W): data[i, j, 7] = np.inf -def _generate_primary_rays(rays, H, W): +def _generate_primary_rays(rays, H, W, ew_res, ns_res): griddim, blockdim = calc_cuda_dims((H, W)) - _generate_primary_rays_kernel[griddim, blockdim](rays, H, W) + _generate_primary_rays_kernel[griddim, blockdim]( + rays, H, W, ew_res, ns_res) return 0 @@ -148,7 +153,9 @@ def _hillshade_rt(raster: xr.DataArray, optix: RTX, azimuth: int, angle_altitude: int, - shadows: bool) -> xr.DataArray: + shadows: bool, + ew_res: float, + ns_res: float) -> xr.DataArray: H, W = raster.shape sun_dir = cupy.array(_get_sun_dir(angle_altitude, azimuth)) @@ -158,7 +165,7 @@ def _hillshade_rt(raster: xr.DataArray, d_aux = cupy.empty((H, W, 3), np.float32) d_output = cupy.empty((H, W), np.float32) - _generate_primary_rays(d_rays, H, W) + _generate_primary_rays(d_rays, H, W, ew_res, ns_res) device = cupy.cuda.Device(0) device.synchronize() res = optix.trace(d_rays, d_hits, W*H) @@ -193,8 +200,10 @@ def hillshade_rtx(raster: xr.DataArray, _check_gpu_memory("hillshade_rtx", H, W) optix = RTX() - create_triangulation(raster, optix) + # hillshade does not use the z-scale; only viewshed scales the observer + # and target elevations by it. + _, ew_res, ns_res = create_triangulation(raster, optix) return _hillshade_rt( raster, optix, azimuth=azimuth, angle_altitude=angle_altitude, - shadows=shadows) + shadows=shadows, ew_res=ew_res, ns_res=ns_res) diff --git a/xrspatial/gpu_rtx/mesh_utils.py b/xrspatial/gpu_rtx/mesh_utils.py index 2c3c0ed9..90e2b2b5 100644 --- a/xrspatial/gpu_rtx/mesh_utils.py +++ b/xrspatial/gpu_rtx/mesh_utils.py @@ -3,14 +3,56 @@ import numpy as np +def _axis_resolution(index, n): + """Cell size along one axis, or 1.0 when it cannot be determined. + + Falls back to unit spacing (the old integer-grid behaviour) when the + raster has no coordinate index on that axis or the axis is a single + cell, so a coordinate-less raster still builds a sensible mesh. + """ + if index is None or n <= 1: + return 1.0 + coords = index.values + return abs(float((coords[-1] - coords[0]) / (n - 1))) + + +def _cell_resolution(raster): + """Return the (ew_res, ns_res) cell sizes from the raster x/y coords.""" + H, W = raster.shape + ew_res = _axis_resolution(raster.indexes.get('x'), W) + ns_res = _axis_resolution(raster.indexes.get('y'), H) + return ew_res, ns_res + + def create_triangulation(raster, optix): """Build a triangulated mesh on the GPU from a 2D elevation raster. - The mesh z-coordinate is scaled by ``max(H, W) / maxH`` so the terrain - ratio is suitable for ray tracing. This requires a positive, finite - maximum elevation: an all-zero or all-NaN raster has no elevation - variance and yields ``inf`` or ``NaN`` mesh vertices that the OptiX - raytracer cannot use sensibly. + Mesh vertex x/y coordinates use the real cell resolution + (``ew_res``, ``ns_res``) read from the raster's x/y coordinates instead + of integer grid indices, so the mesh has the same shape the CPU sweep + works on (issue #2861). The z-coordinate is scaled by the + resolution-independent factor ``max(H, W) / maxH`` so the terrain ratio + stays suitable for ray tracing; this factor must NOT depend on + ``ew_res``/``ns_res`` (a resolution-dependent z-scale would cancel the + x/y stretch). + + Note on viewshed parity: ray-triangle occlusion is invariant under a + per-axis (linear) scaling of the mesh, and the viewshed output angle is + computed from the real ``ew_res``/``ns_res`` downstream, so this change + does not by itself alter the viewshed result -- it keeps the GPU mesh + geometry consistent with the CPU coordinate convention rather than + fixing a result divergence. See the PR for issue #2861. + + A positive, finite maximum elevation is required: an all-zero or + all-NaN raster has no elevation variance and yields ``inf`` or ``NaN`` + mesh vertices that the OptiX raytracer cannot use sensibly. + + Returns + ------- + (scale, ew_res, ns_res) + The z scale factor and the cell resolution used to place the mesh + vertices. Callers need the resolution to cast camera rays at the + matching real-world x/y positions. Raises ------ @@ -22,9 +64,13 @@ def create_triangulation(raster, optix): # width/height H, W = raster.shape + ew_res, ns_res = _cell_resolution(raster) + # Scale the terrain so that the width is proportional to the height # Thus the terrain would be neither too flat nor too steep and - # raytracing will give best accuracy + # raytracing will give best accuracy. maxDim stays in grid cells so the + # z-scale is a single constant, independent of the anisotropic x/y + # resolution applied to the mesh vertices below (see docstring). maxH = float(cupy.amax(raster.data)) maxDim = max(H, W) @@ -50,7 +96,8 @@ def create_triangulation(raster, optix): triangles = cupy.empty(num_tris * 3, np.int32) # Generate a mesh from the terrain (buffers are on the GPU, so # generation happens also on GPU) - res = _triangulate_terrain(verts, triangles, raster, scale) + res = _triangulate_terrain(verts, triangles, raster, scale, + ew_res, ns_res) if res: raise RuntimeError( f"Failed to generate mesh from terrain, error code: {res}") @@ -67,11 +114,12 @@ def create_triangulation(raster, optix): verts = None triangles = None cupy.get_default_memory_pool().free_all_blocks() - return scale + return scale, ew_res, ns_res @nb.cuda.jit -def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride): +def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, + ew_res, ns_res, stride): global_id = stride + nb.cuda.grid(1) if global_id < W*H: h = global_id // W @@ -81,8 +129,8 @@ def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride): val = data[h, w] offset = 3*mesh_map_index - verts[offset] = w - verts[offset+1] = h + verts[offset] = w * ew_res + verts[offset+1] = h * ns_res verts[offset+2] = val * scale if w != W - 1 and h != H - 1: @@ -96,7 +144,7 @@ def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride): @nb.njit(parallel=True) -def _triangulate_cpu(verts, triangles, data, H, W, scale): +def _triangulate_cpu(verts, triangles, data, H, W, scale, ew_res, ns_res): for h in nb.prange(H): for w in range(W): mesh_map_index = h * W + w @@ -104,8 +152,8 @@ def _triangulate_cpu(verts, triangles, data, H, W, scale): val = data[h, w] offset = 3*mesh_map_index - verts[offset] = w - verts[offset+1] = h + verts[offset] = w * ew_res + verts[offset+1] = h * ns_res verts[offset+2] = val * scale if w != W - 1 and h != H - 1: @@ -118,10 +166,12 @@ def _triangulate_cpu(verts, triangles, data, H, W, scale): triangles[offset+5] = np.int32(mesh_map_index) -def _triangulate_terrain(verts, triangles, terrain, scale=1): +def _triangulate_terrain(verts, triangles, terrain, scale=1, + ew_res=1.0, ns_res=1.0): H, W = terrain.shape if isinstance(terrain.data, np.ndarray): - _triangulate_cpu(verts, triangles, terrain.data, H, W, scale) + _triangulate_cpu(verts, triangles, terrain.data, H, W, scale, + ew_res, ns_res) if isinstance(terrain.data, cupy.ndarray): job_size = H*W blockdim = 1024 @@ -131,7 +181,8 @@ def _triangulate_terrain(verts, triangles, terrain, scale=1): while job_size > 0: batch = min(d, griddim) _triangulate_terrain_kernel[batch, blockdim]( - verts, triangles, terrain.data, H, W, scale, offset) + verts, triangles, terrain.data, H, W, scale, + ew_res, ns_res, offset) offset += batch*blockdim job_size -= batch*blockdim return 0 diff --git a/xrspatial/gpu_rtx/viewshed.py b/xrspatial/gpu_rtx/viewshed.py index dc109359..a8852ecd 100644 --- a/xrspatial/gpu_rtx/viewshed.py +++ b/xrspatial/gpu_rtx/viewshed.py @@ -22,24 +22,29 @@ @nb.cuda.jit -def _generate_primary_rays_kernel(data, H, W): +def _generate_primary_rays_kernel(data, H, W, ew_res, ns_res): """ A GPU kernel that given a set of x and y discrete coordinates on a raster terrain generates in @data a list of parallel rays that represent camera rays generated from an orthographic camera that is looking straight down at the surface from an origin height CAMERA_HEIGHT. + + Ray origins are placed at the real-world cell centres (column * ew_res, + row * ns_res) so they land on the resolution-aware mesh built by + ``create_triangulation`` (issue #2861). The small offset that nudges + the ray off a vertex scales with the cell size so it stays sub-cell. """ i, j = nb.cuda.grid(2) if i >= 0 and i < H and j >= 0 and j < W: if (j == W-1): - data[i, j, 0] = j - 1e-3 + data[i, j, 0] = j * ew_res - 1e-3 * ew_res else: - data[i, j, 0] = j + 1e-3 + data[i, j, 0] = j * ew_res + 1e-3 * ew_res if (i == H-1): - data[i, j, 1] = i - 1e-3 + data[i, j, 1] = i * ns_res - 1e-3 * ns_res else: - data[i, j, 1] = i + 1e-3 + data[i, j, 1] = i * ns_res + 1e-3 * ns_res data[i, j, 2] = CAMERA_HEIGHT # Location of the camera (height) data[i, j, 3] = 1e-3 @@ -49,9 +54,10 @@ def _generate_primary_rays_kernel(data, H, W): data[i, j, 7] = np.inf -def _generate_primary_rays(rays, H, W): +def _generate_primary_rays(rays, H, W, ew_res, ns_res): griddim, blockdim = calc_cuda_dims((H, W)) - _generate_primary_rays_kernel[griddim, blockdim](rays, H, W) + _generate_primary_rays_kernel[griddim, blockdim]( + rays, H, W, ew_res, ns_res) return 0 @@ -187,6 +193,8 @@ def _viewshed_rt( observer_elev: float, target_elev: float, scale: float, + ew_res: float, + ns_res: float, name: Optional[str] = 'viewshed', ) -> xr.DataArray: @@ -214,11 +222,10 @@ def _viewshed_rt( y_view = np.where(y_coords == y)[0][0] x_view = np.where(x_coords == x)[0][0] - y_range = (y_coords[0], y_coords[-1]) - x_range = (x_coords[0], x_coords[-1]) - - ew_res = (x_range[1] - x_range[0]) / (W - 1) - ns_res = (y_range[1] - y_range[0]) / (H - 1) + # ew_res / ns_res are the same resolution used to build the mesh in + # create_triangulation, so the camera rays land on the resolution-aware + # geometry and the output angle calculation uses consistent units + # (issue #2861). # Device buffers d_rays = cupy.empty((H, W, 8), np.float32) @@ -226,7 +233,7 @@ def _viewshed_rt( d_visgrid = cupy.empty((H, W), np.float32) d_vsrays = cupy.empty((H, W, 8), np.float32) - _generate_primary_rays(d_rays, H, W) + _generate_primary_rays(d_rays, H, W, ew_res, ns_res) device = cupy.cuda.Device(0) device.synchronize() res = optix.trace(d_rays, d_hits, W*H) @@ -286,7 +293,7 @@ def viewshed_gpu( _check_gpu_memory("viewshed_gpu", H, W) optix = RTX() - scale = create_triangulation(raster, optix) + scale, ew_res, ns_res = create_triangulation(raster, optix) return _viewshed_rt(raster, optix, x, y, observer_elev, target_elev, - scale, name) + scale, ew_res, ns_res, name) diff --git a/xrspatial/tests/test_gpu_rtx_mesh.py b/xrspatial/tests/test_gpu_rtx_mesh.py index 57301a0c..15242099 100644 --- a/xrspatial/tests/test_gpu_rtx_mesh.py +++ b/xrspatial/tests/test_gpu_rtx_mesh.py @@ -27,11 +27,16 @@ ) -def _cupy_raster(data): +def _cupy_raster(data, xs=None, ys=None): """Wrap a numpy array as a cupy-backed DataArray.""" import cupy - return xr.DataArray(cupy.asarray(data), dims=["y", "x"]) + coords = {} + if xs is not None: + coords["x"] = xs + if ys is not None: + coords["y"] = ys + return xr.DataArray(cupy.asarray(data), dims=["y", "x"], coords=coords) def test_create_triangulation_rejects_all_zero_raster(): @@ -99,10 +104,12 @@ def test_create_triangulation_accepts_single_nonzero_pixel(): optix = MagicMock() optix.getHash.return_value = int(expected_hash) - scale = create_triangulation(raster, optix) + scale, ew_res, ns_res = create_triangulation(raster, optix) - # max(H, W) / maxH = 4 / 5.0 + # max(H, W) / maxH = 4 / 5.0; no x/y coords -> unit resolution. assert scale == pytest.approx(4.0 / 5.0) + assert ew_res == pytest.approx(1.0) + assert ns_res == pytest.approx(1.0) optix.build.assert_not_called() # hashes matched, no rebuild @@ -118,3 +125,88 @@ def test_create_triangulation_error_message_includes_max_value(): msg = str(excinfo.value) assert "max=" in msg assert "0.0" in msg + + +# --------------------------------------------------------------------------- +# Resolution-aware mesh geometry (issue #2861) +# --------------------------------------------------------------------------- + + +def test_cell_resolution_reads_anisotropic_coords(): + """_cell_resolution returns the real ew_res / ns_res from the coords.""" + from xrspatial.gpu_rtx.mesh_utils import _cell_resolution + + ny, nx = 6, 4 + xs = np.arange(nx, dtype=float) * 2.0 # ew_res = 2 + ys = np.arange(ny, dtype=float) * 5.0 # ns_res = 5 + raster = _cupy_raster(np.ones((ny, nx), dtype=np.float32), xs=xs, ys=ys) + + ew_res, ns_res = _cell_resolution(raster) + assert ew_res == pytest.approx(2.0) + assert ns_res == pytest.approx(5.0) + + +def test_cell_resolution_falls_back_to_unit_without_coords(): + """A coordinate-less raster keeps the old unit-spacing behaviour.""" + from xrspatial.gpu_rtx.mesh_utils import _cell_resolution + + raster = _cupy_raster(np.ones((5, 5), dtype=np.float32)) + ew_res, ns_res = _cell_resolution(raster) + assert ew_res == pytest.approx(1.0) + assert ns_res == pytest.approx(1.0) + + +def test_triangulate_places_vertices_at_real_resolution(): + """Mesh vertex x/y use the real cell resolution, not integer indices. + + This is the core of issue #2861: the GPU mesh was built on integer + grid coordinates and ignored ew_res / ns_res. The vertices must now + sit at (col * ew_res, row * ns_res). + """ + import cupy + + from xrspatial.gpu_rtx.mesh_utils import (_cell_resolution, + _triangulate_terrain) + + ny, nx = 4, 3 + ew_res, ns_res = 2.0, 5.0 + xs = np.arange(nx, dtype=float) * ew_res + ys = np.arange(ny, dtype=float) * ns_res + data = np.zeros((ny, nx), dtype=np.float32) + raster = _cupy_raster(data, xs=xs, ys=ys) + + ew, ns = _cell_resolution(raster) + verts = cupy.empty(ny * nx * 3, np.float32) + triangles = cupy.empty((ny - 1) * (nx - 1) * 2 * 3, np.int32) + _triangulate_terrain(verts, triangles, raster, 1.0, ew, ns) + cupy.cuda.Device(0).synchronize() + + v = verts.get().reshape(ny * nx, 3) + for row in range(ny): + for col in range(nx): + idx = row * nx + col + assert v[idx, 0] == pytest.approx(col * ew_res) + assert v[idx, 1] == pytest.approx(row * ns_res) + + +def test_create_triangulation_returns_resolution(): + """create_triangulation reports the resolution it built the mesh with.""" + from xrspatial.gpu_rtx.mesh_utils import create_triangulation + + ny, nx = 4, 4 + xs = np.arange(nx, dtype=float) * 3.0 + ys = np.arange(ny, dtype=float) * 7.0 + data = np.zeros((ny, nx), dtype=np.float32) + data[1, 1] = 5.0 + raster = _cupy_raster(data, xs=xs, ys=ys) + + # Skip the build path by matching the data hash. + expected_hash = np.uint64(hash(str(raster.data.get())) % (1 << 64)) + optix = MagicMock() + optix.getHash.return_value = int(expected_hash) + + scale, ew_res, ns_res = create_triangulation(raster, optix) + assert ew_res == pytest.approx(3.0) + assert ns_res == pytest.approx(7.0) + # z-scale stays resolution-independent: max(H, W) / maxH = 4 / 5.0. + assert scale == pytest.approx(4.0 / 5.0) diff --git a/xrspatial/tests/test_viewshed.py b/xrspatial/tests/test_viewshed.py index 357a7dd1..0108e966 100644 --- a/xrspatial/tests/test_viewshed.py +++ b/xrspatial/tests/test_viewshed.py @@ -821,6 +821,50 @@ def test_viewshed_nan_input_across_backends(backend): assert vals[1, 1] == INVISIBLE +@pytest.mark.skipif(not has_rtx(), reason="rtxpy not available") +@pytest.mark.parametrize("fine_axis", ["x", "y"]) +def test_viewshed_gpu_anisotropic_matches_cpu(fine_axis): + """GPU viewshed must agree with the CPU sweep on an anisotropic raster. + + Regression test for issue #2861: the GPU mesh was built on integer grid + coordinates and ignored ew_res / ns_res, so the ray tracer worked on a + different terrain shape than the CPU sweep. The mesh now uses the real + cell resolution. On flat terrain the visibility angles are driven + purely by the horizontal geometry, so a resolution mix-up shows up + directly as an angle mismatch against the CPU reference. Both + anisotropy orientations are checked so an ew_res / ns_res swap is + caught. + """ + import cupy as cp + + ny, nx = 9, 9 + terrain = np.full((ny, nx), 1.3) # flat, positive (RTX needs maxH > 0) + if fine_axis == "x": + xs = np.arange(nx, dtype=float) * 1.0 # fine + ys = np.arange(ny, dtype=float) * 7.0 # coarse + else: + xs = np.arange(nx, dtype=float) * 7.0 # coarse + ys = np.arange(ny, dtype=float) * 1.0 # fine + + obs_x, obs_y, obs_elev = xs[4], ys[4], 5 + + cpu = viewshed( + xa.DataArray(terrain.copy(), coords=dict(x=xs, y=ys), dims=["y", "x"]), + x=obs_x, y=obs_y, observer_elev=obs_elev).values + gpu = viewshed( + xa.DataArray(cp.asarray(terrain.copy()), + coords=dict(x=xs, y=ys), dims=["y", "x"]), + x=obs_x, y=obs_y, observer_elev=obs_elev).data.get() + + # Same set of visible cells. + assert ((cpu > INVISIBLE) == (gpu > INVISIBLE)).all() + + # Same visibility angles (skip the observer cell, fixed at 180). + mask = (cpu > INVISIBLE) & (gpu > INVISIBLE) + mask[4, 4] = False + np.testing.assert_allclose(gpu[mask], cpu[mask], atol=0.05) + + @pytest.mark.parametrize("backend", ["numpy", "cupy", "dask"]) @pytest.mark.parametrize("fine_axis", ["x", "y"]) def test_viewshed_max_distance_anisotropic(backend, fine_axis):