xarray-contrib · brendancol · Jun 3, 2026 · Jun 2, 2026 · Jun 2, 2026 · Jun 3, 2026
diff --git a/xrspatial/gpu_rtx/hillshade.py b/xrspatial/gpu_rtx/hillshade.py
@@ -15,24 +15,28 @@
 
 
 @nb.cuda.jit
-def _generate_primary_rays_kernel(data, H, W):
+def _generate_primary_rays_kernel(data, H, W, ew_res, ns_res):
     """
     A GPU kernel that given a set of x and y discrete coordinates on a raster
     terrain generates in @data a list of parallel rays that represent camera
     rays generated from an orthographic camera that is looking straight down
     at the surface from an origin height 10000.
+
+    Ray origins are placed at the real-world cell centres (column * ew_res,
+    row * ns_res) so they land on the resolution-aware mesh built by
+    ``create_triangulation`` (issue #2861).
     """
     i, j = nb.cuda.grid(2)
     if i >= 0 and i < H and j >= 0 and j < W:
         if (j == W-1):
-            data[i, j, 0] = j - 1e-3
+            data[i, j, 0] = j * ew_res - 1e-3 * ew_res
         else:
-            data[i, j, 0] = j + 1e-3
+            data[i, j, 0] = j * ew_res + 1e-3 * ew_res
 
         if (i == H-1):
-            data[i, j, 1] = i - 1e-3
+            data[i, j, 1] = i * ns_res - 1e-3 * ns_res
         else:
-            data[i, j, 1] = i + 1e-3
+            data[i, j, 1] = i * ns_res + 1e-3 * ns_res
 
         data[i, j, 2] = 10000  # Location of the camera (height)
         data[i, j, 3] = 1e-3
@@ -42,9 +46,10 @@ def _generate_primary_rays_kernel(data, H, W):
         data[i, j, 7] = np.inf
 
 
-def _generate_primary_rays(rays, H, W):
+def _generate_primary_rays(rays, H, W, ew_res, ns_res):
     griddim, blockdim = calc_cuda_dims((H, W))
-    _generate_primary_rays_kernel[griddim, blockdim](rays, H, W)
+    _generate_primary_rays_kernel[griddim, blockdim](
+        rays, H, W, ew_res, ns_res)
     return 0
 
 
@@ -148,7 +153,9 @@ def _hillshade_rt(raster: xr.DataArray,
                   optix: RTX,
                   azimuth: int,
                   angle_altitude: int,
-                  shadows: bool) -> xr.DataArray:
+                  shadows: bool,
+                  ew_res: float,
+                  ns_res: float) -> xr.DataArray:
     H, W = raster.shape
     sun_dir = cupy.array(_get_sun_dir(angle_altitude, azimuth))
 
@@ -158,7 +165,7 @@ def _hillshade_rt(raster: xr.DataArray,
     d_aux = cupy.empty((H, W, 3), np.float32)
     d_output = cupy.empty((H, W), np.float32)
 
-    _generate_primary_rays(d_rays, H, W)
+    _generate_primary_rays(d_rays, H, W, ew_res, ns_res)
     device = cupy.cuda.Device(0)
     device.synchronize()
     res = optix.trace(d_rays, d_hits, W*H)
@@ -193,8 +200,10 @@ def hillshade_rtx(raster: xr.DataArray,
     _check_gpu_memory("hillshade_rtx", H, W)
 
     optix = RTX()
-    create_triangulation(raster, optix)
+    # hillshade does not use the z-scale; only viewshed scales the observer
+    # and target elevations by it.
+    _, ew_res, ns_res = create_triangulation(raster, optix)
 
     return _hillshade_rt(
         raster, optix, azimuth=azimuth, angle_altitude=angle_altitude,
-        shadows=shadows)
+        shadows=shadows, ew_res=ew_res, ns_res=ns_res)
diff --git a/xrspatial/gpu_rtx/mesh_utils.py b/xrspatial/gpu_rtx/mesh_utils.py
@@ -3,14 +3,56 @@
 import numpy as np
 
 
+def _axis_resolution(index, n):
+    """Cell size along one axis, or 1.0 when it cannot be determined.
+
+    Falls back to unit spacing (the old integer-grid behaviour) when the
+    raster has no coordinate index on that axis or the axis is a single
+    cell, so a coordinate-less raster still builds a sensible mesh.
+    """
+    if index is None or n <= 1:
+        return 1.0
+    coords = index.values
+    return abs(float((coords[-1] - coords[0]) / (n - 1)))
+
+
+def _cell_resolution(raster):
+    """Return the (ew_res, ns_res) cell sizes from the raster x/y coords."""
+    H, W = raster.shape
+    ew_res = _axis_resolution(raster.indexes.get('x'), W)
+    ns_res = _axis_resolution(raster.indexes.get('y'), H)
+    return ew_res, ns_res
+
+
 def create_triangulation(raster, optix):
     """Build a triangulated mesh on the GPU from a 2D elevation raster.
 
-    The mesh z-coordinate is scaled by ``max(H, W) / maxH`` so the terrain
-    ratio is suitable for ray tracing.  This requires a positive, finite
-    maximum elevation: an all-zero or all-NaN raster has no elevation
-    variance and yields ``inf`` or ``NaN`` mesh vertices that the OptiX
-    raytracer cannot use sensibly.
+    Mesh vertex x/y coordinates use the real cell resolution
+    (``ew_res``, ``ns_res``) read from the raster's x/y coordinates instead
+    of integer grid indices, so the mesh has the same shape the CPU sweep
+    works on (issue #2861).  The z-coordinate is scaled by the
+    resolution-independent factor ``max(H, W) / maxH`` so the terrain ratio
+    stays suitable for ray tracing; this factor must NOT depend on
+    ``ew_res``/``ns_res`` (a resolution-dependent z-scale would cancel the
+    x/y stretch).
+
+    Note on viewshed parity: ray-triangle occlusion is invariant under a
+    per-axis (linear) scaling of the mesh, and the viewshed output angle is
+    computed from the real ``ew_res``/``ns_res`` downstream, so this change
+    does not by itself alter the viewshed result -- it keeps the GPU mesh
+    geometry consistent with the CPU coordinate convention rather than
+    fixing a result divergence.  See the PR for issue #2861.
+
+    A positive, finite maximum elevation is required: an all-zero or
+    all-NaN raster has no elevation variance and yields ``inf`` or ``NaN``
+    mesh vertices that the OptiX raytracer cannot use sensibly.
+
+    Returns
+    -------
+    (scale, ew_res, ns_res)
+        The z scale factor and the cell resolution used to place the mesh
+        vertices.  Callers need the resolution to cast camera rays at the
+        matching real-world x/y positions.
 
     Raises
     ------
@@ -22,9 +64,13 @@ def create_triangulation(raster, optix):
     # width/height
     H, W = raster.shape
 
+    ew_res, ns_res = _cell_resolution(raster)
+
     # Scale the terrain so that the width is proportional to the height
     # Thus the terrain would be neither too flat nor too steep and
-    # raytracing will give best accuracy
+    # raytracing will give best accuracy.  maxDim stays in grid cells so the
+    # z-scale is a single constant, independent of the anisotropic x/y
+    # resolution applied to the mesh vertices below (see docstring).
     maxH = float(cupy.amax(raster.data))
     maxDim = max(H, W)
 
@@ -50,7 +96,8 @@ def create_triangulation(raster, optix):
         triangles = cupy.empty(num_tris * 3, np.int32)
         # Generate a mesh from the terrain (buffers are on the GPU, so
         # generation happens also on GPU)
-        res = _triangulate_terrain(verts, triangles, raster, scale)
+        res = _triangulate_terrain(verts, triangles, raster, scale,
+                                   ew_res, ns_res)
         if res:
             raise RuntimeError(
                 f"Failed to generate mesh from terrain, error code: {res}")
@@ -67,11 +114,12 @@ def create_triangulation(raster, optix):
         verts = None
         triangles = None
         cupy.get_default_memory_pool().free_all_blocks()
-    return scale
+    return scale, ew_res, ns_res
 
 
 @nb.cuda.jit
-def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
+def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale,
+                                ew_res, ns_res, stride):
     global_id = stride + nb.cuda.grid(1)
     if global_id < W*H:
         h = global_id // W
@@ -81,8 +129,8 @@ def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
         val = data[h, w]
 
         offset = 3*mesh_map_index
-        verts[offset] = w
-        verts[offset+1] = h
+        verts[offset] = w * ew_res
+        verts[offset+1] = h * ns_res
         verts[offset+2] = val * scale
 
         if w != W - 1 and h != H - 1:
@@ -96,16 +144,16 @@ def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
 
 
 @nb.njit(parallel=True)
-def _triangulate_cpu(verts, triangles, data, H, W, scale):
+def _triangulate_cpu(verts, triangles, data, H, W, scale, ew_res, ns_res):
     for h in nb.prange(H):
         for w in range(W):
             mesh_map_index = h * W + w
 
             val = data[h, w]
 
             offset = 3*mesh_map_index
-            verts[offset] = w
-            verts[offset+1] = h
+            verts[offset] = w * ew_res
+            verts[offset+1] = h * ns_res
             verts[offset+2] = val * scale
 
             if w != W - 1 and h != H - 1:
@@ -118,10 +166,12 @@ def _triangulate_cpu(verts, triangles, data, H, W, scale):
                 triangles[offset+5] = np.int32(mesh_map_index)
 
 
-def _triangulate_terrain(verts, triangles, terrain, scale=1):
+def _triangulate_terrain(verts, triangles, terrain, scale=1,
+                         ew_res=1.0, ns_res=1.0):
     H, W = terrain.shape
     if isinstance(terrain.data, np.ndarray):
-        _triangulate_cpu(verts, triangles, terrain.data, H, W, scale)
+        _triangulate_cpu(verts, triangles, terrain.data, H, W, scale,
+                         ew_res, ns_res)
     if isinstance(terrain.data, cupy.ndarray):
         job_size = H*W
         blockdim = 1024
@@ -131,7 +181,8 @@ def _triangulate_terrain(verts, triangles, terrain, scale=1):
         while job_size > 0:
             batch = min(d, griddim)
             _triangulate_terrain_kernel[batch, blockdim](
-                verts, triangles, terrain.data, H, W, scale, offset)
+                verts, triangles, terrain.data, H, W, scale,
+                ew_res, ns_res, offset)
             offset += batch*blockdim
             job_size -= batch*blockdim
     return 0

diff --git a/xrspatial/gpu_rtx/viewshed.py b/xrspatial/gpu_rtx/viewshed.py
@@ -22,24 +22,29 @@
 
 
 @nb.cuda.jit
-def _generate_primary_rays_kernel(data, H, W):
+def _generate_primary_rays_kernel(data, H, W, ew_res, ns_res):
     """
     A GPU kernel that given a set of x and y discrete coordinates on a raster
     terrain generates in @data a list of parallel rays that represent camera
     rays generated from an orthographic camera that is looking straight down
     at the surface from an origin height CAMERA_HEIGHT.
+
+    Ray origins are placed at the real-world cell centres (column * ew_res,
+    row * ns_res) so they land on the resolution-aware mesh built by
+    ``create_triangulation`` (issue #2861).  The small offset that nudges
+    the ray off a vertex scales with the cell size so it stays sub-cell.
     """
     i, j = nb.cuda.grid(2)
     if i >= 0 and i < H and j >= 0 and j < W:
         if (j == W-1):
-            data[i, j, 0] = j - 1e-3
+            data[i, j, 0] = j * ew_res - 1e-3 * ew_res
         else:
-            data[i, j, 0] = j + 1e-3
+            data[i, j, 0] = j * ew_res + 1e-3 * ew_res
 
         if (i == H-1):
-            data[i, j, 1] = i - 1e-3
+            data[i, j, 1] = i * ns_res - 1e-3 * ns_res
         else:
-            data[i, j, 1] = i + 1e-3
+            data[i, j, 1] = i * ns_res + 1e-3 * ns_res
 
         data[i, j, 2] = CAMERA_HEIGHT  # Location of the camera (height)
         data[i, j, 3] = 1e-3
@@ -49,9 +54,10 @@ def _generate_primary_rays_kernel(data, H, W):
         data[i, j, 7] = np.inf
 
 
-def _generate_primary_rays(rays, H, W):
+def _generate_primary_rays(rays, H, W, ew_res, ns_res):
     griddim, blockdim = calc_cuda_dims((H, W))
-    _generate_primary_rays_kernel[griddim, blockdim](rays, H, W)
+    _generate_primary_rays_kernel[griddim, blockdim](
+        rays, H, W, ew_res, ns_res)
     return 0
 
 
@@ -187,6 +193,8 @@ def _viewshed_rt(
     observer_elev: float,
     target_elev: float,
     scale: float,
+    ew_res: float,
+    ns_res: float,
     name: Optional[str] = 'viewshed',
 ) -> xr.DataArray:
 
@@ -214,19 +222,18 @@ def _viewshed_rt(
     y_view = np.where(y_coords == y)[0][0]
     x_view = np.where(x_coords == x)[0][0]
 
-    y_range = (y_coords[0], y_coords[-1])
-    x_range = (x_coords[0], x_coords[-1])
-
-    ew_res = (x_range[1] - x_range[0]) / (W - 1)
-    ns_res = (y_range[1] - y_range[0]) / (H - 1)
+    # ew_res / ns_res are the same resolution used to build the mesh in
+    # create_triangulation, so the camera rays land on the resolution-aware
+    # geometry and the output angle calculation uses consistent units
+    # (issue #2861).
 
     # Device buffers
     d_rays = cupy.empty((H, W, 8), np.float32)
     d_hits = cupy.empty((H, W, 4), np.float32)
     d_visgrid = cupy.empty((H, W), np.float32)
     d_vsrays = cupy.empty((H, W, 8), np.float32)
 
-    _generate_primary_rays(d_rays, H, W)
+    _generate_primary_rays(d_rays, H, W, ew_res, ns_res)
     device = cupy.cuda.Device(0)
     device.synchronize()
     res = optix.trace(d_rays, d_hits, W*H)
@@ -286,7 +293,7 @@ def viewshed_gpu(
     _check_gpu_memory("viewshed_gpu", H, W)
 
     optix = RTX()
-    scale = create_triangulation(raster, optix)
+    scale, ew_res, ns_res = create_triangulation(raster, optix)
 
     return _viewshed_rt(raster, optix, x, y, observer_elev, target_elev,
-                        scale, name)
+                        scale, ew_res, ns_res, name)