Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 20 additions & 11 deletions xrspatial/gpu_rtx/hillshade.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,28 @@


@nb.cuda.jit
def _generate_primary_rays_kernel(data, H, W):
def _generate_primary_rays_kernel(data, H, W, ew_res, ns_res):
"""
A GPU kernel that given a set of x and y discrete coordinates on a raster
terrain generates in @data a list of parallel rays that represent camera
rays generated from an orthographic camera that is looking straight down
at the surface from an origin height 10000.

Ray origins are placed at the real-world cell centres (column * ew_res,
row * ns_res) so they land on the resolution-aware mesh built by
``create_triangulation`` (issue #2861).
"""
i, j = nb.cuda.grid(2)
if i >= 0 and i < H and j >= 0 and j < W:
if (j == W-1):
data[i, j, 0] = j - 1e-3
data[i, j, 0] = j * ew_res - 1e-3 * ew_res
else:
data[i, j, 0] = j + 1e-3
data[i, j, 0] = j * ew_res + 1e-3 * ew_res

if (i == H-1):
data[i, j, 1] = i - 1e-3
data[i, j, 1] = i * ns_res - 1e-3 * ns_res
else:
data[i, j, 1] = i + 1e-3
data[i, j, 1] = i * ns_res + 1e-3 * ns_res

data[i, j, 2] = 10000 # Location of the camera (height)
data[i, j, 3] = 1e-3
Expand All @@ -42,9 +46,10 @@ def _generate_primary_rays_kernel(data, H, W):
data[i, j, 7] = np.inf


def _generate_primary_rays(rays, H, W):
def _generate_primary_rays(rays, H, W, ew_res, ns_res):
griddim, blockdim = calc_cuda_dims((H, W))
_generate_primary_rays_kernel[griddim, blockdim](rays, H, W)
_generate_primary_rays_kernel[griddim, blockdim](
rays, H, W, ew_res, ns_res)
return 0


Expand Down Expand Up @@ -148,7 +153,9 @@ def _hillshade_rt(raster: xr.DataArray,
optix: RTX,
azimuth: int,
angle_altitude: int,
shadows: bool) -> xr.DataArray:
shadows: bool,
ew_res: float,
ns_res: float) -> xr.DataArray:
H, W = raster.shape
sun_dir = cupy.array(_get_sun_dir(angle_altitude, azimuth))

Expand All @@ -158,7 +165,7 @@ def _hillshade_rt(raster: xr.DataArray,
d_aux = cupy.empty((H, W, 3), np.float32)
d_output = cupy.empty((H, W), np.float32)

_generate_primary_rays(d_rays, H, W)
_generate_primary_rays(d_rays, H, W, ew_res, ns_res)
device = cupy.cuda.Device(0)
device.synchronize()
res = optix.trace(d_rays, d_hits, W*H)
Expand Down Expand Up @@ -193,8 +200,10 @@ def hillshade_rtx(raster: xr.DataArray,
_check_gpu_memory("hillshade_rtx", H, W)

optix = RTX()
create_triangulation(raster, optix)
# hillshade does not use the z-scale; only viewshed scales the observer
# and target elevations by it.
_, ew_res, ns_res = create_triangulation(raster, optix)

return _hillshade_rt(
raster, optix, azimuth=azimuth, angle_altitude=angle_altitude,
shadows=shadows)
shadows=shadows, ew_res=ew_res, ns_res=ns_res)
85 changes: 68 additions & 17 deletions xrspatial/gpu_rtx/mesh_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,56 @@
import numpy as np


def _axis_resolution(index, n):
"""Cell size along one axis, or 1.0 when it cannot be determined.

Falls back to unit spacing (the old integer-grid behaviour) when the
raster has no coordinate index on that axis or the axis is a single
cell, so a coordinate-less raster still builds a sensible mesh.
"""
if index is None or n <= 1:
return 1.0
coords = index.values
return abs(float((coords[-1] - coords[0]) / (n - 1)))


def _cell_resolution(raster):
"""Return the (ew_res, ns_res) cell sizes from the raster x/y coords."""
H, W = raster.shape
ew_res = _axis_resolution(raster.indexes.get('x'), W)
ns_res = _axis_resolution(raster.indexes.get('y'), H)
return ew_res, ns_res


def create_triangulation(raster, optix):
"""Build a triangulated mesh on the GPU from a 2D elevation raster.

The mesh z-coordinate is scaled by ``max(H, W) / maxH`` so the terrain
ratio is suitable for ray tracing. This requires a positive, finite
maximum elevation: an all-zero or all-NaN raster has no elevation
variance and yields ``inf`` or ``NaN`` mesh vertices that the OptiX
raytracer cannot use sensibly.
Mesh vertex x/y coordinates use the real cell resolution
(``ew_res``, ``ns_res``) read from the raster's x/y coordinates instead
of integer grid indices, so the mesh has the same shape the CPU sweep
works on (issue #2861). The z-coordinate is scaled by the
resolution-independent factor ``max(H, W) / maxH`` so the terrain ratio
stays suitable for ray tracing; this factor must NOT depend on
``ew_res``/``ns_res`` (a resolution-dependent z-scale would cancel the
x/y stretch).

Note on viewshed parity: ray-triangle occlusion is invariant under a
per-axis (linear) scaling of the mesh, and the viewshed output angle is
computed from the real ``ew_res``/``ns_res`` downstream, so this change
does not by itself alter the viewshed result -- it keeps the GPU mesh
geometry consistent with the CPU coordinate convention rather than
fixing a result divergence. See the PR for issue #2861.

A positive, finite maximum elevation is required: an all-zero or
all-NaN raster has no elevation variance and yields ``inf`` or ``NaN``
mesh vertices that the OptiX raytracer cannot use sensibly.

Returns
-------
(scale, ew_res, ns_res)
The z scale factor and the cell resolution used to place the mesh
vertices. Callers need the resolution to cast camera rays at the
matching real-world x/y positions.

Raises
------
Expand All @@ -22,9 +64,13 @@ def create_triangulation(raster, optix):
# width/height
H, W = raster.shape

ew_res, ns_res = _cell_resolution(raster)

# Scale the terrain so that the width is proportional to the height
# Thus the terrain would be neither too flat nor too steep and
# raytracing will give best accuracy
# raytracing will give best accuracy. maxDim stays in grid cells so the
# z-scale is a single constant, independent of the anisotropic x/y
# resolution applied to the mesh vertices below (see docstring).
maxH = float(cupy.amax(raster.data))
maxDim = max(H, W)

Expand All @@ -50,7 +96,8 @@ def create_triangulation(raster, optix):
triangles = cupy.empty(num_tris * 3, np.int32)
# Generate a mesh from the terrain (buffers are on the GPU, so
# generation happens also on GPU)
res = _triangulate_terrain(verts, triangles, raster, scale)
res = _triangulate_terrain(verts, triangles, raster, scale,
ew_res, ns_res)
if res:
raise RuntimeError(
f"Failed to generate mesh from terrain, error code: {res}")
Expand All @@ -67,11 +114,12 @@ def create_triangulation(raster, optix):
verts = None
triangles = None
cupy.get_default_memory_pool().free_all_blocks()
return scale
return scale, ew_res, ns_res


@nb.cuda.jit
def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale,
ew_res, ns_res, stride):
global_id = stride + nb.cuda.grid(1)
if global_id < W*H:
h = global_id // W
Expand All @@ -81,8 +129,8 @@ def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):
val = data[h, w]

offset = 3*mesh_map_index
verts[offset] = w
verts[offset+1] = h
verts[offset] = w * ew_res
verts[offset+1] = h * ns_res
verts[offset+2] = val * scale

if w != W - 1 and h != H - 1:
Expand All @@ -96,16 +144,16 @@ def _triangulate_terrain_kernel(verts, triangles, data, H, W, scale, stride):


@nb.njit(parallel=True)
def _triangulate_cpu(verts, triangles, data, H, W, scale):
def _triangulate_cpu(verts, triangles, data, H, W, scale, ew_res, ns_res):
for h in nb.prange(H):
for w in range(W):
mesh_map_index = h * W + w

val = data[h, w]

offset = 3*mesh_map_index
verts[offset] = w
verts[offset+1] = h
verts[offset] = w * ew_res
verts[offset+1] = h * ns_res
verts[offset+2] = val * scale

if w != W - 1 and h != H - 1:
Expand All @@ -118,10 +166,12 @@ def _triangulate_cpu(verts, triangles, data, H, W, scale):
triangles[offset+5] = np.int32(mesh_map_index)


def _triangulate_terrain(verts, triangles, terrain, scale=1):
def _triangulate_terrain(verts, triangles, terrain, scale=1,
ew_res=1.0, ns_res=1.0):
H, W = terrain.shape
if isinstance(terrain.data, np.ndarray):
_triangulate_cpu(verts, triangles, terrain.data, H, W, scale)
_triangulate_cpu(verts, triangles, terrain.data, H, W, scale,
ew_res, ns_res)
if isinstance(terrain.data, cupy.ndarray):
job_size = H*W
blockdim = 1024
Expand All @@ -131,7 +181,8 @@ def _triangulate_terrain(verts, triangles, terrain, scale=1):
while job_size > 0:
batch = min(d, griddim)
_triangulate_terrain_kernel[batch, blockdim](
verts, triangles, terrain.data, H, W, scale, offset)
verts, triangles, terrain.data, H, W, scale,
ew_res, ns_res, offset)
offset += batch*blockdim
job_size -= batch*blockdim
return 0
Expand Down
37 changes: 22 additions & 15 deletions xrspatial/gpu_rtx/viewshed.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,29 @@


@nb.cuda.jit
def _generate_primary_rays_kernel(data, H, W):
def _generate_primary_rays_kernel(data, H, W, ew_res, ns_res):
"""
A GPU kernel that given a set of x and y discrete coordinates on a raster
terrain generates in @data a list of parallel rays that represent camera
rays generated from an orthographic camera that is looking straight down
at the surface from an origin height CAMERA_HEIGHT.

Ray origins are placed at the real-world cell centres (column * ew_res,
row * ns_res) so they land on the resolution-aware mesh built by
``create_triangulation`` (issue #2861). The small offset that nudges
the ray off a vertex scales with the cell size so it stays sub-cell.
"""
i, j = nb.cuda.grid(2)
if i >= 0 and i < H and j >= 0 and j < W:
if (j == W-1):
data[i, j, 0] = j - 1e-3
data[i, j, 0] = j * ew_res - 1e-3 * ew_res
else:
data[i, j, 0] = j + 1e-3
data[i, j, 0] = j * ew_res + 1e-3 * ew_res

if (i == H-1):
data[i, j, 1] = i - 1e-3
data[i, j, 1] = i * ns_res - 1e-3 * ns_res
else:
data[i, j, 1] = i + 1e-3
data[i, j, 1] = i * ns_res + 1e-3 * ns_res

data[i, j, 2] = CAMERA_HEIGHT # Location of the camera (height)
data[i, j, 3] = 1e-3
Expand All @@ -49,9 +54,10 @@ def _generate_primary_rays_kernel(data, H, W):
data[i, j, 7] = np.inf


def _generate_primary_rays(rays, H, W):
def _generate_primary_rays(rays, H, W, ew_res, ns_res):
griddim, blockdim = calc_cuda_dims((H, W))
_generate_primary_rays_kernel[griddim, blockdim](rays, H, W)
_generate_primary_rays_kernel[griddim, blockdim](
rays, H, W, ew_res, ns_res)
return 0


Expand Down Expand Up @@ -187,6 +193,8 @@ def _viewshed_rt(
observer_elev: float,
target_elev: float,
scale: float,
ew_res: float,
ns_res: float,
name: Optional[str] = 'viewshed',
) -> xr.DataArray:

Expand Down Expand Up @@ -214,19 +222,18 @@ def _viewshed_rt(
y_view = np.where(y_coords == y)[0][0]
x_view = np.where(x_coords == x)[0][0]

y_range = (y_coords[0], y_coords[-1])
x_range = (x_coords[0], x_coords[-1])

ew_res = (x_range[1] - x_range[0]) / (W - 1)
ns_res = (y_range[1] - y_range[0]) / (H - 1)
# ew_res / ns_res are the same resolution used to build the mesh in
# create_triangulation, so the camera rays land on the resolution-aware
# geometry and the output angle calculation uses consistent units
# (issue #2861).

# Device buffers
d_rays = cupy.empty((H, W, 8), np.float32)
d_hits = cupy.empty((H, W, 4), np.float32)
d_visgrid = cupy.empty((H, W), np.float32)
d_vsrays = cupy.empty((H, W, 8), np.float32)

_generate_primary_rays(d_rays, H, W)
_generate_primary_rays(d_rays, H, W, ew_res, ns_res)
device = cupy.cuda.Device(0)
device.synchronize()
res = optix.trace(d_rays, d_hits, W*H)
Expand Down Expand Up @@ -286,7 +293,7 @@ def viewshed_gpu(
_check_gpu_memory("viewshed_gpu", H, W)

optix = RTX()
scale = create_triangulation(raster, optix)
scale, ew_res, ns_res = create_triangulation(raster, optix)

return _viewshed_rt(raster, optix, x, y, observer_elev, target_elev,
scale, name)
scale, ew_res, ns_res, name)
Loading
Loading