Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
47730f3
perf: Skip bounds check for initial elements in 2^n hypercube
mkitti Feb 13, 2026
865df2a
lint:Use a list comprehension rather than a for loop
mkitti Feb 13, 2026
ce0099c
pref:Add decode_morton_vectorized
mkitti Feb 13, 2026
1b1076f
perf:Replace math.log2() with bit_length()
mkitti Feb 13, 2026
47a68eb
perf:Use magic numbers for 2D and 3D
mkitti Feb 13, 2026
6fb6d00
perf:Add 4D Morton magic numbers
mkitti Feb 13, 2026
db31842
perf:Add Morton magic numbers for 5D
mkitti Feb 13, 2026
f9952f1
perf:Remove singleton dimensions to reduce ndims
mkitti Feb 13, 2026
aedce5a
Add changes
mkitti Feb 13, 2026
ef18210
fix:Address type annotation and linting issues
mkitti Feb 13, 2026
24dcbd5
perf:Remove magic number functions
mkitti Feb 13, 2026
7b3db07
test:Add power of 2 sharding indexing tests
mkitti Feb 13, 2026
443b5d4
test: Add Morton order benchmarks with cache clearing
mkitti Feb 13, 2026
1cdcbdf
fix:Bound LRU cache of _morton_order to 16
mkitti Feb 13, 2026
536f520
Merge branch 'main' into mkitti-morton-decode-optimization
d-v-b Feb 13, 2026
65205b3
Merge branch 'main' into mkitti-morton-decode-optimization
d-v-b Feb 13, 2026
c872e2b
test:Add a single chunk test for a large shard
mkitti Feb 13, 2026
1cad983
test:Add indexing benchmarks for writing
mkitti Feb 16, 2026
a666211
tests:Add single chunk write test for sharding
mkitti Feb 16, 2026
38d2e04
Merge branch 'main' into mkitti-morton-decode-optimization
d-v-b Feb 18, 2026
4a342c9
Update 3708.misc.md
mkitti Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/3708.misc.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Optimize Morton order computation with hypercube optimization, vectorized decoding, and singleton dimension removal, providing 10-45x speedup for typical chunk shapes.
94 changes: 90 additions & 4 deletions src/zarr/core/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,7 @@ def make_slice_selection(selection: Any) -> list[slice]:
def decode_morton(z: int, chunk_shape: tuple[int, ...]) -> tuple[int, ...]:
# Inspired by compressed morton code as implemented in Neuroglancer
# https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code
bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape)
bits = tuple((c - 1).bit_length() for c in chunk_shape)
max_coords_bits = max(bits)
input_bit = 0
input_value = z
Expand All @@ -1467,16 +1467,102 @@ def decode_morton(z: int, chunk_shape: tuple[int, ...]) -> tuple[int, ...]:
return tuple(out)


@lru_cache
def decode_morton_vectorized(
z: npt.NDArray[np.intp], chunk_shape: tuple[int, ...]
) -> npt.NDArray[np.intp]:
"""Vectorized Morton code decoding for multiple z values.

Parameters
----------
z : ndarray
1D array of Morton codes to decode.
chunk_shape : tuple of int
Shape defining the coordinate space.

Returns
-------
ndarray
2D array of shape (len(z), len(chunk_shape)) containing decoded coordinates.
"""
n_dims = len(chunk_shape)
bits = tuple((c - 1).bit_length() for c in chunk_shape)

max_coords_bits = max(bits) if bits else 0
out = np.zeros((len(z), n_dims), dtype=np.intp)

input_bit = 0
for coord_bit in range(max_coords_bits):
for dim in range(n_dims):
if coord_bit < bits[dim]:
# Extract bit at position input_bit from all z values
bit_values = (z >> input_bit) & 1
# Place bit at coord_bit position in dimension dim
out[:, dim] |= bit_values << coord_bit
input_bit += 1

return out


@lru_cache(maxsize=16)
def _morton_order(chunk_shape: tuple[int, ...]) -> tuple[tuple[int, ...], ...]:
n_total = product(chunk_shape)
order: list[tuple[int, ...]] = []
i = 0
if n_total == 0:
return ()

# Optimization: Remove singleton dimensions to enable magic number usage
# for shapes like (1,1,32,32,32). Compute Morton on squeezed shape, then expand.
singleton_dims = tuple(i for i, s in enumerate(chunk_shape) if s == 1)
if singleton_dims:
squeezed_shape = tuple(s for s in chunk_shape if s != 1)
if squeezed_shape:
# Compute Morton order on squeezed shape
squeezed_order = _morton_order(squeezed_shape)
# Expand coordinates to include singleton dimensions (always 0)
expanded: list[tuple[int, ...]] = []
for coord in squeezed_order:
full_coord: list[int] = []
squeezed_idx = 0
for i in range(len(chunk_shape)):
if chunk_shape[i] == 1:
full_coord.append(0)
else:
full_coord.append(coord[squeezed_idx])
squeezed_idx += 1
expanded.append(tuple(full_coord))
return tuple(expanded)
else:
# All dimensions are singletons, just return the single point
return ((0,) * len(chunk_shape),)

n_dims = len(chunk_shape)

# Find the largest power-of-2 hypercube that fits within chunk_shape.
# Within this hypercube, Morton codes are guaranteed to be in bounds.
min_dim = min(chunk_shape)
if min_dim >= 1:
power = min_dim.bit_length() - 1 # floor(log2(min_dim))
hypercube_size = 1 << power # 2^power
n_hypercube = hypercube_size**n_dims
else:
n_hypercube = 0

# Within the hypercube, no bounds checking needed - use vectorized decoding
order: list[tuple[int, ...]]
if n_hypercube > 0:
z_values = np.arange(n_hypercube, dtype=np.intp)
hypercube_coords = decode_morton_vectorized(z_values, chunk_shape)
order = [tuple(row) for row in hypercube_coords]
else:
order = []

# For remaining elements, bounds checking is needed
i = n_hypercube
while len(order) < n_total:
m = decode_morton(i, chunk_shape)
if all(x < y for x, y in zip(m, chunk_shape, strict=False)):
order.append(m)
i += 1

return tuple(order)


Expand Down
Loading