Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
ee7ed92
Add == for Unicode strings
lshaw8317 Feb 5, 2026
2412106
Passes tests
lshaw8317 Feb 5, 2026
3a94b39
Incorporated new functions, have to add tests
lshaw8317 Feb 5, 2026
5e4e50e
Passes tests
lshaw8317 Feb 6, 2026
cd588d0
Update to miniexpr dsl branch
lshaw8317 Feb 7, 2026
a3c38c1
Add decorator for partial lazyfuncs
lshaw8317 Feb 9, 2026
16c5e67
Add decorator for partial lazyfuncs
lshaw8317 Feb 9, 2026
3103a94
Add bench
lshaw8317 Feb 9, 2026
fad321b
Remove raise exception when miniexpr fails
lshaw8317 Feb 9, 2026
4655d99
Miniexpr path working for string ops
lshaw8317 Feb 10, 2026
e7d2121
Update to latest miniexpr
lshaw8317 Feb 10, 2026
0f76b55
Add explanation of filtrs_meta
lshaw8317 Feb 11, 2026
4359317
Add arbitrary shuffle meta
lshaw8317 Feb 12, 2026
71a5c45
Add bench for string shuffle andf ops
lshaw8317 Feb 12, 2026
fe3df2a
Add tests for arrays of strings
lshaw8317 Feb 13, 2026
664c7c9
Add optimised compression and tests for constructors
lshaw8317 Feb 13, 2026
9cbc193
Update test
lshaw8317 Feb 13, 2026
4475332
Merge branch 'main' into add_str
lshaw8317 Feb 14, 2026
1c78b31
Clean up merge and update c-blosc2
lshaw8317 Feb 14, 2026
481e30b
Test without stringshuffle C-blosc2
lshaw8317 Feb 14, 2026
83136bc
Stringshuffle isn't the problem
lshaw8317 Feb 14, 2026
1b56b05
Remove optimised string compression
lshaw8317 Feb 14, 2026
8dd9619
Change it back
lshaw8317 Feb 14, 2026
54e9d82
Merge branch 'main' into optimise_cumsum
lshaw8317 Feb 17, 2026
5e3ad6e
Fix string compression typo bug
lshaw8317 Feb 17, 2026
ffabd01
add debugging
lshaw8317 Feb 18, 2026
8fc6f2e
Merge branch 'main' into add_str
lshaw8317 Feb 18, 2026
8d09e04
Remove obsolete me_variable_ex
lshaw8317 Feb 18, 2026
0d884a5
Further debugging
lshaw8317 Feb 18, 2026
dd8fe73
Update to latest c-blosc2 main
FrancescAlted Feb 19, 2026
a28037d
Test without nchunks, nupdates as (0, 0)
FrancescAlted Feb 19, 2026
5447cbf
Restore (0, 0) case
FrancescAlted Feb 19, 2026
110bbcf
Run just the offending tests
FrancescAlted Feb 19, 2026
006950e
Run just the offending tests (II)
FrancescAlted Feb 19, 2026
57f5e7c
Add debugging
lshaw8317 Feb 19, 2026
e20f4e4
Further debugging
lshaw8317 Feb 19, 2026
465b6c7
Debugging (III)
lshaw8317 Feb 19, 2026
b3f9871
Debugging (IV)
lshaw8317 Feb 19, 2026
969aeff
Debugging (V)
lshaw8317 Feb 19, 2026
d6e9901
Fix tests (?)
lshaw8317 Feb 19, 2026
74e65d6
Testing
lshaw8317 Feb 19, 2026
1a93ce8
Make sure to copy cparams for constructors
lshaw8317 Feb 19, 2026
4dcb0de
Enable all tests
lshaw8317 Feb 19, 2026
6918613
Clean up code
lshaw8317 Feb 19, 2026
2821248
Merge branch 'main' into add_str
lshaw8317 Feb 19, 2026
14e15c6
Remove unused isin function
lshaw8317 Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ else()
include(FetchContent)
FetchContent_Declare(blosc2
GIT_REPOSITORY https://github.com/Blosc/c-blosc2
GIT_TAG cdc78596270c1e235d29436d3e730f0f403ddca9 # fix resize
GIT_TAG f27bb87c51443e237dab4c68d445480b65ae7688 # malloc(0) -> NULL
)
FetchContent_MakeAvailable(blosc2)
include_directories("${blosc2_SOURCE_DIR}/include")
Expand Down
47 changes: 47 additions & 0 deletions bench/ndarray/stringops_bench.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
#######################################################################
# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
# All rights reserved.
#
# SPDX-License-Identifier: BSD-3-Clause
#######################################################################

"""
Compare miniexpr and non-miniexpr paths for string ops.
"""

import time
import numpy as np
import blosc2
from blosc2.lazyexpr import _toggle_miniexpr

# nparr = np.random.randint(low=0, high=128, size=(N, 10), dtype=np.uint32)
# nparr = nparr.view('S40').astype('U10')

N = int(1e5)
nparr = np.repeat(np.array(['josé', 'pepe', 'francisco']), N)
cparams = blosc2.cparams_dflts
cparams["filters"][-1] = blosc2.Filter.SHUFFLE
cparams["filters_meta"][-1] = 0 # use default (typesize)
arr1 = blosc2.asarray(nparr)
print(f"cratio without filter: {arr1.cratio}")
cparams["filters_meta"][-1] = 4
arr1 = blosc2.asarray(nparr, cparams=cparams)
print(f"cratio with filter: {arr1.cratio}")

arr2 = blosc2.full(arr1.shape, 'francisco', blocks=arr1.blocks, chunks=arr1.chunks)

names = ['==', 'contains', 'startswith', 'endswith']
functuple = (lambda a, b : a==b, blosc2.contains, blosc2.startswith, blosc2.endswith)
for name, func in zip(names, functuple):
expr = func(arr1, arr2)
dtic = time.time()
res = expr[()]
dtoc = time.time()
print(f'{name} took {round(dtoc-dtic, 3)}s for miniexpr')
_toggle_miniexpr(False)
expr = func(arr1, arr2)
dtic = time.time()
res = expr[()]
dtoc = time.time()
print(f'{name} took {round(dtoc-dtic, 3)}s for normal fast path')
_toggle_miniexpr(True)
4 changes: 0 additions & 4 deletions doc/reference/index_funcs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,27 +8,23 @@ The following functions are useful for performing indexing and other associated
.. autosummary::

broadcast_to
concat
count_nonzero
expand_dims
indices
meshgrid
sort
squeeze
stack
take
take_along_axis



.. autofunction:: blosc2.broadcast_to
.. autofunction:: blosc2.concat
.. autofunction:: blosc2.count_nonzero
.. autofunction:: blosc2.expand_dims
.. autofunction:: blosc2.indices
.. autofunction:: blosc2.meshgrid
.. autofunction:: blosc2.sort
.. autofunction:: blosc2.squeeze
.. autofunction:: blosc2.stack
.. autofunction:: blosc2.take
.. autofunction:: blosc2.take_along_axis
19 changes: 19 additions & 0 deletions src/blosc2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,21 @@ class Codec(Enum):
class Filter(Enum):
"""
Available filters.
For each of the filters, the integer value passed to ``filters_meta`` has the following meaning:

- NOFILTER: Not used
- SHUFFLE: Number of byte streams for shuffle (if 0 defaults to typesize of array).
- BITSHUFFLE: Not used
- DELTA: Not used (bitwise XOR)
- TRUNC_PREC: Number of bits to which to truncate float
- NDCELL: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4)
- NDMEAN: Cellshape (i.e. for a 3-dim dataset, meta = 4 implies cellshape is 4x4x4)
- BYTEDELTA: Number of byte streams for delta
- INT_TRUNC: Number of bits to which to truncate integer

For TRUNC_PREC and INT_TRUNC, positive values specify number of bits to keep; negative values specify number of bits to zero.

For NDCELL/NDMEAN see this explanation for `NDCELL <https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndcell/README.md>`_ and this for `NDMEAN <https://github.com/Blosc/c-blosc2/blob/main/plugins/filters/ndmean/README.md>`_.
"""

NOFILTER = 0
Expand Down Expand Up @@ -598,6 +613,7 @@ def _raise(exc):
cumulative_prod,
cumulative_sum,
divide,
endswith,
equal,
exp,
expm1,
Expand Down Expand Up @@ -645,6 +661,7 @@ def _raise(exc):
sqrt,
square,
squeeze,
startswith,
std,
subtract,
sum,
Expand Down Expand Up @@ -769,6 +786,7 @@ def _raise(exc):
"detect_number_of_cores",
"divide",
"dparams_dflts",
"endswith",
"empty",
"empty_like",
"equal",
Expand Down Expand Up @@ -877,6 +895,7 @@ def _raise(exc):
"square",
"squeeze",
"stack",
"startswith",
"std",
"storage_dflts",
"subtract",
Expand Down
11 changes: 6 additions & 5 deletions src/blosc2/blosc2_ext.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -571,9 +571,6 @@ cdef extern from "miniexpr.h":
int ncode
void *parameters[1]

int me_compile(const char *expression, const me_variable *variables,
int var_count, me_dtype dtype, int *error, me_expr **out)

int me_compile_nd_jit(const char *expression, const me_variable *variables,
int var_count, me_dtype dtype, int ndims,
const int64_t *shape, const int32_t *chunkshape,
Expand Down Expand Up @@ -1436,7 +1433,7 @@ cdef class SChunk:
return dst

if size < 0:
raise RuntimeError("Error while decompressing the specified chunk")
raise RuntimeError(f"Error while decompressing the specified chunk, error code: {size}")

def get_chunk(self, nchunk):
cdef uint8_t *chunk
Expand Down Expand Up @@ -3004,6 +3001,7 @@ cdef class NDArray:
var.address = NULL # chunked compile: addresses provided later
var.type = 0 # auto-set to ME_VARIABLE inside compiler
var.context = NULL
var.itemsize = v.dtype.itemsize if v.dtype.num == 19 else 0 # only store item type if string

cdef int error = 0
expression = expression.encode("utf-8") if isinstance(expression, str) else expression
Expand Down Expand Up @@ -3124,7 +3122,10 @@ cdef b2nd_context_t* create_b2nd_context(shape, chunks, blocks, dtype, kwargs):
if 'cparams' in kwargs:
kwargs['cparams']['typesize'] = typesize
else:
kwargs['cparams'] = {'typesize': typesize}
kwargs['cparams'] = {'typesize': typesize} # last filter is shuffle
if isinstance(dtype, np.dtypes.StrDType) or dtype == np.str_:
kwargs['cparams']['filters'] = [blosc2.Filter.NOFILTER] * 5 + [blosc2.Filter.SHUFFLE]
kwargs['cparams']['filters_meta'] = [0] * 5 + [4] # unicode char bytesize
if dtype.kind == 'V':
str_dtype = str(dtype)
else:
Expand Down
2 changes: 1 addition & 1 deletion src/blosc2/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1576,7 +1576,7 @@ def compute_chunks_blocks( # noqa: C901
raise ValueError("blocks cannot be greater than chunks")
return chunks, blocks

cparams = kwargs.get("cparams") or copy.deepcopy(blosc2.cparams_dflts)
cparams = kwargs.get("cparams") or blosc2.CParams() # just get defaults
if isinstance(cparams, blosc2.CParams):
cparams = asdict(cparams)
# Typesize in dtype always has preference over typesize in cparams
Expand Down
Loading
Loading