Skip to content

Commit 0e5727a

Browse files
committed
feat: add hnsw-rabitq
1 parent f1349cc commit 0e5727a

75 files changed

Lines changed: 12749 additions & 53 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

python/zvec/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
FlatIndexParam,
4545
HnswIndexParam,
4646
HnswQueryParam,
47+
HnswRabitqIndexParam,
4748
IndexOption,
4849
InvertIndexParam,
4950
IVFIndexParam,
@@ -90,6 +91,7 @@
9091
"VectorQuery",
9192
"InvertIndexParam",
9293
"HnswIndexParam",
94+
"HnswRabitqIndexParam",
9395
"FlatIndexParam",
9496
"IVFIndexParam",
9597
"CollectionOption",

python/zvec/model/param/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
FlatIndexParam,
2121
HnswIndexParam,
2222
HnswQueryParam,
23+
HnswRabitqIndexParam,
2324
IndexOption,
2425
InvertIndexParam,
2526
IVFIndexParam,
@@ -34,6 +35,7 @@
3435
"FlatIndexParam",
3536
"HnswIndexParam",
3637
"HnswQueryParam",
38+
"HnswRabitqIndexParam",
3739
"IVFIndexParam",
3840
"IVFQueryParam",
3941
"IndexOption",

src/binding/python/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
2424
$<TARGET_FILE:core_knn_flat_static>
2525
$<TARGET_FILE:core_knn_flat_sparse_static>
2626
$<TARGET_FILE:core_knn_hnsw_static>
27+
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
2728
$<TARGET_FILE:core_knn_hnsw_sparse_static>
2829
$<TARGET_FILE:core_knn_ivf_static>
2930
$<TARGET_FILE:core_knn_cluster_static>
@@ -42,6 +43,7 @@ elseif (APPLE)
4243
-Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
4344
-Wl,-force_load,$<TARGET_FILE:core_knn_flat_sparse_static>
4445
-Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_static>
46+
-Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_rabitq_static>
4547
-Wl,-force_load,$<TARGET_FILE:core_knn_hnsw_sparse_static>
4648
-Wl,-force_load,$<TARGET_FILE:core_knn_ivf_static>
4749
-Wl,-force_load,$<TARGET_FILE:core_knn_cluster_static>

src/binding/python/model/param/python_param.cc

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,8 @@ static std::string quantize_type_to_string(const QuantizeType type) {
5555
return "UNDEFINED";
5656
case QuantizeType::INT8:
5757
return "INT8";
58+
case QuantizeType::RABITQ:
59+
return "RABITQ";
5860
case QuantizeType::INT4:
5961
return "INT4";
6062
case QuantizeType::FP16:
@@ -376,6 +378,43 @@ encapsulates its construction hyperparameters.
376378
t[3].cast<QuantizeType>());
377379
}));
378380

381+
// binding hnsw rabitq index params
382+
py::class_<HNSWRabitqIndexParams, VectorIndexParams,
383+
std::shared_ptr<HNSWRabitqIndexParams>>
384+
hnsw_rabitq_params(m, "HnswRabitqIndexParam", R"pbdoc(
385+
Parameters for configuring an HNSW (Hierarchical Navigable Small World) index with RabitQ.
386+
HNSW is a graph-based approximate nearest neighbor search index. This class
387+
encapsulates its construction hyperparameters.
388+
Attributes:
389+
metric_type (MetricType): Distance metric used for similarity computation.
390+
Default is ``MetricType.IP`` (inner product).
391+
m (int): Number of bi-directional links created for every new element
392+
during construction. Higher values improve accuracy but increase
393+
memory usage and construction time. Default is 100.
394+
ef_construction (int): Size of the dynamic candidate list for nearest
395+
neighbors during index construction. Larger values yield better
396+
graph quality at the cost of slower build time. Default is 500.
397+
quantize_type (QuantizeType): Optional quantization type for vector
398+
compression (e.g., FP16, INT8). Default is `QuantizeType.UNDEFINED` to
399+
disable quantization.
400+
Examples:
401+
>>> from zvec.typing import MetricType, QuantizeType
402+
>>> params = HnswRabitqIndexParam(
403+
... metric_type=MetricType.COSINE,
404+
... m=16,
405+
... ef_construction=200,
406+
... quantize_type=QuantizeType.INT8
407+
... )
408+
>>> print(params)
409+
{'metric_type': 'IP', 'm': 16, 'ef_construction': 200, 'quantize_type': 'INT8'}
410+
)pbdoc");
411+
hnsw_rabitq_params.def(
412+
py::init<MetricType, int, int, QuantizeType>(),
413+
py::arg("metric_type") = MetricType::IP,
414+
py::arg("m") = core_interface::kDefaultHnswNeighborCnt,
415+
py::arg("ef_construction") = core_interface::kDefaultHnswEfConstruction,
416+
py::arg("quantize_type") = QuantizeType::UNDEFINED);
417+
379418
// FlatIndexParams
380419
py::class_<FlatIndexParams, VectorIndexParams,
381420
std::shared_ptr<FlatIndexParams>>

src/binding/python/typing/python_type.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,8 @@ Enumeration of supported quantization types for vector compression.
131131
.value("UNDEFINED", QuantizeType::UNDEFINED)
132132
.value("FP16", QuantizeType::FP16)
133133
.value("INT8", QuantizeType::INT8)
134-
.value("INT4", QuantizeType::INT4);
134+
.value("INT4", QuantizeType::INT4)
135+
.value("RABITQ", QuantizeType::RABITQ);
135136
}
136137

137138
void ZVecPyTyping::bind_status(py::module_ &m) {

src/core/algorithm/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,5 @@ cc_directory(flat)
66
cc_directory(flat_sparse)
77
cc_directory(ivf)
88
cc_directory(hnsw)
9-
cc_directory(hnsw_sparse)
9+
cc_directory(hnsw_sparse)
10+
cc_directory(hnsw-rabitq)

src/core/algorithm/cluster/opt_kmeans_cluster.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1250,7 +1250,7 @@ int OptKmeansCluster::init(const IndexMeta &meta,
12501250
const ailego::Params &params) {
12511251
auto type_ = meta.data_type();
12521252

1253-
if (meta.metric_name() == "InnerProduct") {
1253+
if (meta.metric_name() == "InnerProduct" || meta.metric_name() == "Cosine") {
12541254
switch (type_) {
12551255
case IndexMeta::DataType::DT_FP16: {
12561256
algorithm_.reset(

src/core/algorithm/flat/flat_builder.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,6 @@ int FlatBuilder<BATCH_SIZE>::dump(const IndexDumper::Pointer &dumper) {
127127
return error_code;
128128
}
129129

130-
holder_ = nullptr;
131130
stats_.set_dumped_count(keys.size());
132131
stats_.set_dumped_costtime(stamp.milli_seconds());
133132
return 0;
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake)
2+
include(${PROJECT_ROOT_DIR}/cmake/option.cmake)
3+
4+
cc_library(
5+
NAME core_knn_hnsw_rabitq
6+
STATIC SHARED STRICT ALWAYS_LINK
7+
SRCS *.cc rabitq/*.cc
8+
LIBS core_framework sparsehash rabitqlib
9+
INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm
10+
VERSION "${PROXIMA_ZVEC_VERSION}"
11+
)

0 commit comments

Comments
 (0)