Skip to content

Commit 475a6c2

Browse files
LuQQiuclaude
andcommitted
refactor: move ann.proto compilation to lance crate, use VectorMetricType enum
Address PR review feedback: - Move ann.proto from lance-datafusion to lance crate (package lance.pb) so it can reference types from lance-index without circular deps - Use lance.index.pb.VectorMetricType enum instead of string for metric_type - Rename key_arrow_ipc to query_vector_arrow_ipc - Make minimum_nprobes and dist_q_c optional - Add comment about prefilter_source handled by DataFusion child serialization - Add comment explaining ANNIvfSubIndexExecProto purpose Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent d11def8 commit 475a6c2

8 files changed

Lines changed: 72 additions & 23 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

protos/ann.proto

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,32 +3,39 @@
33

44
syntax = "proto3";
55

6-
package lance.datafusion;
6+
package lance.pb;
77

88
import "table_identifier.proto";
99
import "table.proto";
10+
import "index.proto";
1011

1112
// Serialized vector query parameters.
1213
message VectorQueryProto {
1314
// Query vector as Arrow IPC bytes (supports Float16, Float32, Float64, UInt8, etc.)
14-
bytes key_arrow_ipc = 1;
15+
bytes query_vector_arrow_ipc = 1;
1516
string column = 2;
1617
uint32 k = 3;
1718
optional float lower_bound = 4;
1819
optional float upper_bound = 5;
19-
uint32 minimum_nprobes = 6;
20+
optional uint32 minimum_nprobes = 6;
2021
optional uint32 maximum_nprobes = 7;
2122
optional uint32 ef = 8;
2223
optional uint32 refine_factor = 9;
23-
// Distance metric type as string ("l2", "cosine", "dot", "hamming").
24-
// Absent means None (use the index's default metric).
25-
optional string metric_type = 10;
24+
// Distance metric type. Absent means None (use the index's default metric).
25+
optional lance.index.pb.VectorMetricType metric_type = 10;
2626
bool use_index = 11;
27-
float dist_q_c = 12;
27+
optional float dist_q_c = 12;
2828
}
2929

30+
// Serializable form of ANNIvfSubIndexExec — the IVF sub-index search node.
31+
//
32+
// Note: ANNIvfSubIndexExec.prefilter_source (child ExecutionPlan) is NOT
33+
// serialized here. DataFusion's PhysicalExtensionCodec handles child plans
34+
// automatically via children() / with_new_children(). The codec receives
35+
// deserialized children in the `inputs` parameter of try_decode and
36+
// reconstructs the PreFilterSource from them.
3037
message ANNIvfSubIndexExecProto {
3138
VectorQueryProto query = 1;
32-
TableIdentifier table = 2;
39+
lance.datafusion.TableIdentifier table = 2;
3340
repeated lance.table.IndexMetadata indices = 3;
3441
}

python/Cargo.lock

Lines changed: 2 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

rust/lance-datafusion/build.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ fn main() -> Result<()> {
2020
&[
2121
"./protos/table_identifier.proto",
2222
"./protos/filtered_read.proto",
23-
"./protos/ann.proto",
2423
],
2524
&["./protos"],
2625
)?;

rust/lance/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,10 @@ semver.workspace = true
8484
tokio-stream = { workspace = true }
8585
tokio-util = { workspace = true }
8686

87+
[build-dependencies]
88+
prost-build.workspace = true
89+
protobuf-src = { version = "2.1", optional = true }
90+
8791
[target.'cfg(target_os = "linux")'.dev-dependencies]
8892
pprof.workspace = true
8993
# Need this so we can prevent dynamic linking in binaries (see cli feature)
@@ -128,6 +132,7 @@ dynamodb = ["lance-table/dynamodb", "dep:aws-sdk-dynamodb"]
128132
dynamodb_tests = ["dynamodb"]
129133
substrait = ["lance-datafusion/substrait"]
130134
protoc = [
135+
"dep:protobuf-src",
131136
"lance-encoding/protoc",
132137
"lance-file/protoc",
133138
"lance-index/protoc",

rust/lance/build.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-License-Identifier: Apache-2.0
2+
// SPDX-FileCopyrightText: Copyright The Lance Authors
3+
4+
use std::io::Result;
5+
6+
fn main() -> Result<()> {
7+
println!("cargo:rerun-if-changed=protos");
8+
9+
#[cfg(feature = "protoc")]
10+
// Use vendored protobuf compiler if requested.
11+
unsafe {
12+
std::env::set_var("PROTOC", protobuf_src::protoc());
13+
}
14+
15+
let mut prost_build = prost_build::Config::new();
16+
prost_build.extern_path(".lance.table", "::lance_table::format::pb");
17+
prost_build.extern_path(".lance.index.pb", "::lance_index::pb");
18+
prost_build.extern_path(".lance.datafusion", "::lance_datafusion::pb");
19+
prost_build.protoc_arg("--experimental_allow_proto3_optional");
20+
prost_build.enable_type_names();
21+
prost_build.compile_protos(&["./protos/ann.proto"], &["./protos"])?;
22+
23+
Ok(())
24+
}

rust/lance/src/io/exec/ann_proto.rs

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
//! Protobuf serialization for [`ANNIvfSubIndexExec`].
55
//!
6-
//! Proto message definitions live in `lance-datafusion` (see `pb`).
6+
//! Proto message definitions live in `crate::pb` (compiled from `ann.proto`).
77
//! Conversion functions live here because they need access to `ANNIvfSubIndexExec`
88
//! and `Dataset`, which are defined in this crate.
99
//!
@@ -15,13 +15,14 @@ use std::sync::Arc;
1515
use arrow_array::RecordBatch;
1616
use arrow_schema::{Field, Schema as ArrowSchema};
1717
use lance_core::{Error, Result};
18-
use lance_datafusion::pb;
18+
use lance_index::pb as index_pb;
1919
use lance_index::vector::Query;
2020
use lance_linalg::distance::DistanceType;
2121
use lance_table::format::IndexMetadata;
2222
use lance_table::format::pb as table_pb;
2323

2424
use crate::Dataset;
25+
use crate::pb;
2526

2627
use super::knn::ANNIvfSubIndexExec;
2728
use super::table_identifier::{resolve_dataset, table_identifier_from_dataset};
@@ -75,49 +76,53 @@ fn query_key_from_ipc_bytes(bytes: &[u8]) -> Result<arrow_array::ArrayRef> {
7576
}
7677

7778
pub fn query_to_proto(query: &Query) -> Result<pb::VectorQueryProto> {
78-
let key_arrow_ipc = query_key_to_ipc_bytes(query.key.as_ref())?;
79+
let query_vector_arrow_ipc = query_key_to_ipc_bytes(query.key.as_ref())?;
7980

80-
let metric_type = query.metric_type.map(|dt| dt.to_string());
81+
let metric_type = query
82+
.metric_type
83+
.map(|dt| index_pb::VectorMetricType::from(dt) as i32);
8184

8285
Ok(pb::VectorQueryProto {
83-
key_arrow_ipc,
86+
query_vector_arrow_ipc,
8487
column: query.column.clone(),
8588
k: query.k as u32,
8689
lower_bound: query.lower_bound,
8790
upper_bound: query.upper_bound,
88-
minimum_nprobes: query.minimum_nprobes as u32,
91+
minimum_nprobes: Some(query.minimum_nprobes as u32),
8992
maximum_nprobes: query.maximum_nprobes.map(|n| n as u32),
9093
ef: query.ef.map(|n| n as u32),
9194
refine_factor: query.refine_factor,
9295
metric_type,
9396
use_index: query.use_index,
94-
dist_q_c: query.dist_q_c,
97+
dist_q_c: Some(query.dist_q_c),
9598
})
9699
}
97100

98101
pub fn query_from_proto(proto: pb::VectorQueryProto) -> Result<Query> {
99-
let key = query_key_from_ipc_bytes(&proto.key_arrow_ipc)?;
102+
let key = query_key_from_ipc_bytes(&proto.query_vector_arrow_ipc)?;
100103

101104
let metric_type = proto
102105
.metric_type
103-
.as_deref()
104-
.map(DistanceType::try_from)
105-
.transpose()
106-
.map_err(|e| Error::internal(format!("Invalid distance type: {e}")))?;
106+
.map(|v| {
107+
index_pb::VectorMetricType::try_from(v)
108+
.map(DistanceType::from)
109+
.map_err(|_| Error::internal(format!("Invalid VectorMetricType value: {v}")))
110+
})
111+
.transpose()?;
107112

108113
Ok(Query {
109114
column: proto.column,
110115
key,
111116
k: proto.k as usize,
112117
lower_bound: proto.lower_bound,
113118
upper_bound: proto.upper_bound,
114-
minimum_nprobes: proto.minimum_nprobes as usize,
119+
minimum_nprobes: proto.minimum_nprobes.unwrap_or(0) as usize,
115120
maximum_nprobes: proto.maximum_nprobes.map(|n| n as usize),
116121
ef: proto.ef.map(|n| n as usize),
117122
refine_factor: proto.refine_factor,
118123
metric_type,
119124
use_index: proto.use_index,
120-
dist_q_c: proto.dist_q_c,
125+
dist_q_c: proto.dist_q_c.unwrap_or(0.0),
121126
})
122127
}
123128

rust/lance/src/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,11 @@ pub mod session;
8585
pub mod table;
8686
pub mod utils;
8787

88+
pub mod pb {
89+
#![allow(clippy::use_self)]
90+
include!(concat!(env!("OUT_DIR"), "/lance.pb.rs"));
91+
}
92+
8893
pub use blob::{BlobArrayBuilder, blob_field};
8994
pub use dataset::Dataset;
9095
use lance_index::vector::DIST_COL;

0 commit comments

Comments
 (0)