Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AnyBuildLogs/latest.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
20260116-140907-2cca51ce
20260129-192840-e7c5749c
2 changes: 2 additions & 0 deletions include/defaults.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ const bool NUM_DIVERSE_BUILD = 1;

const bool REORDER_INDEX = false;
const uint32_t REORDER_DIM = 0;
const bool ATTRIBUTE_DIVERSITY = false;
const float ATTR_DIST_THRESHOLD = 0.2f;

} // namespace defaults
} // namespace diskann
11 changes: 9 additions & 2 deletions include/disk_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,12 @@ DISKANN_DLLEXPORT int build_merged_vamana_index(std::string base_file, diskann::
uint32_t num_threads, bool use_filters = false, bool use_integer_labels = false,
const std::string &label_file = std::string(""),
const std::string &labels_to_medoids_file = std::string(""),
const std::string &universal_label = "", const uint32_t Lf = 0);
const std::string &universal_label = "", const uint32_t Lf = 0,
uint32_t universal_label_num = 0,
const char* seller_file_path = nullptr,
uint32_t num_diverse_build = 1,
const char* attribute_file_path = nullptr,
float attr_dist_threshold = 0.2f);

template <typename T, typename LabelT>
DISKANN_DLLEXPORT uint32_t optimize_beamwidth(std::unique_ptr<diskann::PQFlashIndex<T, LabelT>> &_pFlashIndex,
Expand All @@ -101,7 +106,9 @@ DISKANN_DLLEXPORT int build_disk_index(
const uint32_t Lf = 0,
const char* reorderDataFilePath = nullptr,
const char* sellerFilePath = nullptr,
uint32_t num_diverse_build = 1); // default is empty string for no universal label
uint32_t num_diverse_build = 1,
const char* attributeFilePath = nullptr,
float attr_dist_threshold = 0.2f); // default is empty string for no universal label

template <typename T>
DISKANN_DLLEXPORT void create_disk_layout(const std::string base_file, const std::string mem_index_file,
Expand Down
15 changes: 13 additions & 2 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -267,8 +267,13 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
// determines navigating node of the graph by calculating medoid of datafopt
uint32_t calculate_entry_point();

void parse_label_file(const std::string &label_file, size_t &num_pts_labels, size_t& total_labels);
void parse_seller_file(const std::string& label_file, size_t& num_pts_labels);
template <typename ValueT>
void parse_integer_string_file(const std::string &file_path, size_t &num_points, size_t& total_values,
std::vector<std::vector<ValueT>>& location_to_values,
tsl::robin_set<ValueT>* unique_values = nullptr,
bool sort_values = true);
void parse_seller_file(const std::string& label_file, size_t& num_pts_labels,
std::vector<uint32_t>& location_to_seller, uint32_t& num_unique_sellers);

void convert_pts_label_to_bitmask(std::vector<std::vector<LabelT>>& pts_to_labels, simple_bitmask_buf& bitmask_buf, size_t num_labels);

Expand Down Expand Up @@ -357,6 +362,8 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
void initialize_query_scratch(uint32_t num_threads, uint32_t search_l, uint32_t indexing_l, uint32_t r,
uint32_t maxc, size_t dim, size_t bitmask_size = 0);

double attribute_distance(const std::vector<uint32_t> &a, const std::vector<uint32_t> &b);

// Do not call without acquiring appropriate locks
// call public member functions save and load to invoke these.
DISKANN_DLLEXPORT size_t save_graph(std::string filename);
Expand Down Expand Up @@ -426,6 +433,10 @@ template <typename T, typename TagT = uint32_t, typename LabelT = uint32_t> clas
std::vector<uint32_t> _location_to_seller;
uint32_t _num_unique_sellers = 0;
std::string _seller_file;
bool _attribute_diversity = false;
float _attr_dist_threshold = 0.2f;
std::string _attribute_file;
std::vector<std::vector<std::uint32_t>> _location_to_attributes;

bool _use_universal_label = false;
LabelT _universal_label = 0;
Expand Down
39 changes: 34 additions & 5 deletions include/parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,18 @@ class IndexWriteParameters
const bool diverse_index;
const std::string seller_file;
const uint32_t num_diverse_build;
const bool attribute_diversity;
const std::string attribute_file;
const float attr_dist_threshold;

IndexWriteParameters(const uint32_t search_list_size, const uint32_t max_degree, const bool saturate_graph,
const uint32_t max_occlusion_size, const float alpha, const uint32_t num_threads,
const uint32_t filter_list_size, bool diverse_index, const std::string& seller_file, uint32_t num_diverse_build)
const uint32_t filter_list_size, bool diverse_index, const std::string& seller_file, uint32_t num_diverse_build,
bool attribute_diversity, const std::string& attribute_file, float attr_dist_threshold)
: search_list_size(search_list_size), max_degree(max_degree), saturate_graph(saturate_graph),
max_occlusion_size(max_occlusion_size), alpha(alpha), num_threads(num_threads),
filter_list_size(filter_list_size), diverse_index(diverse_index), seller_file(seller_file), num_diverse_build(num_diverse_build)
filter_list_size(filter_list_size), diverse_index(diverse_index), seller_file(seller_file), num_diverse_build(num_diverse_build),
attribute_diversity(attribute_diversity), attribute_file(attribute_file), attr_dist_threshold(attr_dist_threshold)
{
}

Expand Down Expand Up @@ -100,6 +105,24 @@ class IndexWriteParametersBuilder
return *this;
}

IndexWriteParametersBuilder& with_attribute_diversity(const bool attribute_diversity)
{
_attribute_diversity = attribute_diversity;
return *this;
}

IndexWriteParametersBuilder& with_attr_dist_threshold(const float attr_dist_threshold)
{
_attr_dist_threshold = attr_dist_threshold;
return *this;
}

IndexWriteParametersBuilder& with_attribute_file(const std::string attribute_file)
{
_attribute_file = attribute_file;
return *this;
}

IndexWriteParametersBuilder &with_alpha(const float alpha)
{
_alpha = alpha;
Expand All @@ -121,13 +144,16 @@ class IndexWriteParametersBuilder
IndexWriteParameters build() const
{
return IndexWriteParameters(_search_list_size, _max_degree, _saturate_graph, _max_occlusion_size, _alpha,
_num_threads, _filter_list_size, _diverse_index, _seller_file, _num_diverse_build);
_num_threads, _filter_list_size, _diverse_index, _seller_file, _num_diverse_build,
_attribute_diversity, _attribute_file, _attr_dist_threshold);
}

IndexWriteParametersBuilder(const IndexWriteParameters &wp)
: _search_list_size(wp.search_list_size), _max_degree(wp.max_degree),
_max_occlusion_size(wp.max_occlusion_size), _saturate_graph(wp.saturate_graph), _alpha(wp.alpha),
_filter_list_size(wp.filter_list_size)
_num_threads(wp.num_threads), _filter_list_size(wp.filter_list_size), _diverse_index(wp.diverse_index),
_seller_file(wp.seller_file), _num_diverse_build(wp.num_diverse_build), _attribute_diversity(wp.attribute_diversity),
_attribute_file(wp.attribute_file), _attr_dist_threshold(wp.attr_dist_threshold)
{
}
IndexWriteParametersBuilder(const IndexWriteParametersBuilder &) = delete;
Expand All @@ -143,7 +169,10 @@ class IndexWriteParametersBuilder
uint32_t _filter_list_size{defaults::FILTER_LIST_SIZE};
bool _diverse_index{ defaults::DIVERSE_INDEX };
std::string _seller_file{ defaults::EMPTY_STRING };
uint32_t _num_diverse_build{ defaults::NUM_DIVERSE_BUILD };
uint32_t _num_diverse_build{ defaults::NUM_DIVERSE_BUILD };
bool _attribute_diversity{ defaults::ATTRIBUTE_DIVERSITY };
std::string _attribute_file{ defaults::EMPTY_STRING };
float _attr_dist_threshold{ defaults::ATTR_DIST_THRESHOLD };
};

struct IndexLoadParams
Expand Down
Loading
Loading