From e91cb524b75eb3357487ac959cc0cfdc7530761d Mon Sep 17 00:00:00 2001 From: meiravgri <109056284+meiravgri@users.noreply.github.com> Date: Mon, 3 Nov 2025 14:21:41 +0200 Subject: [PATCH 1/2] Fix incorrect assertion in `BM_VecSimBasics::UpdateAtBlockSize` benchmark (#819) * VecSimIndexInterface: introduce indexMetaDataCapacity for testing returns metadata containers capacity * cover for tiered index (cherry picked from commit f24b65afa98f99352f7c1f384e24f2c2a4cbaad1) --- .../algorithms/brute_force/brute_force.h | 2 ++ src/VecSim/algorithms/hnsw/hnsw.h | 2 ++ src/VecSim/algorithms/hnsw/hnsw_tiered.h | 4 +++ src/VecSim/vec_sim_interface.h | 12 +++++++++ tests/benchmark/bm_vecsim_basics.h | 18 +++++++++---- tests/unit/test_allocator.cpp | 2 ++ tests/unit/test_hnsw_tiered.cpp | 27 +++++++++++++++++++ 7 files changed, 62 insertions(+), 5 deletions(-) diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h index d0928aede..d57929725 100644 --- a/src/VecSim/algorithms/brute_force/brute_force.h +++ b/src/VecSim/algorithms/brute_force/brute_force.h @@ -92,6 +92,8 @@ class BruteForceIndex : public VecSimIndexAbstract { return actual_stored_vec; } + + size_t indexMetaDataCapacity() const override { return idToLabelMapping.capacity(); } #endif protected: diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 8d0633c00..fd5ddb08a 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -327,6 +327,8 @@ class HNSWIndex : public VecSimIndexAbstract, return actual_stored_vec; } + + size_t indexMetaDataCapacity() const override { return idToMetaData.capacity(); } #endif protected: diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h index ef4492e00..784818cc6 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h +++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h @@ -226,6 +226,10 @@ class TieredHNSWIndex : public VecSimTieredIndex { } #ifdef BUILD_TESTS void getDataByLabel(labelType label, std::vector> &vectors_output) const; + size_t indexMetaDataCapacity() const override { + return this->backendIndex->indexMetaDataCapacity() + + this->frontendIndex->indexMetaDataCapacity(); + } #endif }; diff --git a/src/VecSim/vec_sim_interface.h b/src/VecSim/vec_sim_interface.h index 225e8d223..1573922f4 100644 --- a/src/VecSim/vec_sim_interface.h +++ b/src/VecSim/vec_sim_interface.h @@ -262,4 +262,16 @@ struct VecSimIndexInterface : public VecsimBaseObject { inline static void setWriteMode(VecSimWriteMode mode) { VecSimIndexInterface::asyncWriteMode = mode; } +#ifdef BUILD_TESTS + /** + * @brief get the capacity of the meta data containers. + * + * @return The capacity of the meta data containers in number of elements. + * The value returned from this function may differ from the indexCapacity() function. For + * example, in HNSW, the capacity of the meta data containers is the capacity of the labels + * lookup table, while the capacity of the data containers is the capacity of the vectors + * container. + */ + virtual size_t indexMetaDataCapacity() const = 0; +#endif }; diff --git a/tests/benchmark/bm_vecsim_basics.h b/tests/benchmark/bm_vecsim_basics.h index a8945406e..b1fb791e2 100644 --- a/tests/benchmark/bm_vecsim_basics.h +++ b/tests/benchmark/bm_vecsim_basics.h @@ -303,6 +303,9 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { // Calculate vectors needed to reach next block boundary size_t vecs_to_blocksize = BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size); + size_t initial_index_cap = index->indexMetaDataCapacity(); + assert(initial_index_cap == N_VECTORS + vecs_to_blocksize); + assert(vecs_to_blocksize < BM_VecSimGeneral::block_size); labelType initial_label_count = index->indexLabelCount(); labelType curr_label = initial_label_count; @@ -327,15 +330,20 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { // Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles labelType label_to_update = curr_label - 1; - size_t index_cap = index->indexCapacity(); + size_t index_cap = index->indexMetaDataCapacity(); + std::cout << "index_cap after adding vectors " << index_cap << std::endl; + assert(index_cap == initial_index_cap + BM_VecSimGeneral::block_size); + for (auto _ : st) { // Remove the vector directly from hnsw size_t ret = VecSimIndex_DeleteVector( INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)], label_to_update); assert(ret == 1); - assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size); - // Capacity should shrink by one block after deletion + + // Capacity should not change + size_t curr_cap = index->indexMetaDataCapacity(); + assert(curr_cap == index_cap); ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(), label_to_update); assert(ret == 1); @@ -343,8 +351,8 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { assert(VecSimIndex_IndexSize( INDICES[st.range(0) == VecSimAlgo_TIERED ? VecSimAlgo_HNSWLIB : st.range(0)]) == N_VECTORS + added_vec_count); - // Capacity should grow back to original size after addition - assert(index->indexCapacity() == index_cap); + // Capacity should not change + assert(index->indexMetaDataCapacity() == index_cap); } assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count); diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp index 70f79511d..932a13971 100644 --- a/tests/unit/test_allocator.cpp +++ b/tests/unit/test_allocator.cpp @@ -120,6 +120,7 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) { ASSERT_EQ(bfIndex->indexCapacity(), expected_map_containers_size); ASSERT_EQ(bfIndex->idToLabelMapping.capacity(), expected_map_containers_size); + ASSERT_EQ(bfIndex->indexMetaDataCapacity(), expected_map_containers_size); ASSERT_EQ(bfIndex->idToLabelMapping.size(), expected_map_containers_size); ASSERT_GE(bfIndex->labelToIdLookup.bucket_count(), expected_map_containers_size); }; @@ -530,6 +531,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) { ASSERT_EQ(hnswIndex->getStoredVectorsCount(), expected_size); ASSERT_EQ(hnswIndex->idToMetaData.capacity(), expected_map_containers_size); + ASSERT_EQ(hnswIndex->indexMetaDataCapacity(), expected_map_containers_size); ASSERT_EQ(hnswIndex->idToMetaData.size(), expected_map_containers_size); ASSERT_GE(hnswIndex->labelLookup.bucket_count(), expected_map_containers_size); // Also validate that there are no unidirectional connections (these add memory to the diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp index 39d72d253..c98aa159e 100644 --- a/tests/unit/test_hnsw_tiered.cpp +++ b/tests/unit/test_hnsw_tiered.cpp @@ -3795,6 +3795,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), 1); ASSERT_EQ(hnsw_index->indexCapacity(), blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // add up to block size for (size_t i = 1; i < blockSize; i++) { @@ -3805,6 +3810,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize); ASSERT_EQ(hnsw_index->indexCapacity(), blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // add one more vector to trigger another resize GenerateAndAddVector(tiered_index, dim, blockSize); @@ -3814,6 +3824,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1); ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // delete a vector to shrink data blocks ASSERT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1) << "Failed to delete vector 0"; @@ -3825,6 +3840,8 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize); ASSERT_EQ(hnsw_index->indexCapacity(), blockSize); + // meta data capacity should not shrink + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); // add this vector again and verify lock was acquired to resize GenerateAndAddVector(tiered_index, dim, 0); @@ -3833,6 +3850,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1); ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); // add up to block size (count = 2 blockSize), the lock shouldn't be acquired because no resize // is required @@ -3843,4 +3865,9 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) { ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations); ASSERT_EQ(hnsw_index->indexSize(), 2 * blockSize); ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize); + ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize); + ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0); + ASSERT_EQ(tiered_index->indexMetaDataCapacity(), + hnsw_index->indexMetaDataCapacity() + + tiered_index->frontendIndex->indexMetaDataCapacity()); } From 0da50c03d2792199e470fd08f5bc10a5f50bb52b Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 4 Nov 2025 14:52:56 +0200 Subject: [PATCH 2/2] fix asserts --- tests/benchmark/bm_vecsim_basics.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/benchmark/bm_vecsim_basics.h b/tests/benchmark/bm_vecsim_basics.h index b1fb791e2..fcf925bb0 100644 --- a/tests/benchmark/bm_vecsim_basics.h +++ b/tests/benchmark/bm_vecsim_basics.h @@ -303,8 +303,6 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { // Calculate vectors needed to reach next block boundary size_t vecs_to_blocksize = BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size); - size_t initial_index_cap = index->indexMetaDataCapacity(); - assert(initial_index_cap == N_VECTORS + vecs_to_blocksize); assert(vecs_to_blocksize < BM_VecSimGeneral::block_size); labelType initial_label_count = index->indexLabelCount(); @@ -332,7 +330,7 @@ void BM_VecSimBasics::UpdateAtBlockSize(benchmark::State &st) { labelType label_to_update = curr_label - 1; size_t index_cap = index->indexMetaDataCapacity(); std::cout << "index_cap after adding vectors " << index_cap << std::endl; - assert(index_cap == initial_index_cap + BM_VecSimGeneral::block_size); + assert(index_cap == initial_index_size + vecs_to_blocksize + BM_VecSimGeneral::block_size); for (auto _ : st) { // Remove the vector directly from hnsw