Skip to content

Commit c6a8fb4

Browse files
committed
Add fetch and update by metadata (#206)
## Problem Add fetch and update by metadata ## Solution This PR adds the fetchByMetadata and updateByMetadata methods for both Index and AsyncIndex clients, enabling fetching and updating vectors by metadata filters. **Fetch By Metadata** Basic Fetch: Fetch vectors matching a metadata filter with optional limit and pagination: ```java import io.pinecone.proto.FetchByMetadataResponse; import com.google.protobuf.Struct; import com.google.protobuf.Value; // Create a metadata filter Struct filter = Struct.newBuilder() .putFields("genre", Value.newBuilder() .setStructValue(Struct.newBuilder() .putFields("$eq", Value.newBuilder() .setStringValue("action") .build())) .build()) .build(); // Fetch vectors by metadata with limit FetchByMetadataResponse response = index.fetchByMetadata("example-namespace", filter, 10, null); assertNotNull(response); assertTrue(response.getVectorsCount() > 0); // Access fetched vectors response.getVectorsMap().forEach((id, vector) -> { assertNotNull(vector); assertTrue(vector.hasMetadata()); }); ``` Pagination Support: Use pagination tokens to fetch additional pages of results: ```java // Fetch first page FetchByMetadataResponse firstPage = index.fetchByMetadata("example-namespace", filter, 2, null); assertNotNull(firstPage); // Fetch next page using pagination token if (firstPage.hasPagination() && firstPage.getPagination().getNext() != null && !firstPage.getPagination().getNext().isEmpty()) { FetchByMetadataResponse secondPage = index.fetchByMetadata( "example-namespace", filter, 2, firstPage.getPagination().getNext()); assertNotNull(secondPage); } ``` **Update By Metadata** Basic Update: Update vectors matching a metadata filter with new metadata: ```java import io.pinecone.proto.UpdateResponse; import com.google.protobuf.Struct; // Create a filter to match vectors Struct filter = Struct.newBuilder() .putFields("genre", Value.newBuilder() .setStructValue(Struct.newBuilder() .putFields("$eq", Value.newBuilder() .setStringValue("action") .build())) .build()) .build(); // Create new metadata to apply Struct newMetadata = Struct.newBuilder() .putFields("updated", Value.newBuilder().setStringValue("true").build()) .putFields("year", Value.newBuilder().setStringValue("2024").build()) .build(); // Update vectors matching the filter UpdateResponse response = index.updateByMetadata(filter, newMetadata, "example-namespace", false); assertNotNull(response); assertTrue(response.getMatchedRecords() > 0); ``` Dry Run Mode: Preview how many records would be updated without actually applying changes: ```java // Dry run to check how many records would be updated UpdateResponse dryRunResponse = index.updateByMetadata(filter, newMetadata, "example-namespace", true); assertNotNull(dryRunResponse); int matchedRecords = dryRunResponse.getMatchedRecords(); assertTrue(matchedRecords > 0); // Actually perform the update UpdateResponse updateResponse = index.updateByMetadata(filter, newMetadata, "example-namespace", false); assertNotNull(updateResponse); ``` **Async Support** Both methods are available for AsyncIndex: ```java import com.google.common.util.concurrent.ListenableFuture; // Fetch by metadata asynchronously ListenableFuture<FetchByMetadataResponse> fetchFuture = asyncIndex.fetchByMetadata("example-namespace", filter, 10, null); FetchByMetadataResponse fetchResponse = fetchFuture.get(); assertNotNull(fetchResponse); // Update by metadata asynchronously ListenableFuture<UpdateResponse> updateFuture = asyncIndex.updateByMetadata(filter, newMetadata, "example-namespace", false); UpdateResponse updateResponse = updateFuture.get(); assertNotNull(updateResponse); assertTrue(updateResponse.getMatchedRecords() > 0); ``` Note: These operations are supported for serverless indexes. ## Type of Change - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update - [ ] Infrastructure change (CI configs, etc) - [ ] Non-code change (docs, etc) - [ ] None of the above: (explain here) ## Test Plan Added integration tests.
1 parent c01528e commit c6a8fb4

File tree

5 files changed

+624
-0
lines changed

5 files changed

+624
-0
lines changed

README.md

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -697,6 +697,86 @@ List<Float> values = Arrays.asList(1F, 2F, 3F);
697697
UpdateResponse updateResponse = index.update("v1", values, "example-namespace");
698698
```
699699

700+
## Fetch vectors by metadata
701+
702+
The following example fetches vectors by metadata filter.
703+
704+
```java
705+
import io.pinecone.clients.Index;
706+
import io.pinecone.clients.Pinecone;
707+
import io.pinecone.proto.FetchByMetadataResponse;
708+
import com.google.protobuf.Struct;
709+
import com.google.protobuf.Value;
710+
...
711+
712+
Pinecone pinecone = new Pinecone.Builder("PINECONE_API_KEY").build();
713+
Index index = pinecone.getIndexConnection("example-index");
714+
715+
// Create a metadata filter
716+
Struct filter = Struct.newBuilder()
717+
.putFields("genre", Value.newBuilder()
718+
.setStructValue(Struct.newBuilder()
719+
.putFields("$eq", Value.newBuilder()
720+
.setStringValue("action")
721+
.build()))
722+
.build())
723+
.build();
724+
725+
// Fetch vectors by metadata with limit
726+
FetchByMetadataResponse response = index.fetchByMetadata("example-namespace", filter, 10, null);
727+
728+
// Fetch with pagination
729+
String paginationToken = null;
730+
FetchByMetadataResponse fetchResponse = index.fetchByMetadata("example-namespace", filter, 100, paginationToken);
731+
732+
// Continue pagination if needed
733+
if (fetchResponse.hasPagination() &&
734+
fetchResponse.getPagination().getNext() != null &&
735+
!fetchResponse.getPagination().getNext().isEmpty()) {
736+
FetchByMetadataResponse nextPage = index.fetchByMetadata(
737+
"example-namespace", filter, 100, fetchResponse.getPagination().getNext());
738+
}
739+
```
740+
741+
## Update vectors by metadata
742+
743+
The following example updates vectors by metadata filter.
744+
745+
```java
746+
import io.pinecone.clients.Index;
747+
import io.pinecone.clients.Pinecone;
748+
import io.pinecone.proto.UpdateResponse;
749+
import com.google.protobuf.Struct;
750+
import com.google.protobuf.Value;
751+
...
752+
753+
Pinecone pinecone = new Pinecone.Builder("PINECONE_API_KEY").build();
754+
Index index = pinecone.getIndexConnection("example-index");
755+
756+
// Create a filter to match vectors
757+
Struct filter = Struct.newBuilder()
758+
.putFields("genre", Value.newBuilder()
759+
.setStructValue(Struct.newBuilder()
760+
.putFields("$eq", Value.newBuilder()
761+
.setStringValue("action")
762+
.build()))
763+
.build())
764+
.build();
765+
766+
// Create new metadata to apply
767+
Struct newMetadata = Struct.newBuilder()
768+
.putFields("updated", Value.newBuilder().setStringValue("true").build())
769+
.putFields("year", Value.newBuilder().setStringValue("2024").build())
770+
.build();
771+
772+
// Dry run to check how many records would be updated
773+
UpdateResponse dryRunResponse = index.updateByMetadata(filter, newMetadata, "example-namespace", true);
774+
int matchedRecords = dryRunResponse.getMatchedRecords();
775+
776+
// Actually perform the update
777+
UpdateResponse updateResponse = index.updateByMetadata(filter, newMetadata, "example-namespace", false);
778+
```
779+
700780
## Create namespace
701781

702782
The following example shows how to create a namespace.
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
package io.pinecone.integration.dataPlane;
2+
3+
import com.google.protobuf.Struct;
4+
import com.google.protobuf.Value;
5+
import io.pinecone.clients.AsyncIndex;
6+
import io.pinecone.clients.Index;
7+
import io.pinecone.helpers.RandomStringBuilder;
8+
import io.pinecone.helpers.TestResourcesManager;
9+
import io.pinecone.proto.FetchByMetadataResponse;
10+
import io.pinecone.proto.UpdateResponse;
11+
import io.pinecone.unsigned_indices_model.VectorWithUnsignedIndices;
12+
import org.junit.jupiter.api.AfterAll;
13+
import org.junit.jupiter.api.BeforeAll;
14+
import org.junit.jupiter.api.Test;
15+
16+
import java.util.ArrayList;
17+
import java.util.HashMap;
18+
import java.util.List;
19+
import java.util.concurrent.ExecutionException;
20+
21+
import static io.pinecone.helpers.AssertRetry.assertWithRetry;
22+
import static io.pinecone.helpers.BuildUpsertRequest.*;
23+
import static org.junit.jupiter.api.Assertions.*;
24+
25+
public class FetchAndUpdateByMetadataTest {
26+
27+
private static final TestResourcesManager indexManager = TestResourcesManager.getInstance();
28+
private static Index index;
29+
private static AsyncIndex asyncIndex;
30+
private static final String namespace = RandomStringBuilder.build("ns", 8);
31+
32+
@BeforeAll
33+
public static void setUp() throws InterruptedException {
34+
int dimension = indexManager.getDimension();
35+
index = indexManager.getOrCreateServerlessIndexConnection();
36+
asyncIndex = indexManager.getOrCreateServerlessAsyncIndexConnection();
37+
38+
// Upsert vectors with metadata for testing
39+
int numOfVectors = 5;
40+
List<String> upsertIds = getIdsList(numOfVectors);
41+
List<VectorWithUnsignedIndices> vectorsToUpsert = new ArrayList<>(numOfVectors);
42+
43+
// Upsert vectors with different metadata values
44+
for (int i = 0; i < numOfVectors; i++) {
45+
Struct metadata = generateMetadataStruct(i % 3, (i + 1) % 3);
46+
VectorWithUnsignedIndices vector = new VectorWithUnsignedIndices(
47+
upsertIds.get(i),
48+
generateVectorValuesByDimension(dimension),
49+
metadata,
50+
null
51+
);
52+
vectorsToUpsert.add(vector);
53+
}
54+
55+
index.upsert(vectorsToUpsert, namespace);
56+
57+
// Wait for vectors to be indexed
58+
Thread.sleep(5000);
59+
}
60+
61+
@AfterAll
62+
public static void cleanUp() {
63+
index.close();
64+
asyncIndex.close();
65+
}
66+
67+
@Test
68+
public void fetchByMetadataSyncTest() throws InterruptedException {
69+
HashMap<String, List<String>> metadataMap = createAndGetMetadataMap();
70+
String filterValue = metadataMap.get(metadataFields[0]).get(0);
71+
72+
Struct filter = Struct.newBuilder()
73+
.putFields(metadataFields[0], Value.newBuilder()
74+
.setStructValue(Struct.newBuilder()
75+
.putFields("$eq", Value.newBuilder()
76+
.setStringValue(filterValue)
77+
.build()))
78+
.build())
79+
.build();
80+
81+
assertWithRetry(() -> {
82+
FetchByMetadataResponse response = index.fetchByMetadata(namespace, filter, 10, null);
83+
assertNotNull(response);
84+
assertTrue(response.getVectorsCount() > 0);
85+
}, 3);
86+
}
87+
88+
@Test
89+
public void updateByMetadataSyncTest() throws InterruptedException {
90+
HashMap<String, List<String>> metadataMap = createAndGetMetadataMap();
91+
String filterValue = metadataMap.get(metadataFields[0]).get(0);
92+
93+
Struct filter = Struct.newBuilder()
94+
.putFields(metadataFields[0], Value.newBuilder()
95+
.setStructValue(Struct.newBuilder()
96+
.putFields("$eq", Value.newBuilder()
97+
.setStringValue(filterValue)
98+
.build()))
99+
.build())
100+
.build();
101+
102+
Struct newMetadata = Struct.newBuilder()
103+
.putFields("updated", Value.newBuilder().setStringValue("true").build())
104+
.build();
105+
106+
assertWithRetry(() -> {
107+
UpdateResponse response = index.updateByMetadata(filter, newMetadata, namespace, false);
108+
assertNotNull(response);
109+
assertTrue(response.getMatchedRecords() > 0);
110+
}, 3);
111+
}
112+
113+
@Test
114+
public void fetchByMetadataAsyncTest() throws InterruptedException, ExecutionException {
115+
HashMap<String, List<String>> metadataMap = createAndGetMetadataMap();
116+
String filterValue = metadataMap.get(metadataFields[1]).get(0);
117+
118+
Struct filter = Struct.newBuilder()
119+
.putFields(metadataFields[1], Value.newBuilder()
120+
.setStructValue(Struct.newBuilder()
121+
.putFields("$eq", Value.newBuilder()
122+
.setStringValue(filterValue)
123+
.build()))
124+
.build())
125+
.build();
126+
127+
assertWithRetry(() -> {
128+
FetchByMetadataResponse response = asyncIndex.fetchByMetadata(namespace, filter, 10, null).get();
129+
assertNotNull(response);
130+
assertTrue(response.getVectorsCount() > 0);
131+
}, 3);
132+
}
133+
134+
@Test
135+
public void updateByMetadataAsyncTest() throws InterruptedException, ExecutionException {
136+
HashMap<String, List<String>> metadataMap = createAndGetMetadataMap();
137+
String filterValue = metadataMap.get(metadataFields[1]).get(0);
138+
139+
Struct filter = Struct.newBuilder()
140+
.putFields(metadataFields[1], Value.newBuilder()
141+
.setStructValue(Struct.newBuilder()
142+
.putFields("$eq", Value.newBuilder()
143+
.setStringValue(filterValue)
144+
.build()))
145+
.build())
146+
.build();
147+
148+
Struct newMetadata = Struct.newBuilder()
149+
.putFields("async_updated", Value.newBuilder().setStringValue("true").build())
150+
.build();
151+
152+
assertWithRetry(() -> {
153+
UpdateResponse response = asyncIndex.updateByMetadata(filter, newMetadata, namespace, false).get();
154+
assertNotNull(response);
155+
assertTrue(response.getMatchedRecords() > 0);
156+
}, 3);
157+
}
158+
}

0 commit comments

Comments
 (0)