Skip to content

Commit e4c12d1

Browse files
srini047mpangrazzi
andauthored
feat: add support for delete_all_documents for astra client (#2362)
* feat: add support for delete_all_documents for astra client * fix: reuse of astra client * fix: review comments * chore: change log verbosity level --------- Co-authored-by: Michele Pangrazzi <[email protected]>
1 parent f7ec015 commit e4c12d1

File tree

4 files changed

+49
-25
lines changed

4 files changed

+49
-25
lines changed

integrations/astra/src/haystack_integrations/document_stores/astra/astra_client.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import json
2-
from typing import Dict, List, Optional, Union
2+
from typing import Any, Dict, List, Optional, Union
33
from warnings import warn
44

55
from astrapy import DataAPIClient as AstraDBClient
@@ -320,31 +320,36 @@ def delete(
320320
self,
321321
*,
322322
ids: Optional[List[str]] = None,
323-
delete_all: Optional[bool] = None,
324323
filters: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
325324
) -> int:
326325
"""Delete documents from the Astra index.
327326
328327
:param ids: the ids of the documents to delete
329-
:param delete_all: if `True`, delete all documents from the index
330328
:param filters: additional filters to apply when deleting documents
331329
:returns: the number of documents deleted
332330
"""
333-
if delete_all:
334-
query = {"deleteMany": {}} # type: dict
331+
query: Dict[str, Dict[str, Any]] = {}
332+
335333
if ids is not None:
336334
query = {"deleteMany": {"filter": {"_id": {"$in": ids}}}}
337335
if filters is not None:
338336
query = {"deleteMany": {"filter": filters}}
339337

340338
filter_dict = {}
341-
if "filter" in query["deleteMany"]:
342-
filter_dict = query["deleteMany"]["filter"]
343-
339+
filter_dict = query.get("deleteMany", {}).get("filter", {})
344340
delete_result = self._astra_db_collection.delete_many(filter=filter_dict)
345341

346342
return delete_result.deleted_count
347343

344+
def delete_all_documents(self) -> int:
345+
"""
346+
Delete all documents from the Astra index.
347+
:returns: the number of documents deleted
348+
"""
349+
delete_result = self._astra_db_collection.delete_many(filter={})
350+
351+
return delete_result.deleted_count
352+
348353
def count_documents(self, upper_bound: int = 10000) -> int:
349354
"""
350355
Count the number of documents in the Astra index.

integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
from haystack import default_from_dict, default_to_dict, logging
77
from haystack.dataclasses import Document
8-
from haystack.document_stores.errors import DuplicateDocumentError, MissingDocumentError
8+
from haystack.document_stores.errors import DocumentStoreError, DuplicateDocumentError, MissingDocumentError
99
from haystack.document_stores.types import DuplicatePolicy
1010
from haystack.utils import Secret, deserialize_secrets_inplace
1111

@@ -395,12 +395,7 @@ def search(
395395

396396
return result
397397

398-
def delete_documents(
399-
self,
400-
document_ids: Optional[List[str]] = None,
401-
*,
402-
delete_all: Optional[bool] = None,
403-
) -> None:
398+
def delete_documents(self, document_ids: List[str]) -> None:
404399
"""
405400
Deletes documents from the document store.
406401
@@ -413,12 +408,27 @@ def delete_documents(
413408
if document_ids is not None:
414409
for batch in _batches(document_ids, MAX_BATCH_SIZE):
415410
deletion_counter += self.index.delete(ids=batch)
416-
else:
417-
deletion_counter = self.index.delete(delete_all=delete_all)
418411
logger.info(f"{deletion_counter} documents deleted")
419412

420413
if document_ids is not None and deletion_counter == 0:
421414
msg = f"Document {document_ids} does not exist"
422415
raise MissingDocumentError(msg)
423416
else:
424417
logger.info("No documents in document store")
418+
419+
def delete_all_documents(self) -> None:
420+
"""
421+
Deletes all documents from the document store.
422+
"""
423+
deletion_counter = 0
424+
425+
try:
426+
deletion_counter = self.index.delete_all_documents()
427+
except Exception as e:
428+
msg = f"Failed to delete all documents from Astra: {e!s}"
429+
raise DocumentStoreError(msg) from e
430+
431+
if deletion_counter == -1:
432+
logger.info("All documents deleted")
433+
else:
434+
logger.error("Could not delete all documents")

integrations/astra/tests/test_document_store.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ class TestDocumentStore(DocumentStoreBaseTests):
5454
you can add more to this class.
5555
"""
5656

57-
@pytest.fixture
57+
@pytest.fixture(scope="class")
5858
def document_store(self) -> AstraDocumentStore:
5959
return AstraDocumentStore(
6060
collection_name="haystack_integration",
@@ -63,11 +63,11 @@ def document_store(self) -> AstraDocumentStore:
6363
)
6464

6565
@pytest.fixture(autouse=True)
66-
def run_before_and_after_tests(self, document_store: AstraDocumentStore):
66+
def run_before_tests(self, document_store: AstraDocumentStore):
6767
"""
6868
Cleaning up document store
6969
"""
70-
document_store.delete_documents(delete_all=True)
70+
document_store.delete_all_documents()
7171
assert document_store.count_documents() == 0
7272

7373
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
@@ -136,8 +136,7 @@ def test_delete_documents_more_than_twenty_delete_all(self, document_store: Astr
136136
document_store.write_documents(docs)
137137
assert document_store.count_documents() == 25
138138

139-
document_store.delete_documents(delete_all=True)
140-
139+
document_store.delete_all_documents()
141140
assert document_store.count_documents() == 0
142141

143142
def test_delete_documents_more_than_twenty_delete_ids(self, document_store: AstraDocumentStore):
@@ -205,6 +204,13 @@ def test_filter_documents_by_in_operator(self, document_store):
205204
self.assert_documents_are_equal([result[0]], [docs[0]])
206205
self.assert_documents_are_equal([result[1]], [docs[1]])
207206

207+
def test_delete_all_documents(self, document_store: AstraDocumentStore):
208+
"""
209+
Test delete_all_documents() on an Astra.
210+
"""
211+
document_store.delete_all_documents()
212+
assert document_store.count_documents() == 0
213+
208214
@pytest.mark.skip(reason="Unsupported filter operator not.")
209215
def test_not_operator(self, document_store, filterable_docs):
210216
pass

integrations/astra/tests/test_embedding_retrieval.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
)
1414
@pytest.mark.skipif(os.environ.get("ASTRA_DB_API_ENDPOINT", "") == "", reason="ASTRA_DB_API_ENDPOINT env var not set")
1515
class TestEmbeddingRetrieval:
16-
@pytest.fixture
16+
@pytest.fixture(scope="class")
1717
def document_store(self) -> AstraDocumentStore:
1818
return AstraDocumentStore(
1919
collection_name="haystack_integration",
@@ -22,11 +22,11 @@ def document_store(self) -> AstraDocumentStore:
2222
)
2323

2424
@pytest.fixture(autouse=True)
25-
def run_before_and_after_tests(self, document_store: AstraDocumentStore):
25+
def run_before_tests(self, document_store: AstraDocumentStore):
2626
"""
2727
Cleaning up document store
2828
"""
29-
document_store.delete_documents(delete_all=True)
29+
document_store.delete_all_documents()
3030
assert document_store.count_documents() == 0
3131

3232
def test_search_with_top_k(self, document_store):
@@ -45,3 +45,6 @@ def test_search_with_top_k(self, document_store):
4545

4646
for document in result:
4747
assert document.score is not None
48+
49+
document_store.delete_all_documents()
50+
assert document_store.count_documents() == 0

0 commit comments

Comments
 (0)