Skip to content

Commit 823b2ad

Browse files
committed
feat: add support for delete_all_documents for astra client
1 parent 91cabd9 commit 823b2ad

File tree

4 files changed

+44
-18
lines changed

4 files changed

+44
-18
lines changed

integrations/astra/src/haystack_integrations/document_stores/astra/astra_client.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import json
2-
from typing import Dict, List, Optional, Union
2+
from typing import Any, Dict, List, Optional, Union
33
from warnings import warn
44

55
from astrapy import DataAPIClient as AstraDBClient
@@ -320,31 +320,36 @@ def delete(
320320
self,
321321
*,
322322
ids: Optional[List[str]] = None,
323-
delete_all: Optional[bool] = None,
324323
filters: Optional[Dict[str, Union[str, float, int, bool, List, dict]]] = None,
325324
) -> int:
326325
"""Delete documents from the Astra index.
327326
328327
:param ids: the ids of the documents to delete
329-
:param delete_all: if `True`, delete all documents from the index
330328
:param filters: additional filters to apply when deleting documents
331329
:returns: the number of documents deleted
332330
"""
333-
if delete_all:
334-
query = {"deleteMany": {}} # type: dict
331+
query: Dict[str, Dict[str, Any]] = {}
332+
335333
if ids is not None:
336334
query = {"deleteMany": {"filter": {"_id": {"$in": ids}}}}
337335
if filters is not None:
338336
query = {"deleteMany": {"filter": filters}}
339337

340338
filter_dict = {}
341-
if "filter" in query["deleteMany"]:
342-
filter_dict = query["deleteMany"]["filter"]
343-
339+
filter_dict = query.get("deleteMany", {}).get("filter", {})
344340
delete_result = self._astra_db_collection.delete_many(filter=filter_dict)
345341

346342
return delete_result.deleted_count
347343

344+
def delete_all_documents(self) -> int:
345+
"""
346+
Delete all documents from the Astra index.
347+
:returns: the number of documents deleted
348+
"""
349+
delete_result = self._astra_db_collection.delete_many(filter={})
350+
351+
return delete_result.deleted_count
352+
348353
def count_documents(self, upper_bound: int = 10000) -> int:
349354
"""
350355
Count the number of documents in the Astra index.

integrations/astra/src/haystack_integrations/document_stores/astra/document_store.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,8 +398,6 @@ def search(
398398
def delete_documents(
399399
self,
400400
document_ids: Optional[List[str]] = None,
401-
*,
402-
delete_all: Optional[bool] = None,
403401
) -> None:
404402
"""
405403
Deletes documents from the document store.
@@ -413,12 +411,26 @@ def delete_documents(
413411
if document_ids is not None:
414412
for batch in _batches(document_ids, MAX_BATCH_SIZE):
415413
deletion_counter += self.index.delete(ids=batch)
416-
else:
417-
deletion_counter = self.index.delete(delete_all=delete_all)
418414
logger.info(f"{deletion_counter} documents deleted")
419415

420416
if document_ids is not None and deletion_counter == 0:
421417
msg = f"Document {document_ids} does not exist"
422418
raise MissingDocumentError(msg)
423419
else:
424420
logger.info("No documents in document store")
421+
422+
def delete_all_documents(self) -> None:
423+
"""
424+
Deletes all documents from the document store.
425+
"""
426+
if not hasattr(self.index, "delete_all_documents"):
427+
msg = "Underlying index does not support bulk deletion."
428+
raise NotImplementedError(msg)
429+
430+
deletion_counter = 0
431+
deletion_counter = self.index.delete_all_documents()
432+
433+
if deletion_counter == -1:
434+
logger.info("All documents deleted")
435+
else:
436+
logger.info("Could not delete all documents")

integrations/astra/tests/test_document_store.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,11 +63,11 @@ def document_store(self) -> AstraDocumentStore:
6363
)
6464

6565
@pytest.fixture(autouse=True)
66-
def run_before_and_after_tests(self, document_store: AstraDocumentStore):
66+
def run_before_tests(self, document_store: AstraDocumentStore):
6767
"""
6868
Cleaning up document store
6969
"""
70-
document_store.delete_documents(delete_all=True)
70+
document_store.delete_all_documents()
7171
assert document_store.count_documents() == 0
7272

7373
def assert_documents_are_equal(self, received: List[Document], expected: List[Document]):
@@ -136,8 +136,7 @@ def test_delete_documents_more_than_twenty_delete_all(self, document_store: Astr
136136
document_store.write_documents(docs)
137137
assert document_store.count_documents() == 25
138138

139-
document_store.delete_documents(delete_all=True)
140-
139+
document_store.delete_all_documents()
141140
assert document_store.count_documents() == 0
142141

143142
def test_delete_documents_more_than_twenty_delete_ids(self, document_store: AstraDocumentStore):
@@ -205,6 +204,13 @@ def test_filter_documents_by_in_operator(self, document_store):
205204
self.assert_documents_are_equal([result[0]], [docs[0]])
206205
self.assert_documents_are_equal([result[1]], [docs[1]])
207206

207+
def test_delete_all_documents_empty_store(self, document_store: AstraDocumentStore):
208+
"""
209+
Test delete_all_documents() on an Astra.
210+
"""
211+
document_store.delete_all_documents()
212+
assert document_store.count_documents() == 0
213+
208214
@pytest.mark.skip(reason="Unsupported filter operator not.")
209215
def test_not_operator(self, document_store, filterable_docs):
210216
pass

integrations/astra/tests/test_embedding_retrieval.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@ def document_store(self) -> AstraDocumentStore:
2222
)
2323

2424
@pytest.fixture(autouse=True)
25-
def run_before_and_after_tests(self, document_store: AstraDocumentStore):
25+
def run_before_tests(self, document_store: AstraDocumentStore):
2626
"""
2727
Cleaning up document store
2828
"""
29-
document_store.delete_documents(delete_all=True)
29+
document_store.delete_all_documents()
3030
assert document_store.count_documents() == 0
3131

3232
def test_search_with_top_k(self, document_store):
@@ -45,3 +45,6 @@ def test_search_with_top_k(self, document_store):
4545

4646
for document in result:
4747
assert document.score is not None
48+
49+
document_store.delete_all_documents()
50+
assert document_store.count_documents() == 0

0 commit comments

Comments
 (0)