Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 4 additions & 7 deletions corpus_forge/admin/prune.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

from __future__ import annotations

import importlib
import logging
import math
from collections import Counter
Expand Down Expand Up @@ -163,12 +164,10 @@ def _minhash_available() -> bool:
"""

try:
from corpus_forge.quality.minhash import ( # noqa: F401
jaccard_neighbor_distance,
)
mod = importlib.import_module("corpus_forge.quality.minhash")
except ImportError:
return False
return True
return hasattr(mod, "jaccard_neighbor_distance")


def _duplicate_density(
Expand Down Expand Up @@ -509,9 +508,7 @@ def prune_dataset(
# Resolve optional sub-score data sources ONCE for the whole pool.
minhash_module: Any | None
if _minhash_available():
from corpus_forge.quality import minhash as _mh

minhash_module = _mh
minhash_module = importlib.import_module("corpus_forge.quality.minhash")
else:
minhash_module = None
duplicate_density_available = minhash_module is not None
Expand Down
Loading