Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions gtranslate/training/ground_truth_by_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,9 @@ def __init__(self):

# Ground truth from NCBI classifications
self.NCBI_TT4 = set(['s__Candidatus Hodgkinia cicadicola', 's__Candidatus Nasuia deltocephalinicola', 's__Candidatus Stammera capleta'])
self.NCBI_TT4.update(set(['s__Candidatus Organicella extenuata', 's__Candidatus Pinguicoccus supinus']))
self.NCBI_TT4.update(set(['s__Hodgkinia cicadicola', 's__Nasuia deltocephalinicola', 's__Stammera capleta']))
self.NCBI_TT4.update(set(['s__Organicella extenuata', 's__Pinguicoccus supinus']))

# These species clusters have an unclear ground truth, see https://doi.org/10.1093/gbe/evad164
self.GTDB_UNRESOLVED = set(['s__Providencia_A siddallii', 's__Providencia_A siddallii_A'])
Expand Down
Loading