|
1 | | -from src.components.all_associations import get_all_associations, AssociationType |
2 | | -from src.components.drug_annotation import get_drug_annotation |
3 | | -from src.components.phenotype_annotation import get_phenotype_annotation |
4 | | -from src.components.functional_annotation import get_functional_annotation |
| 1 | +from src.components.annotation_table import AnnotationTableGenerator |
| 2 | +from src.components.citation_generator import CitationGenerator |
5 | 3 | from src.components.study_parameters import get_study_parameters |
6 | 4 | from src.utils import get_article_text, is_pmcid, get_title |
7 | | -from typing import Optional |
8 | 5 | from loguru import logger |
9 | 6 | from pathlib import Path |
10 | 7 | import os |
11 | 8 |
|
12 | 9 | class AnnotationPipeline: |
13 | | - def __init__(self, pmcid: str): |
| 10 | + def __init__(self, pmcid: str, citation_approach: str = "lm"): |
14 | 11 | if not is_pmcid(pmcid): |
15 | 12 | logger.error(f"Invalid PMCID: {pmcid}") |
16 | 13 | self.pmcid = pmcid |
| 14 | + self.citation_approach = citation_approach |
17 | 15 | self.article_text = get_article_text(pmcid) |
18 | 16 | self.title = get_title(self.article_text) |
19 | | - self.all_associations = [] |
20 | 17 | self.study_parameters = {} |
21 | | - self.drug_annotations = [] |
22 | | - self.phenotye_annotations = [] |
23 | | - self.functional_annotations = [] |
| 18 | + self.annotations = None |
24 | 19 |
|
25 | 20 | def print_info(self): |
26 | | - logger.info(f"Found {len(self.all_associations)} associations") |
27 | | - logger.info(f"Created {len(self.drug_annotations)} Drug Annotations") |
28 | | - logger.info(f"Created {len(self.phenotye_annotations)} Phenotype Annotations") |
29 | | - logger.info( |
30 | | - f"Created {len(self.functional_annotations)} Functional Annotations" |
31 | | - ) |
| 21 | + annotation_count = len(self.annotations.relationships) if self.annotations else 0 |
| 22 | + |
| 23 | + logger.info(f"Created {annotation_count} Annotations") |
32 | 24 |
|
33 | 25 | def generate_final_structure(self): |
34 | 26 | return { |
35 | 27 | "pmcid": self.pmcid, |
36 | 28 | "title": self.title, |
37 | 29 | "study_parameters": self.study_parameters, |
38 | | - "drug_annotations": self.drug_annotations, |
39 | | - "phenotype_annotations": self.phenotye_annotations, |
40 | | - "functional_annotations": self.functional_annotations, |
| 30 | + "annotations": self.annotations, |
41 | 31 | } |
42 | 32 |
|
43 | 33 | def run(self, save_path: str = "data/annotations"): |
44 | 34 | logger.info("Getting Study Parameters") |
45 | 35 | self.study_parameters = get_study_parameters(self.article_text) |
46 | 36 |
|
47 | | - logger.info("Getting All Associations") |
48 | | - self.all_associations = get_all_associations(self.article_text) |
| 37 | + # Generate annotations using AnnotationTableGenerator |
| 38 | + annotation_generator = AnnotationTableGenerator(self.pmcid) |
| 39 | + |
| 40 | + logger.info("Generating Annotations") |
| 41 | + self.annotations = annotation_generator.generate_table_json() |
49 | 42 |
|
50 | | - for association in self.all_associations: |
51 | | - if association.association_type == AssociationType.DRUG: |
52 | | - self.drug_annotations.append(get_drug_annotation(association)) |
53 | | - if association.association_type == AssociationType.PHENOTYPE: |
54 | | - self.phenotye_annotations.append(get_phenotype_annotation(association)) |
55 | | - if association.association_type == AssociationType.FUNCTIONAL: |
56 | | - self.functional_annotations.append( |
57 | | - get_functional_annotation(association) |
58 | | - ) |
| 43 | + # Generate citations for annotations |
| 44 | + citation_generator = CitationGenerator(self.pmcid, approach=self.citation_approach) |
| 45 | + logger.info(f"Adding Citations to Annotations using {self.citation_approach} approach") |
| 46 | + self.annotations = citation_generator.add_citations_to_annotations(self.annotations) |
59 | 47 |
|
60 | 48 | self.print_info() |
61 | 49 |
|
@@ -89,14 +77,14 @@ def copy_markdown(pmcid: str): |
89 | 77 | if __name__ == "__main__": |
90 | 78 | pmcids = [ |
91 | 79 | "PMC5728534", |
92 | | - # "PMC11730665", |
93 | | - # "PMC5712579", |
94 | | - # "PMC4737107", |
95 | | - # "PMC5749368" |
| 80 | + "PMC11730665", |
| 81 | + "PMC5712579", |
| 82 | + "PMC4737107", |
| 83 | + "PMC5749368" |
96 | 84 | ] |
97 | 85 | for pmcid in pmcids: |
98 | 86 | logger.info(f"Processing {pmcid}") |
99 | | - pipeline = AnnotationPipeline(pmcid) |
| 87 | + pipeline = AnnotationPipeline(pmcid, citation_approach="local") |
100 | 88 | pipeline.run() |
101 | | - # for pmcid in pmcids: |
102 | | - # copy_markdown(pmcid) |
| 89 | + for pmcid in pmcids: |
| 90 | + copy_markdown(pmcid) |
0 commit comments