Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 43 additions & 25 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,16 @@ license = {text = 'Apache-2.0'}
readme = 'README.md'
requires-python = '>=3.7'
dependencies = [
'tqdm',
'pdf2image',
'pdfplumber==0.7.4',
'requests',
'pandas<2',
'pydantic[settings]>=1,<3',
'ncls==0.0.66',
'necessary>=0.3.2',
'tqdm',
'pdf2image',
'pdfplumber==0.7.4',
'requests',
'pandas<2',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
'ncls==0.0.66',
'necessary>=0.3.2',
'numpy<2'
]

[project.urls]
Expand Down Expand Up @@ -74,14 +76,16 @@ pysbd_predictors = [
]
heuristic_predictors = [
'tokenizers',
'pydantic>=1,<2',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
lp_predictors = [
'layoutparser',
'torch',
'torchvision',
'effdet',
'pydantic>=1,<2'
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
hf_predictors = [
'torch',
Expand All @@ -91,45 +95,59 @@ hf_predictors = [
vila_predictors = [
'vila>=0.5,<0.6',
'transformers<4.34.0',
'pydantic>=1,<2',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
mention_predictor = [
'transformers[torch]',
'optimum[onnxruntime]',
'pydantic>=1,<2',
'optimum[onnxruntime]<1.14',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
mention_predictor_gpu = [
'transformers[torch]',
'optimum[onnxruntime-gpu]',
'pydantic>=1,<2',
'optimum[onnxruntime-gpu]<1.14',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
bibentry_predictor = [
'transformers',
'unidecode',
'torch',
'optimum[onnxruntime]',
'pydantic>=1,<2',
'optimum[onnxruntime]<1.14',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
bibentry_predictor_gpu = [
'transformers',
'unidecode',
'torch',
'optimum[onnxruntime-gpu]',
'pydantic>=1,<2'
'optimum[onnxruntime-gpu]<1.14',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
bibentry_detection_predictor_gpu = [
'Pillow<10',
'layoutparser',
'torch',
'torchvision',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
bibentry_detection_predictor = [
'Pillow<10',
'layoutparser',
'torch==1.8.0+cu111',
'torchvision==0.9.0+cu111',
'pydantic<=1,<2',
'torch',
'torchvision',
'pydantic>=1,<3',
'pydantic-settings>=2.0',
]
citation_links = [
'numpy',
'thefuzz[speedup]',
'scikit-learn',
'xgboost',
'pydantic>=1,<2',
'pydantic>=1,<3',
]
section_nesting = [
'numpy',
Expand All @@ -138,14 +156,14 @@ section_nesting = [
]
figure_table_predictors = [
'scipy',
'pydantic>=1,<2',
'pydantic>=1,<3',
]
svm_word_predictor = [
'scikit-learn',
'scipy',
'numpy',
'tokenizers',
'pydantic>=1,<2',
'pydantic>=1,<3',
]
recipes = [
'layoutparser',
Expand Down
11 changes: 6 additions & 5 deletions src/ai2_internal/api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Any, List, Optional, Type

from pydantic import BaseModel, Extra, Field
from pydantic import BaseModel, ConfigDict, Field

import mmda.types.annotation as mmda_ann

Expand Down Expand Up @@ -64,10 +64,11 @@ def from_mmda(cls, metadata: mmda_ann.Metadata) -> "Attributes":
return cls(**metadata.to_json())

def to_mmda(self) -> mmda_ann.Metadata:
return mmda_ann.Metadata.from_json(self.dict())
return mmda_ann.Metadata.from_json(self.model_dump())


class Annotation(BaseModel, extra=Extra.ignore):
class Annotation(BaseModel):
model_config = ConfigDict(extra='ignore')
attributes: Attributes = Attributes()

@classmethod
Expand Down Expand Up @@ -103,7 +104,7 @@ def from_mmda(cls, box_group: mmda_ann.BoxGroup) -> "BoxGroup":
)

def to_mmda(self) -> mmda_ann.BoxGroup:
metadata = mmda_ann.Metadata.from_json(self.attributes.dict())
metadata = mmda_ann.Metadata.from_json(self.attributes.model_dump())
if self.type:
metadata.type=self.type
return mmda_ann.BoxGroup(
Expand Down Expand Up @@ -153,7 +154,7 @@ def from_mmda(cls, span_group: mmda_ann.SpanGroup) -> "SpanGroup":
return ret

def to_mmda(self) -> mmda_ann.SpanGroup:
metadata = mmda_ann.Metadata.from_json(self.attributes.dict())
metadata = mmda_ann.Metadata.from_json(self.attributes.model_dump())
if self.type:
metadata.type = self.type
if self.text:
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/bibentry_detection_predictor/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from typing import List

from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal import api
from mmda.predictors.d2_predictors.bibentry_detection_predictor import BibEntryDetectionPredictor
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/bibentry_predictor/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from typing import List, Optional

from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from mmda.predictors.hf_predictors.bibentry_predictor.predictor import BibEntryPredictor

Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/bibentry_predictor_mmda/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from typing import List

from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal import api
from mmda.predictors.hf_predictors.bibentry_predictor.predictor import BibEntryPredictor
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/citation_links/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from typing import List, Tuple

from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal import api
from mmda.predictors.xgb_predictors.citation_link_predictor import CitationLinkPredictor
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/citation_mentions/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from itertools import groupby
from bisect import bisect

from pydantic import BaseModel, BaseSettings
from pydantic import BaseModel
from pydantic_settings import BaseSettings

from ai2_internal import api
from mmda.predictors.hf_predictors.mention_predictor import MentionPredictor
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/dwp_heuristic/interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List

from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal.api import SpanGroup
from mmda.predictors.heuristic_predictors.dictionary_word_predictor import (
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/figure_table_predictors/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

from typing import List

from pydantic import BaseModel, BaseSettings
from pydantic import BaseModel
from pydantic_settings import BaseSettings

from mmda.predictors.heuristic_predictors.figure_table_predictors import FigureTablePredictions
from mmda.types.document import Document
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/layout_parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from typing import List

import torch
from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal.api import BoxGroup
from mmda.predictors.lp_predictors import LayoutParserPredictor
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/svm_word_predictor/interface.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List

from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal.api import SpanGroup
from mmda.predictors.sklearn_predictors.svm_word_predictor import SVMWordPredictor
Expand Down
3 changes: 2 additions & 1 deletion src/ai2_internal/vila/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
from typing import List

import torch
from pydantic import BaseModel, BaseSettings, Field
from pydantic import BaseModel, Field
from pydantic_settings import BaseSettings

from ai2_internal import api
from mmda.predictors.hf_predictors.token_classification_predictor import (
Expand Down
Loading