diff --git a/pyproject.toml b/pyproject.toml index f2eb94ac..ad3d5ba5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,14 +9,16 @@ license = {text = 'Apache-2.0'} readme = 'README.md' requires-python = '>=3.7' dependencies = [ - 'tqdm', - 'pdf2image', - 'pdfplumber==0.7.4', - 'requests', - 'pandas<2', - 'pydantic[settings]>=1,<3', - 'ncls==0.0.66', - 'necessary>=0.3.2', + 'tqdm', + 'pdf2image', + 'pdfplumber==0.7.4', + 'requests', + 'pandas<2', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', + 'ncls==0.0.66', + 'necessary>=0.3.2', + 'numpy<2' ] [project.urls] @@ -74,14 +76,16 @@ pysbd_predictors = [ ] heuristic_predictors = [ 'tokenizers', - 'pydantic>=1,<2', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] lp_predictors = [ 'layoutparser', 'torch', 'torchvision', 'effdet', - 'pydantic>=1,<2' + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] hf_predictors = [ 'torch', @@ -91,45 +95,59 @@ hf_predictors = [ vila_predictors = [ 'vila>=0.5,<0.6', 'transformers<4.34.0', - 'pydantic>=1,<2', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] mention_predictor = [ 'transformers[torch]', - 'optimum[onnxruntime]', - 'pydantic>=1,<2', + 'optimum[onnxruntime]<1.14', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] mention_predictor_gpu = [ 'transformers[torch]', - 'optimum[onnxruntime-gpu]', - 'pydantic>=1,<2', + 'optimum[onnxruntime-gpu]<1.14', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] bibentry_predictor = [ 'transformers', 'unidecode', 'torch', - 'optimum[onnxruntime]', - 'pydantic>=1,<2', + 'optimum[onnxruntime]<1.14', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] bibentry_predictor_gpu = [ 'transformers', 'unidecode', 'torch', - 'optimum[onnxruntime-gpu]', - 'pydantic>=1,<2' + 'optimum[onnxruntime-gpu]<1.14', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', +] +bibentry_detection_predictor_gpu = [ + 'Pillow<10', + 'layoutparser', + 'torch', + 'torchvision', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] bibentry_detection_predictor = [ 'Pillow<10', 'layoutparser', - 'torch==1.8.0+cu111', - 'torchvision==0.9.0+cu111', - 'pydantic<=1,<2', + 'torch', + 'torchvision', + 'pydantic>=1,<3', + 'pydantic-settings>=2.0', ] citation_links = [ 'numpy', 'thefuzz[speedup]', 'scikit-learn', 'xgboost', - 'pydantic>=1,<2', + 'pydantic>=1,<3', ] section_nesting = [ 'numpy', @@ -138,14 +156,14 @@ section_nesting = [ ] figure_table_predictors = [ 'scipy', - 'pydantic>=1,<2', + 'pydantic>=1,<3', ] svm_word_predictor = [ 'scikit-learn', 'scipy', 'numpy', 'tokenizers', - 'pydantic>=1,<2', + 'pydantic>=1,<3', ] recipes = [ 'layoutparser', diff --git a/src/ai2_internal/api.py b/src/ai2_internal/api.py index 297b4066..d3c6aefd 100644 --- a/src/ai2_internal/api.py +++ b/src/ai2_internal/api.py @@ -1,6 +1,6 @@ from typing import Any, List, Optional, Type -from pydantic import BaseModel, Extra, Field +from pydantic import BaseModel, ConfigDict, Field import mmda.types.annotation as mmda_ann @@ -64,10 +64,11 @@ def from_mmda(cls, metadata: mmda_ann.Metadata) -> "Attributes": return cls(**metadata.to_json()) def to_mmda(self) -> mmda_ann.Metadata: - return mmda_ann.Metadata.from_json(self.dict()) + return mmda_ann.Metadata.from_json(self.model_dump()) -class Annotation(BaseModel, extra=Extra.ignore): +class Annotation(BaseModel): + model_config = ConfigDict(extra='ignore') attributes: Attributes = Attributes() @classmethod @@ -103,7 +104,7 @@ def from_mmda(cls, box_group: mmda_ann.BoxGroup) -> "BoxGroup": ) def to_mmda(self) -> mmda_ann.BoxGroup: - metadata = mmda_ann.Metadata.from_json(self.attributes.dict()) + metadata = mmda_ann.Metadata.from_json(self.attributes.model_dump()) if self.type: metadata.type=self.type return mmda_ann.BoxGroup( @@ -153,7 +154,7 @@ def from_mmda(cls, span_group: mmda_ann.SpanGroup) -> "SpanGroup": return ret def to_mmda(self) -> mmda_ann.SpanGroup: - metadata = mmda_ann.Metadata.from_json(self.attributes.dict()) + metadata = mmda_ann.Metadata.from_json(self.attributes.model_dump()) if self.type: metadata.type = self.type if self.text: diff --git a/src/ai2_internal/bibentry_detection_predictor/interface.py b/src/ai2_internal/bibentry_detection_predictor/interface.py index 7b9e4cea..271d6936 100644 --- a/src/ai2_internal/bibentry_detection_predictor/interface.py +++ b/src/ai2_internal/bibentry_detection_predictor/interface.py @@ -8,7 +8,8 @@ from typing import List -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal import api from mmda.predictors.d2_predictors.bibentry_detection_predictor import BibEntryDetectionPredictor diff --git a/src/ai2_internal/bibentry_predictor/interface.py b/src/ai2_internal/bibentry_predictor/interface.py index 8c49cdf3..7517c350 100644 --- a/src/ai2_internal/bibentry_predictor/interface.py +++ b/src/ai2_internal/bibentry_predictor/interface.py @@ -8,7 +8,8 @@ from typing import List, Optional -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from mmda.predictors.hf_predictors.bibentry_predictor.predictor import BibEntryPredictor diff --git a/src/ai2_internal/bibentry_predictor_mmda/interface.py b/src/ai2_internal/bibentry_predictor_mmda/interface.py index b9a25d66..d0b408f1 100644 --- a/src/ai2_internal/bibentry_predictor_mmda/interface.py +++ b/src/ai2_internal/bibentry_predictor_mmda/interface.py @@ -8,7 +8,8 @@ from typing import List -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal import api from mmda.predictors.hf_predictors.bibentry_predictor.predictor import BibEntryPredictor diff --git a/src/ai2_internal/citation_links/interface.py b/src/ai2_internal/citation_links/interface.py index 1546dd07..71cff087 100644 --- a/src/ai2_internal/citation_links/interface.py +++ b/src/ai2_internal/citation_links/interface.py @@ -8,7 +8,8 @@ from typing import List, Tuple -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal import api from mmda.predictors.xgb_predictors.citation_link_predictor import CitationLinkPredictor diff --git a/src/ai2_internal/citation_mentions/interface.py b/src/ai2_internal/citation_mentions/interface.py index 04c4238e..cb05f824 100644 --- a/src/ai2_internal/citation_mentions/interface.py +++ b/src/ai2_internal/citation_mentions/interface.py @@ -10,7 +10,8 @@ from itertools import groupby from bisect import bisect -from pydantic import BaseModel, BaseSettings +from pydantic import BaseModel +from pydantic_settings import BaseSettings from ai2_internal import api from mmda.predictors.hf_predictors.mention_predictor import MentionPredictor diff --git a/src/ai2_internal/dwp_heuristic/interface.py b/src/ai2_internal/dwp_heuristic/interface.py index f3f65111..52082330 100644 --- a/src/ai2_internal/dwp_heuristic/interface.py +++ b/src/ai2_internal/dwp_heuristic/interface.py @@ -1,6 +1,7 @@ from typing import List -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal.api import SpanGroup from mmda.predictors.heuristic_predictors.dictionary_word_predictor import ( diff --git a/src/ai2_internal/figure_table_predictors/interface.py b/src/ai2_internal/figure_table_predictors/interface.py index 21248547..b2f2a949 100644 --- a/src/ai2_internal/figure_table_predictors/interface.py +++ b/src/ai2_internal/figure_table_predictors/interface.py @@ -8,7 +8,8 @@ from typing import List -from pydantic import BaseModel, BaseSettings +from pydantic import BaseModel +from pydantic_settings import BaseSettings from mmda.predictors.heuristic_predictors.figure_table_predictors import FigureTablePredictions from mmda.types.document import Document diff --git a/src/ai2_internal/layout_parser/interface.py b/src/ai2_internal/layout_parser/interface.py index 14d188ac..abaf94c2 100644 --- a/src/ai2_internal/layout_parser/interface.py +++ b/src/ai2_internal/layout_parser/interface.py @@ -10,7 +10,8 @@ from typing import List import torch -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal.api import BoxGroup from mmda.predictors.lp_predictors import LayoutParserPredictor diff --git a/src/ai2_internal/svm_word_predictor/interface.py b/src/ai2_internal/svm_word_predictor/interface.py index 3a102bf0..992e60b3 100644 --- a/src/ai2_internal/svm_word_predictor/interface.py +++ b/src/ai2_internal/svm_word_predictor/interface.py @@ -1,6 +1,7 @@ from typing import List -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal.api import SpanGroup from mmda.predictors.sklearn_predictors.svm_word_predictor import SVMWordPredictor diff --git a/src/ai2_internal/vila/interface.py b/src/ai2_internal/vila/interface.py index 84ca5bb4..c264517c 100644 --- a/src/ai2_internal/vila/interface.py +++ b/src/ai2_internal/vila/interface.py @@ -10,7 +10,8 @@ from typing import List import torch -from pydantic import BaseModel, BaseSettings, Field +from pydantic import BaseModel, Field +from pydantic_settings import BaseSettings from ai2_internal import api from mmda.predictors.hf_predictors.token_classification_predictor import (