MDverse · pierrepo · Nov 28, 2025 · Nov 17, 2025 · Nov 17, 2025 · Nov 17, 2025
diff --git a/README.md b/README.md
@@ -35,6 +35,9 @@ uv sync
 # Analyze a structure file
 uv run grodecoder path/to/structure.gro
 
+# Analyze a pair topology file + coordinates
+uv run grodecoder path/to/topology.psf /path/to/coordinates.coor
+
 # Output to stdout with compact format
 uv run grodecoder structure.pdb --compact --stdout
 
@@ -101,6 +104,40 @@ GROdecoder produces detailed JSON inventories with the following structure:
 
 ## 🔧 Advanced Features
 
+### Read back a Grodecoder inventory file
+
+Reading a Grodecoder inventory file is essential to be able to access the different parts of a system
+without having to identify them again:
+
+```python
+from grodecoder import read_grodecoder_output
+
+gro_results = read_grodecoder_output("1BRS_grodecoder.json")
+
+# Print the sequence of protein segment only.
+for segment in gro_results.decoded.inventory.segments:
+    if segment.is_protein():
+        print(segment.sequence)
+```
+
+In conjunction with the structure file, we can use the grodecoder output file to access the different
+parts of the system, as identified by grodecoder:
+
+```python
+import MDAnalysis
+from grodecoder import read_grodecoder_output
+
+
+universe = MDAnalysis.Universe("tests/data/1BRS.pdb")
+gro_results = read_grodecoder_output("1BRS_grodecoder.json")
+
+# Prints the center of mass of each protein segment.
+for segment in gro_results.decoded.inventory.segments:
+    if segment.is_protein():
+        seg: MDAnalysis.AtomGroup = universe.atoms[segment.atoms]
+        print(seg.center_of_mass())
+```
+
 ### Chain Detection
 GROdecoder uses sophisticated distance-based algorithms to detect protein and nucleic acid chains:
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -39,6 +39,3 @@ line-length = 110
 
 [tool.pytest.ini_options]
 addopts = "-ra"
-
-[tool.mypy]
-ignore_missing_imports = true
diff --git a/src/grodecoder/__init__.py b/src/grodecoder/__init__.py
@@ -1,63 +1,13 @@
-import json
-from datetime import datetime
-
-import MDAnalysis as mda
-from loguru import logger
-
-from . import databases, toputils
-from ._typing import AtomGroup, Json, PathLike, Residue, Universe, UniverseLike
-from .identifier import identify
+from .core import decode, decode_structure
 from .models import Decoded, GrodecoderRunOutput, GrodecoderRunOutputRead
+from .io import read_grodecoder_output, read_universe
 
 __all__ = [
-    "databases",
-    "identify",
-    "toputils",
-    "read_structure",
-    "AtomGroup",
+    "decode",
+    "decode_structure",
+    "read_grodecoder_output",
+    "read_universe",
     "Decoded",
     "GrodecoderRunOutput",
     "GrodecoderRunOutputRead",
-    "Json",
-    "PathLike",
-    "Residue",
-    "Universe",
-    "UniverseLike",
 ]
-
-__version__ = "0.0.1"
-
-
-def _now() -> str:
-    """Returns the current date and time formatted string."""
-    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-
-
-def read_structure(path: PathLike, psf_path: PathLike | None = None) -> Universe:
-    """Reads a structure file."""
-    if psf_path:
-        return mda.Universe(path, psf_path)
-    return mda.Universe(path)
-
-
-def read_grodecoder_output(path: PathLike) -> GrodecoderRunOutputRead:
-    with open(path) as fileobj:
-        return GrodecoderRunOutputRead.model_validate(json.load(fileobj))
-
-
-def decode(universe: UniverseLike, bond_threshold: float = 5.0) -> Decoded:
-    """Decodes the universe into an inventory of segments."""
-    return Decoded(
-        inventory=identify(universe, bond_threshold=bond_threshold),
-        resolution=toputils.guess_resolution(universe),
-    )
-
-
-def decode_structure(
-    path: PathLike, psf_path: PathLike | None = None, bond_threshold: float = 5.0
-) -> Decoded:
-    """Reads a structure file and decodes it into an inventory of segments."""
-    universe = read_structure(path, psf_path)
-    assert universe.atoms is not None  # required by type checker for some reason
-    logger.info(f"{path}: {len(universe.atoms):,d} atoms")
-    return decode(universe, bond_threshold=bond_threshold)
diff --git a/src/grodecoder/cli.py b/src/grodecoder/cli.py
diff --git a/src/grodecoder/cli/__init__.py b/src/grodecoder/cli/__init__.py
@@ -0,0 +1,41 @@
+import click
+
+from ..main import main as grodecoder_main
+from .args import Arguments as CliArgs
+from .args import CoordinatesFile, StructureFile
+from ..logging import setup_logging
+
+
+@click.command()
+@click.argument("structure_file", type=StructureFile)
+@click.argument("coordinates_file", type=CoordinatesFile, required=False)
+@click.option(
+    "--bond-threshold",
+    default=5.0,
+    type=float,
+    help="Threshold for interchain bond detection (default: 5 Å)",
+)
+@click.option("--no-atom-ids", is_flag=True, help="do not output the atom indice array")
+@click.option(
+    "-s",
+    "--stdout",
+    metavar="print_to_stdout",
+    is_flag=True,
+    help="Output the results to stdout in JSON format",
+)
+@click.option("-v", "--verbose", is_flag=True, help="show debug messages")
+def cli(**kwargs):
+    """Command-line interface for processing structure files."""
+    args = CliArgs(
+        structure_file=kwargs["structure_file"],
+        coordinates_file=kwargs["coordinates_file"],
+        no_atom_ids=kwargs["no_atom_ids"],
+        print_to_stdout=kwargs["stdout"],
+    )
+
+    logfile = args.get_log_filename()
+    setup_logging(logfile, kwargs["verbose"])
+    grodecoder_main(args)
+
+
+__all__ = ["cli"]
diff --git a/src/grodecoder/cli/args.py b/src/grodecoder/cli/args.py
@@ -0,0 +1,85 @@
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import ClassVar
+
+from loguru import logger
+
+DEFAULT_OUTPUT_STEM_SUFFIX = "_grodecoder"
+
+
+def _fatal_error(msg: str, status: int = 1):
+    """Prints an error message and exits with status `status`."""
+    logger.critical(msg)
+    sys.exit(status)
+
+
+@dataclass
+class InputFile:
+    path: Path
+    valid_extensions: ClassVar[set[str]]
+
+    def __post_init__(self):
+        # Ensures paths are pathlib.Path instances.
+        self.path = Path(self.path)
+
+        # Ensures paths are valid files.
+        path = self.path
+        if not path.exists():
+            _fatal_error(f"'{path}' does not exist")
+        if not path.is_file():
+            _fatal_error(f"'{path}' is not a file")
+        if path.suffix not in self.valid_extensions:
+            _fatal_error(f"'{path}' has an invalid extension (valid extensions are {self.valid_extensions})")
+        return path
+
+    @property
+    def extension(self) -> str:
+        return self.path.suffix
+
+    @property
+    def stem(self) -> str:
+        return self.path.stem
+
+
+@dataclass
+class StructureFile(InputFile):
+    valid_extensions: ClassVar[set[str]] = {".gro", ".pdb", ".tpr", ".psf"}
+
+
+@dataclass
+class CoordinatesFile(InputFile):
+    valid_extensions: ClassVar[set[str]] = {".gro", ".pdb", ".tpr", ".psf", ".coor"}
+
+
+@dataclass
+class Arguments:
+    """Holds command-line arguments.
+
+    Attrs:
+        structure_file (Path): Path to the structure file.
+        coordinates_file (Path): Path to the coordinates file.
+        bond_threshold (float): Threshold for interchain bond detection.
+        no_atom_ids (bool): If True, use compact serialization (no atom indices).
+        print_to_stdout (bool): Whether to output results to stdout.
+    """
+
+    structure_file: StructureFile
+    coordinates_file: CoordinatesFile | None = None
+    bond_threshold: float = 5.0
+    no_atom_ids: bool = True
+    print_to_stdout: bool = False
+
+    def get_log_filename(self) -> Path:
+        return generate_output_log_path(self.structure_file.stem)
+
+    def get_inventory_filename(self) -> Path:
+        return generate_output_inventory_path(self.structure_file.stem)
+
+
+def generate_output_inventory_path(stem: str) -> Path:
+    return Path(stem + DEFAULT_OUTPUT_STEM_SUFFIX + ".json")
+
+
+def generate_output_log_path(stem: str) -> Path:
+    return Path(stem + DEFAULT_OUTPUT_STEM_SUFFIX + ".log")