Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ uv sync
# Analyze a structure file
uv run grodecoder path/to/structure.gro

# Analyze a pair topology file + coordinates
uv run grodecoder path/to/topology.psf /path/to/coordinates.coor

# Output to stdout with compact format
uv run grodecoder structure.pdb --compact --stdout

Expand Down Expand Up @@ -101,6 +104,40 @@ GROdecoder produces detailed JSON inventories with the following structure:

## 🔧 Advanced Features

### Read back a Grodecoder inventory file

Reading a Grodecoder inventory file is essential to be able to access the different parts of a system
without having to identify them again:

```python
from grodecoder import read_grodecoder_output

gro_results = read_grodecoder_output("1BRS_grodecoder.json")

# Print the sequence of protein segment only.
for segment in gro_results.decoded.inventory.segments:
if segment.is_protein():
print(segment.sequence)
```

In conjunction with the structure file, we can use the grodecoder output file to access the different
parts of the system, as identified by grodecoder:

```python
import MDAnalysis
from grodecoder import read_grodecoder_output


universe = MDAnalysis.Universe("tests/data/1BRS.pdb")
gro_results = read_grodecoder_output("1BRS_grodecoder.json")

# Prints the center of mass of each protein segment.
for segment in gro_results.decoded.inventory.segments:
if segment.is_protein():
seg: MDAnalysis.AtomGroup = universe.atoms[segment.atoms]
print(seg.center_of_mass())
```

### Chain Detection
GROdecoder uses sophisticated distance-based algorithms to detect protein and nucleic acid chains:

Expand Down
3 changes: 0 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,3 @@ line-length = 110

[tool.pytest.ini_options]
addopts = "-ra"

[tool.mypy]
ignore_missing_imports = true
62 changes: 6 additions & 56 deletions src/grodecoder/__init__.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,13 @@
import json
from datetime import datetime

import MDAnalysis as mda
from loguru import logger

from . import databases, toputils
from ._typing import AtomGroup, Json, PathLike, Residue, Universe, UniverseLike
from .identifier import identify
from .core import decode, decode_structure
from .models import Decoded, GrodecoderRunOutput, GrodecoderRunOutputRead
from .io import read_grodecoder_output, read_universe

__all__ = [
"databases",
"identify",
"toputils",
"read_structure",
"AtomGroup",
"decode",
"decode_structure",
"read_grodecoder_output",
"read_universe",
"Decoded",
"GrodecoderRunOutput",
"GrodecoderRunOutputRead",
"Json",
"PathLike",
"Residue",
"Universe",
"UniverseLike",
]

__version__ = "0.0.1"


def _now() -> str:
"""Returns the current date and time formatted string."""
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")


def read_structure(path: PathLike, psf_path: PathLike | None = None) -> Universe:
"""Reads a structure file."""
if psf_path:
return mda.Universe(path, psf_path)
return mda.Universe(path)


def read_grodecoder_output(path: PathLike) -> GrodecoderRunOutputRead:
with open(path) as fileobj:
return GrodecoderRunOutputRead.model_validate(json.load(fileobj))


def decode(universe: UniverseLike, bond_threshold: float = 5.0) -> Decoded:
"""Decodes the universe into an inventory of segments."""
return Decoded(
inventory=identify(universe, bond_threshold=bond_threshold),
resolution=toputils.guess_resolution(universe),
)


def decode_structure(
path: PathLike, psf_path: PathLike | None = None, bond_threshold: float = 5.0
) -> Decoded:
"""Reads a structure file and decodes it into an inventory of segments."""
universe = read_structure(path, psf_path)
assert universe.atoms is not None # required by type checker for some reason
logger.info(f"{path}: {len(universe.atoms):,d} atoms")
return decode(universe, bond_threshold=bond_threshold)
126 changes: 0 additions & 126 deletions src/grodecoder/cli.py

This file was deleted.

41 changes: 41 additions & 0 deletions src/grodecoder/cli/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import click

from ..main import main as grodecoder_main
from .args import Arguments as CliArgs
from .args import CoordinatesFile, StructureFile
from ..logging import setup_logging


@click.command()
@click.argument("structure_file", type=StructureFile)
@click.argument("coordinates_file", type=CoordinatesFile, required=False)
@click.option(
"--bond-threshold",
default=5.0,
type=float,
help="Threshold for interchain bond detection (default: 5 Å)",
)
@click.option("--no-atom-ids", is_flag=True, help="do not output the atom indice array")
@click.option(
"-s",
"--stdout",
metavar="print_to_stdout",
is_flag=True,
help="Output the results to stdout in JSON format",
)
@click.option("-v", "--verbose", is_flag=True, help="show debug messages")
def cli(**kwargs):
"""Command-line interface for processing structure files."""
args = CliArgs(
structure_file=kwargs["structure_file"],
coordinates_file=kwargs["coordinates_file"],
no_atom_ids=kwargs["no_atom_ids"],
print_to_stdout=kwargs["stdout"],
)

logfile = args.get_log_filename()
setup_logging(logfile, kwargs["verbose"])
grodecoder_main(args)


__all__ = ["cli"]
85 changes: 85 additions & 0 deletions src/grodecoder/cli/args.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import ClassVar

from loguru import logger

DEFAULT_OUTPUT_STEM_SUFFIX = "_grodecoder"


def _fatal_error(msg: str, status: int = 1):
"""Prints an error message and exits with status `status`."""
logger.critical(msg)
sys.exit(status)


@dataclass
class InputFile:
path: Path
valid_extensions: ClassVar[set[str]]

def __post_init__(self):
# Ensures paths are pathlib.Path instances.
self.path = Path(self.path)

# Ensures paths are valid files.
path = self.path
if not path.exists():
_fatal_error(f"'{path}' does not exist")
if not path.is_file():
_fatal_error(f"'{path}' is not a file")
if path.suffix not in self.valid_extensions:
_fatal_error(f"'{path}' has an invalid extension (valid extensions are {self.valid_extensions})")
return path

@property
def extension(self) -> str:
return self.path.suffix

@property
def stem(self) -> str:
return self.path.stem


@dataclass
class StructureFile(InputFile):
valid_extensions: ClassVar[set[str]] = {".gro", ".pdb", ".tpr", ".psf"}


@dataclass
class CoordinatesFile(InputFile):
valid_extensions: ClassVar[set[str]] = {".gro", ".pdb", ".tpr", ".psf", ".coor"}


@dataclass
class Arguments:
"""Holds command-line arguments.

Attrs:
structure_file (Path): Path to the structure file.
coordinates_file (Path): Path to the coordinates file.
bond_threshold (float): Threshold for interchain bond detection.
no_atom_ids (bool): If True, use compact serialization (no atom indices).
print_to_stdout (bool): Whether to output results to stdout.
"""

structure_file: StructureFile
coordinates_file: CoordinatesFile | None = None
bond_threshold: float = 5.0
no_atom_ids: bool = True
print_to_stdout: bool = False

def get_log_filename(self) -> Path:
return generate_output_log_path(self.structure_file.stem)

def get_inventory_filename(self) -> Path:
return generate_output_inventory_path(self.structure_file.stem)


def generate_output_inventory_path(stem: str) -> Path:
return Path(stem + DEFAULT_OUTPUT_STEM_SUFFIX + ".json")


def generate_output_log_path(stem: str) -> Path:
return Path(stem + DEFAULT_OUTPUT_STEM_SUFFIX + ".log")
Loading