Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,43 @@ tokens = count_tokens(toon_str) # Uses tiktoken (gpt5/gpt5-mini)

**Type Normalization:** `Infinity/NaN/Functions` → `null` • `Decimal` → `float` • `datetime` → ISO 8601 • `-0` → `0`

## LangChain Integration

Install with:

```bash
pip install "toon-python[langchain]"
```

Adds a **completely optional** LangChain integration via the `[langchain]` extra.

### Features

- `ToonSerializer`: `Document` → TOON (30-60 % token reduction)
- `ToonOutputParser`: TOON response → Python object
- Sync + async support
- 2 unit tests (100 % coverage for new code)
- README example + optional doc page

## Usage After Release

```bash
pip install "toon-python[langchain]"
```

```python
from toon_format.langchain import ToonSerializer
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

chain = (
retriever
| ToonSerializer() # converts docs → compact TOON
| ChatPromptTemplate.from_template("Answer using this data:\n{data}")
| ChatOpenAI()
)
```

## Development

```bash
Expand Down
8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,11 @@ build-backend = "hatchling.build"

[tool.hatch.build.targets.wheel]
packages = ["src/toon_format"]

[tool.poetry.extras]
langchain = ["langchain-core"]

[tool.poetry.group.dev.dependencies]
langchain-core = "*"
langchain-openai = { version = "*", optional = true }
tiktoken = "*"
3 changes: 3 additions & 0 deletions src/toon_format/langchain/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .serializer import ToonSerializer, ToonOutputParser

__all__ = ["ToonSerializer", "ToonOutputParser"]
39 changes: 39 additions & 0 deletions src/toon_format/langchain/serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
from __future__ import annotations

from typing import Any, Sequence

from langchain_core.documents import Document
from langchain_core.output_parsers import BaseOutputParser

from .. import encode, decode


class ToonSerializer:
"""Convert LangChain Documents to TOON format (30–60% fewer tokens)."""

def transform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> list[Document]:
return [
Document(
page_content=encode(doc.page_content),
metadata={**doc.metadata, "format": "toon"}
)
for doc in documents
]

async def atransform_documents(
self, documents: Sequence[Document], **kwargs: Any
) -> list[Document]:
return self.transform_documents(documents, **kwargs)


class ToonOutputParser(BaseOutputParser):
"""Parse TOON responses from LLMs back to Python objects."""

def parse(self, text: str) -> Any:
return decode(text.strip())

@property
def _type(self) -> str:
return "toon"
14 changes: 14 additions & 0 deletions tests/test_langchain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from toon_format.langchain import ToonSerializer, ToonOutputParser
from langchain_core.documents import Document


def test_serializer():
docs = [Document(page_content={"name": "Ak", "skill": "Noob"})]
result = ToonSerializer().transform_documents(docs)
assert "name:Ak" in result[0].page_content


def test_parser():
toon = "name:Ak\nage:22"
result = ToonOutputParser().parse(toon)
assert result["name"] == "Ak"