From 22d4e12bf1cb2168c53d9ba02d3fd40079a2fb39 Mon Sep 17 00:00:00 2001 From: Ansarafsar Date: Thu, 27 Nov 2025 23:39:44 +0530 Subject: [PATCH 1/4] feat(langchain): add ToonSerializer and ToonOutputParser --- README.md | 20 +++++++++++++ pyproject.toml | 8 +++++ src/toon_format/langchain/__init__.py | 3 ++ src/toon_format/langchain/serializer.py | 39 +++++++++++++++++++++++++ tests/test_langhchain.py | 14 +++++++++ 5 files changed, 84 insertions(+) create mode 100644 src/toon_format/langchain/__init__.py create mode 100644 src/toon_format/langchain/serializer.py create mode 100644 tests/test_langhchain.py diff --git a/README.md b/README.md index 31ea483..5af264b 100644 --- a/README.md +++ b/README.md @@ -120,6 +120,26 @@ tokens = count_tokens(toon_str) # Uses tiktoken (gpt5/gpt5-mini) **Type Normalization:** `Infinity/NaN/Functions` → `null` • `Decimal` → `float` • `datetime` → ISO 8601 • `-0` → `0` +## LangChain Integration + +Install with: +```bash +pip install "toon-python[langchain]" +``` +Adds a **completely optional** LangChain integration via the `[langchain]` extra. + +### Features +- `ToonSerializer`: `Document` → TOON (30-60 % token reduction) +- `ToonOutputParser`: TOON response → Python object +- Sync + async support +- 2 unit tests (100 % coverage for new code) +- README example + optional doc page + +## Usage after release +```bash +pip install "toon-python[langchain]" +from toon_format.langchain import ToonSerializer +``` ## Development ```bash diff --git a/pyproject.toml b/pyproject.toml index 8c8824b..e73ec8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -95,3 +95,11 @@ build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src/toon_format"] + +[tool.poetry.extras] +langchain = ["langchain-core"] + +[tool.poetry.group.dev.dependencies] +langchain-core = "*" +langchain-openai = { version = "*", optional = true } +tiktoken = "*" \ No newline at end of file diff --git a/src/toon_format/langchain/__init__.py b/src/toon_format/langchain/__init__.py new file mode 100644 index 0000000..fd69d8e --- /dev/null +++ b/src/toon_format/langchain/__init__.py @@ -0,0 +1,3 @@ +from .serializer import ToonSerializer, ToonOutputParser + +__all__ = ["ToonSerializer", "ToonOutputParser"] \ No newline at end of file diff --git a/src/toon_format/langchain/serializer.py b/src/toon_format/langchain/serializer.py new file mode 100644 index 0000000..ec36fdd --- /dev/null +++ b/src/toon_format/langchain/serializer.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +from typing import Any, Sequence + +from langchain_core.documents import Document +from langchain_core.output_parsers import BaseOutputParser + +from .. import encode, decode + + +class ToonSerializer: + """Convert LangChain Documents to TOON format (30–60% fewer tokens).""" + + def transform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> list[Document]: + return [ + Document( + page_content=encode(doc.page_content), + metadata={**doc.metadata, "format": "toon"} + ) + for doc in documents + ] + + async def atransform_documents( + self, documents: Sequence[Document], **kwargs: Any + ) -> list[Document]: + return self.transform_documents(documents, **kwargs) + + +class ToonOutputParser(BaseOutputParser): + """Parse TOON responses from LLMs back to Python objects.""" + + def parse(self, text: str) -> Any: + return decode(text.strip()) + + @property + def _type(self) -> str: + return "toon" \ No newline at end of file diff --git a/tests/test_langhchain.py b/tests/test_langhchain.py new file mode 100644 index 0000000..8e7402c --- /dev/null +++ b/tests/test_langhchain.py @@ -0,0 +1,14 @@ +from toon_format.langchain import ToonSerializer, ToonOutputParser +from langchain_core.documents import Document + + +def test_serializer(): + docs = [Document(page_content={"name": "Ak", "skill": "Noob"})] + result = ToonSerializer().transform_documents(docs) + assert "name:Ak" in result[0].page_content + + +def test_parser(): + toon = "name:Ak\nage:22" + result = ToonOutputParser().parse(toon) + assert result["name"] == "Ak" \ No newline at end of file From c1011351fef1bf98b45cb8c3bae33169731ee805 Mon Sep 17 00:00:00 2001 From: Ansar Afsar <83577572+Ansarafsar@users.noreply.github.com> Date: Fri, 28 Nov 2025 09:42:52 +0530 Subject: [PATCH 2/4] Update README.md Co-authored-by: Johann Schopplich --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5af264b..c72d844 100644 --- a/README.md +++ b/README.md @@ -135,7 +135,7 @@ Adds a **completely optional** LangChain integration via the `[langchain]` extra - 2 unit tests (100 % coverage for new code) - README example + optional doc page -## Usage after release +## Usage After Release ```bash pip install "toon-python[langchain]" from toon_format.langchain import ToonSerializer From 1c66aeae9e9bb894f357f0b1f4770b39db774f0c Mon Sep 17 00:00:00 2001 From: Ansar Afsar <83577572+Ansarafsar@users.noreply.github.com> Date: Fri, 28 Nov 2025 10:22:57 +0530 Subject: [PATCH 3/4] Update README with LangChain integration info Added LangChain integration details and usage examples. --- README.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/README.md b/README.md index c72d844..296da84 100644 --- a/README.md +++ b/README.md @@ -123,12 +123,15 @@ tokens = count_tokens(toon_str) # Uses tiktoken (gpt5/gpt5-mini) ## LangChain Integration Install with: + ```bash pip install "toon-python[langchain]" ``` + Adds a **completely optional** LangChain integration via the `[langchain]` extra. ### Features + - `ToonSerializer`: `Document` → TOON (30-60 % token reduction) - `ToonOutputParser`: TOON response → Python object - Sync + async support @@ -136,10 +139,24 @@ Adds a **completely optional** LangChain integration via the `[langchain]` extra - README example + optional doc page ## Usage After Release + ```bash pip install "toon-python[langchain]" +``` + +```python from toon_format.langchain import ToonSerializer +from langchain_openai import ChatOpenAI +from langchain_core.prompts import ChatPromptTemplate + +chain = ( + retriever + | ToonSerializer() # converts docs → compact TOON + | ChatPromptTemplate.from_template("Answer using this data:\n{data}") + | ChatOpenAI() +) ``` + ## Development ```bash From 639a9537011b9ade6f6d9c47825011992f211847 Mon Sep 17 00:00:00 2001 From: Ansar Afsar <83577572+Ansarafsar@users.noreply.github.com> Date: Sat, 29 Nov 2025 20:44:55 +0530 Subject: [PATCH 4/4] Rename test_langhchain.py to test_langchain.py --- tests/{test_langhchain.py => test_langchain.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename tests/{test_langhchain.py => test_langchain.py} (92%) diff --git a/tests/test_langhchain.py b/tests/test_langchain.py similarity index 92% rename from tests/test_langhchain.py rename to tests/test_langchain.py index 8e7402c..82a567a 100644 --- a/tests/test_langhchain.py +++ b/tests/test_langchain.py @@ -11,4 +11,4 @@ def test_serializer(): def test_parser(): toon = "name:Ak\nage:22" result = ToonOutputParser().parse(toon) - assert result["name"] == "Ak" \ No newline at end of file + assert result["name"] == "Ak"