diff --git a/.gitignore b/.gitignore index 0575f02..9adffef 100644 --- a/.gitignore +++ b/.gitignore @@ -107,4 +107,5 @@ uv.lock PR_DESCRIPTION.md AGENTS.md -.augment/ \ No newline at end of file +.augment/ +*.log diff --git a/MCP_SERVER_README.md b/MCP_SERVER_README.md new file mode 100644 index 0000000..3b8f92c --- /dev/null +++ b/MCP_SERVER_README.md @@ -0,0 +1,193 @@ +# TOON MCP Server + +Model Context Protocol (MCP) server for [TOON format](https://toonformat.dev) encoding and decoding. + +## Overview + +This MCP server provides tools for converting between JSON and TOON (Token-Oriented Object Notation) format. TOON is a compact, human-readable serialization format designed for passing structured data to Large Language Models with **30-60% fewer tokens** compared to JSON on large uniform arrays. + +## Features + +- **`toon_encode`**: Convert JSON data to TOON format +- **`toon_decode`**: Convert TOON format back to JSON +- Built with [FastMCP](https://github.com/jlowin/fastmcp) for easy integration +- Uses native Python implementation from `toon_format` package + +## Installation + +### From Source + +```bash +cd /path/to/toon-pythonMCP + +# Install the package with MCP dependencies +pip install -e ".[mcp]" +``` + +### Requirements + +- Python 3.8+ +- FastMCP 2.0+ + +## Usage + +### Running the Server + +```bash +toon-mcp +``` + +Or using Python directly: + +```bash +python run_server.py +``` + +Or as a module: + +```bash +python -m toon_mcp.server +``` + +### Configuring MCP Clients + +This server works with any MCP-compatible client. Below is an example configuration for Claude Desktop. + +**Claude Desktop Example:** + +Add to your configuration file (`~/Library/Application Support/Claude/claude_desktop_config.json` on macOS, `%APPDATA%/Claude/claude_desktop_config.json` on Windows): + +```json +{ + "mcpServers": { + "toon": { + "command": "toon-mcp", + "args": [] + } + } +} +``` + +Or if running from source: + +```json +{ + "mcpServers": { + "toon": { + "command": "python", + "args": ["run_server.py"], + "cwd": "/path/to/toon-pythonMCP" + } + } +} +``` + +**Other MCP Clients:** + +Refer to your MCP client's documentation for specific configuration format. The server follows the standard MCP protocol and should work with any compliant client. + +## Available Tools + +### `toon_encode` + +Encode JSON data into TOON format. + +**Parameters:** +- `data` (required): The JSON data to encode +- `indent` (optional, default: 2): Number of spaces per indentation level +- `delimiter` (optional, default: ","): Delimiter for array values - comma (','), tab ('\\t'), or pipe ('|') + +**Example:** + +```python +# Input +{ + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"} + ] +} + +# Output +users[2]{id,name,role}: + 1,Alice,admin + 2,Bob,user +``` + +### `toon_decode` + +Decode TOON format string back into JSON data. + +**Parameters:** +- `toon_string` (required): The TOON formatted string to decode +- `indent` (optional, default: 2): Expected indentation level +- `strict` (optional, default: true): Enable strict validation + +**Example:** + +```python +# Input +users[2]{id,name,role}: + 1,Alice,admin + 2,Bob,user + +# Output +{ + "users": [ + {"id": 1, "name": "Alice", "role": "admin"}, + {"id": 2, "name": "Bob", "role": "user"} + ] +} +``` + +## Why Use TOON? + +TOON is optimized for LLM contexts: + +- **Token-efficient**: 30-60% fewer tokens on large uniform arrays vs formatted JSON +- **LLM-friendly**: Explicit lengths and fields enable better validation +- **Minimal syntax**: Removes redundant punctuation +- **Tabular arrays**: Declare keys once, stream data as rows +- **Human-readable**: Like YAML but more compact + +## Use Cases + +- Passing large datasets to LLM prompts +- Reducing token costs for API calls +- Structured data in AI applications +- Data serialization for LLM fine-tuning + +## Best Use Cases + +TOON excels with: +- **Uniform arrays of objects** (same fields, primitive values) +- **Large tabular datasets** with consistent structure +- **Semi-uniform data** with ~60%+ tabular eligibility + +For deeply nested or non-uniform structures, JSON may be more efficient. + +## Development + +### Project Structure + +``` +toon-pythonMCP/ +├── src/ +│ ├── toon_format/ # Core TOON implementation +│ └── toon_mcp/ # MCP server +│ ├── __init__.py +│ └── server.py # FastMCP server implementation +├── run_server.py # Standalone runner +└── pyproject.toml +``` + +## License + +MIT License © 2025 Johann Schopplich + +## Links + +- [TOON Specification](https://github.com/toon-format/spec) +- [TOON Python Implementation](https://github.com/toon-format/toon-python) +- [FastMCP](https://github.com/jlowin/fastmcp) +- [Model Context Protocol](https://modelcontextprotocol.io) diff --git a/pyproject.toml b/pyproject.toml index 8c8824b..8b16558 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,9 +35,11 @@ Documentation = "https://github.com/toon-format/spec" [project.scripts] toon = "toon_format.cli:main" +toon-mcp = "toon_mcp.server:main" [dependency-groups] benchmark = ["tiktoken>=0.4.0"] +mcp = ["fastmcp>=2.0.0"] dev = [ "pytest>=8.0.0", "pytest-cov>=4.1.0", @@ -94,4 +96,4 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] -packages = ["src/toon_format"] +packages = ["src/toon_format", "src/toon_mcp"] diff --git a/run_server.py b/run_server.py new file mode 100644 index 0000000..df0eab2 --- /dev/null +++ b/run_server.py @@ -0,0 +1,14 @@ +#!/usr/bin/env python3 +"""Standalone TOON MCP server runner - works without package installation.""" + +import sys +from pathlib import Path + +# Add the src directory to Python path +sys.path.insert(0, str(Path(__file__).parent / "src")) + +# Import and run the server +from toon_mcp.server import main + +if __name__ == "__main__": + main() diff --git a/src/toon_mcp/__init__.py b/src/toon_mcp/__init__.py new file mode 100644 index 0000000..2885209 --- /dev/null +++ b/src/toon_mcp/__init__.py @@ -0,0 +1,3 @@ +"""TOON MCP Server - Model Context Protocol server for TOON format.""" + +__version__ = "1.0.0" diff --git a/src/toon_mcp/server.py b/src/toon_mcp/server.py new file mode 100644 index 0000000..12e74ba --- /dev/null +++ b/src/toon_mcp/server.py @@ -0,0 +1,94 @@ +"""TOON MCP Server - FastMCP server for TOON format encoding and decoding.""" + +from typing import Any, Dict, List, Union + +from fastmcp import FastMCP + +# Import from the local toon_format package +from toon_format import decode, encode +from toon_format.types import DecodeOptions, EncodeOptions + +# Create FastMCP server instance +mcp = FastMCP( + name="toon-mcp-server", + version="1.0.0", +) + + +@mcp.tool() +def toon_encode( + data: Any, + indent: int = 2, + delimiter: str = ",", +) -> str: + """ + Encode JSON data into TOON format. + + TOON (Token-Oriented Object Notation) is a compact format designed for LLM + prompts with reduced token usage (typically 30-60% fewer tokens than JSON + on large uniform arrays). + + Args: + data: The JSON data to encode into TOON format (objects, arrays, primitives) + indent: Number of spaces per indentation level (default: 2) + delimiter: Delimiter for array values - comma (','), tab ('\\t'), or pipe ('|') + + Returns: + TOON formatted string + + Examples: + >>> toon_encode({"users": [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]}) + 'users[2]{id,name}:\\n 1,Alice\\n 2,Bob' + + >>> toon_encode({"items": ["a", "b", "c"]}) + 'items[3]: a,b,c' + + >>> toon_encode({"id": 123, "name": "Ada", "active": True}) + 'id: 123\\nname: Ada\\nactive: true' + """ + options: EncodeOptions = { + "indent": indent, + "delimiter": delimiter, + } + + return encode(data, options) + + +@mcp.tool() +def toon_decode( + toon_string: str, + indent: int = 2, + strict: bool = True, +) -> Union[Dict[str, Any], List[Any], str, int, float, bool, None]: + """ + Decode TOON format string back into JSON data. + + Args: + toon_string: The TOON formatted string to decode + indent: Expected number of spaces per indentation level (default: 2) + strict: Enable strict validation during decoding (default: True) + + Returns: + Decoded data as Python objects (dict, list, str, int, float, bool, or None) + + Examples: + >>> toon_decode('users[2]{id,name}:\\n 1,Alice\\n 2,Bob') + {'users': [{'id': 1, 'name': 'Alice'}, {'id': 2, 'name': 'Bob'}]} + + >>> toon_decode('items[3]: a,b,c') + {'items': ['a', 'b', 'c']} + + >>> toon_decode('id: 123\\nname: Ada\\nactive: true') + {'id': 123, 'name': 'Ada', 'active': True} + """ + options = DecodeOptions(indent=indent, strict=strict) + return decode(toon_string, options) + + +def main(): + """Run the TOON MCP server.""" + mcp.run() + + +if __name__ == "__main__": + main()