Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/toon_format/normalize.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
- datetime/date → ISO 8601 strings
- Decimal → float
- tuple/set/frozenset → sorted lists
- pathlib.Path → string representation
- Infinity/NaN → null
- Functions/callables → null
- Negative zero → zero
Expand All @@ -16,6 +17,7 @@
from collections.abc import Mapping
from datetime import date, datetime
from decimal import Decimal
from pathlib import PurePath
from typing import Any

# TypeGuard was added in Python 3.10, use typing_extensions for older versions
Expand All @@ -39,6 +41,7 @@ def normalize_value(value: Any) -> JsonValue:
Converts Python-specific types to JSON-compatible equivalents:
- datetime objects → ISO 8601 strings
- sets → sorted lists
- pathlib.Path → string representation
- Large integers (>2^53-1) → strings (for JS compatibility)
- Non-finite floats (inf, -inf, NaN) → null
- Negative zero → positive zero
Expand All @@ -64,10 +67,15 @@ def normalize_value(value: Any) -> JsonValue:
>>> normalize_value(2**60) # Large integer
'1152921504606846976'

>>> from pathlib import Path
>>> normalize_value(Path('/tmp/file.txt'))
'/tmp/file.txt'

Note:
- Recursive: normalizes nested structures
- Sets are sorted for deterministic output
- Heterogeneous sets sorted by repr() if natural sorting fails
- Path objects are converted to their string representation
"""
if value is None:
return None
Expand Down Expand Up @@ -100,6 +108,11 @@ def normalize_value(value: Any) -> JsonValue:
return None
return float(value)

# Handle pathlib.Path objects -> string representation
if isinstance(value, PurePath):
logger.debug(f"Converting {type(value).__name__} to string: {value}")
return str(value)

if isinstance(value, datetime):
try:
result = value.isoformat()
Expand Down
64 changes: 64 additions & 0 deletions tests/test_normalization.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
"""

from decimal import Decimal
from pathlib import Path, PurePosixPath, PureWindowsPath

from toon_format import decode, encode

Expand Down Expand Up @@ -416,3 +417,66 @@ def test_roundtrip_numeric_precision(self):
# All numbers should round-trip with fidelity
for key, value in original.items():
assert decoded[key] == value, f"Mismatch for {key}: {decoded[key]} != {value}"


class TestPathNormalization:
"""Test pathlib.Path normalization to strings."""

def test_path_to_string(self):
"""pathlib.Path should be converted to string."""
data = {"file": Path("/tmp/test.txt")}
result = encode(data)
decoded = decode(result)

assert decoded["file"] == "/tmp/test.txt"

def test_relative_path(self):
"""Relative paths should be preserved."""
data = {"rel": Path("./relative/path.txt")}
result = encode(data)
decoded = decode(result)

# Path normalization may vary, but should be a string
assert isinstance(decoded["rel"], str)
assert "relative" in decoded["rel"]
assert "path.txt" in decoded["rel"]

def test_pure_path(self):
"""PurePath objects should also be normalized."""
data = {
"posix": PurePosixPath("/usr/bin/python"),
"windows": PureWindowsPath("C:\\Windows\\System32"),
}
result = encode(data)
decoded = decode(result)

assert decoded["posix"] == "/usr/bin/python"
assert decoded["windows"] == "C:\\Windows\\System32"

def test_path_in_array(self):
"""Path objects in arrays should be normalized."""
data = {"paths": [Path("/tmp/a"), Path("/tmp/b"), Path("/tmp/c")]}
result = encode(data)
decoded = decode(result)

assert decoded["paths"] == ["/tmp/a", "/tmp/b", "/tmp/c"]

def test_path_in_nested_structure(self):
"""Path objects in nested structures should be normalized."""
data = {
"project": {
"root": Path("/home/user/project"),
"src": Path("/home/user/project/src"),
"files": [
{"name": "main.py", "path": Path("/home/user/project/src/main.py")},
{"name": "test.py", "path": Path("/home/user/project/src/test.py")},
],
}
}
result = encode(data)
decoded = decode(result)

assert decoded["project"]["root"] == "/home/user/project"
assert decoded["project"]["src"] == "/home/user/project/src"
assert decoded["project"]["files"][0]["path"] == "/home/user/project/src/main.py"
assert decoded["project"]["files"][1]["path"] == "/home/user/project/src/test.py"