diff --git a/src/toon_format/normalize.py b/src/toon_format/normalize.py index 157f2ed..dda9bfd 100644 --- a/src/toon_format/normalize.py +++ b/src/toon_format/normalize.py @@ -6,6 +6,7 @@ - datetime/date → ISO 8601 strings - Decimal → float - tuple/set/frozenset → sorted lists +- pathlib.Path → string representation - Infinity/NaN → null - Functions/callables → null - Negative zero → zero @@ -16,6 +17,7 @@ from collections.abc import Mapping from datetime import date, datetime from decimal import Decimal +from pathlib import PurePath from typing import Any # TypeGuard was added in Python 3.10, use typing_extensions for older versions @@ -39,6 +41,7 @@ def normalize_value(value: Any) -> JsonValue: Converts Python-specific types to JSON-compatible equivalents: - datetime objects → ISO 8601 strings - sets → sorted lists + - pathlib.Path → string representation - Large integers (>2^53-1) → strings (for JS compatibility) - Non-finite floats (inf, -inf, NaN) → null - Negative zero → positive zero @@ -64,10 +67,15 @@ def normalize_value(value: Any) -> JsonValue: >>> normalize_value(2**60) # Large integer '1152921504606846976' + >>> from pathlib import Path + >>> normalize_value(Path('/tmp/file.txt')) + '/tmp/file.txt' + Note: - Recursive: normalizes nested structures - Sets are sorted for deterministic output - Heterogeneous sets sorted by repr() if natural sorting fails + - Path objects are converted to their string representation """ if value is None: return None @@ -100,6 +108,11 @@ def normalize_value(value: Any) -> JsonValue: return None return float(value) + # Handle pathlib.Path objects -> string representation + if isinstance(value, PurePath): + logger.debug(f"Converting {type(value).__name__} to string: {value}") + return str(value) + if isinstance(value, datetime): try: result = value.isoformat() diff --git a/tests/test_normalization.py b/tests/test_normalization.py index b6fb1ed..8c16d65 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -17,6 +17,7 @@ """ from decimal import Decimal +from pathlib import Path, PurePosixPath, PureWindowsPath from toon_format import decode, encode @@ -416,3 +417,66 @@ def test_roundtrip_numeric_precision(self): # All numbers should round-trip with fidelity for key, value in original.items(): assert decoded[key] == value, f"Mismatch for {key}: {decoded[key]} != {value}" + + +class TestPathNormalization: + """Test pathlib.Path normalization to strings.""" + + def test_path_to_string(self): + """pathlib.Path should be converted to string.""" + data = {"file": Path("/tmp/test.txt")} + result = encode(data) + decoded = decode(result) + + assert decoded["file"] == "/tmp/test.txt" + + def test_relative_path(self): + """Relative paths should be preserved.""" + data = {"rel": Path("./relative/path.txt")} + result = encode(data) + decoded = decode(result) + + # Path normalization may vary, but should be a string + assert isinstance(decoded["rel"], str) + assert "relative" in decoded["rel"] + assert "path.txt" in decoded["rel"] + + def test_pure_path(self): + """PurePath objects should also be normalized.""" + data = { + "posix": PurePosixPath("/usr/bin/python"), + "windows": PureWindowsPath("C:\\Windows\\System32"), + } + result = encode(data) + decoded = decode(result) + + assert decoded["posix"] == "/usr/bin/python" + assert decoded["windows"] == "C:\\Windows\\System32" + + def test_path_in_array(self): + """Path objects in arrays should be normalized.""" + data = {"paths": [Path("/tmp/a"), Path("/tmp/b"), Path("/tmp/c")]} + result = encode(data) + decoded = decode(result) + + assert decoded["paths"] == ["/tmp/a", "/tmp/b", "/tmp/c"] + + def test_path_in_nested_structure(self): + """Path objects in nested structures should be normalized.""" + data = { + "project": { + "root": Path("/home/user/project"), + "src": Path("/home/user/project/src"), + "files": [ + {"name": "main.py", "path": Path("/home/user/project/src/main.py")}, + {"name": "test.py", "path": Path("/home/user/project/src/test.py")}, + ], + } + } + result = encode(data) + decoded = decode(result) + + assert decoded["project"]["root"] == "/home/user/project" + assert decoded["project"]["src"] == "/home/user/project/src" + assert decoded["project"]["files"][0]["path"] == "/home/user/project/src/main.py" + assert decoded["project"]["files"][1]["path"] == "/home/user/project/src/test.py"