From 730116b319bdb738d183f0d677c9a2b7e4f534a9 Mon Sep 17 00:00:00 2001 From: Morteza Hosseini Date: Mon, 10 Nov 2025 14:38:26 +0000 Subject: [PATCH 1/3] feat: add pathlib.Path normalization to string representation --- src/toon_format/normalize.py | 15 ++++++++ tests/test_normalization.py | 68 ++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+) diff --git a/src/toon_format/normalize.py b/src/toon_format/normalize.py index 157f2ed..c26bed6 100644 --- a/src/toon_format/normalize.py +++ b/src/toon_format/normalize.py @@ -6,6 +6,8 @@ - datetime/date → ISO 8601 strings - Decimal → float - tuple/set/frozenset → sorted lists +- bytes/bytearray → base64 encoded strings +- pathlib.Path → string representation - Infinity/NaN → null - Functions/callables → null - Negative zero → zero @@ -16,6 +18,7 @@ from collections.abc import Mapping from datetime import date, datetime from decimal import Decimal +from pathlib import Path, PurePath from typing import Any # TypeGuard was added in Python 3.10, use typing_extensions for older versions @@ -39,6 +42,7 @@ def normalize_value(value: Any) -> JsonValue: Converts Python-specific types to JSON-compatible equivalents: - datetime objects → ISO 8601 strings - sets → sorted lists + - pathlib.Path → string representation - Large integers (>2^53-1) → strings (for JS compatibility) - Non-finite floats (inf, -inf, NaN) → null - Negative zero → positive zero @@ -64,10 +68,16 @@ def normalize_value(value: Any) -> JsonValue: >>> normalize_value(2**60) # Large integer '1152921504606846976' + >>> from pathlib import Path + >>> normalize_value(Path('/tmp/file.txt')) + '/tmp/file.txt' + Note: - Recursive: normalizes nested structures - Sets are sorted for deterministic output - Heterogeneous sets sorted by repr() if natural sorting fails + - bytes/bytearray are base64 encoded + - Path objects are converted to their string representation """ if value is None: return None @@ -100,6 +110,11 @@ def normalize_value(value: Any) -> JsonValue: return None return float(value) + # Handle pathlib.Path objects -> string representation + if isinstance(value, PurePath): + logger.debug(f"Converting {type(value).__name__} to string: {value}") + return str(value) + if isinstance(value, datetime): try: result = value.isoformat() diff --git a/tests/test_normalization.py b/tests/test_normalization.py index b6fb1ed..92f6637 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -17,6 +17,7 @@ """ from decimal import Decimal +from pathlib import Path, PurePosixPath, PureWindowsPath from toon_format import decode, encode @@ -416,3 +417,70 @@ def test_roundtrip_numeric_precision(self): # All numbers should round-trip with fidelity for key, value in original.items(): assert decoded[key] == value, f"Mismatch for {key}: {decoded[key]} != {value}" + + +class TestPathNormalization: + """Test pathlib.Path normalization to strings.""" + + def test_path_to_string(self): + """pathlib.Path should be converted to string.""" + data = {"file": Path("/tmp/test.txt")} + result = encode(data) + decoded = decode(result) + + assert decoded["file"] == "/tmp/test.txt" + + def test_relative_path(self): + """Relative paths should be preserved.""" + data = {"rel": Path("./relative/path.txt")} + result = encode(data) + decoded = decode(result) + + # Path normalization may vary, but should be a string + assert isinstance(decoded["rel"], str) + assert "relative" in decoded["rel"] + assert "path.txt" in decoded["rel"] + + def test_pure_path(self): + """PurePath objects should also be normalized.""" + data = { + "posix": PurePosixPath("/usr/bin/python"), + "windows": PureWindowsPath("C:\\Windows\\System32"), + } + result = encode(data) + decoded = decode(result) + + assert decoded["posix"] == "/usr/bin/python" + assert decoded["windows"] == "C:\\Windows\\System32" + + def test_path_in_array(self): + """Path objects in arrays should be normalized.""" + from pathlib import Path + + data = {"paths": [Path("/tmp/a"), Path("/tmp/b"), Path("/tmp/c")]} + result = encode(data) + decoded = decode(result) + + assert decoded["paths"] == ["/tmp/a", "/tmp/b", "/tmp/c"] + + def test_path_in_nested_structure(self): + """Path objects in nested structures should be normalized.""" + from pathlib import Path + + data = { + "project": { + "root": Path("/home/user/project"), + "src": Path("/home/user/project/src"), + "files": [ + {"name": "main.py", "path": Path("/home/user/project/src/main.py")}, + {"name": "test.py", "path": Path("/home/user/project/src/test.py")}, + ], + } + } + result = encode(data) + decoded = decode(result) + + assert decoded["project"]["root"] == "/home/user/project" + assert decoded["project"]["src"] == "/home/user/project/src" + assert decoded["project"]["files"][0]["path"] == "/home/user/project/src/main.py" + assert decoded["project"]["files"][1]["path"] == "/home/user/project/src/test.py" From aafa505315cbece10ae3fa80a75f04db9b3f8a81 Mon Sep 17 00:00:00 2001 From: Morteza Hosseini Date: Mon, 10 Nov 2025 14:43:31 +0000 Subject: [PATCH 2/3] refactor: remove redundant imports in path normalization tests --- tests/test_normalization.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/test_normalization.py b/tests/test_normalization.py index 92f6637..8c16d65 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -455,8 +455,6 @@ def test_pure_path(self): def test_path_in_array(self): """Path objects in arrays should be normalized.""" - from pathlib import Path - data = {"paths": [Path("/tmp/a"), Path("/tmp/b"), Path("/tmp/c")]} result = encode(data) decoded = decode(result) @@ -465,8 +463,6 @@ def test_path_in_array(self): def test_path_in_nested_structure(self): """Path objects in nested structures should be normalized.""" - from pathlib import Path - data = { "project": { "root": Path("/home/user/project"), From 09316b83b1e6eb2aed114b6b962e76cd99c5c9b0 Mon Sep 17 00:00:00 2001 From: Morteza Hosseini Date: Mon, 10 Nov 2025 14:57:59 +0000 Subject: [PATCH 3/3] refactor: remove bytes/bytearray normalization from documentation --- src/toon_format/normalize.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/toon_format/normalize.py b/src/toon_format/normalize.py index c26bed6..dda9bfd 100644 --- a/src/toon_format/normalize.py +++ b/src/toon_format/normalize.py @@ -6,7 +6,6 @@ - datetime/date → ISO 8601 strings - Decimal → float - tuple/set/frozenset → sorted lists -- bytes/bytearray → base64 encoded strings - pathlib.Path → string representation - Infinity/NaN → null - Functions/callables → null @@ -18,7 +17,7 @@ from collections.abc import Mapping from datetime import date, datetime from decimal import Decimal -from pathlib import Path, PurePath +from pathlib import PurePath from typing import Any # TypeGuard was added in Python 3.10, use typing_extensions for older versions @@ -76,7 +75,6 @@ def normalize_value(value: Any) -> JsonValue: - Recursive: normalizes nested structures - Sets are sorted for deterministic output - Heterogeneous sets sorted by repr() if natural sorting fails - - bytes/bytearray are base64 encoded - Path objects are converted to their string representation """ if value is None: