Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion src/toon_format/_literal_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,14 +51,22 @@ def is_numeric_literal(token: str) -> bool:
True
>>> is_numeric_literal("0123") # Leading zero - not valid
False
>>> is_numeric_literal("-01") # Negative with leading zero - not valid
False
>>> is_numeric_literal("hello")
False
"""
if not token:
return False

# Handle negative numbers
start_idx = 1 if token.startswith("-") else 0
if start_idx >= len(token):
return False

# Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
if len(token) > 1 and token[0] == "0" and token[1] != ".":
# Check the first digit after optional minus sign
if len(token) > start_idx + 1 and token[start_idx] == "0" and token[start_idx + 1] != ".":
return False

# Check if it's a valid number
Expand Down
190 changes: 190 additions & 0 deletions tests/test_numeric_detection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
"""Tests for numeric detection utilities.

Tests the consistency and correctness of numeric literal detection
across encoding and decoding pipelines.
"""

from toon_format._literal_utils import is_numeric_literal
from toon_format._validation import is_numeric_like


class TestNumericLiteral:
"""Tests for is_numeric_literal (decoder utility)."""

def test_valid_integers(self):
"""Test valid integer literals are recognized."""
assert is_numeric_literal("0")
assert is_numeric_literal("1")
assert is_numeric_literal("42")
assert is_numeric_literal("999")
assert is_numeric_literal("-1")
assert is_numeric_literal("-42")

def test_valid_floats(self):
"""Test valid float literals are recognized."""
assert is_numeric_literal("0.0")
assert is_numeric_literal("0.5")
assert is_numeric_literal("3.14")
assert is_numeric_literal("-2.5")
assert is_numeric_literal("1.23456")

def test_scientific_notation(self):
"""Test scientific notation is recognized."""
assert is_numeric_literal("1e10")
assert is_numeric_literal("1.5e10")
assert is_numeric_literal("1e-10")
assert is_numeric_literal("1.5e-10")
assert is_numeric_literal("-1e10")
assert is_numeric_literal("2.5E+3")

def test_leading_zeros_rejected(self):
"""Test numbers with leading zeros are rejected (except special cases)."""
assert not is_numeric_literal("01")
assert not is_numeric_literal("0123")
assert not is_numeric_literal("00")
assert not is_numeric_literal("-01")
# But these are valid:
assert is_numeric_literal("0") # Just zero
assert is_numeric_literal("0.5") # Decimal starting with zero
assert is_numeric_literal("0.0")

def test_non_numeric_strings(self):
"""Test non-numeric strings are rejected."""
assert not is_numeric_literal("")
assert not is_numeric_literal("abc")
assert not is_numeric_literal("12abc")
assert not is_numeric_literal("12.34.56")
assert not is_numeric_literal("--5")
assert not is_numeric_literal("1.2.3")

def test_special_float_values_rejected(self):
"""Test NaN and infinity are rejected."""
assert not is_numeric_literal("nan")
assert not is_numeric_literal("NaN")
assert not is_numeric_literal("inf")
assert not is_numeric_literal("Infinity")
assert not is_numeric_literal("-inf")

def test_empty_string(self):
"""Test empty string is rejected."""
assert not is_numeric_literal("")

def test_whitespace_only(self):
"""Test whitespace-only strings are rejected."""
assert not is_numeric_literal(" ")
assert not is_numeric_literal(" ")


class TestNumericLike:
"""Tests for is_numeric_like (encoder utility)."""

def test_valid_integers(self):
"""Test valid integers are recognized as numeric-like."""
assert is_numeric_like("0")
assert is_numeric_like("1")
assert is_numeric_like("42")
assert is_numeric_like("-1")
assert is_numeric_like("-42")

def test_valid_floats(self):
"""Test valid floats are recognized as numeric-like."""
assert is_numeric_like("0.0")
assert is_numeric_like("0.5")
assert is_numeric_like("3.14")
assert is_numeric_like("-2.5")

def test_scientific_notation(self):
"""Test scientific notation is recognized as numeric-like."""
assert is_numeric_like("1e10")
assert is_numeric_like("1.5e10")
assert is_numeric_like("1e-10")
assert is_numeric_like("2.5E+3")

def test_octal_like_numbers(self):
"""Test octal-like numbers (leading zeros) are recognized as numeric-like."""
# These LOOK like numbers so they need quoting
assert is_numeric_like("01")
assert is_numeric_like("0123")
assert is_numeric_like("00")

def test_non_numeric_strings(self):
"""Test non-numeric strings are not numeric-like."""
assert not is_numeric_like("")
assert not is_numeric_like("abc")
assert not is_numeric_like("hello")
assert not is_numeric_like("12abc")

def test_edge_cases(self):
"""Test edge cases."""
assert not is_numeric_like("")
assert not is_numeric_like(" ")
assert not is_numeric_like("--5")


class TestConsistency:
"""Tests to ensure consistency between is_numeric_literal and is_numeric_like."""

def test_valid_numbers_recognized_by_both(self):
"""Test that valid numbers are recognized by both functions."""
valid_numbers = ["0", "1", "42", "-1", "3.14", "-2.5", "1e10", "1.5e-3"]
for num in valid_numbers:
assert is_numeric_literal(num), f"{num} should be numeric literal"
assert is_numeric_like(num), f"{num} should be numeric-like"

def test_octal_like_difference(self):
"""Test the key difference: octal-like numbers.

is_numeric_like returns True (needs quoting in encoder)
is_numeric_literal returns False (not parsed as number in decoder)
"""
octal_like = ["01", "0123", "00", "007"]
for num in octal_like:
assert is_numeric_like(num), f"{num} should be numeric-like (needs quoting)"
assert not is_numeric_literal(num), (
f"{num} should not be numeric literal (has leading zero)"
)

def test_non_numbers_rejected_by_both(self):
"""Test that non-numbers are rejected by both functions."""
non_numbers = ["", "abc", "hello", "12abc", "nan", "inf"]
for val in non_numbers:
# Allow for potential differences in edge cases, but most should agree
if val: # Skip empty string edge case
assert not is_numeric_literal(val), f"{val} should not be numeric literal"
# is_numeric_like might have slightly different behavior for edge cases


class TestRoundTripConsistency:
"""Test that encoding and decoding are consistent."""

def test_octal_like_numbers_preserved_as_strings(self):
"""Test that octal-like numbers are preserved as strings through round-trip."""
from toon_format import decode, encode

# These should be treated as strings, not numbers
octal_values = ["0123", "007", "00"]
for val in octal_values:
# When we encode a dict with these as values
data = {"value": val}
encoded = encode(data)
decoded = decode(encoded)
# Assert it's a dict before trying to access
assert isinstance(decoded, dict)
# They should come back as strings
assert decoded["value"] == val
assert isinstance(decoded["value"], str)

def test_valid_numbers_preserved_as_numbers(self):
"""Test that valid numbers are preserved as numbers through round-trip."""
from toon_format import decode, encode

numbers = [0, 1, 42, -1, 3.14, -2.5]
for num in numbers:
data = {"value": num}
encoded = encode(data)
decoded = decode(encoded)
# Assert it's a dict before trying to access
assert isinstance(decoded, dict)
# They should come back as numbers (with potential float/int conversion)
assert decoded["value"] == num
assert isinstance(decoded["value"], (int, float))
Loading