airbytehq · Sbnikitha · Nov 13, 2025 · Nov 13, 2025 · coderabbitai · Nov 13, 2025
diff --git a/.DS_Store b/.DS_Store
diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md
@@ -0,0 +1,97 @@
+# Airbyte Python CDK AI Development Guide
+
+This guide provides essential context for AI agents working with the Airbyte Python CDK codebase.
+
+## Project Overview
+
+The Airbyte Python CDK is a framework for building Source Connectors for the Airbyte data integration platform. It provides components for:
+
+- HTTP API connectors (REST, GraphQL)
+- Declarative connectors using manifest files
+- File-based source connectors
+- Vector database destinations
+- Concurrent data fetching
+
+## Key Architectural Concepts
+
+### Core Components
+
+- **Source Classes**: Implement the `Source` interface in `airbyte_cdk.sources.source`. Base implementations include:
+  - `AbstractSource` - Base class for Python sources
+  - `DeclarativeSource` - For low-code connectors defined via manifest files
+  - `ConcurrentSource` - For high-throughput parallel data fetching
+
+- **Streams**: Core abstraction for data sources (`airbyte_cdk.sources.streams.Stream`). Key types:
+  - `HttpStream` - Base class for HTTP API streams 
+  - `DefaultStream` - Used with declarative sources
+  - Concurrent streams in `airbyte_cdk.sources.streams.concurrent`
+
+### Data Flow
+1. Sources expose one or more Stream implementations
+2. Streams define schema, state management, and record extraction
+3. Records flow through the Airbyte protocol via standardized message types
+
+## Development Conventions
+
+### Testing Patterns
+
+- Unit tests use pytest with scenarios pattern (`unit_tests/sources/**/test_*.py`)
+- Mock HTTP responses with `HttpMocker` and response builders
+- Standard test suite base classes in `airbyte_cdk.test.standard_tests`
+- Use `@pytest.mark.parametrize` for test variations
+
+### Source Implementation
+
+- Prefer declarative manifests using `SourceDeclarativeManifest` for simple API connectors
+- Extend base classes for custom logic:
+  ```python
+  from airbyte_cdk.sources import AbstractSource
+  from airbyte_cdk.sources.streams import Stream
+
+  class MySource(AbstractSource):
+      def check_connection(...):
+          # Verify credentials/connectivity
+
+      def streams(self, config):
+          return [MyStream(config)]
+  ```
+
+### State Management 
+
+- Use `ConnectorStateManager` for handling incremental sync state
+- Implement cursor fields in streams for incremental syncs
+- State is persisted as JSON-serializable objects
+
+## Common Workflows
+
+### Building a New Connector
+
+1. Start with [Connector Builder UI](https://docs.airbyte.com/connector-development/connector-builder-ui/overview)
+2. For complex cases, use low-code CDK with manifest files
+3. Custom Python implementation only when necessary
+
+### Testing
+
+```bash
+pytest unit_tests/  # Run all tests
+pytest unit_tests/sources/my_connector/  # Test specific connector
+```
+
+### Dependencies
+
+- Manage with Poetry (`pyproject.toml`)
+- Core requirements locked in `poetry.lock`
+- Optional features via extras in `pyproject.toml`
+
+## Integration Points
+
+- Airbyte Protocol: Messages must conform to protocol models in `airbyte_cdk.models`
+- External APIs: Use `HttpStream` with proper rate limiting
+- Vector DBs: Implement destination logic using `destinations.vector_db_based`
+
+## Key Files
+
+- `airbyte_cdk/sources/abstract_source.py`: Base source implementation
+- `airbyte_cdk/sources/streams/http/http.py`: HTTP stream base class
+- `airbyte_cdk/sources/declarative/`: Low-code CDK components
+- `unit_tests/sources/`: Test examples and patterns
diff --git a/airbyte_cdk/test/utils/transforms/test_cleaning.py b/airbyte_cdk/test/utils/transforms/test_cleaning.py
@@ -0,0 +1,142 @@
+"""Unit tests for cleaning transforms."""
+import pytest
+from airbyte_cdk.utils.transforms.cleaning import (
+    to_lower,
+    strip_whitespace,
+    squash_whitespace,
+    normalize_unicode,
+    remove_punctuation,
+    map_values,
+    cast_numeric,
+)
+
+def test_to_lower():
+    """Test string lowercasing function."""
+    # Test normal cases
+    assert to_lower("Hello") == "hello"
+    assert to_lower("HELLO") == "hello"
+    assert to_lower("HeLLo") == "hello"
+
+    # Test with spaces and special characters
+    assert to_lower("Hello World!") == "hello world!"
+    assert to_lower("Hello123") == "hello123"
+
+    # Test empty and None
+    assert to_lower("") == ""
+    assert to_lower(None) is None
+
+def test_strip_whitespace():
+    """Test whitespace stripping function."""
+    # Test normal cases
+    assert strip_whitespace("  hello  ") == "hello"
+    assert strip_whitespace("hello") == "hello"
+
+    # Test with tabs and newlines
+    assert strip_whitespace("\thello\n") == "hello"
+    assert strip_whitespace("  hello\n  world  ") == "hello\n  world"
+
+    # Test empty and None
+    assert strip_whitespace("   ") == ""
+    assert strip_whitespace("") == ""
+    assert strip_whitespace(None) is None
+
+def test_squash_whitespace():
+    """Test whitespace squashing function."""
+    # Test normal cases
+    assert squash_whitespace("hello   world") == "hello world"
+    assert squash_whitespace("  hello  world  ") == "hello world"
+
+    # Test with tabs and newlines
+    assert squash_whitespace("hello\n\nworld") == "hello world"
+    assert squash_whitespace("hello\t\tworld") == "hello world"
+    assert squash_whitespace("\n hello \t world \n") == "hello world"
+
+    # Test empty and None
+    assert squash_whitespace("   ") == ""
+    assert squash_whitespace("") == ""
+    assert squash_whitespace(None) is None
+
+def test_normalize_unicode():
+    """Test unicode normalization function."""
+    # Test normal cases
+    assert normalize_unicode("hello") == "hello"
+
+    # Test composed characters
+    assert normalize_unicode("café") == "café"  # Composed 'é'
+
+    # Test decomposed characters
+    decomposed = "cafe\u0301"  # 'e' with combining acute accent
+    assert normalize_unicode(decomposed) == "café"  # Should normalize to composed form
+
+    # Test different normalization forms
+    assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
+
+    # Test empty and None
+    assert normalize_unicode("") == ""
+    assert normalize_unicode(None) is None
+
+def test_remove_punctuation():
+    """Test punctuation removal function."""
+    # Test normal cases
+    assert remove_punctuation("hello, world!") == "hello world"
+    assert remove_punctuation("hello.world") == "helloworld"
+
+    # Test with multiple punctuation marks
+    assert remove_punctuation("hello!!! world???") == "hello world"
+    assert remove_punctuation("hello@#$%world") == "helloworld"
+
+    # Test with unicode punctuation
+    assert remove_punctuation("hello—world") == "helloworld"
+    assert remove_punctuation("«hello»") == "hello"
+
+    # Test empty and None
+    assert remove_punctuation("") == ""
+    assert remove_punctuation(None) is None
+
+def test_map_values():
+    """Test value mapping function."""
+    mapping = {"a": 1, "b": 2, "c": 3}
+
+    # Test normal cases
+    assert map_values("a", mapping) == 1
+    assert map_values("b", mapping) == 2
+
+    # Test with default value
+    assert map_values("x", mapping) is None
+    assert map_values("x", mapping, default=0) == 0
+
+    # Test with different value types
+    mixed_mapping = {1: "one", "two": 2, None: "null"}
+    assert map_values(1, mixed_mapping) == "one"
+    assert map_values(None, mixed_mapping) == "null"
+
+def test_cast_numeric():
+    """Test numeric casting function."""
+    # Test successful casts
+    assert cast_numeric("123") == 123
+    assert cast_numeric("123.45") == 123.45
+    assert cast_numeric(123) == 123
+    assert cast_numeric(123.45) == 123.45
+
+    # Test integers vs floats
+    assert isinstance(cast_numeric("123"), int)
+    assert isinstance(cast_numeric("123.45"), float)
+
+    # Test empty values
+    assert cast_numeric(None) is None
+    assert cast_numeric("", on_error="none") is None  # Need to specify on_error="none" to get None for empty string
+    assert cast_numeric("   ", on_error="none") is None  # Need to specify on_error="none" to get None for whitespace
+
+    # Test empty values with default behavior (on_error="ignore")
+    assert cast_numeric("") == ""
+    assert cast_numeric("   ") == "   "
+
+    # Test error handling modes
+    non_numeric = "abc"
+    assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
+    assert cast_numeric(non_numeric, on_error="none") is None
+    assert cast_numeric(non_numeric, on_error="default", default=0) == 0
+
+    # Test error raising
+    with pytest.raises(Exception):
+        cast_numeric(non_numeric, on_error="raise")
-def test_to_lower():
-    """Test string lowercasing function."""
-    # Test normal cases
-    assert to_lower("Hello") == "hello"
-    assert to_lower("HELLO") == "hello"
-    assert to_lower("HeLLo") == "hello"
-    
-    # Test with spaces and special characters
-    assert to_lower("Hello World!") == "hello world!"
-    assert to_lower("Hello123") == "hello123"
-    
-    # Test empty and None
-    assert to_lower("") == ""
-    assert to_lower(None) is None
-
-def test_strip_whitespace():
-    """Test whitespace stripping function."""
-    # Test normal cases
-    assert strip_whitespace("  hello  ") == "hello"
-    assert strip_whitespace("hello") == "hello"
-    
-    # Test with tabs and newlines
-    assert strip_whitespace("\thello\n") == "hello"
-    assert strip_whitespace("  hello\n  world  ") == "hello\n  world"
-    
-    # Test empty and None
-    assert strip_whitespace("   ") == ""
-    assert strip_whitespace("") == ""
-    assert strip_whitespace(None) is None
-
-def test_squash_whitespace():
-    """Test whitespace squashing function."""
-    # Test normal cases
-    assert squash_whitespace("hello   world") == "hello world"
-    assert squash_whitespace("  hello  world  ") == "hello world"
-    
-    # Test with tabs and newlines
-    assert squash_whitespace("hello\n\nworld") == "hello world"
-    assert squash_whitespace("hello\t\tworld") == "hello world"
-    assert squash_whitespace("\n hello \t world \n") == "hello world"
-    
-    # Test empty and None
-    assert squash_whitespace("   ") == ""
-    assert squash_whitespace("") == ""
-    assert squash_whitespace(None) is None
-
-def test_normalize_unicode():
-    """Test unicode normalization function."""
-    # Test normal cases
-    assert normalize_unicode("hello") == "hello"
-    
-    # Test composed characters
-    assert normalize_unicode("café") == "café"  # Composed 'é'
-    
-    # Test decomposed characters
-    decomposed = "cafe\u0301"  # 'e' with combining acute accent
-    assert normalize_unicode(decomposed) == "café"  # Should normalize to composed form
-    
-    # Test different normalization forms
-    assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
-    
-    # Test empty and None
-    assert normalize_unicode("") == ""
-    assert normalize_unicode(None) is None
-
-def test_remove_punctuation():
-    """Test punctuation removal function."""
-    # Test normal cases
-    assert remove_punctuation("hello, world!") == "hello world"
-    assert remove_punctuation("hello.world") == "helloworld"
-    
-    # Test with multiple punctuation marks
-    assert remove_punctuation("hello!!! world???") == "hello world"
-    assert remove_punctuation("hello@#$%world") == "helloworld"
-    
-    # Test with unicode punctuation
-    assert remove_punctuation("hello—world") == "helloworld"
-    assert remove_punctuation("«hello»") == "hello"
-    
-    # Test empty and None
-    assert remove_punctuation("") == ""
-    assert remove_punctuation(None) is None
-
-def test_map_values():
-    """Test value mapping function."""
-    mapping = {"a": 1, "b": 2, "c": 3}
-    
-    # Test normal cases
-    assert map_values("a", mapping) == 1
-    assert map_values("b", mapping) == 2
-    
-    # Test with default value
-    assert map_values("x", mapping) is None
-    assert map_values("x", mapping, default=0) == 0
-    
-    # Test with different value types
-    mixed_mapping = {1: "one", "two": 2, None: "null"}
-    assert map_values(1, mixed_mapping) == "one"
-    assert map_values(None, mixed_mapping) == "null"
-
-def test_cast_numeric():
-    """Test numeric casting function."""
-    # Test successful casts
-    assert cast_numeric("123") == 123
-    assert cast_numeric("123.45") == 123.45
-    assert cast_numeric(123) == 123
-    assert cast_numeric(123.45) == 123.45
-    
-    # Test integers vs floats
-    assert isinstance(cast_numeric("123"), int)
-    assert isinstance(cast_numeric("123.45"), float)
-    
-    # Test empty values
-    assert cast_numeric(None) is None
-    assert cast_numeric("", on_error="none") is None  # Need to specify on_error="none" to get None for empty string
-    assert cast_numeric("   ", on_error="none") is None  # Need to specify on_error="none" to get None for whitespace
-    
-    # Test empty values with default behavior (on_error="ignore")
-    assert cast_numeric("") == ""
-    assert cast_numeric("   ") == "   "
-    
-    # Test error handling modes
-    non_numeric = "abc"
-    assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
-    assert cast_numeric(non_numeric, on_error="none") is None
-    assert cast_numeric(non_numeric, on_error="default", default=0) == 0
-    
-    # Test error raising
-    with pytest.raises(Exception):
-        cast_numeric(non_numeric, on_error="raise")
+def test_to_lower() -> None:
+    """Test string lowercasing function."""
+    # Test normal cases
+    assert to_lower("Hello") == "hello"
+    assert to_lower("HELLO") == "hello"
+    assert to_lower("HeLLo") == "hello"
+    
+    # Test with spaces and special characters
+    assert to_lower("Hello World!") == "hello world!"
+    assert to_lower("Hello123") == "hello123"
+    
+    # Test empty and None
+    assert to_lower("") == ""
+    assert to_lower(None) is None
+
+def test_strip_whitespace() -> None:
+    """Test whitespace stripping function."""
+    # Test normal cases
+    assert strip_whitespace("  hello  ") == "hello"
+    assert strip_whitespace("hello") == "hello"
+    
+    # Test with tabs and newlines
+    assert strip_whitespace("\thello\n") == "hello"
+    assert strip_whitespace("  hello\n  world  ") == "hello\n  world"
+    
+    # Test empty and None
+    assert strip_whitespace("   ") == ""
+    assert strip_whitespace("") == ""
+    assert strip_whitespace(None) is None
+
+def test_squash_whitespace() -> None:
+    """Test whitespace squashing function."""
+    # Test normal cases
+    assert squash_whitespace("hello   world") == "hello world"
+    assert squash_whitespace("  hello  world  ") == "hello world"
+    
+    # Test with tabs and newlines
+    assert squash_whitespace("hello\n\nworld") == "hello world"
+    assert squash_whitespace("hello\t\tworld") == "hello world"
+    assert squash_whitespace("\n hello \t world \n") == "hello world"
+    
+    # Test empty and None
+    assert squash_whitespace("   ") == ""
+    assert squash_whitespace("") == ""
+    assert squash_whitespace(None) is None
+
+def test_normalize_unicode() -> None:
+    """Test unicode normalization function."""
+    # Test normal cases
+    assert normalize_unicode("hello") == "hello"
+    
+    # Test composed characters
+    assert normalize_unicode("café") == "café"  # Composed 'é'
+    
+    # Test decomposed characters
+    decomposed = "cafe\u0301"  # 'e' with combining acute accent
+    assert normalize_unicode(decomposed) == "café"  # Should normalize to composed form
+    
+    # Test different normalization forms
+    assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
+    
+    # Test empty and None
+    assert normalize_unicode("") == ""
+    assert normalize_unicode(None) is None
+
+def test_remove_punctuation() -> None:
+    """Test punctuation removal function."""
+    # Test normal cases
+    assert remove_punctuation("hello, world!") == "hello world"
+    assert remove_punctuation("hello.world") == "helloworld"
+    
+    # Test with multiple punctuation marks
+    assert remove_punctuation("hello!!! world???") == "hello world"
+    assert remove_punctuation("hello@#$%world") == "helloworld"
+    
+    # Test with unicode punctuation
+    assert remove_punctuation("hello—world") == "helloworld"
+    assert remove_punctuation("«hello»") == "hello"
+    
+    # Test empty and None
+    assert remove_punctuation("") == ""
+    assert remove_punctuation(None) is None
+
+def test_map_values() -> None:
+    """Test value mapping function."""
+    mapping = {"a": 1, "b": 2, "c": 3}
+    
+    # Test normal cases
+    assert map_values("a", mapping) == 1
+    assert map_values("b", mapping) == 2
+    
+    # Test with default value
+    assert map_values("x", mapping) is None
+    assert map_values("x", mapping, default=0) == 0
+    
+    # Test with different value types
+    mixed_mapping = {1: "one", "two": 2, None: "null"}
+    assert map_values(1, mixed_mapping) == "one"
+    assert map_values(None, mixed_mapping) == "null"
+
+def test_cast_numeric() -> None:
+    """Test numeric casting function."""
+    # Test successful casts
+    assert cast_numeric("123") == 123
+    assert cast_numeric("123.45") == 123.45
+    assert cast_numeric(123) == 123
+    assert cast_numeric(123.45) == 123.45
+    
+    # Test integers vs floats
+    assert isinstance(cast_numeric("123"), int)
+    assert isinstance(cast_numeric("123.45"), float)
+    
+    # Test empty values
+    assert cast_numeric(None) is None
+    assert cast_numeric("", on_error="none") is None  # Need to specify on_error="none" to get None for empty string
+    assert cast_numeric("   ", on_error="none") is None  # Need to specify on_error="none" to get None for whitespace
+    
+    # Test empty values with default behavior (on_error="ignore")
+    assert cast_numeric("") == ""
+    assert cast_numeric("   ") == "   "
+    
+    # Test error handling modes
+    non_numeric = "abc"
+    assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
+    assert cast_numeric(non_numeric, on_error="none") is None
+    assert cast_numeric(non_numeric, on_error="default", default=0) == 0
+    
+    # Test error raising
+    with pytest.raises(Exception):
+        cast_numeric(non_numeric, on_error="raise")
-def test_to_lower():
-    """Test string lowercasing function."""
-    # Test normal cases
-    assert to_lower("Hello") == "hello"
-    assert to_lower("HELLO") == "hello"
-    assert to_lower("HeLLo") == "hello"
-    
-    # Test with spaces and special characters
-    assert to_lower("Hello World!") == "hello world!"
-    assert to_lower("Hello123") == "hello123"
-    
-    # Test empty and None
-    assert to_lower("") == ""
-    assert to_lower(None) is None
-
-def test_strip_whitespace():
-    """Test whitespace stripping function."""
-    # Test normal cases
-    assert strip_whitespace("  hello  ") == "hello"
-    assert strip_whitespace("hello") == "hello"
-    
-    # Test with tabs and newlines
-    assert strip_whitespace("\thello\n") == "hello"
-    assert strip_whitespace("  hello\n  world  ") == "hello\n  world"
-    
-    # Test empty and None
-    assert strip_whitespace("   ") == ""
-    assert strip_whitespace("") == ""
-    assert strip_whitespace(None) is None
-
-def test_squash_whitespace():
-    """Test whitespace squashing function."""
-    # Test normal cases
-    assert squash_whitespace("hello   world") == "hello world"
-    assert squash_whitespace("  hello  world  ") == "hello world"
-    
-    # Test with tabs and newlines
-    assert squash_whitespace("hello\n\nworld") == "hello world"
-    assert squash_whitespace("hello\t\tworld") == "hello world"
-    assert squash_whitespace("\n hello \t world \n") == "hello world"
-    
-    # Test empty and None
-    assert squash_whitespace("   ") == ""
-    assert squash_whitespace("") == ""
-    assert squash_whitespace(None) is None
-
-def test_normalize_unicode():
-    """Test unicode normalization function."""
-    # Test normal cases
-    assert normalize_unicode("hello") == "hello"
-    
-    # Test composed characters
-    assert normalize_unicode("café") == "café"  # Composed 'é'
-    
-    # Test decomposed characters
-    decomposed = "cafe\u0301"  # 'e' with combining acute accent
-    assert normalize_unicode(decomposed) == "café"  # Should normalize to composed form
-    
-    # Test different normalization forms
-    assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
-    
-    # Test empty and None
-    assert normalize_unicode("") == ""
-    assert normalize_unicode(None) is None
-
-def test_remove_punctuation():
-    """Test punctuation removal function."""
-    # Test normal cases
-    assert remove_punctuation("hello, world!") == "hello world"
-    assert remove_punctuation("hello.world") == "helloworld"
-    
-    # Test with multiple punctuation marks
-    assert remove_punctuation("hello!!! world???") == "hello world"
-    assert remove_punctuation("hello@#$%world") == "helloworld"
-    
-    # Test with unicode punctuation
-    assert remove_punctuation("hello—world") == "helloworld"
-    assert remove_punctuation("«hello»") == "hello"
-    
-    # Test empty and None
-    assert remove_punctuation("") == ""
-    assert remove_punctuation(None) is None
-
-def test_map_values():
-    """Test value mapping function."""
-    mapping = {"a": 1, "b": 2, "c": 3}
-    
-    # Test normal cases
-    assert map_values("a", mapping) == 1
-    assert map_values("b", mapping) == 2
-    
-    # Test with default value
-    assert map_values("x", mapping) is None
-    assert map_values("x", mapping, default=0) == 0
-    
-    # Test with different value types
-    mixed_mapping = {1: "one", "two": 2, None: "null"}
-    assert map_values(1, mixed_mapping) == "one"
-    assert map_values(None, mixed_mapping) == "null"
-
-def test_cast_numeric():
-    """Test numeric casting function."""
-    # Test successful casts
-    assert cast_numeric("123") == 123
-    assert cast_numeric("123.45") == 123.45
-    assert cast_numeric(123) == 123
-    assert cast_numeric(123.45) == 123.45
-    
-    # Test integers vs floats
-    assert isinstance(cast_numeric("123"), int)
-    assert isinstance(cast_numeric("123.45"), float)
-    
-    # Test empty values
-    assert cast_numeric(None) is None
-    assert cast_numeric("", on_error="none") is None  # Need to specify on_error="none" to get None for empty string
-    assert cast_numeric("   ", on_error="none") is None  # Need to specify on_error="none" to get None for whitespace
-    
-    # Test empty values with default behavior (on_error="ignore")
-    assert cast_numeric("") == ""
-    assert cast_numeric("   ") == "   "
-    
-    # Test error handling modes
-    non_numeric = "abc"
-    assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
-    assert cast_numeric(non_numeric, on_error="none") is None
-    assert cast_numeric(non_numeric, on_error="default", default=0) == 0
-    
-    # Test error raising
-    with pytest.raises(Exception):
-        cast_numeric(non_numeric, on_error="raise")
+def test_to_lower() -> None:
+    """Test string lowercasing function."""
+    # Test normal cases
+    assert to_lower("Hello") == "hello"
+    assert to_lower("HELLO") == "hello"
+    assert to_lower("HeLLo") == "hello"
+    
+    # Test with spaces and special characters
+    assert to_lower("Hello World!") == "hello world!"
+    assert to_lower("Hello123") == "hello123"
+    
+    # Test empty and None
+    assert to_lower("") == ""
+    assert to_lower(None) is None
+
+def test_strip_whitespace() -> None:
+    """Test whitespace stripping function."""
+    # Test normal cases
+    assert strip_whitespace("  hello  ") == "hello"
+    assert strip_whitespace("hello") == "hello"
+    
+    # Test with tabs and newlines
+    assert strip_whitespace("\thello\n") == "hello"
+    assert strip_whitespace("  hello\n  world  ") == "hello\n  world"
+    
+    # Test empty and None
+    assert strip_whitespace("   ") == ""
+    assert strip_whitespace("") == ""
+    assert strip_whitespace(None) is None
+
+def test_squash_whitespace() -> None:
+    """Test whitespace squashing function."""
+    # Test normal cases
+    assert squash_whitespace("hello   world") == "hello world"
+    assert squash_whitespace("  hello  world  ") == "hello world"
+    
+    # Test with tabs and newlines
+    assert squash_whitespace("hello\n\nworld") == "hello world"
+    assert squash_whitespace("hello\t\tworld") == "hello world"
+    assert squash_whitespace("\n hello \t world \n") == "hello world"
+    
+    # Test empty and None
+    assert squash_whitespace("   ") == ""
+    assert squash_whitespace("") == ""
+    assert squash_whitespace(None) is None
+
+def test_normalize_unicode() -> None:
+    """Test unicode normalization function."""
+    # Test normal cases
+    assert normalize_unicode("hello") == "hello"
+    
+    # Test composed characters
+    assert normalize_unicode("café") == "café"  # Composed 'é'
+    
+    # Test decomposed characters
+    decomposed = "cafe\u0301"  # 'e' with combining acute accent
+    assert normalize_unicode(decomposed) == "café"  # Should normalize to composed form
+    
+    # Test different normalization forms
+    assert normalize_unicode("café", form="NFD") != normalize_unicode("café", form="NFC")
+    
+    # Test empty and None
+    assert normalize_unicode("") == ""
+    assert normalize_unicode(None) is None
+
+def test_remove_punctuation() -> None:
+    """Test punctuation removal function."""
+    # Test normal cases
+    assert remove_punctuation("hello, world!") == "hello world"
+    assert remove_punctuation("hello.world") == "helloworld"
+    
+    # Test with multiple punctuation marks
+    assert remove_punctuation("hello!!! world???") == "hello world"
+    assert remove_punctuation("hello@#$%world") == "helloworld"
+    
+    # Test with unicode punctuation
+    assert remove_punctuation("hello—world") == "helloworld"
+    assert remove_punctuation("«hello»") == "hello"
+    
+    # Test empty and None
+    assert remove_punctuation("") == ""
+    assert remove_punctuation(None) is None
+
+def test_map_values() -> None:
+    """Test value mapping function."""
+    mapping = {"a": 1, "b": 2, "c": 3}
+    
+    # Test normal cases
+    assert map_values("a", mapping) == 1
+    assert map_values("b", mapping) == 2
+    
+    # Test with default value
+    assert map_values("x", mapping) is None
+    assert map_values("x", mapping, default=0) == 0
+    
+    # Test with different value types
+    mixed_mapping = {1: "one", "two": 2, None: "null"}
+    assert map_values(1, mixed_mapping) == "one"
+    assert map_values(None, mixed_mapping) == "null"
+
+def test_cast_numeric() -> None:
+    """Test numeric casting function."""
+    # Test successful casts
+    assert cast_numeric("123") == 123
+    assert cast_numeric("123.45") == 123.45
+    assert cast_numeric(123) == 123
+    assert cast_numeric(123.45) == 123.45
+    
+    # Test integers vs floats
+    assert isinstance(cast_numeric("123"), int)
+    assert isinstance(cast_numeric("123.45"), float)
+    
+    # Test empty values
+    assert cast_numeric(None) is None
+    assert cast_numeric("", on_error="none") is None  # Need to specify on_error="none" to get None for empty string
+    assert cast_numeric("   ", on_error="none") is None  # Need to specify on_error="none" to get None for whitespace
+    
+    # Test empty values with default behavior (on_error="ignore")
+    assert cast_numeric("") == ""
+    assert cast_numeric("   ") == "   "
+    
+    # Test error handling modes
+    non_numeric = "abc"
+    assert cast_numeric(non_numeric, on_error="ignore") == non_numeric
+    assert cast_numeric(non_numeric, on_error="none") is None
+    assert cast_numeric(non_numeric, on_error="default", default=0) == 0
+    
+    # Test error raising
+    with pytest.raises(Exception):
+        cast_numeric(non_numeric, on_error="raise")
diff --git a/airbyte_cdk/test/utils/transforms/test_date.py b/airbyte_cdk/test/utils/transforms/test_date.py
@@ -0,0 +1,72 @@
+"""Unit tests for date transforms."""
+from datetime import datetime
+
+from airbyte_cdk.utils.transforms.date import (
+    try_parse_date,
+    extract_date_parts,
+    floor_to_month,
+    ceil_to_month,
+)
+
+def test_try_parse_date():
+    """Test date parsing function."""
+    # Test with datetime object
+    dt = datetime(2023, 1, 15)
+    assert try_parse_date(dt) == dt
+
+    # Test with non-date object
+    assert try_parse_date("2023-01-15") is None
+    assert try_parse_date(123) is None
+    assert try_parse_date(None) is None
+
+def test_extract_date_parts():
+    """Test date parts extraction function."""
+    # Test with valid datetime
+    dt = datetime(2023, 1, 15)  # Sunday
+    parts = extract_date_parts(dt)
+    assert parts["year"] == 2023
+    assert parts["month"] == 1
+    assert parts["day"] == 15
+    assert parts["dow"] == 6  # Sunday is 6
+
+    # Test with invalid input
+    parts = extract_date_parts(None)
+    assert all(v is None for v in parts.values())
+
+    parts = extract_date_parts("not a date")
+    assert all(v is None for v in parts.values())
+
+def test_floor_to_month():
+    """Test floor to month function."""
+    # Test normal cases
+    dt = datetime(2023, 1, 15)
+    assert floor_to_month(dt) == datetime(2023, 1, 1)
+
+    dt = datetime(2023, 12, 31)
+    assert floor_to_month(dt) == datetime(2023, 12, 1)
+
+    # Test first day of month
+    dt = datetime(2023, 1, 1)
+    assert floor_to_month(dt) == dt
+
+    # Test with invalid input
+    assert floor_to_month(None) is None
+    assert floor_to_month("not a date") is None
+
+def test_ceil_to_month():
+    """Test ceil to month function."""
+    # Test normal cases
+    dt = datetime(2023, 1, 15)
+    assert ceil_to_month(dt) == datetime(2023, 2, 1)
+
+    # Test end of year
+    dt = datetime(2023, 12, 15)
+    assert ceil_to_month(dt) == datetime(2024, 1, 1)
+
+    # Test first day of month
+    dt = datetime(2023, 1, 1)
+    assert ceil_to_month(dt) == datetime(2023, 2, 1)
+
+    # Test with invalid input
+    assert ceil_to_month(None) is None
+    assert ceil_to_month("not a date") is None
-def test_try_parse_date():
-    """Test date parsing function."""
-    # Test with datetime object
-    dt = datetime(2023, 1, 15)
-    assert try_parse_date(dt) == dt
-    
-    # Test with non-date object
-    assert try_parse_date("2023-01-15") is None
-    assert try_parse_date(123) is None
-    assert try_parse_date(None) is None
-
-def test_extract_date_parts():
-    """Test date parts extraction function."""
-    # Test with valid datetime
-    dt = datetime(2023, 1, 15)  # Sunday
-    parts = extract_date_parts(dt)
-    assert parts["year"] == 2023
-    assert parts["month"] == 1
-    assert parts["day"] == 15
-    assert parts["dow"] == 6  # Sunday is 6
-    
-    # Test with invalid input
-    parts = extract_date_parts(None)
-    assert all(v is None for v in parts.values())
-    
-    parts = extract_date_parts("not a date")
-    assert all(v is None for v in parts.values())
-
-def test_floor_to_month():
-    """Test floor to month function."""
-    # Test normal cases
-    dt = datetime(2023, 1, 15)
-    assert floor_to_month(dt) == datetime(2023, 1, 1)
-    
-    dt = datetime(2023, 12, 31)
-    assert floor_to_month(dt) == datetime(2023, 12, 1)
-    
-    # Test first day of month
-    dt = datetime(2023, 1, 1)
-    assert floor_to_month(dt) == dt
-    
-    # Test with invalid input
-    assert floor_to_month(None) is None
-    assert floor_to_month("not a date") is None
-
-def test_ceil_to_month():
-    """Test ceil to month function."""
-    # Test normal cases
-    dt = datetime(2023, 1, 15)
-    assert ceil_to_month(dt) == datetime(2023, 2, 1)
-    
-    # Test end of year
-    dt = datetime(2023, 12, 15)
-    assert ceil_to_month(dt) == datetime(2024, 1, 1)
-    
-    # Test first day of month
-    dt = datetime(2023, 1, 1)
-    assert ceil_to_month(dt) == datetime(2023, 2, 1)
-    
-    # Test with invalid input
-    assert ceil_to_month(None) is None
-    assert ceil_to_month("not a date") is None
+def test_try_parse_date() -> None:
+    """Test date parsing function."""
+    # Test with datetime object
+    dt = datetime(2023, 1, 15)
+    assert try_parse_date(dt) == dt
+    
+    # Test with non-date object
+    assert try_parse_date("2023-01-15") is None
+    assert try_parse_date(123) is None
+    assert try_parse_date(None) is None
+
+def test_extract_date_parts() -> None:
+    """Test date parts extraction function."""
+    # Test with valid datetime
+    dt = datetime(2023, 1, 15)  # Sunday
+    parts = extract_date_parts(dt)
+    assert parts["year"] == 2023
+    assert parts["month"] == 1
+    assert parts["day"] == 15
+    assert parts["dow"] == 6  # Sunday is 6
+    
+    # Test with invalid input
+    parts = extract_date_parts(None)
+    assert all(v is None for v in parts.values())
+    
+    parts = extract_date_parts("not a date")
+    assert all(v is None for v in parts.values())
+
+def test_floor_to_month() -> None:
+    """Test floor to month function."""
+    # Test normal cases
+    dt = datetime(2023, 1, 15)
+    assert floor_to_month(dt) == datetime(2023, 1, 1)
+    
+    dt = datetime(2023, 12, 31)
+    assert floor_to_month(dt) == datetime(2023, 12, 1)
+    
+    # Test first day of month
+    dt = datetime(2023, 1, 1)
+    assert floor_to_month(dt) == dt
+    
+    # Test with invalid input
+    assert floor_to_month(None) is None
+    assert floor_to_month("not a date") is None
+
+def test_ceil_to_month() -> None:
+    """Test ceil to month function."""
+    # Test normal cases
+    dt = datetime(2023, 1, 15)
+    assert ceil_to_month(dt) == datetime(2023, 2, 1)
+    
+    # Test end of year
+    dt = datetime(2023, 12, 15)
+    assert ceil_to_month(dt) == datetime(2024, 1, 1)
+    
+    # Test first day of month
+    dt = datetime(2023, 1, 1)
+    assert ceil_to_month(dt) == datetime(2023, 2, 1)
+    
+    # Test with invalid input
+    assert ceil_to_month(None) is None
+    assert ceil_to_month("not a date") is None
-def test_try_parse_date():
-    """Test date parsing function."""
-    # Test with datetime object
-    dt = datetime(2023, 1, 15)
-    assert try_parse_date(dt) == dt
-    
-    # Test with non-date object
-    assert try_parse_date("2023-01-15") is None
-    assert try_parse_date(123) is None
-    assert try_parse_date(None) is None
-
-def test_extract_date_parts():
-    """Test date parts extraction function."""
-    # Test with valid datetime
-    dt = datetime(2023, 1, 15)  # Sunday
-    parts = extract_date_parts(dt)
-    assert parts["year"] == 2023
-    assert parts["month"] == 1
-    assert parts["day"] == 15
-    assert parts["dow"] == 6  # Sunday is 6
-    
-    # Test with invalid input
-    parts = extract_date_parts(None)
-    assert all(v is None for v in parts.values())
-    
-    parts = extract_date_parts("not a date")
-    assert all(v is None for v in parts.values())
-
-def test_floor_to_month():
-    """Test floor to month function."""
-    # Test normal cases
-    dt = datetime(2023, 1, 15)
-    assert floor_to_month(dt) == datetime(2023, 1, 1)
-    
-    dt = datetime(2023, 12, 31)
-    assert floor_to_month(dt) == datetime(2023, 12, 1)
-    
-    # Test first day of month
-    dt = datetime(2023, 1, 1)
-    assert floor_to_month(dt) == dt
-    
-    # Test with invalid input
-    assert floor_to_month(None) is None
-    assert floor_to_month("not a date") is None
-
-def test_ceil_to_month():
-    """Test ceil to month function."""
-    # Test normal cases
-    dt = datetime(2023, 1, 15)
-    assert ceil_to_month(dt) == datetime(2023, 2, 1)
-    
-    # Test end of year
-    dt = datetime(2023, 12, 15)
-    assert ceil_to_month(dt) == datetime(2024, 1, 1)
-    
-    # Test first day of month
-    dt = datetime(2023, 1, 1)
-    assert ceil_to_month(dt) == datetime(2023, 2, 1)
-    
-    # Test with invalid input
-    assert ceil_to_month(None) is None
-    assert ceil_to_month("not a date") is None
+def test_try_parse_date() -> None:
+    """Test date parsing function."""
+    # Test with datetime object
+    dt = datetime(2023, 1, 15)
+    assert try_parse_date(dt) == dt
+    
+    # Test with non-date object
+    assert try_parse_date("2023-01-15") is None
+    assert try_parse_date(123) is None
+    assert try_parse_date(None) is None
+
+def test_extract_date_parts() -> None:
+    """Test date parts extraction function."""
+    # Test with valid datetime
+    dt = datetime(2023, 1, 15)  # Sunday
+    parts = extract_date_parts(dt)
+    assert parts["year"] == 2023
+    assert parts["month"] == 1
+    assert parts["day"] == 15
+    assert parts["dow"] == 6  # Sunday is 6
+    
+    # Test with invalid input
+    parts = extract_date_parts(None)
+    assert all(v is None for v in parts.values())
+    
+    parts = extract_date_parts("not a date")
+    assert all(v is None for v in parts.values())
+
+def test_floor_to_month() -> None:
+    """Test floor to month function."""
+    # Test normal cases
+    dt = datetime(2023, 1, 15)
+    assert floor_to_month(dt) == datetime(2023, 1, 1)
+    
+    dt = datetime(2023, 12, 31)
+    assert floor_to_month(dt) == datetime(2023, 12, 1)
+    
+    # Test first day of month
+    dt = datetime(2023, 1, 1)
+    assert floor_to_month(dt) == dt
+    
+    # Test with invalid input
+    assert floor_to_month(None) is None
+    assert floor_to_month("not a date") is None
+
+def test_ceil_to_month() -> None:
+    """Test ceil to month function."""
+    # Test normal cases
+    dt = datetime(2023, 1, 15)
+    assert ceil_to_month(dt) == datetime(2023, 2, 1)
+    
+    # Test end of year
+    dt = datetime(2023, 12, 15)
+    assert ceil_to_month(dt) == datetime(2024, 1, 1)
+    
+    # Test first day of month
+    dt = datetime(2023, 1, 1)
+    assert ceil_to_month(dt) == datetime(2023, 2, 1)
+    
+    # Test with invalid input
+    assert ceil_to_month(None) is None
+    assert ceil_to_month("not a date") is None