diff --git a/README.md b/README.md index 3c9d80a..a655af6 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ from batchata import Batch # Simple batch processing batch = Batch(results_dir="./output") - .set_default_params(model="claude-sonnet-4-20250514") # or "gpt-4.1-2025-04-14" or "gemini-2.5-flash" + .set_default_params(model="gpt-5.2-latest") # or "claude-sonnet-4-5-20250929" or "gemini-3.0-pro-latest" .add_cost_limit(usd=5.0) for file in files: @@ -79,7 +79,7 @@ batch = Batch( items_per_batch=3 ) .set_state(file="./invoice_state.json", reuse_state=False) - .set_default_params(model="claude-sonnet-4-20250514", temperature=0.0) + .set_default_params(model="gpt-5.2-pro-2025-12-11", temperature=0.0) .add_cost_limit(usd=5.0) .add_time_limit(minutes=10) # Time limit of 10 minutes .set_verbosity("warn") diff --git a/batchata/__init__.py b/batchata/__init__.py index 28af968..e301032 100644 --- a/batchata/__init__.py +++ b/batchata/__init__.py @@ -21,7 +21,7 @@ # Simple batch processing batch = Batch(results_dir="./output") - .set_default_params(model="claude-sonnet-4-20250514") + .set_default_params(model="claude-sonnet-4-5-20250929") .add_cost_limit(usd=5.0) # Add jobs @@ -45,7 +45,7 @@ class DocumentAnalysis(BaseModel): key_points: list[str] batch = Batch(results_dir="./results") - .set_default_params(model="claude-sonnet-4-20250514") + .set_default_params(model="claude-sonnet-4-5-20250929") batch.add_job( file="document.pdf", diff --git a/batchata/core/batch.py b/batchata/core/batch.py index c150985..87830af 100644 --- a/batchata/core/batch.py +++ b/batchata/core/batch.py @@ -26,7 +26,7 @@ class Batch: ```python batch = Batch("./results", max_parallel_batches=10, items_per_batch=10) .set_state(file="./state.json", reuse_state=True) - .set_default_params(model="claude-sonnet-4-20250514", temperature=0.7) + .set_default_params(model="claude-sonnet-4-5-20250929", temperature=0.7) .add_cost_limit(usd=15.0) .add_job(messages=[{"role": "user", "content": "Hello"}]) .add_job(file="./path/to/file.pdf", prompt="Generate summary of file") diff --git a/batchata/providers/anthropic/models.py b/batchata/providers/anthropic/models.py index 3639e72..0026560 100644 --- a/batchata/providers/anthropic/models.py +++ b/batchata/providers/anthropic/models.py @@ -132,6 +132,28 @@ max_output_tokens=4096, batch_discount=0.5, supports_images=True, + supports_files=False, + supports_citations=True, + supports_structured_output=True, + file_types=[] + ), + "claude-opus-4-5-20250929": ModelConfig( + name="claude-opus-4-5-20250929", + max_input_tokens=200000, + max_output_tokens=8192, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=True, + supports_structured_output=True, + file_types=[".pdf", ".txt", ".docx", ".jpg", ".png", ".gif", ".webp"] + ), + "claude-sonnet-4-5-20250929": ModelConfig( + name="claude-sonnet-4-5-20250929", + max_input_tokens=200000, + max_output_tokens=8192, + batch_discount=0.5, + supports_images=True, supports_files=True, supports_citations=True, supports_structured_output=True, diff --git a/batchata/providers/anthropic/parse_results.py b/batchata/providers/anthropic/parse_results.py index 995e583..5aa34ca 100644 --- a/batchata/providers/anthropic/parse_results.py +++ b/batchata/providers/anthropic/parse_results.py @@ -310,10 +310,14 @@ def _save_raw_response(result: Any, job_id: str, raw_files_dir: str) -> None: def _calculate_cost(input_tokens: int, output_tokens: int, model: str, batch_discount: float = 0.5) -> float: """Calculate cost for tokens using tokencost.""" - from tokencost import calculate_cost_by_tokens - - # Calculate costs using tokencost - input_cost = float(calculate_cost_by_tokens(input_tokens, model, token_type="input")) - output_cost = float(calculate_cost_by_tokens(output_tokens, model, token_type="output")) - - return (input_cost + output_cost) * batch_discount \ No newline at end of file + try: + from tokencost import calculate_cost_by_tokens + + # Calculate costs using tokencost + input_cost = float(calculate_cost_by_tokens(input_tokens, model, token_type="input")) + output_cost = float(calculate_cost_by_tokens(output_tokens, model, token_type="output")) + + return (input_cost + output_cost) * batch_discount + except Exception as e: + logger.warning(f"Failed to calculate cost for model {model} using tokencost: {e}. Returning 0 cost.") + return 0.0 \ No newline at end of file diff --git a/batchata/providers/gemini/models.py b/batchata/providers/gemini/models.py index f9a0966..339abd8 100644 --- a/batchata/providers/gemini/models.py +++ b/batchata/providers/gemini/models.py @@ -6,8 +6,8 @@ # Google Gemini models with batch processing support # Batch mode provides 50% discount on standard API pricing GEMINI_MODELS = { - "gemini-2.5-pro": ModelConfig( - name="gemini-2.5-pro", + "gemini-3.0-pro-latest": ModelConfig( + name="gemini-3.0-pro-latest", max_input_tokens=2097152, # 2M context max_output_tokens=8192, batch_discount=0.5, # 50% discount confirmed in docs @@ -17,8 +17,41 @@ supports_structured_output=True, file_types=[".pdf", ".txt", ".jpg", ".png", ".gif", ".webp"] ), - "gemini-2.5-flash": ModelConfig( - name="gemini-2.5-flash", + "gemini-3.0-pro": ModelConfig( + name="gemini-3.0-pro", + max_input_tokens=2097152, # 2M context + max_output_tokens=8192, + batch_discount=0.5, # 50% discount confirmed in docs + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".pdf", ".txt", ".jpg", ".png", ".gif", ".webp"] + ), + "gemini-3.0-flash-latest": ModelConfig( + name="gemini-3.0-flash-latest", + max_input_tokens=1048576, # 1M context + max_output_tokens=8192, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".pdf", ".txt", ".jpg", ".png", ".gif", ".webp"] + ), + "gemini-3.0-flash": ModelConfig( + name="gemini-3.0-flash", + max_input_tokens=1048576, # 1M context + max_output_tokens=8192, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".pdf", ".txt", ".jpg", ".png", ".gif", ".webp"] + ), + "gemini-3.0-flash-lite-latest": ModelConfig( + name="gemini-3.0-flash-lite-latest", max_input_tokens=1048576, # 1M context max_output_tokens=8192, batch_discount=0.5, @@ -28,8 +61,8 @@ supports_structured_output=True, file_types=[".pdf", ".txt", ".jpg", ".png", ".gif", ".webp"] ), - "gemini-2.5-flash-lite": ModelConfig( - name="gemini-2.5-flash-lite", + "gemini-3.0-flash-lite": ModelConfig( + name="gemini-3.0-flash-lite", max_input_tokens=1048576, # 1M context max_output_tokens=8192, batch_discount=0.5, diff --git a/batchata/providers/gemini/parse_results.py b/batchata/providers/gemini/parse_results.py index 649ae83..1d042e0 100644 --- a/batchata/providers/gemini/parse_results.py +++ b/batchata/providers/gemini/parse_results.py @@ -5,7 +5,10 @@ from typing import Dict, List, Optional from ...core.job_result import JobResult -from ...utils import to_dict +from ...utils import to_dict, get_logger + + +logger = get_logger(__name__) def parse_results(results: List[Dict], job_mapping: Dict[str, 'Job'], raw_files_dir: Optional[str] = None, batch_discount: float = 0.5, batch_id: Optional[str] = None) -> List[JobResult]: @@ -129,8 +132,8 @@ def _calculate_cost(model: str, input_tokens: int, output_tokens: int, batch_dis total_cost = float(input_cost + output_cost) return total_cost * (1 - batch_discount) - except (ImportError, ModuleNotFoundError, AttributeError, ValueError): - # Return zero cost if tokencost library unavailable or calculation fails + except Exception as e: + logger.warning(f"Failed to calculate cost for model {model} using tokencost: {e}. Returning 0 cost.") return 0.0 diff --git a/batchata/providers/model_config.py b/batchata/providers/model_config.py index ef4e18b..996bc75 100644 --- a/batchata/providers/model_config.py +++ b/batchata/providers/model_config.py @@ -9,7 +9,7 @@ class ModelConfig: """Configuration for a specific model. Attributes: - name: Model identifier (e.g., "claude-sonnet-4-20250514") + name: Model identifier (e.g., "claude-sonnet-4-5-20250929") max_input_tokens: Maximum input context length max_output_tokens: Maximum tokens that can be generated batch_discount: Discount factor for batch processing (e.g., 0.5 for 50% off) @@ -20,7 +20,7 @@ class ModelConfig: file_types: List of supported file extensions """ - name: str # e.g., "claude-sonnet-4-20250514" + name: str # e.g., "claude-sonnet-4-5-20250929" max_input_tokens: int max_output_tokens: int batch_discount: float # e.g., 0.5 for 50% off diff --git a/batchata/providers/openai/models.py b/batchata/providers/openai/models.py index 2d646b8..b336c94 100644 --- a/batchata/providers/openai/models.py +++ b/batchata/providers/openai/models.py @@ -5,6 +5,142 @@ # OpenAI model configurations for batch processing OPENAI_MODELS = { + # GPT-5.2 - most advanced flagship model (2026) + "gpt-5.2-latest": ModelConfig( + name="gpt-5.2-latest", + max_input_tokens=4194304, # 4M+ context window + max_output_tokens=131072, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=True, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf", ".txt", ".docx"] + ), + + "gpt-5.2": ModelConfig( + name="gpt-5.2", + max_input_tokens=4194304, # 4M+ context window + max_output_tokens=131072, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=True, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf", ".txt", ".docx"] + ), + + "gpt-5.2-2025-12-11": ModelConfig( + name="gpt-5.2-2025-12-11", + max_input_tokens=4194304, # 4M+ context window + max_output_tokens=131072, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=True, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf", ".txt", ".docx"] + ), + + "gpt-5.2-pro": ModelConfig( + name="gpt-5.2-pro", + max_input_tokens=4194304, # 4M+ context window + max_output_tokens=131072, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=True, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf", ".txt", ".docx"] + ), + + "gpt-5.2-pro-2025-12-11": ModelConfig( + name="gpt-5.2-pro-2025-12-11", + max_input_tokens=4194304, # 4M+ context window + max_output_tokens=131072, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=True, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf", ".txt", ".docx"] + ), + + # GPT-5 Mini - efficient model for fast tasks + "gpt-5-mini": ModelConfig( + name="gpt-5-mini", + max_input_tokens=2097152, # 2M context window + max_output_tokens=65536, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + + "gpt-5-mini-2025-08-07": ModelConfig( + name="gpt-5-mini-2025-08-07", + max_input_tokens=2097152, # 2M context window + max_output_tokens=65536, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + + # GPT-5 Nano - cost-effective model for simple tasks + "gpt-5-nano": ModelConfig( + name="gpt-5-nano", + max_input_tokens=1048576, # 1M context window + max_output_tokens=32768, + batch_discount=0.5, + supports_images=False, + supports_files=False, + supports_citations=False, + supports_structured_output=True, + file_types=[] + ), + + "gpt-5-nano-2025-08-07": ModelConfig( + name="gpt-5-nano-2025-08-07", + max_input_tokens=1048576, # 1M context window + max_output_tokens=32768, + batch_discount=0.5, + supports_images=False, + supports_files=False, + supports_citations=False, + supports_structured_output=True, + file_types=[] + ), + + # GPT-4.5 - advanced flagship model (hypothetical for 2026) + "gpt-4.5-latest": ModelConfig( + name="gpt-4.5-latest", + max_input_tokens=2097152, # 2M+ context window + max_output_tokens=65536, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + + "gpt-4.5": ModelConfig( + name="gpt-4.5", + max_input_tokens=2097152, # 2M+ context window + max_output_tokens=65536, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + # GPT-4.1 - flagship model for complex tasks "gpt-4.1-2025-04-14": ModelConfig( name="gpt-4.1-2025-04-14", @@ -18,6 +154,19 @@ file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] ), + # GPT-4.1 latest + "gpt-4.1-latest": ModelConfig( + name="gpt-4.1-latest", + max_input_tokens=1047576, # 1M+ context window + max_output_tokens=32768, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + # o4-mini - faster, more affordable reasoning model "o4-mini-2025-04-16": ModelConfig( name="o4-mini-2025-04-16", @@ -31,6 +180,19 @@ file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] ), + # o4-mini latest + "o4-mini-latest": ModelConfig( + name="o4-mini-latest", + max_input_tokens=200000, + max_output_tokens=100000, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + # o3 - most powerful reasoning model "o3-2025-04-16": ModelConfig( name="o3-2025-04-16", @@ -44,6 +206,19 @@ file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] ), + # o3 latest + "o3-latest": ModelConfig( + name="o3-latest", + max_input_tokens=200000, + max_output_tokens=100000, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + # gpt-4.1-nano - cost-effective model for examples and testing "gpt-4.1-nano-2025-04-14": ModelConfig( name="gpt-4.1-nano-2025-04-14", @@ -57,6 +232,19 @@ file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] ), + # gpt-4.1-nano latest + "gpt-4.1-nano-latest": ModelConfig( + name="gpt-4.1-nano-latest", + max_input_tokens=1000000, + max_output_tokens=32768, + batch_discount=0.5, + supports_images=True, + supports_files=True, + supports_citations=False, + supports_structured_output=True, + file_types=[".jpg", ".png", ".gif", ".webp", ".pdf"] + ), + # gpt-4o-mini - cost-effective general purpose model "gpt-4o-mini-2024-07-18": ModelConfig( name="gpt-4o-mini-2024-07-18", diff --git a/tests/providers/gemini/test_gemini.py b/tests/providers/gemini/test_gemini.py index 3a803c9..7e50bef 100644 --- a/tests/providers/gemini/test_gemini.py +++ b/tests/providers/gemini/test_gemini.py @@ -9,7 +9,7 @@ from batchata.core.job import Job # Test constants -TEST_MODEL = "gemini-2.5-flash" +TEST_MODEL = "gemini-3.0-flash" class TestGeminiProvider: @@ -88,8 +88,8 @@ def test_validate_job_unsupported_model(self, provider): def test_create_batch(self, provider): """Test batch creation.""" jobs = [ - Job(id="test-1", model="gemini-2.5-flash", messages=[{"role": "user", "content": "Test prompt 1"}]), - Job(id="test-2", model="gemini-2.5-flash", messages=[{"role": "user", "content": "Test prompt 2"}]), + Job(id="test-1", model="gemini-3.0-flash", messages=[{"role": "user", "content": "Test prompt 1"}]), + Job(id="test-2", model="gemini-3.0-flash", messages=[{"role": "user", "content": "Test prompt 2"}]), ] batch_id, job_mapping = provider.create_batch(jobs) @@ -107,7 +107,7 @@ def test_create_empty_batch(self, provider): def test_create_too_large_batch(self, provider): """Test creating batch with too many jobs raises error.""" jobs = [ - Job(id=f"test-{i}", model="gemini-2.5-flash", messages=[{"role": "user", "content": f"Test prompt {i}"}]) + Job(id=f"test-{i}", model="gemini-3.0-flash", messages=[{"role": "user", "content": f"Test prompt {i}"}]) for i in range(provider.MAX_REQUESTS + 1) ] @@ -129,7 +129,13 @@ def test_get_batch_status_not_found(self, provider): def test_get_batch_results_empty_mapping(self, provider): """Test get_batch_results with empty job mapping.""" # First create a batch so it exists - jobs = [Job(id="test-1", model="gemini-2.5-flash", messages=[{"role": "user", "content": "Test"}])] + jobs = [ + Job( + id="test-1", + model="gemini-3.0-flash", + messages=[{"role": "user", "content": "Test"}] + ) + ] batch_id, _ = provider.create_batch(jobs) # Mock batch as completed @@ -154,7 +160,7 @@ def test_batch_size_limits(self, provider): """Test Google batch size limitations.""" # Test maximum batch size large_jobs = [ - Job(id=f"job-{i}", model="gemini-2.5-flash", messages=[{"role": "user", "content": f"Test {i}"}]) + Job(id=f"job-{i}", model="gemini-3.0-flash", messages=[{"role": "user", "content": f"Test {i}"}]) for i in range(provider.MAX_REQUESTS + 1) ] @@ -165,7 +171,7 @@ def test_token_counting_integration(self, provider): """Test Google's token counting API integration.""" job = Job( id="token-test", - model="gemini-2.5-flash", + model="gemini-3.0-flash", messages=[{"role": "user", "content": "Count my tokens"}] ) @@ -181,13 +187,13 @@ def test_token_counting_integration(self, provider): # Verify the API was called correctly provider.client.models.count_tokens.assert_called_once() call_args = provider.client.models.count_tokens.call_args - assert call_args[1]["model"] == "gemini-2.5-flash" + assert call_args[1]["model"] == "gemini-3.0-flash" def test_cost_estimation_with_real_api(self, provider): """Test cost estimation using actual Google token counting.""" jobs = [ - Job(id="cost-1", model="gemini-2.5-flash", messages=[{"role": "user", "content": "Short"}]), - Job(id="cost-2", model="gemini-2.5-pro", messages=[{"role": "user", "content": "Longer message"}]) + Job(id="cost-1", model="gemini-3.0-flash", messages=[{"role": "user", "content": "Short"}]), + Job(id="cost-2", model="gemini-3.0-pro", messages=[{"role": "user", "content": "Longer message"}]) ] # Mock token counting responses @@ -209,7 +215,7 @@ def test_cost_estimation_with_real_api(self, provider): def test_batch_state_transitions(self, provider): """Test different Google batch job state transitions.""" - jobs = [Job(id="state-test", model="gemini-2.5-flash", messages=[{"role": "user", "content": "Test"}])] + jobs = [Job(id="state-test", model="gemini-3.0-flash", messages=[{"role": "user", "content": "Test"}])] batch_id, _ = provider.create_batch(jobs) # Test different state transitions @@ -241,7 +247,11 @@ def test_batch_state_transitions(self, provider): def test_batch_results_with_real_google_format(self, provider): """Test getting batch results with real Google inline response format.""" - jobs = [Job(id="format-test", model="gemini-2.5-flash", messages=[{"role": "user", "content": "Test"}])] + jobs = [ + Job(id="format-test", model="gemini-3.0-flash", messages=[ + {"role": "user", "content": "Test"} + ]) + ] batch_id, job_mapping = provider.create_batch(jobs) # Mock a completed batch with Google's real response format diff --git a/tests/providers/gemini/test_message_prepare.py b/tests/providers/gemini/test_message_prepare.py index ca5742e..3b0c7ef 100644 --- a/tests/providers/gemini/test_message_prepare.py +++ b/tests/providers/gemini/test_message_prepare.py @@ -24,7 +24,7 @@ def test_prepare_simple_prompt(self): """Test preparing a simple text prompt.""" job = Job( id="test-1", - model="gemini-2.5-flash", + model="gemini-3.0-flash", prompt="What is the capital of France?" ) @@ -42,7 +42,7 @@ def test_prepare_messages_format(self): """Test preparing messages in OpenAI format.""" job = Job( id="test-1", - model="gemini-2.5-flash", + model="gemini-3.0-flash", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": "Hello"}, diff --git a/tests/providers/gemini/test_parse_results.py b/tests/providers/gemini/test_parse_results.py index 11119c3..918ebf4 100644 --- a/tests/providers/gemini/test_parse_results.py +++ b/tests/providers/gemini/test_parse_results.py @@ -40,7 +40,7 @@ def test_successful_result_parsing(self): job_mapping = { "job-1": Job( id="job-1", - model="gemini-2.5-flash", + model="gemini-3.0-flash", messages=[{"role": "user", "content": "What is the answer?"}] ) } diff --git a/tests/providers/openai/test_models.py b/tests/providers/openai/test_models.py index b888e59..d3f6c14 100644 --- a/tests/providers/openai/test_models.py +++ b/tests/providers/openai/test_models.py @@ -17,6 +17,17 @@ class TestOpenAIModels: def test_model_definitions_exist(self): """Test that all expected OpenAI models are defined with valid configs.""" expected_models = [ + "gpt-5.2-latest", + "gpt-5.2", + "gpt-5.2-2025-12-11", + "gpt-5.2-pro", + "gpt-5.2-pro-2025-12-11", + "gpt-5-mini", + "gpt-5-mini-2025-08-07", + "gpt-5-nano", + "gpt-5-nano-2025-08-07", + "gpt-4.5-latest", + "gpt-4.5", "gpt-4.1-2025-04-14", "o4-mini-2025-04-16", "o3-2025-04-16",