diff --git a/chatlas/_content.py b/chatlas/_content.py index be17e775..15f1a77a 100644 --- a/chatlas/_content.py +++ b/chatlas/_content.py @@ -665,12 +665,17 @@ class ContentPDF(Content): Parameters ---------- - value + data The PDF data extracted + filename + The name of the PDF file + url + An optional URL where the PDF can be accessed """ data: bytes filename: str + url: Optional[str] = None content_type: ContentTypeEnum = "pdf" diff --git a/chatlas/_content_pdf.py b/chatlas/_content_pdf.py index 042a7a3c..335e4218 100644 --- a/chatlas/_content_pdf.py +++ b/chatlas/_content_pdf.py @@ -77,7 +77,7 @@ def content_pdf_url(url: str) -> ContentPDF: # apis where they exist. Might need some kind of mutable state so can # record point to uploaded file. data = download_pdf_bytes(url) - return ContentPDF(data=data, filename=unique_pdf_name()) + return ContentPDF(data=data, filename=unique_pdf_name(), url=url) def parse_data_url(url: str) -> tuple[str, str]: diff --git a/chatlas/_inspect.py b/chatlas/_inspect.py index b847349f..fbe20eda 100644 --- a/chatlas/_inspect.py +++ b/chatlas/_inspect.py @@ -172,8 +172,11 @@ def chatlas_content_as_inspect(content: ContentUnion) -> InspectContent: data_url = f"data:{content.image_content_type};base64,{content.data or ''}" return itool.ContentImage(image=data_url, detail="auto") elif isinstance(content, ContentPDF): + doc = content.url + if doc is None: + doc = f"data:application/pdf;base64,{base64.b64encode(content.data).decode('ascii')}" return itool.ContentDocument( - document=base64.b64encode(content.data).decode("ascii"), + document=doc, mime_type="application/pdf", filename=content.filename, ) @@ -211,13 +214,17 @@ def inspect_content_as_chatlas(content: str | InspectContent) -> Content: image_content_type=content_type, # type: ignore ) if isinstance(content, itool.ContentDocument): + doc = content.document if content.mime_type == "application/pdf": - return ContentPDF( - data=base64.b64decode(content.document), - filename=content.filename, - ) + url = None + if doc.startswith("http://") or doc.startswith("https://"): + url = doc + data = None + else: + data = base64.b64decode(doc.split(",", 1)[1]) + return ContentPDF(data=data, url=url, filename=content.filename) else: - return ContentText(text=content.document) + return ContentText(text=doc) if isinstance(content, itool.ContentData): return ContentJson(value=content.data) raise ValueError( diff --git a/chatlas/_provider_openai_completions.py b/chatlas/_provider_openai_completions.py index 99730a33..6f2d0374 100644 --- a/chatlas/_provider_openai_completions.py +++ b/chatlas/_provider_openai_completions.py @@ -56,18 +56,22 @@ def ChatOpenAICompletions( *, + base_url: str = "https://api.openai.com/v1", system_prompt: Optional[str] = None, model: "Optional[ChatModel | str]" = None, api_key: Optional[str] = None, - base_url: str = "https://api.openai.com/v1", seed: int | None | MISSING_TYPE = MISSING, kwargs: Optional["ChatClientArgs"] = None, ) -> Chat["SubmitInputArgs", ChatCompletion]: """ - Chat with an OpenAI model via the Completions API. + Chat with an OpenAI-compatible model (via the Completions API). This function exists mainly for historical reasons; new code should prefer `ChatOpenAI()`, which uses the newer Responses API. + + This function may also be useful for using an "OpenAI-compatible model" + hosted by another provider (e.g., vLLM, Ollama, etc.) that supports the + OpenAI Completions API. """ if isinstance(seed, MISSING_TYPE): seed = 1014 if is_testing() else None