From ac37c4844d750dc0ffdd64e872db6a4c306fddc2 Mon Sep 17 00:00:00 2001 From: speakeasybot Date: Thu, 30 Oct 2025 00:12:17 +0000 Subject: [PATCH] ## Python SDK Changes Detected: * `unstructured_client.workflows.create_workflow()`: `response.reprocess_all` **Changed** **Breaking** :warning: * `unstructured_client.workflows.get_workflow()`: `response.reprocess_all` **Changed** **Breaking** :warning: * `unstructured_client.workflows.list_workflows()`: `response.[].reprocess_all` **Changed** **Breaking** :warning: * `unstructured_client.workflows.update_workflow()`: `response.reprocess_all` **Changed** **Breaking** :warning: * `unstructured_client.general.partition()`: * `request.partition_parameters` **Changed** **Breaking** :warning: * `unstructured_client.destinations.create_destination()`: * `request.create_destination_connector.config.[azure_destination_connector_config_input]` **Added** * `response.config.[azure_destination_connector_config]` **Added** * `unstructured_client.destinations.get_destination()`: `response.config.[azure_destination_connector_config]` **Added** * `unstructured_client.destinations.list_destinations()`: * `request.destination_type` **Changed** * `response.[].config.[azure_destination_connector_config]` **Added** * `unstructured_client.destinations.update_destination()`: * `request.update_destination_connector.config.[azure_destination_connector_config_input]` **Added** * `response.config.[azure_destination_connector_config]` **Added** --- .gitignore | 1 + .speakeasy/gen.lock | 35 ++++++++------- .speakeasy/workflow.lock | 16 +++---- README.md | 1 + RELEASES.md | 12 ++++- USAGE.md | 1 + codeSamples.yaml | 6 +-- docs/models/shared/config.md | 6 +++ .../destinationconnectorinformationconfig.md | 6 +++ .../models/shared/destinationconnectortype.md | 1 + docs/models/shared/partitionparameters.md | 3 +- .../updatedestinationconnectorconfig.md | 6 +++ docs/models/shared/vlmmodel.md | 23 ---------- docs/models/shared/workflowinformation.md | 2 +- docs/sdks/general/README.md | 2 +- docs/sdks/unstructuredclient/README.md | 5 --- gen.yaml | 11 ++++- pyproject.toml | 2 +- scripts/prepare_readme.py | 5 +++ src/unstructured_client/_version.py | 8 ++-- src/unstructured_client/basesdk.py | 12 ++++- src/unstructured_client/destinations.py | 28 ++++++------ src/unstructured_client/httpclient.py | 1 - src/unstructured_client/jobs.py | 24 +++++----- .../models/errors/__init__.py | 18 ++++++-- .../models/errors/httpvalidationerror.py | 6 ++- .../models/errors/no_response_error.py | 6 ++- .../models/errors/responsevalidationerror.py | 2 + .../models/errors/sdkerror.py | 2 + .../models/errors/servererror.py | 6 ++- .../models/errors/unstructuredclienterror.py | 18 +++++--- .../models/operations/__init__.py | 15 ++++++- .../models/shared/__init__.py | 34 ++++++++++++-- .../shared/createdestinationconnector.py | 14 ++++-- .../shared/destinationconnectorinformation.py | 14 ++++-- .../models/shared/destinationconnectortype.py | 1 + .../models/shared/partition_parameters.py | 44 +++++-------------- .../shared/updatedestinationconnector.py | 14 ++++-- .../models/shared/workflowinformation.py | 8 ++-- src/unstructured_client/sdk.py | 21 +++++++-- src/unstructured_client/sources.py | 28 ++++++------ src/unstructured_client/utils/__init__.py | 18 ++++++-- src/unstructured_client/utils/annotations.py | 40 +++++++++++++---- .../utils/eventstreaming.py | 10 +++++ src/unstructured_client/workflows.py | 24 +++++----- 45 files changed, 356 insertions(+), 204 deletions(-) delete mode 100644 docs/models/shared/vlmmodel.md delete mode 100644 docs/sdks/unstructuredclient/README.md diff --git a/.gitignore b/.gitignore index 14e788bb..51b89173 100755 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +.env.local **/__pycache__/ **/.speakeasy/temp/ **/.speakeasy/logs/ diff --git a/.speakeasy/gen.lock b/.speakeasy/gen.lock index 578a0905..92a36584 100755 --- a/.speakeasy/gen.lock +++ b/.speakeasy/gen.lock @@ -1,12 +1,12 @@ lockVersion: 2.0.0 id: 8b5fa338-9106-4734-abf0-e30d67044a90 management: - docChecksum: 8038a5b9e376d44bad2fd4c117922954 - docVersion: 1.1.45 - speakeasyVersion: 1.601.0 - generationVersion: 2.680.0 - releaseVersion: 0.42.3 - configChecksum: 3d02dd7ca437781b3d721fab5d7b9adc + docChecksum: 203a3322c86603c2dcace3d0c5436c0b + docVersion: 1.2.7 + speakeasyVersion: 1.642.2 + generationVersion: 2.731.6 + releaseVersion: 0.43.0 + configChecksum: f58c0bd4b0a676689c655d7d4b5fd350 repoURL: https://github.com/Unstructured-IO/unstructured-python-client.git repoSubDirectory: . installationURL: https://github.com/Unstructured-IO/unstructured-python-client.git @@ -16,13 +16,13 @@ features: acceptHeaders: 3.0.0 additionalDependencies: 1.0.0 constsAndDefaults: 1.0.5 - core: 5.19.8 + core: 5.23.0 customCodeRegions: 0.1.1 defaultEnabledRetries: 0.2.0 enumUnions: 0.1.0 envVarSecurityUsage: 0.3.2 examples: 3.0.2 - globalSecurity: 3.0.3 + globalSecurity: 3.0.4 globalSecurityCallbacks: 1.0.0 globalSecurityFlattening: 1.0.0 globalServerURLs: 3.1.1 @@ -34,7 +34,7 @@ features: retries: 3.0.2 sdkHooks: 1.1.0 serverIDs: 3.0.0 - unions: 3.0.4 + unions: 3.1.0 uploadStreams: 1.0.0 generatedFiles: - .gitattributes @@ -101,6 +101,8 @@ generatedFiles: - docs/models/shared/astradbconnectorconfiginput.md - docs/models/shared/azureaisearchconnectorconfig.md - docs/models/shared/azureaisearchconnectorconfiginput.md + - docs/models/shared/azuredestinationconnectorconfig.md + - docs/models/shared/azuredestinationconnectorconfiginput.md - docs/models/shared/azuresourceconnectorconfig.md - docs/models/shared/azuresourceconnectorconfiginput.md - docs/models/shared/bodyrunworkflow.md @@ -212,7 +214,6 @@ generatedFiles: - docs/models/shared/updateworkflow.md - docs/models/shared/updateworkflowschedule.md - docs/models/shared/validationerror.md - - docs/models/shared/vlmmodel.md - docs/models/shared/vlmmodelprovider.md - docs/models/shared/weaviatedestinationconnectorconfig.md - docs/models/shared/weaviatedestinationconnectorconfiginput.md @@ -229,7 +230,6 @@ generatedFiles: - docs/sdks/general/README.md - docs/sdks/jobs/README.md - docs/sdks/sources/README.md - - docs/sdks/unstructuredclient/README.md - docs/sdks/workflows/README.md - poetry.toml - py.typed @@ -287,6 +287,8 @@ generatedFiles: - src/unstructured_client/models/shared/astradbconnectorconfiginput.py - src/unstructured_client/models/shared/azureaisearchconnectorconfig.py - src/unstructured_client/models/shared/azureaisearchconnectorconfiginput.py + - src/unstructured_client/models/shared/azuredestinationconnectorconfig.py + - src/unstructured_client/models/shared/azuredestinationconnectorconfiginput.py - src/unstructured_client/models/shared/azuresourceconnectorconfig.py - src/unstructured_client/models/shared/azuresourceconnectorconfiginput.py - src/unstructured_client/models/shared/body_run_workflow.py @@ -600,7 +602,7 @@ examples: application/json: {"name": "", "workflow_type": "advanced"} responses: "200": - application/json: {"created_at": "2023-01-15T13:52:58.634Z", "destinations": [], "id": "80c8c72f-101a-4a39-974b-49aa299e80d3", "name": "", "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["47678eb0-4958-4341-ad66-b1b822c1deb9"], "status": "active", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]} + application/json: {"created_at": "2023-01-15T13:52:58.634Z", "destinations": [], "id": "80c8c72f-101a-4a39-974b-49aa299e80d3", "name": "", "reprocess_all": false, "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["47678eb0-4958-4341-ad66-b1b822c1deb9"], "status": "active", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]} "422": application/json: {"detail": ""} delete_workflow: @@ -620,7 +622,7 @@ examples: workflow_id: "d031b0e5-7ca7-4a2b-b3cc-d869d2df3e76" responses: "200": - application/json: {"created_at": "2024-01-22T17:42:53.375Z", "destinations": ["624abbf1-dbce-4fe0-8d3f-0673956fc10e"], "id": "840c33c3-f30c-4908-9688-2682ed8eea3f", "name": "", "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["349ed27a-eeda-4851-866a-8ce3a65a2957", "9aa34544-3278-446f-a265-efcaa55f9c4a"], "status": "active", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]} + application/json: {"created_at": "2024-01-22T17:42:53.375Z", "destinations": ["624abbf1-dbce-4fe0-8d3f-0673956fc10e"], "id": "840c33c3-f30c-4908-9688-2682ed8eea3f", "name": "", "reprocess_all": false, "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["349ed27a-eeda-4851-866a-8ce3a65a2957", "9aa34544-3278-446f-a265-efcaa55f9c4a"], "status": "active", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]} "422": application/json: {"detail": ""} list_workflows: @@ -630,7 +632,7 @@ examples: sort_by: "id" responses: "200": - application/json: [{"created_at": "2023-09-02T04:21:23.162Z", "destinations": [], "id": "1e4c63cf-6a6b-407b-b993-b2bc8b3bcefc", "name": "", "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["2e12d926-4caf-4cce-8b5c-3e16722c1c84"], "status": "inactive", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]}, {"created_at": "2023-09-02T04:21:23.162Z", "destinations": [], "id": "1e4c63cf-6a6b-407b-b993-b2bc8b3bcefc", "name": "", "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["2e12d926-4caf-4cce-8b5c-3e16722c1c84"], "status": "inactive", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]}] + application/json: [{"created_at": "2023-09-02T04:21:23.162Z", "destinations": [], "id": "1e4c63cf-6a6b-407b-b993-b2bc8b3bcefc", "name": "", "reprocess_all": false, "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["2e12d926-4caf-4cce-8b5c-3e16722c1c84"], "status": "inactive", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]}, {"created_at": "2023-09-02T04:21:23.162Z", "destinations": [], "id": "1e4c63cf-6a6b-407b-b993-b2bc8b3bcefc", "name": "", "reprocess_all": false, "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["2e12d926-4caf-4cce-8b5c-3e16722c1c84"], "status": "inactive", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}, {"name": "", "subtype": "", "type": ""}]}] "422": application/json: {"detail": [{"loc": ["", 864403], "msg": "", "type": ""}]} run_workflow: @@ -652,13 +654,13 @@ examples: application/json: {} responses: "200": - application/json: {"created_at": "2025-01-11T03:06:48.390Z", "destinations": [], "id": "88fd9cfe-ed29-4701-9d1f-8f26d4ea88d3", "name": "", "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["0957eb7f-57d3-4705-bc49-03de6ad3794d", "a8b88099-9065-4458-a824-b37da7a9bccf", "a024c482-10ba-433a-9100-e6471e9d3db0"], "status": "active", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}]} + application/json: {"created_at": "2025-01-11T03:06:48.390Z", "destinations": [], "id": "88fd9cfe-ed29-4701-9d1f-8f26d4ea88d3", "name": "", "reprocess_all": false, "schedule": {"crontab_entries": [{"cron_expression": "0 0 * * *"}]}, "sources": ["0957eb7f-57d3-4705-bc49-03de6ad3794d", "a8b88099-9065-4458-a824-b37da7a9bccf", "a024c482-10ba-433a-9100-e6471e9d3db0"], "status": "active", "workflow_nodes": [{"name": "", "subtype": "", "type": ""}]} "422": application/json: {"detail": ""} partition: speakeasy-default-partition: requestBody: - multipart/form-data: {"chunking_strategy": "by_title", "combine_under_n_chars": null, "content_type": null, "coordinates": false, "encoding": null, "files": {"": "{\"summary\":\"File to be partitioned\",\"externalValue\":\"https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf\"}"}, "gz_uncompressed_content_type": null, "hi_res_model_name": null, "include_orig_elements": null, "include_page_breaks": false, "include_slide_notes": true, "max_characters": null, "multipage_sections": true, "new_after_n_chars": null, "output_format": "application/json", "overlap": 0, "overlap_all": false, "pdf_infer_table_structure": true, "pdfminer_char_margin": null, "pdfminer_line_margin": null, "pdfminer_line_overlap": null, "pdfminer_word_margin": null, "similarity_threshold": null, "split_pdf_allow_failed": false, "split_pdf_cache_tmp_data": false, "split_pdf_cache_tmp_data_dir": "", "split_pdf_concurrency_level": 5, "split_pdf_page": true, "split_pdf_page_range": [1, 10], "starting_page_number": null, "strategy": "auto", "table_ocr_agent": null, "unique_element_ids": false, "vlm_model": "gpt-4o", "vlm_model_provider": "openai", "xml_keep_tags": false} + multipart/form-data: {"chunking_strategy": "by_title", "combine_under_n_chars": null, "content_type": null, "coordinates": false, "do_not_break_similarity_on_footer_header": false, "encoding": null, "files": {"": "{\"summary\":\"File to be partitioned\",\"externalValue\":\"https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf\"}"}, "gz_uncompressed_content_type": null, "hi_res_model_name": null, "include_orig_elements": null, "include_page_breaks": false, "include_slide_notes": true, "max_characters": null, "multipage_sections": true, "new_after_n_chars": null, "output_format": "application/json", "overlap": 0, "overlap_all": false, "pdf_infer_table_structure": true, "pdfminer_char_margin": null, "pdfminer_line_margin": null, "pdfminer_line_overlap": null, "pdfminer_word_margin": null, "similarity_threshold": null, "split_pdf_allow_failed": false, "split_pdf_cache_tmp_data": false, "split_pdf_cache_tmp_data_dir": "", "split_pdf_concurrency_level": 5, "split_pdf_page": true, "split_pdf_page_range": [1, 10], "starting_page_number": null, "strategy": "auto", "table_ocr_agent": null, "unique_element_ids": false, "vlm_model": "gpt-4o", "vlm_model_provider": "openai", "xml_keep_tags": false} responses: "200": application/json: [{"type": "Title", "element_id": "6aa0ff22f91bbe7e26e8e25ca8052acd", "text": "LayoutParser: A Unified Toolkit for Deep Learning Based Document Image Analysis", "metadata": {"languages": ["eng"], "page_number": 1, "filename": "layout-parser-paper.pdf", "filetype": "application/pdf"}}] @@ -712,3 +714,4 @@ examples: application/json: {"detail": []} examplesVersion: 1.0.2 generatedTests: {} +releaseNotes: "## Python SDK Changes Detected:\n* `unstructured_client.workflows.create_workflow()`: `response.reprocess_all` **Changed** **Breaking** :warning:\n* `unstructured_client.workflows.get_workflow()`: `response.reprocess_all` **Changed** **Breaking** :warning:\n* `unstructured_client.workflows.list_workflows()`: `response.[].reprocess_all` **Changed** **Breaking** :warning:\n* `unstructured_client.workflows.update_workflow()`: `response.reprocess_all` **Changed** **Breaking** :warning:\n* `unstructured_client.general.partition()`: \n * `request.partition_parameters` **Changed** **Breaking** :warning:\n* `unstructured_client.destinations.create_destination()`: \n * `request.create_destination_connector.config.[azure_destination_connector_config_input]` **Added**\n * `response.config.[azure_destination_connector_config]` **Added**\n* `unstructured_client.destinations.get_destination()`: `response.config.[azure_destination_connector_config]` **Added**\n* `unstructured_client.destinations.list_destinations()`: \n * `request.destination_type` **Changed**\n * `response.[].config.[azure_destination_connector_config]` **Added**\n* `unstructured_client.destinations.update_destination()`: \n * `request.update_destination_connector.config.[azure_destination_connector_config_input]` **Added**\n * `response.config.[azure_destination_connector_config]` **Added**\n" diff --git a/.speakeasy/workflow.lock b/.speakeasy/workflow.lock index ca7f6466..3f2614bf 100644 --- a/.speakeasy/workflow.lock +++ b/.speakeasy/workflow.lock @@ -1,21 +1,21 @@ -speakeasyVersion: 1.601.0 +speakeasyVersion: 1.642.2 sources: my-source: sourceNamespace: my-source - sourceRevisionDigest: sha256:8165f715321cd34bcebb6c9bb0734a1791777229937787b8e13707d519b05a5e - sourceBlobDigest: sha256:b4f8d9a6b0f4245c50b5e53298343df470978417248fa3988aeeb84ec9200c93 + sourceRevisionDigest: sha256:c0000ca02d148a3411badb20234511cebb95c1f6033ae1c7e5ea968dc62b379a + sourceBlobDigest: sha256:bcbd8404a0c82a7f08a3b738055e5e7b2f1c14062a1de92180a7b2b9c0a3ea96 tags: - latest - - speakeasy-sdk-regen-1754698272 - - 1.1.45 + - speakeasy-sdk-regen-1756339855 + - 1.2.7 targets: unstructured-python: source: my-source sourceNamespace: my-source - sourceRevisionDigest: sha256:8165f715321cd34bcebb6c9bb0734a1791777229937787b8e13707d519b05a5e - sourceBlobDigest: sha256:b4f8d9a6b0f4245c50b5e53298343df470978417248fa3988aeeb84ec9200c93 + sourceRevisionDigest: sha256:c0000ca02d148a3411badb20234511cebb95c1f6033ae1c7e5ea968dc62b379a + sourceBlobDigest: sha256:bcbd8404a0c82a7f08a3b738055e5e7b2f1c14062a1de92180a7b2b9c0a3ea96 codeSamplesNamespace: my-source-code-samples - codeSamplesRevisionDigest: sha256:a5e11972bfb15e43b7fcb2647b3a7b3e129e9303294f9110670280971b10780a + codeSamplesRevisionDigest: sha256:7c16d73cd265c979e746ffcb3c753ea3a1ca0930c00d182e32fc0c0ef4e72d82 workflow: workflowVersion: 1.0.0 speakeasyVersion: latest diff --git a/README.md b/README.md index da29a17a..d3459382 100644 --- a/README.md +++ b/README.md @@ -346,6 +346,7 @@ with UnstructuredClient() as uc_client:
The same SDK client can also be used to make asynchronous requests by importing asyncio. + ```python # Asynchronous Example import asyncio diff --git a/RELEASES.md b/RELEASES.md index 562395d9..bcc4d5a5 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1119,4 +1119,14 @@ Based on: ### Generated - [python v0.42.3] . ### Releases -- [PyPI v0.42.3] https://pypi.org/project/unstructured-client/0.42.3 - . \ No newline at end of file +- [PyPI v0.42.3] https://pypi.org/project/unstructured-client/0.42.3 - . + +## 2025-10-30 00:11:00 +### Changes +Based on: +- OpenAPI Doc +- Speakeasy CLI 1.642.2 (2.731.6) https://github.com/speakeasy-api/speakeasy +### Generated +- [python v0.43.0] . +### Releases +- [PyPI v0.43.0] https://pypi.org/project/unstructured-client/0.43.0 - . \ No newline at end of file diff --git a/USAGE.md b/USAGE.md index 43ed3688..19965d4a 100644 --- a/USAGE.md +++ b/USAGE.md @@ -19,6 +19,7 @@ with UnstructuredClient() as uc_client:
The same SDK client can also be used to make asynchronous requests by importing asyncio. + ```python # Asynchronous Example import asyncio diff --git a/codeSamples.yaml b/codeSamples.yaml index f5eda8f2..2ff50f30 100644 --- a/codeSamples.yaml +++ b/codeSamples.yaml @@ -14,7 +14,7 @@ actions: "x-codeSamples": - "lang": "python" "label": "create_destination" - "source": "from unstructured_client import UnstructuredClient\nfrom unstructured_client.models import shared\n\n\nwith UnstructuredClient() as uc_client:\n\n res = uc_client.destinations.create_destination(request={\n \"create_destination_connector\": {\n \"name\": \"\",\n \"type\": shared.DestinationConnectorType.MOTHERDUCK,\n \"config\": {\n \"index_name\": \"\",\n \"api_key\": \"\",\n \"namespace\": \"\",\n \"batch_size\": 50,\n },\n },\n })\n\n assert res.destination_connector_information is not None\n\n # Handle response\n print(res.destination_connector_information)" + "source": "from unstructured_client import UnstructuredClient\nfrom unstructured_client.models import shared\n\n\nwith UnstructuredClient() as uc_client:\n\n res = uc_client.destinations.create_destination(request={\n \"create_destination_connector\": {\n \"name\": \"\",\n \"type\": shared.DestinationConnectorType.MONGODB,\n \"config\": {\n \"index_name\": \"\",\n \"api_key\": \"\",\n \"namespace\": \"\",\n \"batch_size\": 50,\n },\n },\n })\n\n assert res.destination_connector_information is not None\n\n # Handle response\n print(res.destination_connector_information)" - target: $["paths"]["/api/v1/destinations/{destination_id}"]["delete"] update: "x-codeSamples": @@ -32,7 +32,7 @@ actions: "x-codeSamples": - "lang": "python" "label": "update_destination" - "source": "from unstructured_client import UnstructuredClient\n\n\nwith UnstructuredClient() as uc_client:\n\n res = uc_client.destinations.update_destination(request={\n \"destination_id\": \"9726962d-9d1e-4f84-8787-c7313d183927\",\n \"update_destination_connector\": {\n \"config\": {\n \"bootstrap_servers\": \"\",\n \"port\": 9092,\n \"topic\": \"\",\n \"kafka_api_key\": \"\",\n \"secret\": \"\",\n \"batch_size\": 100,\n },\n },\n })\n\n assert res.destination_connector_information is not None\n\n # Handle response\n print(res.destination_connector_information)" + "source": "from unstructured_client import UnstructuredClient\n\n\nwith UnstructuredClient() as uc_client:\n\n res = uc_client.destinations.update_destination(request={\n \"destination_id\": \"962d9d1e-f847-487c-a731-3d18392716fb\",\n \"update_destination_connector\": {\n \"config\": {\n \"remote_url\": \"https://lined-clamp.info\",\n \"service_account_key\": \"\",\n },\n },\n })\n\n assert res.destination_connector_information is not None\n\n # Handle response\n print(res.destination_connector_information)" - target: $["paths"]["/api/v1/destinations/{destination_id}/connection-check"]["get"] update: "x-codeSamples": @@ -164,4 +164,4 @@ actions: "x-codeSamples": - "lang": "python" "label": "partition" - "source": "from unstructured_client import UnstructuredClient\nfrom unstructured_client.models import shared\n\n\nwith UnstructuredClient() as uc_client:\n\n res = uc_client.general.partition(request={\n \"partition_parameters\": {\n \"files\": {\n \"file_name\": \"example.file\",\n \"content\": open(\"example.file\", \"rb\"),\n },\n \"strategy\": shared.Strategy.AUTO,\n \"vlm_model_provider\": shared.VLMModelProvider.OPENAI,\n \"vlm_model\": shared.VLMModel.GPT_4O,\n \"chunking_strategy\": \"by_title\",\n \"split_pdf_page_range\": [\n 1,\n 10,\n ],\n \"split_pdf_cache_tmp_data_dir\": \"\",\n },\n })\n\n assert res.elements is not None\n\n # Handle response\n print(res.elements)" + "source": "from unstructured_client import UnstructuredClient\nfrom unstructured_client.models import shared\n\n\nwith UnstructuredClient() as uc_client:\n\n res = uc_client.general.partition(request={\n \"partition_parameters\": {\n \"files\": {\n \"file_name\": \"example.file\",\n \"content\": open(\"example.file\", \"rb\"),\n },\n \"strategy\": shared.Strategy.AUTO,\n \"vlm_model_provider\": shared.VLMModelProvider.OPENAI,\n \"vlm_model\": \"gpt-4o\",\n \"chunking_strategy\": \"by_title\",\n \"split_pdf_page_range\": [\n 1,\n 10,\n ],\n \"split_pdf_cache_tmp_data_dir\": \"\",\n },\n })\n\n assert res.elements is not None\n\n # Handle response\n print(res.elements)" diff --git a/docs/models/shared/config.md b/docs/models/shared/config.md index 03abce7e..b6441eb9 100644 --- a/docs/models/shared/config.md +++ b/docs/models/shared/config.md @@ -3,6 +3,12 @@ ## Supported Types +### `shared.AzureDestinationConnectorConfigInput` + +```python +value: shared.AzureDestinationConnectorConfigInput = /* values here */ +``` + ### `shared.AstraDBConnectorConfigInput` ```python diff --git a/docs/models/shared/destinationconnectorinformationconfig.md b/docs/models/shared/destinationconnectorinformationconfig.md index f7871f5a..70b163e8 100644 --- a/docs/models/shared/destinationconnectorinformationconfig.md +++ b/docs/models/shared/destinationconnectorinformationconfig.md @@ -3,6 +3,12 @@ ## Supported Types +### `shared.AzureDestinationConnectorConfig` + +```python +value: shared.AzureDestinationConnectorConfig = /* values here */ +``` + ### `shared.AstraDBConnectorConfig` ```python diff --git a/docs/models/shared/destinationconnectortype.md b/docs/models/shared/destinationconnectortype.md index d0b8f5ab..02159f7f 100644 --- a/docs/models/shared/destinationconnectortype.md +++ b/docs/models/shared/destinationconnectortype.md @@ -5,6 +5,7 @@ | Name | Value | | -------------------------------- | -------------------------------- | +| `AZURE` | azure | | `ASTRADB` | astradb | | `AZURE_AI_SEARCH` | azure_ai_search | | `COUCHBASE` | couchbase | diff --git a/docs/models/shared/partitionparameters.md b/docs/models/shared/partitionparameters.md index 75e52e5e..7202480f 100644 --- a/docs/models/shared/partitionparameters.md +++ b/docs/models/shared/partitionparameters.md @@ -9,6 +9,7 @@ | `combine_under_n_chars` | *OptionalNullable[int]* | :heavy_minus_sign: | If chunking strategy is set, combine elements until a section reaches a length of n chars. Default: 500 | | | `content_type` | *OptionalNullable[str]* | :heavy_minus_sign: | A hint about the content type to use (such as text/markdown), when there are problems processing a specific file. This value is a MIME type in the format type/subtype. | | | `coordinates` | *Optional[bool]* | :heavy_minus_sign: | If `True`, return coordinates for each element extracted via OCR. Default: `False` | | +| `do_not_break_similarity_on_footer_header` | *Optional[bool]* | :heavy_minus_sign: | When `True`, footer, header, and page number are always considered similar to the text before them for chunk by similarity method. This allows chunk by similarity to connect contents across page better. | | | `encoding` | *OptionalNullable[str]* | :heavy_minus_sign: | The encoding method used to decode the text input. Default: utf-8 | | | `extract_image_block_types` | List[*str*] | :heavy_minus_sign: | The types of elements to extract, for use in extracting image blocks as base64 encoded data stored in metadata fields. | | | `files` | [shared.Files](../../models/shared/files.md) | :heavy_check_mark: | The file to extract | {
"summary": "File to be partitioned",
"externalValue": "https://github.com/Unstructured-IO/unstructured/blob/98d3541909f64290b5efb65a226fc3ee8a7cc5ee/example-docs/layout-parser-paper.pdf"
} | @@ -42,6 +43,6 @@ | `strategy` | [Optional[shared.Strategy]](../../models/shared/strategy.md) | :heavy_minus_sign: | The strategy to use for partitioning PDF/image. Options are fast, hi_res, auto. Default: hi_res | auto | | `table_ocr_agent` | *OptionalNullable[str]* | :heavy_minus_sign: | The OCR agent to use for table ocr inference. | | | `unique_element_ids` | *Optional[bool]* | :heavy_minus_sign: | When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False` | | -| `vlm_model` | [Optional[shared.VLMModel]](../../models/shared/vlmmodel.md) | :heavy_minus_sign: | The VLM Model to use. | gpt-4o | +| `vlm_model` | *Optional[str]* | :heavy_minus_sign: | The VLM Model to use. | gpt-4o | | `vlm_model_provider` | [Optional[shared.VLMModelProvider]](../../models/shared/vlmmodelprovider.md) | :heavy_minus_sign: | The VLM Model provider to use. | openai | | `xml_keep_tags` | *Optional[bool]* | :heavy_minus_sign: | If `True`, will retain the XML tags in the output. Otherwise it will simply extract the text from within the tags. Only applies to XML documents. | | \ No newline at end of file diff --git a/docs/models/shared/updatedestinationconnectorconfig.md b/docs/models/shared/updatedestinationconnectorconfig.md index a0424473..ea3688cb 100644 --- a/docs/models/shared/updatedestinationconnectorconfig.md +++ b/docs/models/shared/updatedestinationconnectorconfig.md @@ -3,6 +3,12 @@ ## Supported Types +### `shared.AzureDestinationConnectorConfigInput` + +```python +value: shared.AzureDestinationConnectorConfigInput = /* values here */ +``` + ### `shared.AstraDBConnectorConfigInput` ```python diff --git a/docs/models/shared/vlmmodel.md b/docs/models/shared/vlmmodel.md deleted file mode 100644 index 0a2e7ab4..00000000 --- a/docs/models/shared/vlmmodel.md +++ /dev/null @@ -1,23 +0,0 @@ -# VLMModel - -The VLM Model to use. - - -## Values - -| Name | Value | -| ---------------------------------------------- | ---------------------------------------------- | -| `CLAUDE_3_5_SONNET_20241022` | claude-3-5-sonnet-20241022 | -| `CLAUDE_3_7_SONNET_20250219` | claude-3-7-sonnet-20250219 | -| `GPT_4O` | gpt-4o | -| `GEMINI_1_5_PRO` | gemini-1.5-pro | -| `US_AMAZON_NOVA_PRO_V1_0` | us.amazon.nova-pro-v1:0 | -| `US_AMAZON_NOVA_LITE_V1_0` | us.amazon.nova-lite-v1:0 | -| `US_ANTHROPIC_CLAUDE_3_7_SONNET_20250219_V1_0` | us.anthropic.claude-3-7-sonnet-20250219-v1:0 | -| `US_ANTHROPIC_CLAUDE_3_5_SONNET_20241022_V2_0` | us.anthropic.claude-3-5-sonnet-20241022-v2:0 | -| `US_ANTHROPIC_CLAUDE_3_OPUS_20240229_V1_0` | us.anthropic.claude-3-opus-20240229-v1:0 | -| `US_ANTHROPIC_CLAUDE_3_HAIKU_20240307_V1_0` | us.anthropic.claude-3-haiku-20240307-v1:0 | -| `US_ANTHROPIC_CLAUDE_3_SONNET_20240229_V1_0` | us.anthropic.claude-3-sonnet-20240229-v1:0 | -| `US_META_LLAMA3_2_90B_INSTRUCT_V1_0` | us.meta.llama3-2-90b-instruct-v1:0 | -| `US_META_LLAMA3_2_11B_INSTRUCT_V1_0` | us.meta.llama3-2-11b-instruct-v1:0 | -| `GEMINI_2_0_FLASH_001` | gemini-2.0-flash-001 | \ No newline at end of file diff --git a/docs/models/shared/workflowinformation.md b/docs/models/shared/workflowinformation.md index d9fe6c77..f8c4b867 100644 --- a/docs/models/shared/workflowinformation.md +++ b/docs/models/shared/workflowinformation.md @@ -9,7 +9,7 @@ | `destinations` | List[*str*] | :heavy_check_mark: | N/A | | | `id` | *str* | :heavy_check_mark: | N/A | | | `name` | *str* | :heavy_check_mark: | N/A | | -| `reprocess_all` | *OptionalNullable[bool]* | :heavy_minus_sign: | N/A | | +| `reprocess_all` | *Optional[bool]* | :heavy_minus_sign: | N/A | | | `schedule` | [OptionalNullable[shared.WorkflowSchedule]](../../models/shared/workflowschedule.md) | :heavy_minus_sign: | N/A | {
"crontab_entries": [
{
"cron_expression": "0 0 * * *"
}
]
} | | `sources` | List[*str*] | :heavy_check_mark: | N/A | | | `status` | [shared.WorkflowState](../../models/shared/workflowstate.md) | :heavy_check_mark: | N/A | | diff --git a/docs/sdks/general/README.md b/docs/sdks/general/README.md index 1eee2b9e..4d900e4e 100644 --- a/docs/sdks/general/README.md +++ b/docs/sdks/general/README.md @@ -34,7 +34,7 @@ with UnstructuredClient() as uc_client: 10, ], "strategy": shared.Strategy.AUTO, - "vlm_model": shared.VLMModel.GPT_4O, + "vlm_model": "gpt-4o", "vlm_model_provider": shared.VLMModelProvider.OPENAI, }, }) diff --git a/docs/sdks/unstructuredclient/README.md b/docs/sdks/unstructuredclient/README.md deleted file mode 100644 index 895459f0..00000000 --- a/docs/sdks/unstructuredclient/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# UnstructuredClient SDK - -## Overview - -### Available Operations diff --git a/gen.yaml b/gen.yaml index 1060b902..731847cf 100644 --- a/gen.yaml +++ b/gen.yaml @@ -13,12 +13,15 @@ generation: auth: oAuth2ClientCredentialsEnabled: false oAuth2PasswordEnabled: false + hoistGlobalSecurity: true + schemas: + allOfMergeStrategy: shallowMerge tests: generateTests: true generateNewTests: false skipResponseBodyAssertions: false python: - version: 0.42.3 + version: 0.43.0 additionalDependencies: dev: deepdiff: '>=6.0' @@ -36,6 +39,10 @@ python: httpx: '>=0.27.0' pypdf: '>=4.0' requests-toolbelt: '>=1.0.0' + allowedRedefinedBuiltins: + - id + - object + asyncMode: both authors: - Unstructured baseErrorName: UnstructuredClientError @@ -58,6 +65,7 @@ python: shared: models/shared webhooks: models/webhooks inputModelSuffix: input + legacyPyright: true license: MIT maxMethodParams: 0 methodArguments: require-security-and-request @@ -69,4 +77,5 @@ python: pytestFilterWarnings: [] pytestTimeout: 0 responseFormat: envelope + sseFlatResponse: false templateVersion: v2 diff --git a/pyproject.toml b/pyproject.toml index 8684a295..fa6b7d8f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] name = "unstructured-client" -version = "0.42.3" +version = "0.43.0" description = "Python Client SDK for Unstructured API" authors = [{ name = "Unstructured" },] readme = "README-PYPI.md" diff --git a/scripts/prepare_readme.py b/scripts/prepare_readme.py index 52e872a1..d75b47bc 100644 --- a/scripts/prepare_readme.py +++ b/scripts/prepare_readme.py @@ -10,12 +10,17 @@ GITHUB_URL = ( GITHUB_URL[: -len(".git")] if GITHUB_URL.endswith(".git") else GITHUB_URL ) + REPO_SUBDIR = "." + # Ensure the subdirectory has a trailing slash + if not REPO_SUBDIR.endswith("/"): + REPO_SUBDIR += "/" # links on PyPI should have absolute URLs readme_contents = re.sub( r"(\[[^\]]+\]\()((?!https?:)[^\)]+)(\))", lambda m: m.group(1) + GITHUB_URL + "/blob/master/" + + REPO_SUBDIR + m.group(2) + m.group(3), readme_contents, diff --git a/src/unstructured_client/_version.py b/src/unstructured_client/_version.py index a5be6a7b..2b258bc0 100644 --- a/src/unstructured_client/_version.py +++ b/src/unstructured_client/_version.py @@ -3,10 +3,10 @@ import importlib.metadata __title__: str = "unstructured-client" -__version__: str = "0.42.3" -__openapi_doc_version__: str = "1.1.45" -__gen_version__: str = "2.680.0" -__user_agent__: str = "speakeasy-sdk/python 0.42.3 2.680.0 1.1.45 unstructured-client" +__version__: str = "0.43.0" +__openapi_doc_version__: str = "1.2.7" +__gen_version__: str = "2.731.6" +__user_agent__: str = "speakeasy-sdk/python 0.43.0 2.731.6 1.2.7 unstructured-client" try: if __package__ is not None: diff --git a/src/unstructured_client/basesdk.py b/src/unstructured_client/basesdk.py index a1c0f684..d429490c 100644 --- a/src/unstructured_client/basesdk.py +++ b/src/unstructured_client/basesdk.py @@ -20,9 +20,19 @@ class BaseSDK: sdk_configuration: SDKConfiguration + parent_ref: Optional[object] = None + """ + Reference to the root SDK instance, if any. This will prevent it from + being garbage collected while there are active streams. + """ - def __init__(self, sdk_config: SDKConfiguration) -> None: + def __init__( + self, + sdk_config: SDKConfiguration, + parent_ref: Optional[object] = None, + ) -> None: self.sdk_configuration = sdk_config + self.parent_ref = parent_ref def _get_url(self, base_url, url_variables): sdk_url, sdk_variables = self.sdk_configuration.get_server_details() diff --git a/src/unstructured_client/destinations.py b/src/unstructured_client/destinations.py index 9ad0a291..1947c992 100644 --- a/src/unstructured_client/destinations.py +++ b/src/unstructured_client/destinations.py @@ -81,7 +81,7 @@ def create_connection_check_destinations( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_connection_check_destinations", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -184,7 +184,7 @@ async def create_connection_check_destinations_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_connection_check_destinations", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -292,7 +292,7 @@ def create_destination( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -400,7 +400,7 @@ async def create_destination_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -501,7 +501,7 @@ def delete_destination( config=self.sdk_configuration, base_url=base_url or "", operation_id="delete_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -600,7 +600,7 @@ async def delete_destination_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="delete_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -701,7 +701,7 @@ def get_connection_check_destinations( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_connection_check_destinations", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -804,7 +804,7 @@ async def get_connection_check_destinations_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_connection_check_destinations", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -904,7 +904,7 @@ def get_destination( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1004,7 +1004,7 @@ async def get_destination_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1105,7 +1105,7 @@ def list_destinations( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_destinations", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1206,7 +1206,7 @@ async def list_destinations_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_destinations", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1314,7 +1314,7 @@ def update_destination( config=self.sdk_configuration, base_url=base_url or "", operation_id="update_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1422,7 +1422,7 @@ async def update_destination_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="update_destination", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, diff --git a/src/unstructured_client/httpclient.py b/src/unstructured_client/httpclient.py index 47b052cb..89560b56 100644 --- a/src/unstructured_client/httpclient.py +++ b/src/unstructured_client/httpclient.py @@ -107,7 +107,6 @@ def close_clients( # to them from the owning SDK instance and they can be reaped. owner.client = None owner.async_client = None - if sync_client is not None and not sync_client_supplied: try: sync_client.close() diff --git a/src/unstructured_client/jobs.py b/src/unstructured_client/jobs.py index ba620032..3c165ff8 100644 --- a/src/unstructured_client/jobs.py +++ b/src/unstructured_client/jobs.py @@ -78,7 +78,7 @@ def cancel_job( config=self.sdk_configuration, base_url=base_url or "", operation_id="cancel_job", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -176,7 +176,7 @@ async def cancel_job_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="cancel_job", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -275,7 +275,7 @@ def download_job_output( config=self.sdk_configuration, base_url=base_url or "", operation_id="download_job_output", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -374,7 +374,7 @@ async def download_job_output_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="download_job_output", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -470,7 +470,7 @@ def get_job( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_job", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -568,7 +568,7 @@ async def get_job_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_job", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -668,7 +668,7 @@ def get_job_details( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_job_details", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -768,7 +768,7 @@ async def get_job_details_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_job_details", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -869,7 +869,7 @@ def get_job_failed_files( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_job_failed_files", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -970,7 +970,7 @@ async def get_job_failed_files_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_job_failed_files", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1068,7 +1068,7 @@ def list_jobs( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_jobs", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1166,7 +1166,7 @@ async def list_jobs_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_jobs", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, diff --git a/src/unstructured_client/models/errors/__init__.py b/src/unstructured_client/models/errors/__init__.py index 1aa6130f..c7c3b5d1 100644 --- a/src/unstructured_client/models/errors/__init__.py +++ b/src/unstructured_client/models/errors/__init__.py @@ -1,8 +1,10 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +from .unstructuredclienterror import UnstructuredClientError from typing import TYPE_CHECKING from importlib import import_module import builtins +import sys if TYPE_CHECKING: from .httpvalidationerror import ( @@ -14,7 +16,6 @@ from .responsevalidationerror import ResponseValidationError from .sdkerror import SDKError from .servererror import ServerError, ServerErrorData - from .unstructuredclienterror import UnstructuredClientError __all__ = [ "Detail", @@ -37,10 +38,21 @@ "SDKError": ".sdkerror", "ServerError": ".servererror", "ServerErrorData": ".servererror", - "UnstructuredClientError": ".unstructuredclienterror", } +def dynamic_import(modname, retries=3): + for attempt in range(retries): + try: + return import_module(modname, __package__) + except KeyError: + # Clear any half-initialized module and retry + sys.modules.pop(modname, None) + if attempt == retries - 1: + break + raise KeyError(f"Failed to import module '{modname}' after {retries} attempts") + + def __getattr__(attr_name: str) -> object: module_name = _dynamic_imports.get(attr_name) if module_name is None: @@ -49,7 +61,7 @@ def __getattr__(attr_name: str) -> object: ) try: - module = import_module(module_name, __package__) + module = dynamic_import(module_name) result = getattr(module, attr_name) return result except ImportError as e: diff --git a/src/unstructured_client/models/errors/httpvalidationerror.py b/src/unstructured_client/models/errors/httpvalidationerror.py index 4be2ca24..320d8457 100644 --- a/src/unstructured_client/models/errors/httpvalidationerror.py +++ b/src/unstructured_client/models/errors/httpvalidationerror.py @@ -1,6 +1,7 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations +from dataclasses import dataclass, field import httpx from typing import List, Optional, Union from typing_extensions import TypeAliasType @@ -23,8 +24,9 @@ class HTTPValidationErrorData(BaseModel): detail: Optional[Detail] = None +@dataclass(unsafe_hash=True) class HTTPValidationError(UnstructuredClientError): - data: HTTPValidationErrorData + data: HTTPValidationErrorData = field(hash=False) def __init__( self, @@ -34,4 +36,4 @@ def __init__( ): message = body or raw_response.text super().__init__(message, raw_response, body) - self.data = data + object.__setattr__(self, "data", data) diff --git a/src/unstructured_client/models/errors/no_response_error.py b/src/unstructured_client/models/errors/no_response_error.py index f98beea2..1deab64b 100644 --- a/src/unstructured_client/models/errors/no_response_error.py +++ b/src/unstructured_client/models/errors/no_response_error.py @@ -1,12 +1,16 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" +from dataclasses import dataclass + + +@dataclass(unsafe_hash=True) class NoResponseError(Exception): """Error raised when no HTTP response is received from the server.""" message: str def __init__(self, message: str = "No response received"): - self.message = message + object.__setattr__(self, "message", message) super().__init__(message) def __str__(self): diff --git a/src/unstructured_client/models/errors/responsevalidationerror.py b/src/unstructured_client/models/errors/responsevalidationerror.py index c3c09737..1ee0a9ba 100644 --- a/src/unstructured_client/models/errors/responsevalidationerror.py +++ b/src/unstructured_client/models/errors/responsevalidationerror.py @@ -2,10 +2,12 @@ import httpx from typing import Optional +from dataclasses import dataclass from unstructured_client.models.errors import UnstructuredClientError +@dataclass(unsafe_hash=True) class ResponseValidationError(UnstructuredClientError): """Error raised when there is a type mismatch between the response data and the expected Pydantic model.""" diff --git a/src/unstructured_client/models/errors/sdkerror.py b/src/unstructured_client/models/errors/sdkerror.py index 51e10405..0507ab5f 100644 --- a/src/unstructured_client/models/errors/sdkerror.py +++ b/src/unstructured_client/models/errors/sdkerror.py @@ -2,12 +2,14 @@ import httpx from typing import Optional +from dataclasses import dataclass from unstructured_client.models.errors import UnstructuredClientError MAX_MESSAGE_LEN = 10_000 +@dataclass(unsafe_hash=True) class SDKError(UnstructuredClientError): """The fallback error class if no more specific error class is matched.""" diff --git a/src/unstructured_client/models/errors/servererror.py b/src/unstructured_client/models/errors/servererror.py index 2b4c81a5..737eed69 100644 --- a/src/unstructured_client/models/errors/servererror.py +++ b/src/unstructured_client/models/errors/servererror.py @@ -1,6 +1,7 @@ """Code generated by Speakeasy (https://speakeasy.com). DO NOT EDIT.""" from __future__ import annotations +from dataclasses import dataclass, field import httpx from typing import Optional from unstructured_client.models.errors import UnstructuredClientError @@ -11,8 +12,9 @@ class ServerErrorData(BaseModel): detail: Optional[str] = None +@dataclass(unsafe_hash=True) class ServerError(UnstructuredClientError): - data: ServerErrorData + data: ServerErrorData = field(hash=False) def __init__( self, @@ -22,4 +24,4 @@ def __init__( ): message = body or raw_response.text super().__init__(message, raw_response, body) - self.data = data + object.__setattr__(self, "data", data) diff --git a/src/unstructured_client/models/errors/unstructuredclienterror.py b/src/unstructured_client/models/errors/unstructuredclienterror.py index 054fb875..921fbdaa 100644 --- a/src/unstructured_client/models/errors/unstructuredclienterror.py +++ b/src/unstructured_client/models/errors/unstructuredclienterror.py @@ -2,25 +2,29 @@ import httpx from typing import Optional +from dataclasses import dataclass, field +@dataclass(unsafe_hash=True) class UnstructuredClientError(Exception): """The base class for all HTTP error responses.""" message: str status_code: int body: str - headers: httpx.Headers - raw_response: httpx.Response + headers: httpx.Headers = field(hash=False) + raw_response: httpx.Response = field(hash=False) def __init__( self, message: str, raw_response: httpx.Response, body: Optional[str] = None ): - self.message = message - self.status_code = raw_response.status_code - self.body = body if body is not None else raw_response.text - self.headers = raw_response.headers - self.raw_response = raw_response + object.__setattr__(self, "message", message) + object.__setattr__(self, "status_code", raw_response.status_code) + object.__setattr__( + self, "body", body if body is not None else raw_response.text + ) + object.__setattr__(self, "headers", raw_response.headers) + object.__setattr__(self, "raw_response", raw_response) def __str__(self): return self.message diff --git a/src/unstructured_client/models/operations/__init__.py b/src/unstructured_client/models/operations/__init__.py index 8c6fbfc2..1818438c 100644 --- a/src/unstructured_client/models/operations/__init__.py +++ b/src/unstructured_client/models/operations/__init__.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from importlib import import_module import builtins +import sys if TYPE_CHECKING: from .cancel_job import ( @@ -391,6 +392,18 @@ } +def dynamic_import(modname, retries=3): + for attempt in range(retries): + try: + return import_module(modname, __package__) + except KeyError: + # Clear any half-initialized module and retry + sys.modules.pop(modname, None) + if attempt == retries - 1: + break + raise KeyError(f"Failed to import module '{modname}' after {retries} attempts") + + def __getattr__(attr_name: str) -> object: module_name = _dynamic_imports.get(attr_name) if module_name is None: @@ -399,7 +412,7 @@ def __getattr__(attr_name: str) -> object: ) try: - module = import_module(module_name, __package__) + module = dynamic_import(module_name) result = getattr(module, attr_name) return result except ImportError as e: diff --git a/src/unstructured_client/models/shared/__init__.py b/src/unstructured_client/models/shared/__init__.py index 0ab9d4af..a90b12a0 100644 --- a/src/unstructured_client/models/shared/__init__.py +++ b/src/unstructured_client/models/shared/__init__.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from importlib import import_module import builtins +import sys if TYPE_CHECKING: from .astradbconnectorconfig import ( @@ -21,6 +22,14 @@ AzureAISearchConnectorConfigInput, AzureAISearchConnectorConfigInputTypedDict, ) + from .azuredestinationconnectorconfig import ( + AzureDestinationConnectorConfig, + AzureDestinationConnectorConfigTypedDict, + ) + from .azuredestinationconnectorconfiginput import ( + AzureDestinationConnectorConfigInput, + AzureDestinationConnectorConfigInputTypedDict, + ) from .azuresourceconnectorconfig import ( AzureSourceConnectorConfig, AzureSourceConnectorConfigTypedDict, @@ -257,7 +266,6 @@ PartitionParameters, PartitionParametersTypedDict, Strategy, - VLMModel, VLMModelProvider, ) from .pineconedestinationconnectorconfig import ( @@ -413,6 +421,10 @@ "AzureAISearchConnectorConfigInput", "AzureAISearchConnectorConfigInputTypedDict", "AzureAISearchConnectorConfigTypedDict", + "AzureDestinationConnectorConfig", + "AzureDestinationConnectorConfigInput", + "AzureDestinationConnectorConfigInputTypedDict", + "AzureDestinationConnectorConfigTypedDict", "AzureSourceConnectorConfig", "AzureSourceConnectorConfigInput", "AzureSourceConnectorConfigInputTypedDict", @@ -622,7 +634,6 @@ "UpdateWorkflow", "UpdateWorkflowSchedule", "UpdateWorkflowTypedDict", - "VLMModel", "VLMModelProvider", "ValidationError", "ValidationErrorTypedDict", @@ -654,6 +665,10 @@ "AzureAISearchConnectorConfigTypedDict": ".azureaisearchconnectorconfig", "AzureAISearchConnectorConfigInput": ".azureaisearchconnectorconfiginput", "AzureAISearchConnectorConfigInputTypedDict": ".azureaisearchconnectorconfiginput", + "AzureDestinationConnectorConfig": ".azuredestinationconnectorconfig", + "AzureDestinationConnectorConfigTypedDict": ".azuredestinationconnectorconfig", + "AzureDestinationConnectorConfigInput": ".azuredestinationconnectorconfiginput", + "AzureDestinationConnectorConfigInputTypedDict": ".azuredestinationconnectorconfiginput", "AzureSourceConnectorConfig": ".azuresourceconnectorconfig", "AzureSourceConnectorConfigTypedDict": ".azuresourceconnectorconfig", "AzureSourceConnectorConfigInput": ".azuresourceconnectorconfiginput", @@ -796,7 +811,6 @@ "PartitionParameters": ".partition_parameters", "PartitionParametersTypedDict": ".partition_parameters", "Strategy": ".partition_parameters", - "VLMModel": ".partition_parameters", "VLMModelProvider": ".partition_parameters", "PineconeDestinationConnectorConfig": ".pineconedestinationconnectorconfig", "PineconeDestinationConnectorConfigTypedDict": ".pineconedestinationconnectorconfig", @@ -887,6 +901,18 @@ } +def dynamic_import(modname, retries=3): + for attempt in range(retries): + try: + return import_module(modname, __package__) + except KeyError: + # Clear any half-initialized module and retry + sys.modules.pop(modname, None) + if attempt == retries - 1: + break + raise KeyError(f"Failed to import module '{modname}' after {retries} attempts") + + def __getattr__(attr_name: str) -> object: module_name = _dynamic_imports.get(attr_name) if module_name is None: @@ -895,7 +921,7 @@ def __getattr__(attr_name: str) -> object: ) try: - module = import_module(module_name, __package__) + module = dynamic_import(module_name) result = getattr(module, attr_name) return result except ImportError as e: diff --git a/src/unstructured_client/models/shared/createdestinationconnector.py b/src/unstructured_client/models/shared/createdestinationconnector.py index 3275fd05..f6b50154 100644 --- a/src/unstructured_client/models/shared/createdestinationconnector.py +++ b/src/unstructured_client/models/shared/createdestinationconnector.py @@ -9,6 +9,10 @@ AzureAISearchConnectorConfigInput, AzureAISearchConnectorConfigInputTypedDict, ) +from .azuredestinationconnectorconfiginput import ( + AzureDestinationConnectorConfigInput, + AzureDestinationConnectorConfigInputTypedDict, +) from .couchbasedestinationconnectorconfiginput import ( CouchbaseDestinationConnectorConfigInput, CouchbaseDestinationConnectorConfigInputTypedDict, @@ -98,20 +102,21 @@ Union[ GCSDestinationConnectorConfigInputTypedDict, ElasticsearchConnectorConfigInputTypedDict, + MongoDBConnectorConfigInputTypedDict, AzureAISearchConnectorConfigInputTypedDict, WeaviateDestinationConnectorConfigInputTypedDict, - MongoDBConnectorConfigInputTypedDict, DeltaTableConnectorConfigInputTypedDict, QdrantCloudDestinationConnectorConfigInputTypedDict, PineconeDestinationConnectorConfigInputTypedDict, + AzureDestinationConnectorConfigInputTypedDict, Neo4jDestinationConnectorConfigInputTypedDict, OneDriveDestinationConnectorConfigInputTypedDict, S3DestinationConnectorConfigInputTypedDict, AstraDBConnectorConfigInputTypedDict, PostgresDestinationConnectorConfigInputTypedDict, - DatabricksVolumesConnectorConfigInputTypedDict, MilvusDestinationConnectorConfigInputTypedDict, KafkaCloudDestinationConnectorConfigInputTypedDict, + DatabricksVolumesConnectorConfigInputTypedDict, CouchbaseDestinationConnectorConfigInputTypedDict, RedisDestinationConnectorConfigInputTypedDict, DatabricksVDTDestinationConnectorConfigInputTypedDict, @@ -127,20 +132,21 @@ Union[ GCSDestinationConnectorConfigInput, ElasticsearchConnectorConfigInput, + MongoDBConnectorConfigInput, AzureAISearchConnectorConfigInput, WeaviateDestinationConnectorConfigInput, - MongoDBConnectorConfigInput, DeltaTableConnectorConfigInput, QdrantCloudDestinationConnectorConfigInput, PineconeDestinationConnectorConfigInput, + AzureDestinationConnectorConfigInput, Neo4jDestinationConnectorConfigInput, OneDriveDestinationConnectorConfigInput, S3DestinationConnectorConfigInput, AstraDBConnectorConfigInput, PostgresDestinationConnectorConfigInput, - DatabricksVolumesConnectorConfigInput, MilvusDestinationConnectorConfigInput, KafkaCloudDestinationConnectorConfigInput, + DatabricksVolumesConnectorConfigInput, CouchbaseDestinationConnectorConfigInput, RedisDestinationConnectorConfigInput, DatabricksVDTDestinationConnectorConfigInput, diff --git a/src/unstructured_client/models/shared/destinationconnectorinformation.py b/src/unstructured_client/models/shared/destinationconnectorinformation.py index 0e912623..6c988b4e 100644 --- a/src/unstructured_client/models/shared/destinationconnectorinformation.py +++ b/src/unstructured_client/models/shared/destinationconnectorinformation.py @@ -9,6 +9,10 @@ AzureAISearchConnectorConfig, AzureAISearchConnectorConfigTypedDict, ) +from .azuredestinationconnectorconfig import ( + AzureDestinationConnectorConfig, + AzureDestinationConnectorConfigTypedDict, +) from .couchbasedestinationconnectorconfig import ( CouchbaseDestinationConnectorConfig, CouchbaseDestinationConnectorConfigTypedDict, @@ -106,20 +110,21 @@ Union[ GCSDestinationConnectorConfigTypedDict, ElasticsearchConnectorConfigTypedDict, + MongoDBConnectorConfigTypedDict, AzureAISearchConnectorConfigTypedDict, WeaviateDestinationConnectorConfigTypedDict, - MongoDBConnectorConfigTypedDict, DeltaTableConnectorConfigTypedDict, QdrantCloudDestinationConnectorConfigTypedDict, PineconeDestinationConnectorConfigTypedDict, + AzureDestinationConnectorConfigTypedDict, AstraDBConnectorConfigTypedDict, Neo4jDestinationConnectorConfigTypedDict, OneDriveDestinationConnectorConfigTypedDict, S3DestinationConnectorConfigTypedDict, PostgresDestinationConnectorConfigTypedDict, - DatabricksVolumesConnectorConfigTypedDict, MilvusDestinationConnectorConfigTypedDict, KafkaCloudDestinationConnectorConfigTypedDict, + DatabricksVolumesConnectorConfigTypedDict, CouchbaseDestinationConnectorConfigTypedDict, RedisDestinationConnectorConfigTypedDict, DatabricksVDTDestinationConnectorConfigTypedDict, @@ -135,20 +140,21 @@ Union[ GCSDestinationConnectorConfig, ElasticsearchConnectorConfig, + MongoDBConnectorConfig, AzureAISearchConnectorConfig, WeaviateDestinationConnectorConfig, - MongoDBConnectorConfig, DeltaTableConnectorConfig, QdrantCloudDestinationConnectorConfig, PineconeDestinationConnectorConfig, + AzureDestinationConnectorConfig, AstraDBConnectorConfig, Neo4jDestinationConnectorConfig, OneDriveDestinationConnectorConfig, S3DestinationConnectorConfig, PostgresDestinationConnectorConfig, - DatabricksVolumesConnectorConfig, MilvusDestinationConnectorConfig, KafkaCloudDestinationConnectorConfig, + DatabricksVolumesConnectorConfig, CouchbaseDestinationConnectorConfig, RedisDestinationConnectorConfig, DatabricksVDTDestinationConnectorConfig, diff --git a/src/unstructured_client/models/shared/destinationconnectortype.py b/src/unstructured_client/models/shared/destinationconnectortype.py index 6de4b97c..d463db75 100644 --- a/src/unstructured_client/models/shared/destinationconnectortype.py +++ b/src/unstructured_client/models/shared/destinationconnectortype.py @@ -6,6 +6,7 @@ class DestinationConnectorType(str, Enum, metaclass=utils.OpenEnumMeta): + AZURE = "azure" ASTRADB = "astradb" AZURE_AI_SEARCH = "azure_ai_search" COUCHBASE = "couchbase" diff --git a/src/unstructured_client/models/shared/partition_parameters.py b/src/unstructured_client/models/shared/partition_parameters.py index e50ade3a..0098c0fe 100644 --- a/src/unstructured_client/models/shared/partition_parameters.py +++ b/src/unstructured_client/models/shared/partition_parameters.py @@ -64,35 +64,6 @@ class Strategy(str, Enum, metaclass=utils.OpenEnumMeta): VLM = "vlm" -class VLMModel(str, Enum, metaclass=utils.OpenEnumMeta): - r"""The VLM Model to use.""" - - CLAUDE_3_5_SONNET_20241022 = "claude-3-5-sonnet-20241022" - CLAUDE_3_7_SONNET_20250219 = "claude-3-7-sonnet-20250219" - GPT_4O = "gpt-4o" - GEMINI_1_5_PRO = "gemini-1.5-pro" - US_AMAZON_NOVA_PRO_V1_0 = "us.amazon.nova-pro-v1:0" - US_AMAZON_NOVA_LITE_V1_0 = "us.amazon.nova-lite-v1:0" - US_ANTHROPIC_CLAUDE_3_7_SONNET_20250219_V1_0 = ( - "us.anthropic.claude-3-7-sonnet-20250219-v1:0" - ) - US_ANTHROPIC_CLAUDE_3_5_SONNET_20241022_V2_0 = ( - "us.anthropic.claude-3-5-sonnet-20241022-v2:0" - ) - US_ANTHROPIC_CLAUDE_3_OPUS_20240229_V1_0 = ( - "us.anthropic.claude-3-opus-20240229-v1:0" - ) - US_ANTHROPIC_CLAUDE_3_HAIKU_20240307_V1_0 = ( - "us.anthropic.claude-3-haiku-20240307-v1:0" - ) - US_ANTHROPIC_CLAUDE_3_SONNET_20240229_V1_0 = ( - "us.anthropic.claude-3-sonnet-20240229-v1:0" - ) - US_META_LLAMA3_2_90B_INSTRUCT_V1_0 = "us.meta.llama3-2-90b-instruct-v1:0" - US_META_LLAMA3_2_11B_INSTRUCT_V1_0 = "us.meta.llama3-2-11b-instruct-v1:0" - GEMINI_2_0_FLASH_001 = "gemini-2.0-flash-001" - - class VLMModelProvider(str, Enum, metaclass=utils.OpenEnumMeta): r"""The VLM Model provider to use.""" @@ -116,6 +87,8 @@ class PartitionParametersTypedDict(TypedDict): r"""A hint about the content type to use (such as text/markdown), when there are problems processing a specific file. This value is a MIME type in the format type/subtype.""" coordinates: NotRequired[bool] r"""If `True`, return coordinates for each element extracted via OCR. Default: `False`""" + do_not_break_similarity_on_footer_header: NotRequired[bool] + r"""When `True`, footer, header, and page number are always considered similar to the text before them for chunk by similarity method. This allows chunk by similarity to connect contents across page better.""" encoding: NotRequired[Nullable[str]] r"""The encoding method used to decode the text input. Default: utf-8""" extract_image_block_types: NotRequired[List[str]] @@ -180,7 +153,7 @@ class PartitionParametersTypedDict(TypedDict): r"""The OCR agent to use for table ocr inference.""" unique_element_ids: NotRequired[bool] r"""When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False`""" - vlm_model: NotRequired[VLMModel] + vlm_model: NotRequired[str] r"""The VLM Model to use.""" vlm_model_provider: NotRequired[VLMModelProvider] r"""The VLM Model provider to use.""" @@ -208,6 +181,11 @@ class PartitionParameters(BaseModel): coordinates: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""If `True`, return coordinates for each element extracted via OCR. Default: `False`""" + do_not_break_similarity_on_footer_header: Annotated[ + Optional[bool], FieldMetadata(multipart=True) + ] = False + r"""When `True`, footer, header, and page number are always considered similar to the text before them for chunk by similarity method. This allows chunk by similarity to connect contents across page better.""" + encoding: Annotated[OptionalNullable[str], FieldMetadata(multipart=True)] = None r"""The encoding method used to decode the text input. Default: utf-8""" @@ -352,10 +330,7 @@ class PartitionParameters(BaseModel): unique_element_ids: Annotated[Optional[bool], FieldMetadata(multipart=True)] = False r"""When `True`, assign UUIDs to element IDs, which guarantees their uniqueness (useful when using them as primary keys in database). Otherwise a SHA-256 of element text is used. Default: `False`""" - vlm_model: Annotated[ - Annotated[Optional[VLMModel], PlainValidator(validate_open_enum(False))], - FieldMetadata(multipart=True), - ] = None + vlm_model: Annotated[Optional[str], FieldMetadata(multipart=True)] = None r"""The VLM Model to use.""" vlm_model_provider: Annotated[ @@ -376,6 +351,7 @@ def serialize_model(self, handler): "combine_under_n_chars", "content_type", "coordinates", + "do_not_break_similarity_on_footer_header", "encoding", "extract_image_block_types", "gz_uncompressed_content_type", diff --git a/src/unstructured_client/models/shared/updatedestinationconnector.py b/src/unstructured_client/models/shared/updatedestinationconnector.py index e10814f6..f4a189de 100644 --- a/src/unstructured_client/models/shared/updatedestinationconnector.py +++ b/src/unstructured_client/models/shared/updatedestinationconnector.py @@ -9,6 +9,10 @@ AzureAISearchConnectorConfigInput, AzureAISearchConnectorConfigInputTypedDict, ) +from .azuredestinationconnectorconfiginput import ( + AzureDestinationConnectorConfigInput, + AzureDestinationConnectorConfigInputTypedDict, +) from .couchbasedestinationconnectorconfiginput import ( CouchbaseDestinationConnectorConfigInput, CouchbaseDestinationConnectorConfigInputTypedDict, @@ -95,20 +99,21 @@ Union[ GCSDestinationConnectorConfigInputTypedDict, ElasticsearchConnectorConfigInputTypedDict, + MongoDBConnectorConfigInputTypedDict, AzureAISearchConnectorConfigInputTypedDict, WeaviateDestinationConnectorConfigInputTypedDict, - MongoDBConnectorConfigInputTypedDict, DeltaTableConnectorConfigInputTypedDict, QdrantCloudDestinationConnectorConfigInputTypedDict, PineconeDestinationConnectorConfigInputTypedDict, + AzureDestinationConnectorConfigInputTypedDict, Neo4jDestinationConnectorConfigInputTypedDict, OneDriveDestinationConnectorConfigInputTypedDict, S3DestinationConnectorConfigInputTypedDict, AstraDBConnectorConfigInputTypedDict, PostgresDestinationConnectorConfigInputTypedDict, - DatabricksVolumesConnectorConfigInputTypedDict, MilvusDestinationConnectorConfigInputTypedDict, KafkaCloudDestinationConnectorConfigInputTypedDict, + DatabricksVolumesConnectorConfigInputTypedDict, CouchbaseDestinationConnectorConfigInputTypedDict, RedisDestinationConnectorConfigInputTypedDict, DatabricksVDTDestinationConnectorConfigInputTypedDict, @@ -124,20 +129,21 @@ Union[ GCSDestinationConnectorConfigInput, ElasticsearchConnectorConfigInput, + MongoDBConnectorConfigInput, AzureAISearchConnectorConfigInput, WeaviateDestinationConnectorConfigInput, - MongoDBConnectorConfigInput, DeltaTableConnectorConfigInput, QdrantCloudDestinationConnectorConfigInput, PineconeDestinationConnectorConfigInput, + AzureDestinationConnectorConfigInput, Neo4jDestinationConnectorConfigInput, OneDriveDestinationConnectorConfigInput, S3DestinationConnectorConfigInput, AstraDBConnectorConfigInput, PostgresDestinationConnectorConfigInput, - DatabricksVolumesConnectorConfigInput, MilvusDestinationConnectorConfigInput, KafkaCloudDestinationConnectorConfigInput, + DatabricksVolumesConnectorConfigInput, CouchbaseDestinationConnectorConfigInput, RedisDestinationConnectorConfigInput, DatabricksVDTDestinationConnectorConfigInput, diff --git a/src/unstructured_client/models/shared/workflowinformation.py b/src/unstructured_client/models/shared/workflowinformation.py index 599ce3f2..2025fa84 100644 --- a/src/unstructured_client/models/shared/workflowinformation.py +++ b/src/unstructured_client/models/shared/workflowinformation.py @@ -7,7 +7,7 @@ from .workflowtype import WorkflowType from datetime import datetime from pydantic import model_serializer -from typing import List +from typing import List, Optional from typing_extensions import NotRequired, TypedDict from unstructured_client.types import ( BaseModel, @@ -26,7 +26,7 @@ class WorkflowInformationTypedDict(TypedDict): sources: List[str] status: WorkflowState workflow_nodes: List[WorkflowNodeTypedDict] - reprocess_all: NotRequired[Nullable[bool]] + reprocess_all: NotRequired[bool] schedule: NotRequired[Nullable[WorkflowScheduleTypedDict]] updated_at: NotRequired[Nullable[datetime]] workflow_type: NotRequired[Nullable[WorkflowType]] @@ -47,7 +47,7 @@ class WorkflowInformation(BaseModel): workflow_nodes: List[WorkflowNode] - reprocess_all: OptionalNullable[bool] = UNSET + reprocess_all: Optional[bool] = False schedule: OptionalNullable[WorkflowSchedule] = UNSET @@ -58,7 +58,7 @@ class WorkflowInformation(BaseModel): @model_serializer(mode="wrap") def serialize_model(self, handler): optional_fields = ["reprocess_all", "schedule", "updated_at", "workflow_type"] - nullable_fields = ["reprocess_all", "schedule", "updated_at", "workflow_type"] + nullable_fields = ["schedule", "updated_at", "workflow_type"] null_default_fields = [] serialized = handler(self) diff --git a/src/unstructured_client/sdk.py b/src/unstructured_client/sdk.py index 014bb146..5d70378b 100644 --- a/src/unstructured_client/sdk.py +++ b/src/unstructured_client/sdk.py @@ -7,6 +7,7 @@ from .utils.retries import RetryConfig import httpx import importlib +import sys from typing import Any, Callable, Dict, Optional, TYPE_CHECKING, Union, cast from unstructured_client import utils from unstructured_client._hooks import SDKHooks @@ -63,7 +64,7 @@ def __init__( """ client_supplied = True if client is None: - client = httpx.Client() + client = httpx.Client(follow_redirects=True) client_supplied = False assert issubclass( @@ -72,7 +73,7 @@ def __init__( async_client_supplied = True if async_client is None: - async_client = httpx.AsyncClient() + async_client = httpx.AsyncClient(follow_redirects=True) async_client_supplied = False if debug_logger is None: @@ -107,6 +108,7 @@ def __init__( timeout_ms=timeout_ms, debug_logger=debug_logger, ), + parent_ref=self, ) hooks = SDKHooks() @@ -131,13 +133,24 @@ def __init__( self.sdk_configuration.async_client_supplied, ) + def dynamic_import(self, modname, retries=3): + for attempt in range(retries): + try: + return importlib.import_module(modname) + except KeyError: + # Clear any half-initialized module and retry + sys.modules.pop(modname, None) + if attempt == retries - 1: + break + raise KeyError(f"Failed to import module '{modname}' after {retries} attempts") + def __getattr__(self, name: str): if name in self._sub_sdk_map: module_path, class_name = self._sub_sdk_map[name] try: - module = importlib.import_module(module_path) + module = self.dynamic_import(module_path) klass = getattr(module, class_name) - instance = klass(self.sdk_configuration) + instance = klass(self.sdk_configuration, parent_ref=self) setattr(self, name, instance) return instance except ImportError as e: diff --git a/src/unstructured_client/sources.py b/src/unstructured_client/sources.py index 48bd82da..21399d4e 100644 --- a/src/unstructured_client/sources.py +++ b/src/unstructured_client/sources.py @@ -81,7 +81,7 @@ def create_connection_check_sources( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_connection_check_sources", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -184,7 +184,7 @@ async def create_connection_check_sources_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_connection_check_sources", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -291,7 +291,7 @@ def create_source( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -398,7 +398,7 @@ async def create_source_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -498,7 +498,7 @@ def delete_source( config=self.sdk_configuration, base_url=base_url or "", operation_id="delete_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -596,7 +596,7 @@ async def delete_source_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="delete_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -697,7 +697,7 @@ def get_connection_check_sources( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_connection_check_sources", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -800,7 +800,7 @@ async def get_connection_check_sources_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_connection_check_sources", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -900,7 +900,7 @@ def get_source( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1000,7 +1000,7 @@ async def get_source_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1100,7 +1100,7 @@ def list_sources( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_sources", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1200,7 +1200,7 @@ async def list_sources_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_sources", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1307,7 +1307,7 @@ def update_source( config=self.sdk_configuration, base_url=base_url or "", operation_id="update_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1414,7 +1414,7 @@ async def update_source_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="update_source", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, diff --git a/src/unstructured_client/utils/__init__.py b/src/unstructured_client/utils/__init__.py index dc88403b..56164cf3 100644 --- a/src/unstructured_client/utils/__init__.py +++ b/src/unstructured_client/utils/__init__.py @@ -3,6 +3,7 @@ from typing import TYPE_CHECKING from importlib import import_module import builtins +import sys if TYPE_CHECKING: from .annotations import get_discriminator @@ -159,6 +160,18 @@ } +def dynamic_import(modname, retries=3): + for attempt in range(retries): + try: + return import_module(modname, __package__) + except KeyError: + # Clear any half-initialized module and retry + sys.modules.pop(modname, None) + if attempt == retries - 1: + break + raise KeyError(f"Failed to import module '{modname}' after {retries} attempts") + + def __getattr__(attr_name: str) -> object: module_name = _dynamic_imports.get(attr_name) if module_name is None: @@ -167,9 +180,8 @@ def __getattr__(attr_name: str) -> object: ) try: - module = import_module(module_name, __package__) - result = getattr(module, attr_name) - return result + module = dynamic_import(module_name) + return getattr(module, attr_name) except ImportError as e: raise ImportError( f"Failed to import {attr_name} from {module_name}: {e}" diff --git a/src/unstructured_client/utils/annotations.py b/src/unstructured_client/utils/annotations.py index 387874ed..12e0aa4f 100644 --- a/src/unstructured_client/utils/annotations.py +++ b/src/unstructured_client/utils/annotations.py @@ -3,6 +3,7 @@ from enum import Enum from typing import Any, Optional + def get_discriminator(model: Any, fieldname: str, key: str) -> str: """ Recursively search for the discriminator attribute in a model. @@ -25,31 +26,54 @@ def get_field_discriminator(field: Any) -> Optional[str]: if isinstance(field, dict): if key in field: - return f'{field[key]}' + return f"{field[key]}" if hasattr(field, fieldname): attr = getattr(field, fieldname) if isinstance(attr, Enum): - return f'{attr.value}' - return f'{attr}' + return f"{attr.value}" + return f"{attr}" if hasattr(field, upper_fieldname): attr = getattr(field, upper_fieldname) if isinstance(attr, Enum): - return f'{attr.value}' - return f'{attr}' + return f"{attr.value}" + return f"{attr}" return None + def search_nested_discriminator(obj: Any) -> Optional[str]: + """Recursively search for discriminator in nested structures.""" + # First try direct field lookup + discriminator = get_field_discriminator(obj) + if discriminator is not None: + return discriminator + + # If it's a dict, search in nested values + if isinstance(obj, dict): + for value in obj.values(): + if isinstance(value, list): + # Search in list items + for item in value: + nested_discriminator = search_nested_discriminator(item) + if nested_discriminator is not None: + return nested_discriminator + elif isinstance(value, dict): + # Search in nested dict + nested_discriminator = search_nested_discriminator(value) + if nested_discriminator is not None: + return nested_discriminator + + return None if isinstance(model, list): for field in model: - discriminator = get_field_discriminator(field) + discriminator = search_nested_discriminator(field) if discriminator is not None: return discriminator - discriminator = get_field_discriminator(model) + discriminator = search_nested_discriminator(model) if discriminator is not None: return discriminator - raise ValueError(f'Could not find discriminator field {fieldname} in {model}') + raise ValueError(f"Could not find discriminator field {fieldname} in {model}") diff --git a/src/unstructured_client/utils/eventstreaming.py b/src/unstructured_client/utils/eventstreaming.py index 74a63f75..0969899b 100644 --- a/src/unstructured_client/utils/eventstreaming.py +++ b/src/unstructured_client/utils/eventstreaming.py @@ -17,6 +17,9 @@ class EventStream(Generic[T]): + # Holds a reference to the SDK client to avoid it being garbage collected + # and cause termination of the underlying httpx client. + client_ref: Optional[object] response: httpx.Response generator: Generator[T, None, None] @@ -25,9 +28,11 @@ def __init__( response: httpx.Response, decoder: Callable[[str], T], sentinel: Optional[str] = None, + client_ref: Optional[object] = None, ): self.response = response self.generator = stream_events(response, decoder, sentinel) + self.client_ref = client_ref def __iter__(self): return self @@ -43,6 +48,9 @@ def __exit__(self, exc_type, exc_val, exc_tb): class EventStreamAsync(Generic[T]): + # Holds a reference to the SDK client to avoid it being garbage collected + # and cause termination of the underlying httpx client. + client_ref: Optional[object] response: httpx.Response generator: AsyncGenerator[T, None] @@ -51,9 +59,11 @@ def __init__( response: httpx.Response, decoder: Callable[[str], T], sentinel: Optional[str] = None, + client_ref: Optional[object] = None, ): self.response = response self.generator = stream_events_async(response, decoder, sentinel) + self.client_ref = client_ref def __aiter__(self): return self diff --git a/src/unstructured_client/workflows.py b/src/unstructured_client/workflows.py index 7a2fc6bd..69f8be95 100644 --- a/src/unstructured_client/workflows.py +++ b/src/unstructured_client/workflows.py @@ -81,7 +81,7 @@ def create_workflow( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -184,7 +184,7 @@ async def create_workflow_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="create_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -284,7 +284,7 @@ def delete_workflow( config=self.sdk_configuration, base_url=base_url or "", operation_id="delete_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -382,7 +382,7 @@ async def delete_workflow_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="delete_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -480,7 +480,7 @@ def get_workflow( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -580,7 +580,7 @@ async def get_workflow_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="get_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -680,7 +680,7 @@ def list_workflows( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_workflows", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -780,7 +780,7 @@ async def list_workflows_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="list_workflows", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -887,7 +887,7 @@ def run_workflow( config=self.sdk_configuration, base_url=base_url or "", operation_id="run_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -994,7 +994,7 @@ async def run_workflow_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="run_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1097,7 +1097,7 @@ def update_workflow( config=self.sdk_configuration, base_url=base_url or "", operation_id="update_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req, @@ -1200,7 +1200,7 @@ async def update_workflow_async( config=self.sdk_configuration, base_url=base_url or "", operation_id="update_workflow", - oauth2_scopes=[], + oauth2_scopes=None, security_source=self.sdk_configuration.security, ), request=req,