cocoindex-io · georgeh0 · Dec 4, 2025 · Dec 3, 2025 · Dec 4, 2025 · georgeh0
diff --git a/docs/docs/ai/llm.mdx b/docs/docs/ai/llm.mdx
@@ -20,6 +20,7 @@ We support the following types of LLM APIs:
 | API Name | `LlmApiType` enum | Text Generation | Text Embedding |
 |----------|---------------------|--------------------|--------------------|
 | [OpenAI](#openai) | `LlmApiType.OPENAI` | ✅ | ✅ |
+| [Azure OpenAI](#azure-openai) | `LlmApiType.AZURE_OPENAI` | ✅ | ✅ |
 | [Ollama](#ollama) | `LlmApiType.OLLAMA` | ✅ | ✅ |
 | [Google Gemini](#google-gemini) | `LlmApiType.GEMINI` | ✅ | ✅ |
 | [Vertex AI](#vertex-ai) | `LlmApiType.VERTEX_AI` | ✅ | ✅ |
@@ -116,6 +117,67 @@ cocoindex.functions.EmbedText(
 </TabItem>
 </Tabs>
 
+### Azure OpenAI
+
+[Azure OpenAI](https://azure.microsoft.com/en-us/products/ai-services/openai-service) is Microsoft's cloud service offering OpenAI models through Azure.
+
+To use the Azure OpenAI API:
+
+1.  Create an Azure account and set up an Azure OpenAI resource in the [Azure Portal](https://portal.azure.com/).
+2.  Deploy a model (e.g., GPT-4, text-embedding-ada-002) to your Azure OpenAI resource.
+3.  Get your API key from the Azure Portal under your Azure OpenAI resource.
+4.  Set the environment variable `AZURE_OPENAI_API_KEY` to your API key.
+
+Spec for Azure OpenAI requires:
+-   `address` (type: `str`, required): The base URL of your Azure OpenAI resource, e.g., `https://your-resource-name.openai.azure.com`.
+-   `api_config` (type: `cocoindex.llm.AzureOpenAiConfig`, required): Configuration with the following fields:
+    -   `deployment_id` (type: `str`, required): The deployment name/ID you created in Azure OpenAI Studio.
+    -   `api_version` (type: `str`, optional): The API version to use. Defaults to `2024-08-01-preview` (required for structured output support). See [API versions](https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle).
+
+For text generation, a spec for Azure OpenAI looks like this:
+
+<Tabs>
+<TabItem value="python" label="Python" default>
+
+```python
+cocoindex.LlmSpec(
+    api_type=cocoindex.LlmApiType.AZURE_OPENAI,
+    model="gpt-4o",  # This is the base model name
+    address="https://your-resource-name.openai.azure.com",
+    api_config=cocoindex.llm.AzureOpenAiConfig(
+        deployment_id="your-deployment-name",
+        api_version="2024-08-01-preview",  # Optional, defaults to 2024-08-01-preview
+    ),
+)
+```
+
+</TabItem>
+</Tabs>
+
+For text embedding, a spec for Azure OpenAI looks like this:
+
+<Tabs>
+<TabItem value="python" label="Python" default>
+
+```python
+cocoindex.functions.EmbedText(
+    api_type=cocoindex.LlmApiType.AZURE_OPENAI,
+    model="text-embedding-3-small",
+    address="https://your-resource-name.openai.azure.com",
+    output_dimension=1536,  # Optional, use the default output dimension if not specified
+    api_config=cocoindex.llm.AzureOpenAiConfig(
+        deployment_id="your-embedding-deployment-name",
+    ),
+)
+```
+
+</TabItem>
+</Tabs>
+
+:::note
+Azure OpenAI uses deployment names instead of direct model names in API calls. The `deployment_id` in the config should match the deployment you created in Azure OpenAI Studio.
+:::
+
 ### Ollama
 
 [Ollama](https://ollama.com/) allows you to run LLM models on your local machine easily. To get started:

diff --git a/python/cocoindex/llm.py b/python/cocoindex/llm.py
@@ -17,6 +17,7 @@ class LlmApiType(Enum):
     VOYAGE = "Voyage"
     VLLM = "Vllm"
     BEDROCK = "Bedrock"
+    AZURE_OPENAI = "AzureOpenAi"
 
 
 @dataclass
@@ -39,6 +40,16 @@ class OpenAiConfig:
     project_id: str | None = None
 
 
+@dataclass
+class AzureOpenAiConfig:
+    """A specification for an Azure OpenAI LLM."""
+
+    kind = "AzureOpenAi"
+
+    deployment_id: str
+    api_version: str | None = None
+
+
 @dataclass
 class LlmSpec:
     """A specification for a LLM."""
@@ -47,4 +58,4 @@ class LlmSpec:
     model: str
     address: str | None = None
     api_key: TransientAuthEntryReference[str] | None = None
-    api_config: VertexAiConfig | OpenAiConfig | None = None
+    api_config: VertexAiConfig | OpenAiConfig | AzureOpenAiConfig | None = None
diff --git a/rust/cocoindex/src/llm/azureopenai.rs b/rust/cocoindex/src/llm/azureopenai.rs
@@ -0,0 +1,123 @@
+use crate::prelude::*;
+
+use super::LlmEmbeddingClient;
+use super::LlmGenerationClient;
+use async_openai::{Client as OpenAIClient, config::AzureConfig};
+use phf::phf_map;
+
+static DEFAULT_EMBEDDING_DIMENSIONS: phf::Map<&str, u32> = phf_map! {
+    "text-embedding-3-small" => 1536,
+    "text-embedding-3-large" => 3072,
+    "text-embedding-ada-002" => 1536,
+};
+
+pub struct Client {
+    client: async_openai::Client<AzureConfig>,
+}
+
+impl Client {
+    pub async fn new_azure_openai(
+        address: Option<String>,
+        api_key: Option<String>,
+        api_config: Option<super::LlmApiConfig>,
+    ) -> anyhow::Result<Self> {
+        let config = match api_config {
+            Some(super::LlmApiConfig::AzureOpenAi(config)) => config,
+            Some(_) => anyhow::bail!("unexpected config type, expected AzureOpenAiConfig"),
+            None => anyhow::bail!("AzureOpenAiConfig is required for Azure OpenAI"),
+        };
+
+        let api_base =
+            address.ok_or_else(|| anyhow::anyhow!("address is required for Azure OpenAI"))?;
+
+        // Default to API version that supports structured outputs (json_schema).
+        // See: https://learn.microsoft.com/en-us/azure/ai-foundry/openai/api-version-lifecycle
+        let api_version = config
+            .api_version
+            .unwrap_or_else(|| "2024-08-01-preview".to_string());
+
+        let api_key = api_key.or_else(|| std::env::var("AZURE_OPENAI_API_KEY").ok())
+            .ok_or_else(|| anyhow::anyhow!("AZURE_OPENAI_API_KEY must be set either via api_key parameter or environment variable"))?;
+
+        let azure_config = AzureConfig::new()
+            .with_api_base(api_base)
+            .with_api_version(api_version)
+            .with_deployment_id(config.deployment_id)
+            .with_api_key(api_key);
+
+        Ok(Self {
+            client: OpenAIClient::with_config(azure_config),
+        })
+    }
+}
+
+#[async_trait]
+impl LlmGenerationClient for Client {
+    async fn generate<'req>(
+        &self,
+        request: super::LlmGenerateRequest<'req>,
+    ) -> Result<super::LlmGenerateResponse> {
+        let request = &request;
+        let response = retryable::run(
+            || async {
+                let req = super::openai::create_llm_generation_request(request)?;
+                let response = self.client.chat().create(req).await?;
+                retryable::Ok(response)
+            },
+            &retryable::RetryOptions::default(),
+        )
+        .await?;
+
+        // Extract the response text from the first choice
+        let text = response
+            .choices
+            .into_iter()
+            .next()
+            .and_then(|choice| choice.message.content)
+            .ok_or_else(|| anyhow::anyhow!("No response from Azure OpenAI"))?;
+
+        Ok(super::LlmGenerateResponse { text })
+    }
+
+    fn json_schema_options(&self) -> super::ToJsonSchemaOptions {
+        super::ToJsonSchemaOptions {
+            fields_always_required: true,
+            supports_format: false,
+            extract_descriptions: false,
+            top_level_must_be_object: true,
+            supports_additional_properties: true,
+        }
+    }
+}
+
+#[async_trait]
+impl LlmEmbeddingClient for Client {
+    async fn embed_text<'req>(
+        &self,
+        request: super::LlmEmbeddingRequest<'req>,
+    ) -> Result<super::LlmEmbeddingResponse> {
+        let response = retryable::run(
+            || async {
+                let texts: Vec<String> = request.texts.iter().map(|t| t.to_string()).collect();
+                self.client
+                    .embeddings()
+                    .create(async_openai::types::CreateEmbeddingRequest {
+                        model: request.model.to_string(),
+                        input: async_openai::types::EmbeddingInput::StringArray(texts),
+                        dimensions: request.output_dimension,
+                        ..Default::default()
+                    })
+                    .await
+            },
+            &retryable::RetryOptions::default(),
+        )
+        .await?;
+        Ok(super::LlmEmbeddingResponse {
+            embeddings: response.data.into_iter().map(|e| e.embedding).collect(),
+        })
+    }
+
+    fn get_default_embedding_dimension(&self, model: &str) -> Option<u32> {
+        DEFAULT_EMBEDDING_DIMENSIONS.get(model).copied()
+    }
+}
diff --git a/rust/cocoindex/src/llm/mod.rs b/rust/cocoindex/src/llm/mod.rs
@@ -19,6 +19,7 @@ pub enum LlmApiType {
     Vllm,
     VertexAi,
     Bedrock,
+    AzureOpenAi,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -33,11 +34,18 @@ pub struct OpenAiConfig {
     pub project_id: Option<String>,
 }
 
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct AzureOpenAiConfig {
+    pub deployment_id: String,
+    pub api_version: Option<String>,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 #[serde(tag = "kind")]
 pub enum LlmApiConfig {
     VertexAi(VertexAiConfig),
     OpenAi(OpenAiConfig),
+    AzureOpenAi(AzureOpenAiConfig),
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize)]
@@ -108,6 +116,7 @@ pub trait LlmEmbeddingClient: Send + Sync {
 }
 
 mod anthropic;
+mod azureopenai;
 mod bedrock;
 mod gemini;
 mod litellm;
@@ -147,6 +156,10 @@ pub async fn new_llm_generation_client(
             Box::new(openrouter::Client::new_openrouter(address, api_key).await?)
                 as Box<dyn LlmGenerationClient>
         }
+        LlmApiType::AzureOpenAi => {
+            Box::new(azureopenai::Client::new_azure_openai(address, api_key, api_config).await?)
+                as Box<dyn LlmGenerationClient>
+        }
         LlmApiType::Voyage => {
             api_bail!("Voyage is not supported for generation")
         }
@@ -182,6 +195,10 @@ pub async fn new_llm_embedding_client(
             Box::new(gemini::VertexAiClient::new(address, api_key, api_config).await?)
                 as Box<dyn LlmEmbeddingClient>
         }
+        LlmApiType::AzureOpenAi => {
+            Box::new(azureopenai::Client::new_azure_openai(address, api_key, api_config).await?)
+                as Box<dyn LlmEmbeddingClient>
+        }
         LlmApiType::LiteLlm | LlmApiType::Vllm | LlmApiType::Anthropic | LlmApiType::Bedrock => {
             api_bail!("Embedding is not supported for API type {:?}", api_type)
         }

diff --git a/rust/cocoindex/src/llm/openai.rs b/rust/cocoindex/src/llm/openai.rs
@@ -67,7 +67,7 @@ impl Client {
     }
 }
 
-fn create_llm_generation_request(
+pub(super) fn create_llm_generation_request(
     request: &super::LlmGenerateRequest,
 ) -> Result<CreateChatCompletionRequest> {
     let mut messages = Vec::new();