Add support for chat_completion task in Azure OpenAI integration (#5796)

Jan-Kazlouski-elastic · web-flow · commit 5b295b25f99a · 2025-12-16T11:45:58.000-08:00
diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
diff --git a/output/schema/schema.json b/output/schema/schema.json
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
diff --git a/specification/_json_spec/inference.put_azureopenai.json b/specification/_json_spec/inference.put_azureopenai.json
@@ -19,7 +19,7 @@
             "task_type": {
               "type": "enum",
               "description": "The task type",
-              "options": ["completion", "text_embedding"]
+              "options": ["chat_completion", "completion", "text_embedding"]
             },
             "azureopenai_inference_id": {
               "type": "string",
diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts
@@ -802,7 +802,7 @@ export class AzureOpenAIServiceSettings {
    * This setting helps to minimize the number of rate limit errors returned from Azure.
    * The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
    * For `text_embedding`, it is set to `1440`.
-   * For `completion`, it is set to `120`.
+   * For `completion` and `chat_completion`, it is set to `120`.
    * @ext_doc_id azureopenai-quota-limits
    */
   rate_limit?: RateLimitSetting
@@ -816,14 +816,15 @@ export class AzureOpenAIServiceSettings {
 
 export class AzureOpenAITaskSettings {
   /**
-   * For a `completion` or `text_embedding` task, specify the user issuing the request.
+   * For a `completion`, `chat_completion` or `text_embedding` task, specify the user issuing the request.
    * This information can be used for abuse detection.
    */
   user?: string
 }
 
 export enum AzureOpenAITaskType {
   completion,
+  chat_completion,
   text_embedding
 }
 
diff --git a/specification/inference/_types/Services.ts b/specification/inference/_types/Services.ts
@@ -425,7 +425,7 @@ export class RateLimitSetting {
    * * `anthropic` service: `50`
    * * `azureaistudio` service: `240`
    * * `azureopenai` service and task type `text_embedding`: `1440`
-   * * `azureopenai` service and task type `completion`: `120`
+   * * `azureopenai` service and task types `completion` or `chat_completion`: `120`
    * * `cohere` service: `10000`
    * * `contextualai` service: `1000`
    * * `elastic` service and task type `chat_completion`: `240`
diff --git a/specification/inference/_types/TaskType.ts b/specification/inference/_types/TaskType.ts
@@ -70,7 +70,8 @@ export enum TaskTypeAzureAIStudio {
 
 export enum TaskTypeAzureOpenAI {
   text_embedding,
-  completion
+  completion,
+  chat_completion
 }
 
 export enum TaskTypeCohere {
diff --git a/specification/inference/put/PutRequest.ts b/specification/inference/put/PutRequest.ts
@@ -37,7 +37,7 @@ import { TaskType } from '@inference/_types/TaskType'
  * * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
  * * Anthropic (`completion`)
  * * Azure AI Studio (`completion`, `rerank`, `text_embedding`)
- * * Azure OpenAI (`completion`, `text_embedding`)
+ * * Azure OpenAI (`chat_completion`, `completion`, `text_embedding`)
  * * Cohere (`completion`, `rerank`, `text_embedding`)
  * * DeepSeek (`chat_completion`, `completion`)
  * * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)
diff --git a/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts b/specification/inference/put_azureopenai/PutAzureOpenAiRequest.ts
@@ -76,7 +76,7 @@ export interface Request extends RequestBase {
     /**
      * The chunking configuration object.
      * Applies only to the `text_embedding` task type.
-     * Not applicable to the `completion` task type.
+     * Not applicable to the `completion` and `chat_completion` task types.
      * @ext_doc_id inference-chunking
      */
     chunking_settings?: InferenceChunkingSettings
diff --git a/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml b/specification/inference/put_azureopenai/examples/request/PutAzureOpenAiRequestExample3.yaml
@@ -0,0 +1,14 @@
+summary: A chat completion task
+description: Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task.
+method_request: 'PUT _inference/chat_completion/azure_openai_chat_completion'
+# type: "request"
+value: |-
+  {
+      "service": "azureopenai",
+      "service_settings": {
+          "api_key": "Api-Key",
+          "resource_name": "Resource-name",
+          "deployment_id": "Deployment-id",
+          "api_version": "2024-02-01"
+      }
+  }
diff --git a/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample1.yaml
@@ -0,0 +1,25 @@
+summary: A text embedding task
+description: A successful response when creating an Azure OpenAI `text_embedding` inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "azure_openai_embeddings",
+    "task_type": "text_embedding",
+    "service": "azureopenai",
+    "service_settings": {
+      "resource_name": "Resource-name",
+      "deployment_id": "Deployment-id",
+      "api_version": "2024-02-01",
+      "rate_limit": {
+        "requests_per_minute": 1140
+      },
+      "dimensions": 1536,
+      "similarity": "dot_product"
+    },
+    "chunking_settings": {
+      "strategy": "sentence",
+      "max_chunk_size": 250,
+      "sentence_overlap": 1
+    }
+  }
diff --git a/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample2.yaml
@@ -0,0 +1,18 @@
+summary: A completion task
+description: A successful response when creating an Azure OpenAI `completion` inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "azure_openai_completion",
+    "task_type": "completion",
+    "service": "azureopenai",
+    "service_settings": {
+      "resource_name": "Resource-name",
+      "deployment_id": "Deployment-id",
+      "api_version": "2024-02-01",
+      "rate_limit": {
+        "requests_per_minute": 120
+      }
+    }
+  }
diff --git a/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml b/specification/inference/put_azureopenai/examples/response/PutAzureOpenAiResponseExample3.yaml
@@ -0,0 +1,18 @@
+summary: A chat completion task
+description: A successful response when creating an Azure OpenAI `chat_completion` inference endpoint.
+# type: response
+# response_code:
+value: |-
+  {
+    "inference_id": "azure_openai_chat_completion",
+    "task_type": "chat_completion",
+    "service": "azureopenai",
+    "service_settings": {
+      "resource_name": "Resource-name",
+      "deployment_id": "Deployment-id",
+      "api_version": "2024-02-01",
+      "rate_limit": {
+        "requests_per_minute": 120
+      }
+    }
+  }

Original file line number	Diff line number	Diff line change
`@@ -70,7 +70,8 @@ export enum TaskTypeAzureAIStudio {`
`70`	`70`
`71`	`71`	`export enum TaskTypeAzureOpenAI {`
`72`	`72`	`text_embedding,`
`73`		`- completion`
	`73`	`+ completion,`
	`74`	`+ chat_completion`
`74`	`75`	`}`
`75`	`76`
`76`	`77`	`export enum TaskTypeCohere {`