Skip to content
Merged
38 changes: 31 additions & 7 deletions output/openapi/elasticsearch-openapi.json

Large diffs are not rendered by default.

38 changes: 31 additions & 7 deletions output/openapi/elasticsearch-serverless-openapi.json

Large diffs are not rendered by default.

221 changes: 125 additions & 96 deletions output/schema/schema.json

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions output/typescript/types.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion specification/_json_spec/inference.put_azureopenai.json
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
"task_type": {
"type": "enum",
"description": "The task type",
"options": ["completion", "text_embedding"]
"options": ["chat_completion", "completion", "text_embedding"]
},
"azureopenai_inference_id": {
"type": "string",
Expand Down
5 changes: 3 additions & 2 deletions specification/inference/_types/CommonTypes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -802,7 +802,7 @@ export class AzureOpenAIServiceSettings {
* This setting helps to minimize the number of rate limit errors returned from Azure.
* The `azureopenai` service sets a default number of requests allowed per minute depending on the task type.
* For `text_embedding`, it is set to `1440`.
* For `completion`, it is set to `120`.
* For `completion` and `chat_completion`, it is set to `120`.
* @ext_doc_id azureopenai-quota-limits
*/
rate_limit?: RateLimitSetting
Expand All @@ -816,14 +816,15 @@ export class AzureOpenAIServiceSettings {

export class AzureOpenAITaskSettings {
/**
* For a `completion` or `text_embedding` task, specify the user issuing the request.
* For a `completion`, `chat_completion` or `text_embedding` task, specify the user issuing the request.
* This information can be used for abuse detection.
*/
user?: string
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On line 819 above this, "For a `completion` or `text_embedding` task" should be "For a `completion`, `chat_completion` or `text_embedding` task"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Fixed.

export enum AzureOpenAITaskType {
completion,
chat_completion,
text_embedding
}

Expand Down
2 changes: 1 addition & 1 deletion specification/inference/_types/Services.ts
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ export class RateLimitSetting {
* * `anthropic` service: `50`
* * `azureaistudio` service: `240`
* * `azureopenai` service and task type `text_embedding`: `1440`
* * `azureopenai` service and task type `completion`: `120`
* * `azureopenai` service and task types `completion` or `chat_completion`: `120`
* * `cohere` service: `10000`
* * `contextualai` service: `1000`
* * `elastic` service and task type `chat_completion`: `240`
Expand Down
3 changes: 2 additions & 1 deletion specification/inference/_types/TaskType.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,8 @@ export enum TaskTypeAzureAIStudio {

export enum TaskTypeAzureOpenAI {
text_embedding,
completion
completion,
chat_completion
}

export enum TaskTypeCohere {
Expand Down
2 changes: 1 addition & 1 deletion specification/inference/put/PutRequest.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ import { TaskType } from '@inference/_types/TaskType'
* * Amazon SageMaker (`chat_completion`, `completion`, `rerank`, `sparse_embedding`, `text_embedding`)
* * Anthropic (`completion`)
* * Azure AI Studio (`completion`, `rerank`, `text_embedding`)
* * Azure OpenAI (`completion`, `text_embedding`)
* * Azure OpenAI (`chat_completion`, `completion`, `text_embedding`)
* * Cohere (`completion`, `rerank`, `text_embedding`)
* * DeepSeek (`chat_completion`, `completion`)
* * Elasticsearch (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ export interface Request extends RequestBase {
/**
* The chunking configuration object.
* Applies only to the `text_embedding` task type.
* Not applicable to the `completion` task type.
* Not applicable to the `completion` and `chat_completion` task types.
* @ext_doc_id inference-chunking
*/
chunking_settings?: InferenceChunkingSettings
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
summary: A chat completion task
description: Run `PUT _inference/chat_completion/azure_openai_chat_completion` to create an inference endpoint that performs a `chat_completion` task.
method_request: 'PUT _inference/chat_completion/azure_openai_chat_completion'
# type: "request"
value: |-
{
"service": "azureopenai",
"service_settings": {
"api_key": "Api-Key",
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
summary: A text embedding task
description: A successful response when creating an Azure OpenAI `text_embedding` inference endpoint.
# type: response
# response_code:
value: |-
{
"inference_id": "azure_openai_embeddings",
"task_type": "text_embedding",
"service": "azureopenai",
"service_settings": {
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01",
"rate_limit": {
"requests_per_minute": 1140
},
"dimensions": 1536,
"similarity": "dot_product"
},
"chunking_settings": {
"strategy": "sentence",
"max_chunk_size": 250,
"sentence_overlap": 1
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
summary: A completion task
description: A successful response when creating an Azure OpenAI `completion` inference endpoint.
# type: response
# response_code:
value: |-
{
"inference_id": "azure_openai_completion",
"task_type": "completion",
"service": "azureopenai",
"service_settings": {
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01",
"rate_limit": {
"requests_per_minute": 120
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
summary: A chat completion task
description: A successful response when creating an Azure OpenAI `chat_completion` inference endpoint.
# type: response
# response_code:
value: |-
{
"inference_id": "azure_openai_chat_completion",
"task_type": "chat_completion",
"service": "azureopenai",
"service_settings": {
"resource_name": "Resource-name",
"deployment_id": "Deployment-id",
"api_version": "2024-02-01",
"rate_limit": {
"requests_per_minute": 120
}
}
}