Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions ai-quick-actions/model-deployment-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ Table of Contents:
- [Model Fine Tuning](fine-tuning-tips.md)
- [Model Evaluation](evaluation-tips.md)
- [Model Registration](register-tips.md)
- [Multi Modal Inferencing](multimodal-models-tips.md)
- [Multi Model Inferencing](multimodal-models-tips.md)
- [Stacked Model Inferencing](stacked-deployment-tips.md)
- [Private_Endpoints](model-deployment-private-endpoint-tips.md)
- [Tool Calling](model-deployment-tool-calling-tips.md)

Expand Down Expand Up @@ -606,4 +607,4 @@ Table of Contents:
- [Model Registration](register-tips.md)
- [Multi Modal Inferencing](multimodal-models-tips.md)
- [Private_Endpoints](model-deployment-private-endpoint-tips.md)
- [Tool Calling](model-deployment-tool-calling-tips.md)
- [Tool Calling](model-deployment-tool-calling-tips.md)
155 changes: 144 additions & 11 deletions ai-quick-actions/multimodel-deployment-tips.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
- [Get MultiModel Configuration](#get-multimodel-configuration)
- [Create MultiModel Deployment](#create-multimodel-deployment)
- [Manage MultiModel Deployments](#manage-multimodel-deployments)
- [List MultiModel Deployments](#list-multimodel-deployments)
- [Edit MultiModel Deployments](#edit-multimodel-deployments)
- [MultiModel Inferencing](#multimodel-inferencing)
- [MultiModel Evaluation](#multimodel-evaluation)
- [Create Model Evaluation](#create-model-evaluations)
Expand Down Expand Up @@ -439,7 +441,7 @@ The private endpoint id of model deployment.
```bash
ads aqua deployment create \
--container_image_uri "dsmc://odsc-vllm-serving:0.6.4.post1.2" \
--models '[{"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}, {"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}]' \
--models '[{"model_id":"ocid1.datasciencemodel.oc1.iad.<ocid>", "gpu_count":1}, {"model_id":"ocid1.datasciencemodel.oc1.iad.<ocid>", "gpu_count":1}]' \
--instance_shape "VM.GPU.A10.2" \
--display_name "modelDeployment_multmodel_model1_model2"

Expand All @@ -450,9 +452,9 @@ ads aqua deployment create \
```json
{
"id": "ocid1.datasciencemodeldeployment.oc1.iad.<ocid>",
"display_name": "Multi model deployment of Mistral-7B-v0.1 and falcon-7b on A10.2",
"display_name": "modelDeployment_multmodel_model1_model2",
"aqua_service_model": false,
"model_id": "ocid1.datasciencemodel.oc1.<ocid>",
"model_id": "ocid1.datasciencemodelgroup.oc1.<ocid>",
"models": [
{
"model_id": "ocid1.datasciencemodel.oc1.iad.<ocid>",
Expand Down Expand Up @@ -483,22 +485,23 @@ ads aqua deployment create \
"memory_in_gbs": null
},
"tags": {
"aqua_model_id": "ocid1.datasciencemodel.oc1.iad.<ocid>",
"aqua_model_id": "ocid1.datasciencemodelgroup.oc1.iad.<ocid>",
"aqua_multimodel": "true",
"OCI_AQUA": "active"
},
"environment_variables": {
"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/completions",
"MULTI_MODEL_CONFIG": "{\"models\": [{\"params\": \"--served-model-name mistralai/Mistral-7B-v0.1 --seed 42 --tensor-parallel-size 1 --max-model-len 4096\", \"model_path\": \"service_models/Mistral-7B-v0.1/78814a9/artifact\"}, {\"params\": \"--served-model-name tiiuae/falcon-7b --seed 42 --tensor-parallel-size 1 --trust-remote-code\", \"model_path\": \"service_models/falcon-7b/f779652/artifact\"}]}",
"MODEL_DEPLOY_ENABLE_STREAMING": "true",
},
}
```

#### Create MultiModel deployment with `/v1/chat/completions`

```bash
ads aqua deployment create \
--container_image_uri "dsmc://odsc-vllm-serving:0.6.4.post1.2" \
--models '[{"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}, {"model_id":"ocid1.log.oc1.iad.<ocid>", "gpu_count":1}]' \
--models '[{"model_id":"ocid1.datasciencemodel.oc1.iad.<ocid>", "gpu_count":1}, {"model_id":"ocid1.datasciencemodel.oc1.iad.<ocid>", "gpu_count":1}]' \
--env-var '{"MODEL_DEPLOY_PREDICT_ENDPOINT":"/v1/chat/completions"}' \
--instance_shape "VM.GPU.A10.2" \
--display_name "modelDeployment_multmodel_model1_model2"
Expand All @@ -510,9 +513,9 @@ ads aqua deployment create \
```json
{
"id": "ocid1.datasciencemodeldeployment.oc1.iad.<ocid>",
"display_name": "Multi model deployment of Mistral-7B-v0.1 and falcon-7b on A10.2",
"display_name": "modelDeployment_multmodel_model1_model2",
"aqua_service_model": false,
"model_id": "ocid1.datasciencemodel.oc1.iad.<ocid>",
"model_id": "ocid1.datasciencemodelgroup.oc1.iad.<ocid>",
"models": [
{
"model_id": "ocid1.datasciencemodel.oc1.iad.<ocid>",
Expand Down Expand Up @@ -543,25 +546,155 @@ ads aqua deployment create \
"memory_in_gbs": null
},
"tags": {
"aqua_model_id": "ocid1.datasciencemodel.oc1.<ocid>",
"aqua_model_id": "ocid1.datasciencemodelgroup.oc1.<ocid>",
"aqua_multimodel": "true",
"OCI_AQUA": "active"
},
"environment_variables": {
"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/chat/completions",
"MULTI_MODEL_CONFIG": "{\"models\": [{\"params\": \"--served-model-name mistralai/Mistral-7B-v0.1 --seed 42 --tensor-parallel-size 1 --max-model-len 4096\", \"model_path\": \"service_models/Mistral-7B-v0.1/78814a9/artifact\"}, {\"params\": \"--served-model-name tiiuae/falcon-7b --seed 42 --tensor-parallel-size 1 --trust-remote-code\", \"model_path\": \"service_models/falcon-7b/f779652/artifact\"}]}",
"MODEL_DEPLOY_ENABLE_STREAMING": "true",
},
}
```


## Manage MultiModel Deployments

### Description
### List MultiModel Deployments

To list all AQUA deployments (both MultiModel and single-model) within a specified compartment or project, or to get detailed information on a specific MultiModel deployment, kindly refer to the [AQUA CLI tips](cli-tips.md) documentation.

Note: MultiModel deployments are identified by the tag `"aqua_multimodel": "true",` associated with them.

### Edit MultiModel Deployments

#### Usage

```bash
ads aqua deployment update [OPTIONS]
```

#### Required Parameters

`--model_deployment_id [str]`

The model deployment OCID to be updated.

#### Optional Parameters

`--models [str]`

The String representation of a JSON array, where each object defines a model’s OCID and the number of GPUs assigned to it. The gpu count should always be a **power of two (e.g., 1, 2, 4, 8)**. <br>
Example: `'[{"model_id":"<model_ocid>", "gpu_count":1},{"model_id":"<model_ocid>", "gpu_count":1}]'` for `VM.GPU.A10.2` shape. <br>

`--display_name [str]`

The name of model deployment.

`--description [str]`

The description of the model deployment. Defaults to None.

`--instance_count [int]`

The number of instance used for model deployment. Defaults to 1.

`--log_group_id [str]`

The oci logging group id. The access log and predict log share the same log group.

`--access_log_id [str]`

The access log OCID for the access logs. Check [model deployment logging](https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm) for more details.

`--predict_log_id [str]`

The predict log OCID for the predict logs. Check [model deployment logging](https://docs.oracle.com/en-us/iaas/data-science/using/model_dep_using_logging.htm) for more details.

`--web_concurrency [int]`

The number of worker processes/threads to handle incoming requests.

`--bandwidth_mbps [int]`

The bandwidth limit on the load balancer in Mbps.

`--memory_in_gbs [float]`

Memory (in GB) for the selected shape.

`--ocpus [float]`

OCPU count for the selected shape.

`--freeform_tags [dict]`

Freeform tags for model deployment.

`--defined_tags [dict]`
Defined tags for model deployment.

#### Example

##### Edit MultiModel deployment with `/v1/completions`

```bash
ads aqua deployment update \
--model_deployment_id "ocid1.datasciencemodeldeployment.oc1.iad.<ocid>" \
--models '[{"model_id":"ocid1.datasciencemodel.oc1.iad.<ocid>", "model_name":"test_updated_model_name", "gpu_count":2}]' \
--display_name "updated_modelDeployment_multmodel_model1_model2"

```

##### CLI Output

```json
{
"id": "ocid1.datasciencemodeldeployment.oc1.iad.<ocid>",
"display_name": "updated_modelDeployment_multmodel_model1_model2",
"aqua_service_model": false,
"model_id": "ocid1.datasciencemodelgroup.oc1.iad.<ocid>",
"models": [
{
"model_id": "ocid1.datasciencemodel.oc1.iad.<ocid>",
"model_name": "mistralai/Mistral-7B-v0.1",
"gpu_count": 1,
"env_var": {}
},
{
"model_id": "ocid1.datasciencemodel.oc1.iad.<ocid>",
"model_name": "tiiuae/falcon-7b",
"gpu_count": 1,
"env_var": {}
}
],
"aqua_model_name": "",
"state": "UPDATING",
"description": null,
"created_on": "2025-03-10 19:09:40.793000+00:00",
"created_by": "ocid1.user.oc1..<ocid>",
"endpoint": "https://modeldeployment.us-ashburn-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.iad.<ocid>",
"private_endpoint_id": null,
"console_link": "https://cloud.oracle.com/data-science/model-deployments/ocid1.datasciencemodeldeployment.oc1.iad.<ocid>",
"lifecycle_details": null,
"shape_info": {
"instance_shape": "VM.GPU.A10.2",
"instance_count": 1,
"ocpus": null,
"memory_in_gbs": null
},
"tags": {
"aqua_model_id": "ocid1.datasciencemodelgroup.oc1.<ocid>",
"aqua_multimodel": "true",
"OCI_AQUA": "active"
},
"environment_variables": {
"MODEL_DEPLOY_PREDICT_ENDPOINT": "/v1/chat/completions",
"MODEL_DEPLOY_ENABLE_STREAMING": "true",
},
}
```

# MultiModel Inferencing

The only change required to infer a specific model from a MultiModel deployment is to update the value of `"model"` parameter in the request payload. The values for this parameter can be found in the Model Deployment details, under the field name `"model_name"`. This parameter segregates the request flow, ensuring that the inference request is directed to the correct model within the MultiModel deployment.
Expand Down
Loading