|
| 1 | +syntax = "proto3"; |
| 2 | +package inference; |
| 3 | +option go_package = "github.com/kserve/modelmesh-serving/fvt/generated;inference"; |
| 4 | + |
| 5 | +// Inference Server GRPC endpoints. |
| 6 | +service GRPCInferenceService |
| 7 | +{ |
| 8 | + // The ServerLive API indicates if the inference server is able to receive |
| 9 | + // and respond to metadata and inference requests. |
| 10 | + rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {} |
| 11 | + |
| 12 | + // The ServerReady API indicates if the server is ready for inferencing. |
| 13 | + rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {} |
| 14 | + |
| 15 | + // The ModelReady API indicates if a specific model is ready for inferencing. |
| 16 | + rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {} |
| 17 | + |
| 18 | + // The ServerMetadata API provides information about the server. Errors are |
| 19 | + // indicated by the google.rpc.Status returned for the request. The OK code |
| 20 | + // indicates success and other codes indicate failure. |
| 21 | + rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {} |
| 22 | + |
| 23 | + // The per-model metadata API provides information about a model. Errors are |
| 24 | + // indicated by the google.rpc.Status returned for the request. The OK code |
| 25 | + // indicates success and other codes indicate failure. |
| 26 | + rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {} |
| 27 | + |
| 28 | + // The ModelInfer API performs inference using the specified model. Errors are |
| 29 | + // indicated by the google.rpc.Status returned for the request. The OK code |
| 30 | + // indicates success and other codes indicate failure. |
| 31 | + rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {} |
| 32 | +} |
| 33 | + |
| 34 | +message ServerLiveRequest {} |
| 35 | + |
| 36 | +message ServerLiveResponse |
| 37 | +{ |
| 38 | + // True if the inference server is live, false if not live. |
| 39 | + bool live = 1; |
| 40 | +} |
| 41 | + |
| 42 | +message ServerReadyRequest {} |
| 43 | + |
| 44 | +message ServerReadyResponse |
| 45 | +{ |
| 46 | + // True if the inference server is ready, false if not ready. |
| 47 | + bool ready = 1; |
| 48 | +} |
| 49 | + |
| 50 | +message ModelReadyRequest |
| 51 | +{ |
| 52 | + // The name of the model to check for readiness. |
| 53 | + string name = 1; |
| 54 | + |
| 55 | + // The version of the model to check for readiness. If not given the |
| 56 | + // server will choose a version based on the model and internal policy. |
| 57 | + string version = 2; |
| 58 | +} |
| 59 | + |
| 60 | +message ModelReadyResponse |
| 61 | +{ |
| 62 | + // True if the model is ready, false if not ready. |
| 63 | + bool ready = 1; |
| 64 | +} |
| 65 | + |
| 66 | +message ServerMetadataRequest {} |
| 67 | + |
| 68 | +message ServerMetadataResponse |
| 69 | +{ |
| 70 | + // The server name. |
| 71 | + string name = 1; |
| 72 | + |
| 73 | + // The server version. |
| 74 | + string version = 2; |
| 75 | + |
| 76 | + // The extensions supported by the server. |
| 77 | + repeated string extensions = 3; |
| 78 | +} |
| 79 | + |
| 80 | +message ModelMetadataRequest |
| 81 | +{ |
| 82 | + // The name of the model. |
| 83 | + string name = 1; |
| 84 | + |
| 85 | + // The version of the model to check for readiness. If not given the |
| 86 | + // server will choose a version based on the model and internal policy. |
| 87 | + string version = 2; |
| 88 | +} |
| 89 | + |
| 90 | +message ModelMetadataResponse |
| 91 | +{ |
| 92 | + // Metadata for a tensor. |
| 93 | + message TensorMetadata |
| 94 | + { |
| 95 | + // The tensor name. |
| 96 | + string name = 1; |
| 97 | + |
| 98 | + // The tensor data type. |
| 99 | + string datatype = 2; |
| 100 | + |
| 101 | + // The tensor shape. A variable-size dimension is represented |
| 102 | + // by a -1 value. |
| 103 | + repeated int64 shape = 3; |
| 104 | + } |
| 105 | + |
| 106 | + // The model name. |
| 107 | + string name = 1; |
| 108 | + |
| 109 | + // The versions of the model available on the server. |
| 110 | + repeated string versions = 2; |
| 111 | + |
| 112 | + // The model's platform. See Platforms. |
| 113 | + string platform = 3; |
| 114 | + |
| 115 | + // The model's inputs. |
| 116 | + repeated TensorMetadata inputs = 4; |
| 117 | + |
| 118 | + // The model's outputs. |
| 119 | + repeated TensorMetadata outputs = 5; |
| 120 | +} |
| 121 | + |
| 122 | +message ModelInferRequest |
| 123 | +{ |
| 124 | + // An input tensor for an inference request. |
| 125 | + message InferInputTensor |
| 126 | + { |
| 127 | + // The tensor name. |
| 128 | + string name = 1; |
| 129 | + |
| 130 | + // The tensor data type. |
| 131 | + string datatype = 2; |
| 132 | + |
| 133 | + // The tensor shape. |
| 134 | + repeated int64 shape = 3; |
| 135 | + |
| 136 | + // Optional inference input tensor parameters. |
| 137 | + map<string, InferParameter> parameters = 4; |
| 138 | + |
| 139 | + // The tensor contents using a data-type format. This field must |
| 140 | + // not be specified if "raw" tensor contents are being used for |
| 141 | + // the inference request. |
| 142 | + InferTensorContents contents = 5; |
| 143 | + } |
| 144 | + |
| 145 | + // An output tensor requested for an inference request. |
| 146 | + message InferRequestedOutputTensor |
| 147 | + { |
| 148 | + // The tensor name. |
| 149 | + string name = 1; |
| 150 | + |
| 151 | + // Optional requested output tensor parameters. |
| 152 | + map<string, InferParameter> parameters = 2; |
| 153 | + } |
| 154 | + |
| 155 | + // The name of the model to use for inferencing. |
| 156 | + string model_name = 1; |
| 157 | + |
| 158 | + // The version of the model to use for inference. If not given the |
| 159 | + // server will choose a version based on the model and internal policy. |
| 160 | + string model_version = 2; |
| 161 | + |
| 162 | + // Optional identifier for the request. If specified will be |
| 163 | + // returned in the response. |
| 164 | + string id = 3; |
| 165 | + |
| 166 | + // Optional inference parameters. |
| 167 | + map<string, InferParameter> parameters = 4; |
| 168 | + |
| 169 | + // The input tensors for the inference. |
| 170 | + repeated InferInputTensor inputs = 5; |
| 171 | + |
| 172 | + // The requested output tensors for the inference. Optional, if not |
| 173 | + // specified all outputs produced by the model will be returned. |
| 174 | + repeated InferRequestedOutputTensor outputs = 6; |
| 175 | + |
| 176 | + // The data contained in an input tensor can be represented in "raw" |
| 177 | + // bytes form or in the repeated type that matches the tensor's data |
| 178 | + // type. To use the raw representation 'raw_input_contents' must be |
| 179 | + // initialized with data for each tensor in the same order as |
| 180 | + // 'inputs'. For each tensor, the size of this content must match |
| 181 | + // what is expected by the tensor's shape and data type. The raw |
| 182 | + // data must be the flattened, one-dimensional, row-major order of |
| 183 | + // the tensor elements without any stride or padding between the |
| 184 | + // elements. Note that the FP16 data type must be represented as raw |
| 185 | + // content as there is no specific data type for a 16-bit float |
| 186 | + // type. |
| 187 | + // |
| 188 | + // If this field is specified then InferInputTensor::contents must |
| 189 | + // not be specified for any input tensor. |
| 190 | + repeated bytes raw_input_contents = 7; |
| 191 | +} |
| 192 | + |
| 193 | +message ModelInferResponse |
| 194 | +{ |
| 195 | + // An output tensor returned for an inference request. |
| 196 | + message InferOutputTensor |
| 197 | + { |
| 198 | + // The tensor name. |
| 199 | + string name = 1; |
| 200 | + |
| 201 | + // The tensor data type. |
| 202 | + string datatype = 2; |
| 203 | + |
| 204 | + // The tensor shape. |
| 205 | + repeated int64 shape = 3; |
| 206 | + |
| 207 | + // Optional output tensor parameters. |
| 208 | + map<string, InferParameter> parameters = 4; |
| 209 | + |
| 210 | + // The tensor contents using a data-type format. This field must |
| 211 | + // not be specified if "raw" tensor contents are being used for |
| 212 | + // the inference response. |
| 213 | + InferTensorContents contents = 5; |
| 214 | + } |
| 215 | + |
| 216 | + // The name of the model used for inference. |
| 217 | + string model_name = 1; |
| 218 | + |
| 219 | + // The version of the model used for inference. |
| 220 | + string model_version = 2; |
| 221 | + |
| 222 | + // The id of the inference request if one was specified. |
| 223 | + string id = 3; |
| 224 | + |
| 225 | + // Optional inference response parameters. |
| 226 | + map<string, InferParameter> parameters = 4; |
| 227 | + |
| 228 | + // The output tensors holding inference results. |
| 229 | + repeated InferOutputTensor outputs = 5; |
| 230 | + |
| 231 | + // The data contained in an output tensor can be represented in |
| 232 | + // "raw" bytes form or in the repeated type that matches the |
| 233 | + // tensor's data type. To use the raw representation 'raw_output_contents' |
| 234 | + // must be initialized with data for each tensor in the same order as |
| 235 | + // 'outputs'. For each tensor, the size of this content must match |
| 236 | + // what is expected by the tensor's shape and data type. The raw |
| 237 | + // data must be the flattened, one-dimensional, row-major order of |
| 238 | + // the tensor elements without any stride or padding between the |
| 239 | + // elements. Note that the FP16 data type must be represented as raw |
| 240 | + // content as there is no specific data type for a 16-bit float |
| 241 | + // type. |
| 242 | + // |
| 243 | + // If this field is specified then InferOutputTensor::contents must |
| 244 | + // not be specified for any output tensor. |
| 245 | + repeated bytes raw_output_contents = 6; |
| 246 | +} |
| 247 | + |
| 248 | +// An inference parameter value. The Parameters message describes a |
| 249 | +// “name”/”value” pair, where the “name” is the name of the parameter |
| 250 | +// and the “value” is a boolean, integer, or string corresponding to |
| 251 | +// the parameter. |
| 252 | +message InferParameter |
| 253 | +{ |
| 254 | + // The parameter value can be a string, an int64, a boolean |
| 255 | + // or a message specific to a predefined parameter. |
| 256 | + oneof parameter_choice |
| 257 | + { |
| 258 | + // A boolean parameter value. |
| 259 | + bool bool_param = 1; |
| 260 | + |
| 261 | + // An int64 parameter value. |
| 262 | + int64 int64_param = 2; |
| 263 | + |
| 264 | + // A string parameter value. |
| 265 | + string string_param = 3; |
| 266 | + } |
| 267 | +} |
| 268 | + |
| 269 | +// The data contained in a tensor represented by the repeated type |
| 270 | +// that matches the tensor's data type. Protobuf oneof is not used |
| 271 | +// because oneofs cannot contain repeated fields. |
| 272 | +message InferTensorContents |
| 273 | +{ |
| 274 | + // Representation for BOOL data type. The size must match what is |
| 275 | + // expected by the tensor's shape. The contents must be the flattened, |
| 276 | + // one-dimensional, row-major order of the tensor elements. |
| 277 | + repeated bool bool_contents = 1; |
| 278 | + |
| 279 | + // Representation for INT8, INT16, and INT32 data types. The size |
| 280 | + // must match what is expected by the tensor's shape. The contents |
| 281 | + // must be the flattened, one-dimensional, row-major order of the |
| 282 | + // tensor elements. |
| 283 | + repeated int32 int_contents = 2; |
| 284 | + |
| 285 | + // Representation for INT64 data types. The size must match what |
| 286 | + // is expected by the tensor's shape. The contents must be the |
| 287 | + // flattened, one-dimensional, row-major order of the tensor elements. |
| 288 | + repeated int64 int64_contents = 3; |
| 289 | + |
| 290 | + // Representation for UINT8, UINT16, and UINT32 data types. The size |
| 291 | + // must match what is expected by the tensor's shape. The contents |
| 292 | + // must be the flattened, one-dimensional, row-major order of the |
| 293 | + // tensor elements. |
| 294 | + repeated uint32 uint_contents = 4; |
| 295 | + |
| 296 | + // Representation for UINT64 data types. The size must match what |
| 297 | + // is expected by the tensor's shape. The contents must be the |
| 298 | + // flattened, one-dimensional, row-major order of the tensor elements. |
| 299 | + repeated uint64 uint64_contents = 5; |
| 300 | + |
| 301 | + // Representation for FP32 data type. The size must match what is |
| 302 | + // expected by the tensor's shape. The contents must be the flattened, |
| 303 | + // one-dimensional, row-major order of the tensor elements. |
| 304 | + repeated float fp32_contents = 6; |
| 305 | + |
| 306 | + // Representation for FP64 data type. The size must match what is |
| 307 | + // expected by the tensor's shape. The contents must be the flattened, |
| 308 | + // one-dimensional, row-major order of the tensor elements. |
| 309 | + repeated double fp64_contents = 7; |
| 310 | + |
| 311 | + // Representation for BYTES data type. The size must match what is |
| 312 | + // expected by the tensor's shape. The contents must be the flattened, |
| 313 | + // one-dimensional, row-major order of the tensor elements. |
| 314 | + repeated bytes bytes_contents = 8; |
| 315 | +} |
0 commit comments