Skip to content

Commit 6988f2d

Browse files
authored
query inputs (#82)
Signed-off-by: Michael Kalantar <[email protected]>
1 parent d5de92b commit 6988f2d

File tree

2 files changed

+327
-0
lines changed

2 files changed

+327
-0
lines changed
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"inputs": [
3+
{
4+
"name": "predict",
5+
"shape": [1, 64],
6+
"datatype": "FP32",
7+
"contents": {
8+
"fp32_contents": [0.0, 0.0, 1.0, 11.0, 14.0, 15.0, 3.0, 0.0, 0.0, 1.0, 13.0, 16.0, 12.0, 16.0, 8.0, 0.0, 0.0, 8.0, 16.0, 4.0, 6.0, 16.0, 5.0, 0.0, 0.0, 5.0, 15.0, 11.0, 13.0, 14.0, 0.0, 0.0, 0.0, 0.0, 2.0, 12.0, 16.0, 13.0, 0.0, 0.0, 0.0, 0.0, 0.0, 13.0, 16.0, 16.0, 6.0, 0.0, 0.0, 0.0, 0.0, 16.0, 16.0, 16.0, 7.0, 0.0, 0.0, 0.0, 0.0, 11.0, 13.0, 12.0, 1.0, 0.0]
9+
}
10+
}
11+
]
12+
}
Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
syntax = "proto3";
2+
package inference;
3+
option go_package = "github.com/kserve/modelmesh-serving/fvt/generated;inference";
4+
5+
// Inference Server GRPC endpoints.
6+
service GRPCInferenceService
7+
{
8+
// The ServerLive API indicates if the inference server is able to receive
9+
// and respond to metadata and inference requests.
10+
rpc ServerLive(ServerLiveRequest) returns (ServerLiveResponse) {}
11+
12+
// The ServerReady API indicates if the server is ready for inferencing.
13+
rpc ServerReady(ServerReadyRequest) returns (ServerReadyResponse) {}
14+
15+
// The ModelReady API indicates if a specific model is ready for inferencing.
16+
rpc ModelReady(ModelReadyRequest) returns (ModelReadyResponse) {}
17+
18+
// The ServerMetadata API provides information about the server. Errors are
19+
// indicated by the google.rpc.Status returned for the request. The OK code
20+
// indicates success and other codes indicate failure.
21+
rpc ServerMetadata(ServerMetadataRequest) returns (ServerMetadataResponse) {}
22+
23+
// The per-model metadata API provides information about a model. Errors are
24+
// indicated by the google.rpc.Status returned for the request. The OK code
25+
// indicates success and other codes indicate failure.
26+
rpc ModelMetadata(ModelMetadataRequest) returns (ModelMetadataResponse) {}
27+
28+
// The ModelInfer API performs inference using the specified model. Errors are
29+
// indicated by the google.rpc.Status returned for the request. The OK code
30+
// indicates success and other codes indicate failure.
31+
rpc ModelInfer(ModelInferRequest) returns (ModelInferResponse) {}
32+
}
33+
34+
message ServerLiveRequest {}
35+
36+
message ServerLiveResponse
37+
{
38+
// True if the inference server is live, false if not live.
39+
bool live = 1;
40+
}
41+
42+
message ServerReadyRequest {}
43+
44+
message ServerReadyResponse
45+
{
46+
// True if the inference server is ready, false if not ready.
47+
bool ready = 1;
48+
}
49+
50+
message ModelReadyRequest
51+
{
52+
// The name of the model to check for readiness.
53+
string name = 1;
54+
55+
// The version of the model to check for readiness. If not given the
56+
// server will choose a version based on the model and internal policy.
57+
string version = 2;
58+
}
59+
60+
message ModelReadyResponse
61+
{
62+
// True if the model is ready, false if not ready.
63+
bool ready = 1;
64+
}
65+
66+
message ServerMetadataRequest {}
67+
68+
message ServerMetadataResponse
69+
{
70+
// The server name.
71+
string name = 1;
72+
73+
// The server version.
74+
string version = 2;
75+
76+
// The extensions supported by the server.
77+
repeated string extensions = 3;
78+
}
79+
80+
message ModelMetadataRequest
81+
{
82+
// The name of the model.
83+
string name = 1;
84+
85+
// The version of the model to check for readiness. If not given the
86+
// server will choose a version based on the model and internal policy.
87+
string version = 2;
88+
}
89+
90+
message ModelMetadataResponse
91+
{
92+
// Metadata for a tensor.
93+
message TensorMetadata
94+
{
95+
// The tensor name.
96+
string name = 1;
97+
98+
// The tensor data type.
99+
string datatype = 2;
100+
101+
// The tensor shape. A variable-size dimension is represented
102+
// by a -1 value.
103+
repeated int64 shape = 3;
104+
}
105+
106+
// The model name.
107+
string name = 1;
108+
109+
// The versions of the model available on the server.
110+
repeated string versions = 2;
111+
112+
// The model's platform. See Platforms.
113+
string platform = 3;
114+
115+
// The model's inputs.
116+
repeated TensorMetadata inputs = 4;
117+
118+
// The model's outputs.
119+
repeated TensorMetadata outputs = 5;
120+
}
121+
122+
message ModelInferRequest
123+
{
124+
// An input tensor for an inference request.
125+
message InferInputTensor
126+
{
127+
// The tensor name.
128+
string name = 1;
129+
130+
// The tensor data type.
131+
string datatype = 2;
132+
133+
// The tensor shape.
134+
repeated int64 shape = 3;
135+
136+
// Optional inference input tensor parameters.
137+
map<string, InferParameter> parameters = 4;
138+
139+
// The tensor contents using a data-type format. This field must
140+
// not be specified if "raw" tensor contents are being used for
141+
// the inference request.
142+
InferTensorContents contents = 5;
143+
}
144+
145+
// An output tensor requested for an inference request.
146+
message InferRequestedOutputTensor
147+
{
148+
// The tensor name.
149+
string name = 1;
150+
151+
// Optional requested output tensor parameters.
152+
map<string, InferParameter> parameters = 2;
153+
}
154+
155+
// The name of the model to use for inferencing.
156+
string model_name = 1;
157+
158+
// The version of the model to use for inference. If not given the
159+
// server will choose a version based on the model and internal policy.
160+
string model_version = 2;
161+
162+
// Optional identifier for the request. If specified will be
163+
// returned in the response.
164+
string id = 3;
165+
166+
// Optional inference parameters.
167+
map<string, InferParameter> parameters = 4;
168+
169+
// The input tensors for the inference.
170+
repeated InferInputTensor inputs = 5;
171+
172+
// The requested output tensors for the inference. Optional, if not
173+
// specified all outputs produced by the model will be returned.
174+
repeated InferRequestedOutputTensor outputs = 6;
175+
176+
// The data contained in an input tensor can be represented in "raw"
177+
// bytes form or in the repeated type that matches the tensor's data
178+
// type. To use the raw representation 'raw_input_contents' must be
179+
// initialized with data for each tensor in the same order as
180+
// 'inputs'. For each tensor, the size of this content must match
181+
// what is expected by the tensor's shape and data type. The raw
182+
// data must be the flattened, one-dimensional, row-major order of
183+
// the tensor elements without any stride or padding between the
184+
// elements. Note that the FP16 data type must be represented as raw
185+
// content as there is no specific data type for a 16-bit float
186+
// type.
187+
//
188+
// If this field is specified then InferInputTensor::contents must
189+
// not be specified for any input tensor.
190+
repeated bytes raw_input_contents = 7;
191+
}
192+
193+
message ModelInferResponse
194+
{
195+
// An output tensor returned for an inference request.
196+
message InferOutputTensor
197+
{
198+
// The tensor name.
199+
string name = 1;
200+
201+
// The tensor data type.
202+
string datatype = 2;
203+
204+
// The tensor shape.
205+
repeated int64 shape = 3;
206+
207+
// Optional output tensor parameters.
208+
map<string, InferParameter> parameters = 4;
209+
210+
// The tensor contents using a data-type format. This field must
211+
// not be specified if "raw" tensor contents are being used for
212+
// the inference response.
213+
InferTensorContents contents = 5;
214+
}
215+
216+
// The name of the model used for inference.
217+
string model_name = 1;
218+
219+
// The version of the model used for inference.
220+
string model_version = 2;
221+
222+
// The id of the inference request if one was specified.
223+
string id = 3;
224+
225+
// Optional inference response parameters.
226+
map<string, InferParameter> parameters = 4;
227+
228+
// The output tensors holding inference results.
229+
repeated InferOutputTensor outputs = 5;
230+
231+
// The data contained in an output tensor can be represented in
232+
// "raw" bytes form or in the repeated type that matches the
233+
// tensor's data type. To use the raw representation 'raw_output_contents'
234+
// must be initialized with data for each tensor in the same order as
235+
// 'outputs'. For each tensor, the size of this content must match
236+
// what is expected by the tensor's shape and data type. The raw
237+
// data must be the flattened, one-dimensional, row-major order of
238+
// the tensor elements without any stride or padding between the
239+
// elements. Note that the FP16 data type must be represented as raw
240+
// content as there is no specific data type for a 16-bit float
241+
// type.
242+
//
243+
// If this field is specified then InferOutputTensor::contents must
244+
// not be specified for any output tensor.
245+
repeated bytes raw_output_contents = 6;
246+
}
247+
248+
// An inference parameter value. The Parameters message describes a
249+
// “name”/”value” pair, where the “name” is the name of the parameter
250+
// and the “value” is a boolean, integer, or string corresponding to
251+
// the parameter.
252+
message InferParameter
253+
{
254+
// The parameter value can be a string, an int64, a boolean
255+
// or a message specific to a predefined parameter.
256+
oneof parameter_choice
257+
{
258+
// A boolean parameter value.
259+
bool bool_param = 1;
260+
261+
// An int64 parameter value.
262+
int64 int64_param = 2;
263+
264+
// A string parameter value.
265+
string string_param = 3;
266+
}
267+
}
268+
269+
// The data contained in a tensor represented by the repeated type
270+
// that matches the tensor's data type. Protobuf oneof is not used
271+
// because oneofs cannot contain repeated fields.
272+
message InferTensorContents
273+
{
274+
// Representation for BOOL data type. The size must match what is
275+
// expected by the tensor's shape. The contents must be the flattened,
276+
// one-dimensional, row-major order of the tensor elements.
277+
repeated bool bool_contents = 1;
278+
279+
// Representation for INT8, INT16, and INT32 data types. The size
280+
// must match what is expected by the tensor's shape. The contents
281+
// must be the flattened, one-dimensional, row-major order of the
282+
// tensor elements.
283+
repeated int32 int_contents = 2;
284+
285+
// Representation for INT64 data types. The size must match what
286+
// is expected by the tensor's shape. The contents must be the
287+
// flattened, one-dimensional, row-major order of the tensor elements.
288+
repeated int64 int64_contents = 3;
289+
290+
// Representation for UINT8, UINT16, and UINT32 data types. The size
291+
// must match what is expected by the tensor's shape. The contents
292+
// must be the flattened, one-dimensional, row-major order of the
293+
// tensor elements.
294+
repeated uint32 uint_contents = 4;
295+
296+
// Representation for UINT64 data types. The size must match what
297+
// is expected by the tensor's shape. The contents must be the
298+
// flattened, one-dimensional, row-major order of the tensor elements.
299+
repeated uint64 uint64_contents = 5;
300+
301+
// Representation for FP32 data type. The size must match what is
302+
// expected by the tensor's shape. The contents must be the flattened,
303+
// one-dimensional, row-major order of the tensor elements.
304+
repeated float fp32_contents = 6;
305+
306+
// Representation for FP64 data type. The size must match what is
307+
// expected by the tensor's shape. The contents must be the flattened,
308+
// one-dimensional, row-major order of the tensor elements.
309+
repeated double fp64_contents = 7;
310+
311+
// Representation for BYTES data type. The size must match what is
312+
// expected by the tensor's shape. The contents must be the flattened,
313+
// one-dimensional, row-major order of the tensor elements.
314+
repeated bytes bytes_contents = 8;
315+
}

0 commit comments

Comments
 (0)