File tree Expand file tree Collapse file tree 2 files changed +10
-2
lines changed
distributed/device_communicators Expand file tree Collapse file tree 2 files changed +10
-2
lines changed Original file line number Diff line number Diff line change @@ -236,7 +236,9 @@ def __init__(
236236 n_reader , # number of all readers
237237 n_local_reader , # number of local readers through shared memory
238238 local_reader_ranks : list [int ] | None = None ,
239- max_chunk_bytes : int = 1024 * 1024 * 24 , # 24MiB
239+ # Default of 24MiB chosen to be large enough to accommodate grammar
240+ # bitmask tensors for large batches (1024 requests).
241+ max_chunk_bytes : int = 1024 * 1024 * 24 ,
240242 max_chunks : int = 10 ,
241243 connect_ip : str | None = None ,
242244 ):
@@ -538,6 +540,10 @@ def oob_callback(buf: PickleBuffer) -> bool:
538540 buf [0 ] = 1 # overflow
539541 self .local_socket .send_multipart (all_buffers , copy = False )
540542 else :
543+ # Byte 0: 0
544+ # Bytes 1-2: Count of buffers
545+ # Then each buffer follows, preceded by 4 bytes containing its length:
546+ # [4 byte int L][L bytes of buffer content] ...
541547 with self .acquire_write (timeout ) as buf :
542548 buf [0 ] = 0 # not overflow
543549 offset = 3
Original file line number Diff line number Diff line change @@ -165,7 +165,9 @@ class SchedulerOutput:
165165 # freed from the encoder cache.
166166 free_encoder_mm_hashes : list [str ]
167167
168- # ids of structured outputs requests included in the bitmask, in order.
168+ # ids of structured outputs requests included in the bitmask, in the
169+ # same order as the corresponding stacked rows of the bitmask.
170+ # There may be more than one row per request in the case of speculative decoding.
169171 structured_output_request_ids : list [str ]
170172 # the bitmask for the whole batch
171173 grammar_bitmask : "npt.NDArray[np.int32] | None"
You can’t perform that action at this time.
0 commit comments