Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 0 additions & 62 deletions python/sglang/srt/managers/io_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,38 +1008,6 @@ class BatchTokenIDOutput(BaseBatchReq, SpeculativeDecodingMetricsMixin):
time_stats: Optional[List[SchedulerReqTimeStats]] = None


@dataclass
class BatchMultimodalDecodeReq(BaseBatchReq):
decoded_ids: List[int]
input_token_logprobs_val: List[float]
input_token_logprobs_idx: List[int]
output_token_logprobs_val: List[float]
output_token_logprobs_idx: List[int]
read_offsets: List[int]
skip_special_tokens: List[bool]
spaces_between_special_tokens: List[bool]
image_resolutions: List[List[int]]
resize_image_resolutions: List[List[int]]

finished_reasons: List[BaseFinishReason]

# Token counts
prompt_tokens: List[int]
completion_tokens: List[int]
cached_tokens: List[int]

# The information of placeholder tokens (e.g., image token)
# idx is the index of the token in the prompt after expansion.
# val is the length of padded tokens after expansion.
placeholder_tokens_idx: List[Optional[List[int]]]
placeholder_tokens_val: List[Optional[List[int]]]

return_bytes: List[bool]

# The trainer step id. Used to know which step's weights are used for sampling.
token_steps: List[List[int]] = None


@dataclass
class BatchStrOutput(BaseBatchReq, SpeculativeDecodingMetricsMixin):
# The finish reason
Expand Down Expand Up @@ -1102,36 +1070,6 @@ class BatchStrOutput(BaseBatchReq, SpeculativeDecodingMetricsMixin):
time_stats: Optional[List[SchedulerReqTimeStats]] = None


@dataclass
class BatchMultimodalOutput(BaseBatchReq):
# The finish reason
finished_reasons: List[dict]
decoded_ids: List[List[int]]
# The outputs
outputs: Union[List[str | bytes], List[List[Dict]]]

# probability values for input tokens and output tokens
input_token_logprobs_val: List[List[float]]
input_token_logprobs_idx: List[List[int]]
output_token_logprobs_val: List[List[float]]
output_token_logprobs_idx: List[List[int]]

# Token counts
prompt_tokens: List[int]
completion_tokens: List[int]
cached_tokens: List[int]

placeholder_tokens_idx: List[Optional[List[int]]]
placeholder_tokens_val: List[Optional[List[int]]]

return_bytes: List[bool]
# Detailed breakdown of cached tokens by source (device/host/storage)
cached_tokens_details: Optional[List[Optional[Dict[str, Any]]]] = None

# For observability
time_stats: Optional[List[SchedulerReqTimeStats]] = None


@dataclass
class BatchEmbeddingOutput(BaseBatchReq):
# The finish reason
Expand Down
12 changes: 0 additions & 12 deletions python/sglang/srt/managers/multi_tokenizer_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
BaseBatchReq,
BaseReq,
BatchEmbeddingOutput,
BatchMultimodalOutput,
BatchStrOutput,
BatchTokenIDOutput,
)
Expand Down Expand Up @@ -282,17 +281,6 @@ def _handle_output_by_index(output, i):
output, "token_steps", i, check_length=False
),
)
elif isinstance(output, BatchMultimodalOutput):
new_output = BatchMultimodalOutput(
rids=[output.rids[i]],
finished_reasons=_extract_field_by_index(output, "finished_reasons", i),
outputs=_extract_field_by_index(output, "outputs", i),
prompt_tokens=_extract_field_by_index(output, "prompt_tokens", i),
completion_tokens=_extract_field_by_index(output, "completion_tokens", i),
cached_tokens=_extract_field_by_index(output, "cached_tokens", i),
placeholder_tokens_idx=None,
placeholder_tokens_val=None,
)
else:
new_output = output
return new_output
Expand Down
7 changes: 0 additions & 7 deletions python/sglang/srt/managers/tokenizer_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
AbortReq,
ActiveRanksOutput,
BatchEmbeddingOutput,
BatchMultimodalOutput,
BatchStrOutput,
BatchTokenIDOutput,
BatchTokenizedEmbeddingReqInput,
Expand Down Expand Up @@ -462,7 +461,6 @@ def init_request_dispatcher(self):
BatchStrOutput,
BatchEmbeddingOutput,
BatchTokenIDOutput,
BatchMultimodalOutput,
),
self._handle_batch_output,
),
Expand Down Expand Up @@ -1541,7 +1539,6 @@ def _handle_batch_output(
recv_obj: Union[
BatchStrOutput,
BatchEmbeddingOutput,
BatchMultimodalOutput,
BatchTokenIDOutput,
],
):
Expand Down Expand Up @@ -1644,8 +1641,6 @@ def _handle_batch_output(
"output_ids": output_token_ids,
"meta_info": meta_info,
}
elif isinstance(recv_obj, BatchMultimodalOutput):
raise NotImplementedError("BatchMultimodalOut not implemented")
else:
assert isinstance(recv_obj, BatchEmbeddingOutput)
out_dict = {
Expand Down Expand Up @@ -1914,7 +1909,6 @@ def _calculate_spec_decoding_metrics(
recv_obj: Union[
BatchStrOutput,
BatchEmbeddingOutput,
BatchMultimodalOutput,
BatchTokenIDOutput,
],
i: int,
Expand Down Expand Up @@ -2424,7 +2418,6 @@ def convert_to_span_attrs(
recv_obj: Union[
BatchStrOutput,
BatchEmbeddingOutput,
BatchMultimodalOutput,
BatchTokenIDOutput,
],
i: int,
Expand Down
Loading