From 54499f6d181902ba1083439a1ac57e45b76fc730 Mon Sep 17 00:00:00 2001 From: Robin Ede Date: Sun, 17 Aug 2025 16:59:09 -0500 Subject: [PATCH 1/2] Fix chat CLI GPU loading and request_id validation issues (#40230) This commit addresses two critical bugs in the transformers chat CLI: 1. **GPU Loading Issue**: Changed default device from "cpu" to "auto" in ChatArguments - Chat CLI now automatically uses GPU when available instead of defaulting to CPU - Matches the behavior of the underlying serving infrastructure 2. **Request ID Validation Error**: Added request_id field to TransformersCompletionCreateParamsStreaming schema - Fixes "Unexpected keys in the request: {'request_id'}" error on second message - Allows request_id to be properly sent and validated by the server Both fixes target the exact root causes identified in issue #40230: - Users will now get GPU acceleration by default when available - Chat sessions will no longer break after the second message --- src/transformers/commands/chat.py | 2 +- src/transformers/commands/serving.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/commands/chat.py b/src/transformers/commands/chat.py index 89bac4fec212..c7fb7c2c7aeb 100644 --- a/src/transformers/commands/chat.py +++ b/src/transformers/commands/chat.py @@ -246,7 +246,7 @@ class ChatArguments: default="main", metadata={"help": "Specific model version to use (can be a branch name, tag name or commit id)."}, ) - device: str = field(default="cpu", metadata={"help": "Device to use for inference."}) + device: str = field(default="auto", metadata={"help": "Device to use for inference."}) torch_dtype: Optional[str] = field( default="auto", metadata={ diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py index 6708dcc5dde9..3170f5895dbb 100644 --- a/src/transformers/commands/serving.py +++ b/src/transformers/commands/serving.py @@ -129,6 +129,7 @@ class TransformersCompletionCreateParamsStreaming(CompletionCreateParamsStreamin """ generation_config: str + request_id: Optional[str] = None class TransformersTranscriptionCreateParams(TranscriptionCreateParamsBase, total=False): """ From ca5908ea62b2bd5acf2d08ca19ee81879793ed23 Mon Sep 17 00:00:00 2001 From: Robin Ede Date: Mon, 18 Aug 2025 13:04:01 -0500 Subject: [PATCH 2/2] Remove unrelated request_id field from TransformersCompletionCreateParamsStreaming --- src/transformers/commands/serving.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/commands/serving.py b/src/transformers/commands/serving.py index 3170f5895dbb..6708dcc5dde9 100644 --- a/src/transformers/commands/serving.py +++ b/src/transformers/commands/serving.py @@ -129,7 +129,6 @@ class TransformersCompletionCreateParamsStreaming(CompletionCreateParamsStreamin """ generation_config: str - request_id: Optional[str] = None class TransformersTranscriptionCreateParams(TranscriptionCreateParamsBase, total=False): """