-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
fix(agent): use full tool schema for DeepSeek V4 #7862
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -289,10 +289,12 @@ async def reset( | |||||||||
| # Light tool schema does not include tool parameters. | ||||||||||
| # This can reduce token usage when tools have large descriptions. | ||||||||||
| # See #4681 | ||||||||||
| self.tool_schema_mode = tool_schema_mode | ||||||||||
| self.tool_schema_mode = self._normalize_tool_schema_mode( | ||||||||||
| tool_schema_mode, provider, request | ||||||||||
| ) | ||||||||||
| self._tool_schema_param_set = None | ||||||||||
| self._skill_like_raw_tool_set = None | ||||||||||
| if tool_schema_mode == "skills_like": | ||||||||||
| if self.tool_schema_mode == "skills_like": | ||||||||||
| tool_set = self.req.func_tool | ||||||||||
| if not tool_set: | ||||||||||
| return | ||||||||||
|
|
@@ -322,6 +324,26 @@ async def reset( | |||||||||
| self.stats = AgentStats() | ||||||||||
| self.stats.start_time = time.time() | ||||||||||
|
|
||||||||||
| @staticmethod | ||||||||||
| def _normalize_tool_schema_mode( | ||||||||||
| tool_schema_mode: str | None, | ||||||||||
| provider: Provider, | ||||||||||
| request: ProviderRequest, | ||||||||||
| ) -> str | None: | ||||||||||
| if tool_schema_mode != "skills_like": | ||||||||||
| return tool_schema_mode | ||||||||||
|
|
||||||||||
| model = (request.model or provider.get_model() or "").lower().strip() | ||||||||||
| model_name = model.rsplit("/", 1)[-1] | ||||||||||
| if model_name not in {"deepseek-v4-flash", "deepseek-v4-pro"}: | ||||||||||
| return tool_schema_mode | ||||||||||
|
Comment on lines
+338
to
+339
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Instead of hardcoding specific model variants, consider using a prefix check for
Suggested change
|
||||||||||
|
|
||||||||||
| logger.info( | ||||||||||
| "DeepSeek V4 does not support skills-like light tool schemas; " | ||||||||||
| "using full tool schemas for function calling." | ||||||||||
| ) | ||||||||||
| return "full" | ||||||||||
|
|
||||||||||
| def _read_tool_hint(self) -> str: | ||||||||||
| if self.read_tool is not None: | ||||||||||
| return f"`{self.read_tool.name}`" | ||||||||||
|
|
||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1273,6 +1273,40 @@ async def text_chat(self, **kwargs) -> LLMResponse: | |
| assert parts[0].text == "<image_caption>一张猫的照片</image_caption>" | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_deepseek_v4_uses_full_tool_schema_instead_of_skills_like(): | ||
| provider = MockProvider() | ||
| tool = FunctionTool( | ||
| name="test_tool", | ||
| description="测试", | ||
| parameters={"type": "object", "properties": {"query": {"type": "string"}}}, | ||
| handler=AsyncMock(), | ||
| ) | ||
| tool_set = ToolSet(tools=[tool]) | ||
| req = ProviderRequest( | ||
| prompt="test", | ||
| func_tool=tool_set, | ||
| contexts=[], | ||
| model="deepseek-v4-flash", | ||
|
Comment on lines
+1286
to
+1290
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (testing): Consider adding a test where the model name comes from the provider instead of the request. This path supports models from both Suggested implementation: from unittest.mock import AsyncMock, Mock@pytest.mark.asyncio
async def test_deepseek_v4_uses_full_tool_schema_instead_of_skills_like():
provider = MockProvider()
tool = FunctionTool(
name="test_tool",
description="测试",
parameters={"type": "object", "properties": {"query": {"type": "string"}}},
handler=AsyncMock(),
)
tool_set = ToolSet(tools=[tool])
req = ProviderRequest(
prompt="test",
func_tool=tool_set,
contexts=[],
model="deepseek-v4-flash",
)
runner = ToolLoopAgentRunner()
await runner.reset(
provider=provider,
request=req,
run_context=ContextWrapper(context=None),
tool_executor=cast(Any, MockToolExecutor()),
agent_hooks=MockHooks(),
tool_schema_mode="skills_like",
)
@pytest.mark.asyncio
async def test_deepseek_v4_uses_full_tool_schema_when_model_from_provider():
provider = MockProvider()
# Ensure provider.get_model returns a DeepSeek V4 model name when request.model is None
provider.get_model = Mock(return_value="deepseek-v4-flash")
tool = FunctionTool(
name="test_tool",
description="测试",
parameters={"type": "object", "properties": {"query": {"type": "string"}}},
handler=AsyncMock(),
)
tool_set = ToolSet(tools=[tool])
req = ProviderRequest(
prompt="test",
func_tool=tool_set,
contexts=[],
model=None,
)
runner = ToolLoopAgentRunner()
await runner.reset(
provider=provider,
request=req,
run_context=ContextWrapper(context=None),
tool_executor=cast(Any, MockToolExecutor()),
agent_hooks=MockHooks(),
tool_schema_mode="skills_like",
) |
||
| ) | ||
| runner = ToolLoopAgentRunner() | ||
|
|
||
| await runner.reset( | ||
| provider=provider, | ||
| request=req, | ||
| run_context=ContextWrapper(context=None), | ||
| tool_executor=cast(Any, MockToolExecutor()), | ||
| agent_hooks=MockHooks(), | ||
| tool_schema_mode="skills_like", | ||
| ) | ||
|
|
||
| assert runner.tool_schema_mode == "full" | ||
| assert runner.req.func_tool is tool_set | ||
| assert runner.req.func_tool.tools[0].parameters == tool.parameters | ||
| assert runner.req.func_tool.tools[0].handler is tool.handler | ||
| assert runner._tool_schema_param_set is None | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_follow_up_accepted_when_active_and_not_stopping( | ||
| runner, mock_provider, provider_request, mock_tool_executor, mock_hooks | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The override only triggers when
model_nameis exactlydeepseek-v4-flashordeepseek-v4-pro, so DeepSeek V4 identifiers with valid suffixes/prefix variants (for example provider-qualified or tier-suffixed IDs likedeepseek-v4-flash:free) will miss this branch and remain inskills_likemode. In that case the runner still sends light tool schemas and can hit the same function-calling rejection this fix is meant to prevent.Useful? React with 👍 / 👎.