diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 2fae94e1a7..6848c5d43a 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -313,7 +313,8 @@ async def _query_stream( logger.warning("Saving chunk state error: " + str(e)) if not chunk.choices: continue - delta = chunk.choices[0].delta + choice = chunk.choices[0] + delta = choice.delta # logger.debug(f"chunk delta: {delta}") # handle the content delta reasoning = self._extract_reasoning_content(chunk) @@ -331,6 +332,11 @@ async def _query_stream( _y = True if chunk.usage: llm_response.usage = self._extract_usage(chunk.usage) + elif choice_usage := getattr(choice, "usage", None): + # Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI + # See https://github.com/AstrBotDevs/AstrBot/issues/6614 + llm_response.usage = self._extract_usage(choice_usage) + state.current_completion_snapshot.usage = choice_usage if _y: yield llm_response @@ -359,13 +365,11 @@ def _extract_reasoning_content( reasoning_text = str(reasoning_attr) return reasoning_text - def _extract_usage(self, usage: CompletionUsage) -> TokenUsage: - ptd = usage.prompt_tokens_details - cached = ptd.cached_tokens if ptd and ptd.cached_tokens else 0 - prompt_tokens = 0 if usage.prompt_tokens is None else usage.prompt_tokens - completion_tokens = ( - 0 if usage.completion_tokens is None else usage.completion_tokens - ) + def _extract_usage(self, usage: CompletionUsage | dict) -> TokenUsage: + ptd = getattr(usage, "prompt_tokens_details", None) + cached = getattr(ptd, "cached_tokens", 0) if ptd else 0 + prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 + completion_tokens = getattr(usage, "completion_tokens", 0) or 0 return TokenUsage( input_other=prompt_tokens - cached, input_cached=cached,