-
-
Notifications
You must be signed in to change notification settings - Fork 2.1k
fix(openai): Token usage not working when using MoonshotAI official API #6618
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -313,7 +313,8 @@ async def _query_stream( | |
| logger.warning("Saving chunk state error: " + str(e)) | ||
| if not chunk.choices: | ||
| continue | ||
| delta = chunk.choices[0].delta | ||
| choice = chunk.choices[0] | ||
| delta = choice.delta | ||
| # logger.debug(f"chunk delta: {delta}") | ||
| # handle the content delta | ||
| reasoning = self._extract_reasoning_content(chunk) | ||
|
|
@@ -331,6 +332,11 @@ async def _query_stream( | |
| _y = True | ||
| if chunk.usage: | ||
| llm_response.usage = self._extract_usage(chunk.usage) | ||
| elif choice_usage := getattr(choice, "usage", None): | ||
| # Workaround for some providers that only return usage in choices[].usage, e.g. MoonshotAI | ||
| # See https://github.com/AstrBotDevs/AstrBot/issues/6614 | ||
| llm_response.usage = self._extract_usage(choice_usage) | ||
| state.current_completion_snapshot.usage = choice_usage | ||
| if _y: | ||
| yield llm_response | ||
|
|
||
|
|
@@ -359,13 +365,11 @@ def _extract_reasoning_content( | |
| reasoning_text = str(reasoning_attr) | ||
| return reasoning_text | ||
|
|
||
| def _extract_usage(self, usage: CompletionUsage) -> TokenUsage: | ||
| ptd = usage.prompt_tokens_details | ||
| cached = ptd.cached_tokens if ptd and ptd.cached_tokens else 0 | ||
| prompt_tokens = 0 if usage.prompt_tokens is None else usage.prompt_tokens | ||
| completion_tokens = ( | ||
| 0 if usage.completion_tokens is None else usage.completion_tokens | ||
| ) | ||
| def _extract_usage(self, usage: CompletionUsage | dict) -> TokenUsage: | ||
| ptd = getattr(usage, "prompt_tokens_details", None) | ||
| cached = getattr(ptd, "cached_tokens", 0) if ptd else 0 | ||
| prompt_tokens = getattr(usage, "prompt_tokens", 0) or 0 | ||
| completion_tokens = getattr(usage, "completion_tokens", 0) or 0 | ||
| return TokenUsage( | ||
| input_other=prompt_tokens - cached, | ||
| input_cached=cached, | ||
|
Comment on lines
-362
to
371
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. issue (bug_risk): The updated Two concrete issues:
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
issue (bug_risk):
llm_response.usageandstate.current_completion_snapshot.usagenow hold different types, which can be surprising.Here
llm_response.usageis a normalizedTokenUsage, whilestate.current_completion_snapshot.usageis a raw providerchoice_usage. If other code assumessnapshot.usageis aTokenUsage, this mismatch can cause type errors. Consider either storing the raw data in a separate field (e.g.raw_usage) or normalizing both toTokenUsagefor consistency.