From 281b4329801054783e79da86a79ccdc0c58c9a38 Mon Sep 17 00:00:00 2001 From: Miguel Neves Date: Mon, 10 Feb 2025 13:17:15 +0000 Subject: [PATCH 1/6] feat: added support for bedrock nova models --- examples/core.py | 23 +- libs/core/llmstudio_core/config.yaml | 16 + .../llmstudio_core/providers/bedrock/nova.py | 347 ++++++++++++++++++ .../providers/bedrock/provider.py | 3 + .../core/llmstudio_core/providers/provider.py | 9 +- 5 files changed, 387 insertions(+), 11 deletions(-) create mode 100644 libs/core/llmstudio_core/providers/bedrock/nova.py diff --git a/examples/core.py b/examples/core.py index e84ea89b..1eab6c81 100644 --- a/examples/core.py +++ b/examples/core.py @@ -8,7 +8,7 @@ from dotenv import load_dotenv load_dotenv() -def run_provider(provider, model, api_key, **kwargs): +def run_provider(provider, model, api_key=None, **kwargs): print(f"\n\n###RUNNING for <{provider}>, <{model}> ###") llm = LLMCore(provider=provider, api_key=api_key, **kwargs) @@ -107,6 +107,16 @@ def build_chat_request(model: str, chat_input: str, is_stream: bool, max_tokens: "max_completion_tokens": max_tokens } } + elif 'amazon.nova' in model or 'anthropic.claude' in model: + chat_request = { + "chat_input": chat_input, + "model": model, + "is_stream": is_stream, + "retries": 0, + "parameters": { + "maxTokens": max_tokens + } + } else: chat_request = { "chat_input": chat_input, @@ -150,10 +160,7 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) -# provider = "vertexai" -# model = "gemini-1.5-pro-latest" -# for _ in range(1): -# latencies = run_provider(provider=provider, model=model, -# api_key=os.environ["GOOGLE_API_KEY"], -# ) -# pprint(latencies) + +# Bedrock +multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) +#multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) diff --git a/libs/core/llmstudio_core/config.yaml b/libs/core/llmstudio_core/config.yaml index b608bd01..9d014a9c 100644 --- a/libs/core/llmstudio_core/config.yaml +++ b/libs/core/llmstudio_core/config.yaml @@ -126,6 +126,22 @@ providers: max_tokens: 100000 input_token_cost: 0.000008 output_token_cost: 0.000024 + us.amazon.nova-pro-v1:0: + mode: chat + max_tokens: 300000 + input_token_cost: 0.0000008 + output_token_cost: 0.0000016 + us.amazon.nova-lite-v1:0: + mode: chat + max_tokens: 300000 + input_token_cost: 0.00000006 + output_token_cost: 0.00000012 + us.amazon.nova-micro-v1:0: + mode: chat + max_tokens: 128000 + input_token_cost: 0.000000035 + output_token_cost: 0.00000007 + parameters: temperature: name: "Temperature" diff --git a/libs/core/llmstudio_core/providers/bedrock/nova.py b/libs/core/llmstudio_core/providers/bedrock/nova.py new file mode 100644 index 00000000..c546ab4c --- /dev/null +++ b/libs/core/llmstudio_core/providers/bedrock/nova.py @@ -0,0 +1,347 @@ +import json +import os +import time +import uuid +from typing import ( + Any, + AsyncGenerator, + Coroutine, + Dict, + Generator, + List, + Optional, + Union, +) + +import boto3 +from llmstudio_core.exceptions import ProviderError +from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider +from llmstudio_core.utils import OpenAIToolFunction +from openai.types.chat import ChatCompletionChunk +from openai.types.chat.chat_completion_chunk import ( + Choice, + ChoiceDelta, + ChoiceDeltaToolCall, + ChoiceDeltaToolCallFunction, + CompletionUsage, +) +from pydantic import ValidationError + +SERVICE = "bedrock-runtime" + + +@provider +class BedrockNovaProvider(ProviderCore): + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + self._client = boto3.client( + SERVICE, + region_name=self.region if self.region else os.getenv("BEDROCK_REGION"), + aws_access_key_id=self.access_key + if self.access_key + else os.getenv("BEDROCK_ACCESS_KEY"), + aws_secret_access_key=self.secret_key + if self.secret_key + else os.getenv("BEDROCK_SECRET_KEY"), + ) + + @staticmethod + def _provider_config_name(): + return "bedrock-nova" + + def validate_request(self, request: ChatRequest): + return ChatRequest(**request) + + async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]: + """Generate an AWS Bedrock client""" + return self.generate_client(request=request) + + def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: + """Generate an AWS Bedrock client""" + try: + messages, system_prompt = self._process_messages(request.chat_input) + tools = self._process_tools(request.parameters) + + system_prompt = ( + request.parameters.get("system") + if request.parameters.get("system") + else system_prompt + ) + + client_params = { + "modelId": request.model, + "messages": messages, + "inferenceConfig": self._process_parameters(request.parameters), + "system": system_prompt, + } + if tools: + client_params["toolConfig"] = tools + + return self._client.converse_stream(**client_params) + except Exception as e: + raise ProviderError(str(e)) + + async def aparse_response( + self, response: Any, **kwargs + ) -> AsyncGenerator[Any, None]: + return self.parse_response(response=response, **kwargs) + + def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: + tool_name = None + tool_arguments = "" + tool_id = None + + for chunk in response["stream"]: + if chunk.get("messageStart"): + first_chunk = ChatCompletionChunk( + id=str(uuid.uuid4()), + choices=[ + Choice( + delta=ChoiceDelta( + content=None, + function_call=None, + role="assistant", + tool_calls=None, + ), + index=0, + ) + ], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + usage=None, + ) + yield first_chunk.model_dump() + + elif chunk.get("contentBlockStart"): + if chunk["contentBlockStart"]["start"].get("toolUse"): + tool_name = chunk["contentBlockStart"]["start"]["toolUse"]["name"] + tool_arguments = "" + tool_id = chunk["contentBlockStart"]["start"]["toolUse"][ + "toolUseId" + ] + + elif chunk.get("contentBlockDelta"): + delta = chunk["contentBlockDelta"]["delta"] + if delta.get("text"): + # Regular content, yield it + text = delta["text"] + chunk = ChatCompletionChunk( + id=str(uuid.uuid4()), + choices=[ + Choice( + delta=ChoiceDelta(content=text), + finish_reason=None, + index=0, + ) + ], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + ) + yield chunk.model_dump() + + elif delta.get("toolUse"): + partial_json = delta["toolUse"]["input"] + tool_arguments += partial_json + + elif chunk.get("contentBlockStop") and tool_id: + name_chunk = ChatCompletionChunk( + id=str(uuid.uuid4()), + choices=[ + Choice( + delta=ChoiceDelta( + role="assistant", + tool_calls=[ + ChoiceDeltaToolCall( + index=chunk["contentBlockStop"][ + "contentBlockIndex" + ], + id=tool_id, + function=ChoiceDeltaToolCallFunction( + name=tool_name, + arguments="", + type="function", + ), + ) + ], + ), + finish_reason=None, + index=chunk["contentBlockStop"]["contentBlockIndex"], + ) + ], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + ) + yield name_chunk.model_dump() + + args_chunk = ChatCompletionChunk( + id=tool_id, + choices=[ + Choice( + delta=ChoiceDelta( + tool_calls=[ + ChoiceDeltaToolCall( + index=chunk["contentBlockStop"][ + "contentBlockIndex" + ], + function=ChoiceDeltaToolCallFunction( + arguments=tool_arguments, + ), + ) + ], + ), + finish_reason=None, + index=chunk["contentBlockStop"]["contentBlockIndex"], + ) + ], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + ) + yield args_chunk.model_dump() + + elif chunk.get("messageStop"): + stop_reason = chunk["messageStop"].get("stopReason") + final_chunk = ChatCompletionChunk( + id=str(uuid.uuid4()), + choices=[ + Choice( + delta=ChoiceDelta(), + finish_reason="tool_calls" + if stop_reason == "tool_use" + else "length" + if stop_reason == "max_tokens" + else "stop", + index=0, + ) + ], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + ) + yield final_chunk.model_dump() + + elif chunk.get("metadata"): + usage = chunk["metadata"].get("usage") + final_stream_chunk = ChatCompletionChunk( + id=str(uuid.uuid4()), + choices=[], + created=int(time.time()), + model=kwargs.get("request").model, + object="chat.completion.chunk", + usage=CompletionUsage( + completion_tokens=usage["outputTokens"], + prompt_tokens=usage["inputTokens"], + total_tokens=usage["totalTokens"], + ), + ) + yield final_stream_chunk.model_dump() + + @staticmethod + def _process_messages( + chat_input: Union[str, List[Dict[str, str]]] + ) -> List[Dict[str, Union[List[Dict[str, str]], str]]]: + + if isinstance(chat_input, str): + return [ + { + "role": "user", + "content": [{"text": chat_input}], + } + ], [] + + elif isinstance(chat_input, list): + messages = [] + next_tool_result_message = False + system_prompt = [] + for message in chat_input: + if message.get("role") in ["assistant", "user"]: + next_tool_result_message = False + if message.get("tool_calls"): + tool_use = {"role": "assistant", "content": []} + for tool in message.get("tool_calls"): + tool_use["content"].append( + { + "toolUse": { + "toolUseId": tool["id"], + "name": tool["function"]["name"], + "input": json.loads( + tool["function"]["arguments"] + ), + } + } + ) + messages.append(tool_use) + else: + messages.append( + { + "role": message.get("role"), + "content": [{"text": message.get("content")}], + } + ) + if message.get("role") in ["tool"]: + if not next_tool_result_message: + tool_result = {"role": "user", "content": []} + next_tool_result_message = True + messages.append(tool_result) + + tool_result = { + "toolResult": { + "toolUseId": message["tool_call_id"], + "content": [{"json": {"text": message["content"]}}], + } + } + + messages[-1]["content"].append(tool_result) + + if message.get("role") in ["system"]: + system_prompt = [{"text": message.get("content")}] + + return messages, system_prompt + + @staticmethod + def _process_tools(parameters: dict) -> Optional[Dict]: + if parameters.get("tools") is None and parameters.get("functions") is None: + return None + + try: + if parameters.get("tools"): + parsed_tools = [ + OpenAIToolFunction(**tool["function"]) + for tool in parameters["tools"] + ] + + if parameters.get("functions"): + parsed_tools = [ + OpenAIToolFunction(**tool) for tool in parameters["functions"] + ] + + tool_configurations = [] + for tool in parsed_tools: + tool_config = { + "toolSpec": { + "name": tool.name, + "description": tool.description, + "inputSchema": { + "json": { + "type": tool.parameters.type, + "properties": tool.parameters.properties, + "required": tool.parameters.required, + } + }, + } + } + tool_configurations.append(tool_config) + return {"tools": tool_configurations} + + except ValidationError: + return parameters.get("tools", parameters.get("functions")) + + @staticmethod + def _process_parameters(parameters: dict) -> dict: + remove_keys = ["system", "stop", "tools", "functions"] + for key in remove_keys: + parameters.pop(key, None) + return parameters diff --git a/libs/core/llmstudio_core/providers/bedrock/provider.py b/libs/core/llmstudio_core/providers/bedrock/provider.py index 7087dac2..53376889 100644 --- a/libs/core/llmstudio_core/providers/bedrock/provider.py +++ b/libs/core/llmstudio_core/providers/bedrock/provider.py @@ -1,6 +1,7 @@ from typing import Any, AsyncGenerator, Coroutine, Generator from llmstudio_core.providers.bedrock.anthropic import BedrockAnthropicProvider +from llmstudio_core.providers.bedrock.nova import BedrockNovaProvider from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider @@ -14,6 +15,8 @@ def __init__(self, config, **kwargs): def _get_provider(self, model): if "anthropic." in model: return BedrockAnthropicProvider(config=self.config, **self.kwargs) + if "amazon.nova" in model: + return BedrockNovaProvider(config=self.config, **self.kwargs) raise ValueError(f" provider is not yet supported.") diff --git a/libs/core/llmstudio_core/providers/provider.py b/libs/core/llmstudio_core/providers/provider.py index 3cb9ad57..f7fed317 100644 --- a/libs/core/llmstudio_core/providers/provider.py +++ b/libs/core/llmstudio_core/providers/provider.py @@ -823,9 +823,12 @@ def _calculate_metrics( ) total_cost_usd -= cached_savings - reasoning_tokens = usage.get("completion_tokens_details", {}).get( - "reasoning_tokens", None - ) + completion_tokens_details = usage.get("completion_tokens_details") + if completion_tokens_details: + reasoning_tokens = completion_tokens_details.get( + "reasoning_tokens", None + ) + if reasoning_tokens: total_tokens += reasoning_tokens reasoning_cost = self._calculate_cost( From 727a3a1684846c5baeb4c39d16d5c29d33cfa59b Mon Sep 17 00:00:00 2001 From: Miguel Neves Date: Mon, 10 Feb 2025 13:31:57 +0000 Subject: [PATCH 2/6] feat: tokens are now read from usage if available to ensure accuracy --- libs/core/llmstudio_core/providers/provider.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/libs/core/llmstudio_core/providers/provider.py b/libs/core/llmstudio_core/providers/provider.py index f7fed317..9646919f 100644 --- a/libs/core/llmstudio_core/providers/provider.py +++ b/libs/core/llmstudio_core/providers/provider.py @@ -808,6 +808,11 @@ def _calculate_metrics( output_tokens = len(self.tokenizer.encode(self._output_to_string(output))) total_tokens = input_tokens + output_tokens + if usage: + input_tokens = usage.get("prompt_tokens", input_tokens) + output_tokens = usage.get("completion_tokens", output_tokens) + total_tokens = usage.get("total_tokens", total_tokens) + # Cost calculations input_cost = self._calculate_cost(input_tokens, model_config.input_token_cost) output_cost = self._calculate_cost( From 932724f89a6b17a09f2455395c5cf45030353923 Mon Sep 17 00:00:00 2001 From: Miguel Neves Date: Mon, 10 Feb 2025 13:35:17 +0000 Subject: [PATCH 3/6] chore: removed duplicated integration tests folder in wrong place --- .../test_cache_and_reasoning_costs.py | 192 ------------------ 1 file changed, 192 deletions(-) delete mode 100644 libs/core/tests/integration_tests/test_cache_and_reasoning_costs.py diff --git a/libs/core/tests/integration_tests/test_cache_and_reasoning_costs.py b/libs/core/tests/integration_tests/test_cache_and_reasoning_costs.py deleted file mode 100644 index 8dd3b18c..00000000 --- a/libs/core/tests/integration_tests/test_cache_and_reasoning_costs.py +++ /dev/null @@ -1,192 +0,0 @@ -import asyncio -import os -from pprint import pprint - -import pytest -from dotenv import load_dotenv -from llmstudio_core.providers import LLMCore - -load_dotenv() - -# input prompt has to be >1024 tokens to auto cache on OpenAI -input_prompt = """ -What is Lorem Ipsum? json -Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. - -Why do we use it? -It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like). - - -Where does it come from? -Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. - -What is Lorem Ipsum? json -Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. - -Why do we use it? -It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like). - - -Where does it come from? -Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. - -What is Lorem Ipsum? json -Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum. - -Why do we use it? -It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like). - - -Where does it come from? -Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32. - -After reading this answer just in one line, saying hello how are you in latin. -""" - - -def run_provider(provider, model, api_key, **kwargs): - print(f"\n\n###RUNNING for <{provider}>, <{model}> ###") - llm = LLMCore(provider=provider, api_key=api_key, **kwargs) - - metrics = {} - - print("\nAsync Non-Stream") - - chat_request = build_chat_request(model, chat_input=input_prompt, is_stream=False) - - response_async = asyncio.run(llm.achat(**chat_request)) - pprint(response_async) - metrics["async non-stream"] = response_async.metrics - - print("\nAsync Stream") - - async def async_stream(): - chat_request = build_chat_request( - model, chat_input=input_prompt, is_stream=True - ) - - response_async = await llm.achat(**chat_request) - async for p in response_async: - if not p.metrics: - print("that: ", p.chat_output_stream) - if p.metrics: - pprint(p) - metrics["async stream"] = p.metrics - - asyncio.run(async_stream()) - - print("\nSync Non-Stream") - chat_request = build_chat_request(model, chat_input=input_prompt, is_stream=False) - - response_sync = llm.chat(**chat_request) - pprint(response_sync) - metrics["sync non-stream"] = response_sync.metrics - - print("\nSync Stream") - chat_request = build_chat_request(model, chat_input=input_prompt, is_stream=True) - - response_sync_stream = llm.chat(**chat_request) - for p in response_sync_stream: - if p.metrics: - pprint(p) - metrics["sync stream"] = p.metrics - - print(f"\n\n###REPORT for <{provider}>, <{model}> ###") - return metrics - - -def build_chat_request( - model: str, chat_input: str, is_stream: bool, max_tokens: int = 1000 -): - if model.startswith(("o1", "o3")): - chat_request = { - "chat_input": chat_input, - "model": model, - "is_stream": is_stream, - "retries": 0, - "parameters": {"max_completion_tokens": max_tokens}, - } - else: - chat_request = { - "chat_input": chat_input, - "model": model, - "is_stream": is_stream, - "retries": 0, - "parameters": { - "temperature": 0, - "max_tokens": max_tokens, - "response_format": {"type": "json_object"}, - "functions": None, - }, - } - return chat_request - - -# Fixture for provider-model pairs -@pytest.fixture( - scope="module", params=[("openai", "gpt-4o-mini"), ("openai", "o1-mini")] -) -def provider_model(request): - return request.param - - -# Fixture for metrics, computes them once per provider-model pair -@pytest.fixture(scope="module") -def metrics(provider_model): - provider, model = provider_model - print(f"Running provider {provider} with model {model}") - return run_provider( - provider=provider, model=model, api_key=os.environ["OPENAI_API_KEY"] - ) - - -# Test cache metrics (runs for all models) -def test_metrics_cache(provider_model, metrics): - provider, model = provider_model - at_least_one_cached = False - for current_metrics in metrics.values(): - print(current_metrics) - assert current_metrics["input_tokens"] > current_metrics["cached_tokens"] - if current_metrics["cached_tokens"] > 0: - at_least_one_cached = True - assert at_least_one_cached == True - print(f"All Cache Tests Passed for {provider} - {model}") - - -# Test reasoning metrics (only runs for o1 and o3 family) -def test_metrics_reasoning(provider_model, metrics): - provider, model = provider_model - - if not model.startswith(("o1", "o3")): - pytest.skip(f"Reasoning metrics test not applicable for model {model}") - - for current_metrics in metrics.values(): - assert current_metrics["reasoning_tokens"] > 0 - assert current_metrics["reasoning_tokens"] < current_metrics["total_tokens"] - assert ( - current_metrics["input_tokens"] - + current_metrics["output_tokens"] - + current_metrics["reasoning_tokens"] - == current_metrics["total_tokens"] - ) - print(f"All Reasoning Tests Passed for {provider} - {model}") - - -def usage_when_max_tokens_reached(): - """ - Usefull to test handling of Usage in other finish_reason scenarios - """ - provider, model = ("openai", "o1-mini") - api_key = os.environ["OPENAI_API_KEY"] - - llm = LLMCore(provider=provider, api_key=api_key) - chat_request = build_chat_request( - model, chat_input=input_prompt, is_stream=False, max_tokens=7 - ) - response = asyncio.run(llm.achat(**chat_request)) - - assert response.metrics["async non-stream"]["reasoning_tokens"] > 0 - assert response.metrics["sync non-stream"]["reasoning_tokens"] > 0 - assert response.metrics["async stream"]["reasoning_tokens"] > 0 - assert response.metrics["sync stream"]["reasoning_tokens"] > 0 - print(f"All Max Tokens Usage Reached Tests Passed for {provider} - {model}") From abdf8b26184af02904ba4b6a4e3ea5098f37d195 Mon Sep 17 00:00:00 2001 From: Miguel Neves Date: Mon, 10 Feb 2025 17:37:50 +0000 Subject: [PATCH 4/6] feat: refactored bedrock provider into being a single file instead of folder --- examples/core.py | 2 +- libs/core/llmstudio_core/config.yaml | 6 +- .../core/llmstudio_core/providers/__init__.py | 2 +- .../providers/bedrock/__init__.py | 0 .../providers/bedrock/anthropic.py | 330 ------------------ .../providers/bedrock/provider.py | 46 --- .../{bedrock/nova.py => bedrock_converse.py} | 12 +- 7 files changed, 12 insertions(+), 386 deletions(-) delete mode 100644 libs/core/llmstudio_core/providers/bedrock/__init__.py delete mode 100644 libs/core/llmstudio_core/providers/bedrock/anthropic.py delete mode 100644 libs/core/llmstudio_core/providers/bedrock/provider.py rename libs/core/llmstudio_core/providers/{bedrock/nova.py => bedrock_converse.py} (97%) diff --git a/examples/core.py b/examples/core.py index 1eab6c81..e9acfc50 100644 --- a/examples/core.py +++ b/examples/core.py @@ -162,5 +162,5 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) # Bedrock -multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) +multiple_provider_runs(provider="bedrock-converse", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) #multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) diff --git a/libs/core/llmstudio_core/config.yaml b/libs/core/llmstudio_core/config.yaml index 9d014a9c..ebf7ed66 100644 --- a/libs/core/llmstudio_core/config.yaml +++ b/libs/core/llmstudio_core/config.yaml @@ -71,9 +71,9 @@ providers: min: 0 max: 500 step: 1 - bedrock: - id: bedrock - name: Bedrock + bedrock-converse: + id: bedrock-converse + name: Bedrock Converse chat: true embed: true keys: diff --git a/libs/core/llmstudio_core/providers/__init__.py b/libs/core/llmstudio_core/providers/__init__.py index 6aa6c4e7..330fe48e 100644 --- a/libs/core/llmstudio_core/providers/__init__.py +++ b/libs/core/llmstudio_core/providers/__init__.py @@ -2,7 +2,7 @@ from typing import Optional from llmstudio_core.providers.azure import AzureProvider -from llmstudio_core.providers.bedrock.provider import BedrockProvider +from llmstudio_core.providers.bedrock_converse import BedrockConverseProvider # from llmstudio_core.providers.ollama import OllamaProvider #TODO: adapt it from llmstudio_core.providers.openai import OpenAIProvider diff --git a/libs/core/llmstudio_core/providers/bedrock/__init__.py b/libs/core/llmstudio_core/providers/bedrock/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/libs/core/llmstudio_core/providers/bedrock/anthropic.py b/libs/core/llmstudio_core/providers/bedrock/anthropic.py deleted file mode 100644 index f2b3cfc1..00000000 --- a/libs/core/llmstudio_core/providers/bedrock/anthropic.py +++ /dev/null @@ -1,330 +0,0 @@ -import json -import os -import time -import uuid -from typing import ( - Any, - AsyncGenerator, - Coroutine, - Dict, - Generator, - List, - Optional, - Union, -) - -import boto3 -from llmstudio_core.exceptions import ProviderError -from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider -from llmstudio_core.utils import OpenAIToolFunction -from openai.types.chat import ChatCompletionChunk -from openai.types.chat.chat_completion_chunk import ( - Choice, - ChoiceDelta, - ChoiceDeltaToolCall, - ChoiceDeltaToolCallFunction, -) -from pydantic import ValidationError - -SERVICE = "bedrock-runtime" - - -@provider -class BedrockAnthropicProvider(ProviderCore): - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) - self._client = boto3.client( - SERVICE, - region_name=self.region if self.region else os.getenv("BEDROCK_REGION"), - aws_access_key_id=self.access_key - if self.access_key - else os.getenv("BEDROCK_ACCESS_KEY"), - aws_secret_access_key=self.secret_key - if self.secret_key - else os.getenv("BEDROCK_SECRET_KEY"), - ) - - @staticmethod - def _provider_config_name(): - return "bedrock-antropic" - - def validate_request(self, request: ChatRequest): - return ChatRequest(**request) - - async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]: - """Generate an AWS Bedrock client""" - return self.generate_client(request=request) - - def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: - """Generate an AWS Bedrock client""" - try: - messages, system_prompt = self._process_messages(request.chat_input) - tools = self._process_tools(request.parameters) - - system_prompt = ( - request.parameters.get("system") - if request.parameters.get("system") - else system_prompt - ) - - client_params = { - "modelId": request.model, - "messages": messages, - "inferenceConfig": self._process_parameters(request.parameters), - "system": system_prompt, - } - if tools: - client_params["toolConfig"] = tools - - return self._client.converse_stream(**client_params) - except Exception as e: - raise ProviderError(str(e)) - - async def aparse_response( - self, response: Any, **kwargs - ) -> AsyncGenerator[Any, None]: - return self.parse_response(response=response, **kwargs) - - def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: - tool_name = None - tool_arguments = "" - tool_id = None - - for chunk in response["stream"]: - if chunk.get("messageStart"): - first_chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta( - content=None, - function_call=None, - role="assistant", - tool_calls=None, - ), - index=0, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - usage=None, - ) - yield first_chunk.model_dump() - - elif chunk.get("contentBlockStart"): - if chunk["contentBlockStart"]["start"].get("toolUse"): - tool_name = chunk["contentBlockStart"]["start"]["toolUse"]["name"] - tool_arguments = "" - tool_id = chunk["contentBlockStart"]["start"]["toolUse"][ - "toolUseId" - ] - - elif chunk.get("contentBlockDelta"): - delta = chunk["contentBlockDelta"]["delta"] - if delta.get("text"): - # Regular content, yield it - text = delta["text"] - chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta(content=text), - finish_reason=None, - index=0, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield chunk.model_dump() - - elif delta.get("toolUse"): - partial_json = delta["toolUse"]["input"] - tool_arguments += partial_json - - elif chunk.get("contentBlockStop") and tool_id: - name_chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta( - role="assistant", - tool_calls=[ - ChoiceDeltaToolCall( - index=chunk["contentBlockStop"][ - "contentBlockIndex" - ], - id=tool_id, - function=ChoiceDeltaToolCallFunction( - name=tool_name, - arguments="", - type="function", - ), - ) - ], - ), - finish_reason=None, - index=chunk["contentBlockStop"]["contentBlockIndex"], - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield name_chunk.model_dump() - - args_chunk = ChatCompletionChunk( - id=tool_id, - choices=[ - Choice( - delta=ChoiceDelta( - tool_calls=[ - ChoiceDeltaToolCall( - index=chunk["contentBlockStop"][ - "contentBlockIndex" - ], - function=ChoiceDeltaToolCallFunction( - arguments=tool_arguments, - ), - ) - ], - ), - finish_reason=None, - index=chunk["contentBlockStop"]["contentBlockIndex"], - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield args_chunk.model_dump() - - elif chunk.get("messageStop"): - stop_reason = chunk["messageStop"].get("stopReason") - final_chunk = ChatCompletionChunk( - id=str(uuid.uuid4()), - choices=[ - Choice( - delta=ChoiceDelta(), - finish_reason="tool_calls" - if stop_reason == "tool_use" - else "length" - if stop_reason == "max_tokens" - else "stop", - index=0, - ) - ], - created=int(time.time()), - model=kwargs.get("request").model, - object="chat.completion.chunk", - ) - yield final_chunk.model_dump() - - @staticmethod - def _process_messages( - chat_input: Union[str, List[Dict[str, str]]] - ) -> List[Dict[str, Union[List[Dict[str, str]], str]]]: - - if isinstance(chat_input, str): - return [ - { - "role": "user", - "content": [{"text": chat_input}], - } - ], [] - - elif isinstance(chat_input, list): - messages = [] - next_tool_result_message = False - system_prompt = [] - for message in chat_input: - if message.get("role") in ["assistant", "user"]: - next_tool_result_message = False - if message.get("tool_calls"): - tool_use = {"role": "assistant", "content": []} - for tool in message.get("tool_calls"): - tool_use["content"].append( - { - "toolUse": { - "toolUseId": tool["id"], - "name": tool["function"]["name"], - "input": json.loads( - tool["function"]["arguments"] - ), - } - } - ) - messages.append(tool_use) - else: - messages.append( - { - "role": message.get("role"), - "content": [{"text": message.get("content")}], - } - ) - if message.get("role") in ["tool"]: - if not next_tool_result_message: - tool_result = {"role": "user", "content": []} - next_tool_result_message = True - messages.append(tool_result) - - tool_result = { - "toolResult": { - "toolUseId": message["tool_call_id"], - "content": [{"json": {"text": message["content"]}}], - } - } - - messages[-1]["content"].append(tool_result) - - if message.get("role") in ["system"]: - system_prompt = [{"text": message.get("content")}] - - return messages, system_prompt - - @staticmethod - def _process_tools(parameters: dict) -> Optional[Dict]: - if parameters.get("tools") is None and parameters.get("functions") is None: - return None - - try: - if parameters.get("tools"): - parsed_tools = [ - OpenAIToolFunction(**tool["function"]) - for tool in parameters["tools"] - ] - - if parameters.get("functions"): - parsed_tools = [ - OpenAIToolFunction(**tool) for tool in parameters["functions"] - ] - - tool_configurations = [] - for tool in parsed_tools: - tool_config = { - "toolSpec": { - "name": tool.name, - "description": tool.description, - "inputSchema": { - "json": { - "type": tool.parameters.type, - "properties": tool.parameters.properties, - "required": tool.parameters.required, - } - }, - } - } - tool_configurations.append(tool_config) - return {"tools": tool_configurations} - - except ValidationError: - return parameters.get("tools", parameters.get("functions")) - - @staticmethod - def _process_parameters(parameters: dict) -> dict: - remove_keys = ["system", "stop", "tools", "functions"] - for key in remove_keys: - parameters.pop(key, None) - return parameters diff --git a/libs/core/llmstudio_core/providers/bedrock/provider.py b/libs/core/llmstudio_core/providers/bedrock/provider.py deleted file mode 100644 index 53376889..00000000 --- a/libs/core/llmstudio_core/providers/bedrock/provider.py +++ /dev/null @@ -1,46 +0,0 @@ -from typing import Any, AsyncGenerator, Coroutine, Generator - -from llmstudio_core.providers.bedrock.anthropic import BedrockAnthropicProvider -from llmstudio_core.providers.bedrock.nova import BedrockNovaProvider -from llmstudio_core.providers.provider import ChatRequest, ProviderCore, provider - - -@provider -class BedrockProvider(ProviderCore): - def __init__(self, config, **kwargs): - super().__init__(config, **kwargs) - self.kwargs = kwargs - self.selected_model = None - - def _get_provider(self, model): - if "anthropic." in model: - return BedrockAnthropicProvider(config=self.config, **self.kwargs) - if "amazon.nova" in model: - return BedrockNovaProvider(config=self.config, **self.kwargs) - - raise ValueError(f" provider is not yet supported.") - - @staticmethod - def _provider_config_name(): - return "bedrock" - - def validate_request(self, request: ChatRequest): - return ChatRequest(**request) - - async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]: - self.selected_model = self._get_provider(request.model) - return await self.selected_model.agenerate_client(request) - - def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: - self.selected_model = self._get_provider(request.model) - return self.selected_model.generate_client(request=request) - - async def aparse_response( - self, response: Any, **kwargs - ) -> AsyncGenerator[Any, None]: - result = await self.selected_model.aparse_response(response=response, **kwargs) - for chunk in result: - yield chunk - - def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: - return self.selected_model.parse_response(response=response, **kwargs) diff --git a/libs/core/llmstudio_core/providers/bedrock/nova.py b/libs/core/llmstudio_core/providers/bedrock_converse.py similarity index 97% rename from libs/core/llmstudio_core/providers/bedrock/nova.py rename to libs/core/llmstudio_core/providers/bedrock_converse.py index c546ab4c..1dfda94b 100644 --- a/libs/core/llmstudio_core/providers/bedrock/nova.py +++ b/libs/core/llmstudio_core/providers/bedrock_converse.py @@ -31,7 +31,7 @@ @provider -class BedrockNovaProvider(ProviderCore): +class BedrockConverseProvider(ProviderCore): def __init__(self, config, **kwargs): super().__init__(config, **kwargs) self._client = boto3.client( @@ -47,17 +47,17 @@ def __init__(self, config, **kwargs): @staticmethod def _provider_config_name(): - return "bedrock-nova" + return "bedrock-converse" def validate_request(self, request: ChatRequest): return ChatRequest(**request) async def agenerate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Any]: - """Generate an AWS Bedrock client""" + """Generate an AWS Bedrock Converse client""" return self.generate_client(request=request) def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator]: - """Generate an AWS Bedrock client""" + """Generate an AWS Bedrock Converse client""" try: messages, system_prompt = self._process_messages(request.chat_input) tools = self._process_tools(request.parameters) @@ -84,7 +84,9 @@ def generate_client(self, request: ChatRequest) -> Coroutine[Any, Any, Generator async def aparse_response( self, response: Any, **kwargs ) -> AsyncGenerator[Any, None]: - return self.parse_response(response=response, **kwargs) + result = self.parse_response(response=response, **kwargs) + for chunk in result: + yield chunk def parse_response(self, response: AsyncGenerator[Any, None], **kwargs) -> Any: tool_name = None From effaa850dc61e6810ad4a3ebc12ef323f09fa07d Mon Sep 17 00:00:00 2001 From: Miguel Neves Date: Mon, 10 Feb 2025 17:39:54 +0000 Subject: [PATCH 5/6] chore: renamed bedrock to bedrock-converse in examples/core.py --- examples/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/core.py b/examples/core.py index e9acfc50..898e82fc 100644 --- a/examples/core.py +++ b/examples/core.py @@ -163,4 +163,4 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * # Bedrock multiple_provider_runs(provider="bedrock-converse", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) -#multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) +#multiple_provider_runs(provider="bedrock-converse", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) From b3586671568ec495ca5225f9d2c1928ec3adb169 Mon Sep 17 00:00:00 2001 From: Miguel Neves Date: Mon, 10 Feb 2025 17:44:43 +0000 Subject: [PATCH 6/6] chore: renamed bedrock in config.yaml --- examples/core.py | 4 ++-- libs/core/llmstudio_core/config.yaml | 6 +++--- libs/core/llmstudio_core/providers/bedrock_converse.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/core.py b/examples/core.py index 898e82fc..1eab6c81 100644 --- a/examples/core.py +++ b/examples/core.py @@ -162,5 +162,5 @@ def multiple_provider_runs(provider:str, model:str, num_runs:int, api_key:str, * multiple_provider_runs(provider="vertexai", model="gemini-1.5-flash", num_runs=1, api_key=os.environ["GOOGLE_API_KEY"]) # Bedrock -multiple_provider_runs(provider="bedrock-converse", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) -#multiple_provider_runs(provider="bedrock-converse", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) +multiple_provider_runs(provider="bedrock", model="us.amazon.nova-lite-v1:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) +#multiple_provider_runs(provider="bedrock", model="anthropic.claude-3-5-sonnet-20241022-v2:0", num_runs=1, api_key=None, region=os.environ["BEDROCK_REGION"], secret_key=os.environ["BEDROCK_SECRET_KEY"], access_key=os.environ["BEDROCK_ACCESS_KEY"]) diff --git a/libs/core/llmstudio_core/config.yaml b/libs/core/llmstudio_core/config.yaml index ebf7ed66..46813bd6 100644 --- a/libs/core/llmstudio_core/config.yaml +++ b/libs/core/llmstudio_core/config.yaml @@ -71,9 +71,9 @@ providers: min: 0 max: 500 step: 1 - bedrock-converse: - id: bedrock-converse - name: Bedrock Converse + bedrock: + id: bedrock + name: Bedrock ConverseAPI chat: true embed: true keys: diff --git a/libs/core/llmstudio_core/providers/bedrock_converse.py b/libs/core/llmstudio_core/providers/bedrock_converse.py index 1dfda94b..dc756e0a 100644 --- a/libs/core/llmstudio_core/providers/bedrock_converse.py +++ b/libs/core/llmstudio_core/providers/bedrock_converse.py @@ -47,7 +47,7 @@ def __init__(self, config, **kwargs): @staticmethod def _provider_config_name(): - return "bedrock-converse" + return "bedrock" def validate_request(self, request: ChatRequest): return ChatRequest(**request)