From 71f4554908984e713da682eed9e171dfeab48b13 Mon Sep 17 00:00:00 2001 From: Sam Denton <106690182+sam-scale@users.noreply.github.com> Date: Thu, 16 Nov 2023 09:21:15 -0800 Subject: [PATCH] Increase llama-2 max_input_tokens --- .../domain/use_cases/llm_model_endpoint_use_cases.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index 3668a8959..9e94859a9 100644 --- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -399,7 +399,7 @@ async def create_text_generation_inference_bundle( max_input_length = 1024 max_total_tokens = 2048 if "llama-2" in model_name: - max_input_length = 2048 + max_input_length = 4095 max_total_tokens = 4096 subcommands = []