diff --git a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py index 3668a8959..9e94859a9 100644 --- a/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py +++ b/model-engine/model_engine_server/domain/use_cases/llm_model_endpoint_use_cases.py @@ -399,7 +399,7 @@ async def create_text_generation_inference_bundle( max_input_length = 1024 max_total_tokens = 2048 if "llama-2" in model_name: - max_input_length = 2048 + max_input_length = 4095 max_total_tokens = 4096 subcommands = []