From 0632e3a9413fc00b0f334381d71bef6e1d42e000 Mon Sep 17 00:00:00 2001 From: Chen Meng Date: Mon, 3 Jun 2024 05:42:40 -0400 Subject: [PATCH] [Layers] Increased the threshold for enabling flashAttn --- src/models/env_config.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/models/env_config.cpp b/src/models/env_config.cpp index 7d3f4928..9c94e9df 100644 --- a/src/models/env_config.cpp +++ b/src/models/env_config.cpp @@ -37,7 +37,7 @@ int getFlashThresh() { // > threshold to enable flash attention, default 1024 static int envFlashThresh = -1; if (envFlashThresh == -1) - envFlashThresh = (getenv("FLASH_ATTN_THRESHOLD") ? atoi(getenv("FLASH_ATTN_THRESHOLD")) : 1024); + envFlashThresh = (getenv("FLASH_ATTN_THRESHOLD") ? atoi(getenv("FLASH_ATTN_THRESHOLD")) : 8192); return envFlashThresh; }