LostRuins · BBC-Esq · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024 · Jul 18, 2024
diff --git a/koboldcpp.py b/koboldcpp.py
@@ -557,71 +557,66 @@ def string_contains_sequence_substring(inputstr,sequences):
 import struct
 
 def read_gguf_metadata(file_path):
-    chunk_size = 8192  # read only first 8kb of file
-    try:
-        def read_gguf_key(keyname,data,maxval):
-            keylen = len(keyname)
-            index = data.find(keyname)  # Search for the magic number, Read 2 chunks of 4 byte numbers
-            if index != -1 and index + keylen + 8 <= chunk_size:
-                start_index = index + keylen
-                first_value_bytes = data[start_index:start_index + 4]
-                second_value_bytes = data[start_index + 4:start_index + 8]
-                # Unpack each 4 bytes as an unsigned int32 in little-endian format
-                value1 = struct.unpack('<I', first_value_bytes)[0] #4 means its a uint32
-                value2 = struct.unpack('<I', second_value_bytes)[0]
-                if value1 == 4 and value2 > 0 and value2 <= maxval:
-                    return value2 #contains the desired value
-                return 0
-            else:
-                return 0 #not found
+    CHUNK_SIZE = 8192  # read only first 8kb of file
+    MIN_FILE_SIZE = 10000  # ignore files under 10kb
+
+    def read_gguf_key(keyname, data, maxval):
+        index = data.find(keyname)  # Search for the magic number
+        if index != -1 and index + len(keyname) + 8 <= CHUNK_SIZE:
+            start_index = index + len(keyname)
+            # Read 2 chunks of 4 byte numbers
+            # Unpack each 4 bytes as an unsigned int32 in little-endian format
+            value1, value2 = struct.unpack('<II', data[start_index:start_index + 8])
+            if value1 == 4 and 0 < value2 <= maxval:
+                return value2  # contains the desired value
+        return 0  # not found
 
-        fsize = os.path.getsize(file_path)
-        if fsize < 10000: #ignore files under 10kb
+    try:
+        if os.path.getsize(file_path) < MIN_FILE_SIZE:  # ignore files under 10kb
             return None
+
         with open(file_path, 'rb') as f:
             file_header = f.read(4)
-            if file_header != b'GGUF': #file is not GGUF
+            if file_header != b'GGUF':  # file is not GGUF
                 return None
-            data = f.read(chunk_size)
-            layercount = read_gguf_key(b'.block_count',data,512)
-            head_count_kv = read_gguf_key(b'.attention.head_count_kv',data,8192)
-            key_length = read_gguf_key(b'.attention.key_length',data,8192)
-            val_length = read_gguf_key(b'.attention.value_length',data,8192)
-            return [layercount,head_count_kv, max(key_length,val_length)]
+
+            data = f.read(CHUNK_SIZE)
+            layercount = read_gguf_key(b'.block_count', data, 512)
+            head_count_kv = read_gguf_key(b'.attention.head_count_kv', data, 8192)
+            key_length = read_gguf_key(b'.attention.key_length', data, 8192)
+            val_length = read_gguf_key(b'.attention.value_length', data, 8192)
+
+            return [layercount, head_count_kv, max(key_length, val_length)]
     except Exception as ex:
         return None
 
-def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how many layers to use
+def autoset_gpu_layers(filepath, ctxsize, gpumem):  # shitty algo to determine how many layers to use
     try:
-        layerlimit = 0
         fsize = os.path.getsize(filepath)
-        if fsize>10000000: #dont bother with models < 10mb
-            cs = ctxsize
-            mem = gpumem
-            csmul = 1.0
-            if cs and cs > 8192:
-                csmul = 1.4
-            elif cs and cs > 4096:
-                csmul = 1.2
-            elif cs and cs > 2048:
-                csmul = 1.1
-            if mem < fsize*1.6*csmul:
-                ggufmeta = read_gguf_metadata(filepath)
-                if not ggufmeta or ggufmeta[0]==0: #fail to read or no layers
-                    sizeperlayer = fsize*csmul*0.052
-                    layerlimit = int(min(200,mem/sizeperlayer))
-                else:
-                    layers = ggufmeta[0]
-                    headcount = ggufmeta[1]
-                    headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128)
-                    ratio = mem/(fsize*csmul*1.5)
-                    if headcount > 0:
-                        ratio = max(ratio,mem/(fsize*1.34 + (layers*headcount*headkvlen*cs*4.25)))
-                    layerlimit = int(ratio*layers)
-            else:
-                layerlimit = 200 # assume full offload
-        return layerlimit
-    except Exception as ex:
+        if fsize <= 10000000:  # dont bother with models < 10mb
+            return 0
+
+        cs = ctxsize
+        mem = gpumem
+        csmul = 1.0 + (0.4 if cs > 8192 else 0.2 if cs > 4096 else 0.1 if cs > 2048 else 0)
+
+        if mem >= fsize * 1.6 * csmul:
+            return 200  # assume full offload
+
+        ggufmeta = read_gguf_metadata(filepath)
+        if not ggufmeta or ggufmeta[0] == 0:  # fail to read or no layers
+            sizeperlayer = fsize * csmul * 0.052
+            return int(min(200, mem / sizeperlayer))
+
+        layers, headcount, headkvlen = ggufmeta
+        headkvlen = max(headkvlen, 128)
+        ratio = mem / (fsize * csmul * 1.5)
+        if headcount > 0:
+            ratio = max(ratio, mem / (fsize * 1.34 + (layers * headcount * headkvlen * cs * 4.25)))
+
+        return int(ratio * layers)
+
+    except Exception:
         return 0
 
 def fetch_gpu_properties(testCL,testCU,testVK):
@@ -3071,39 +3066,41 @@ def display_help():
             wb.open("https://github.com/LostRuins/koboldcpp/wiki")
         except:
             print("Cannot launch help in browser.")
+
     def display_updates():
         try:
-            import webbrowser as wb
-            wb.open("https://github.com/LostRuins/koboldcpp/releases/latest")
+            import webbrowser
+            webbrowser.open("https://github.com/LostRuins/koboldcpp/releases/latest")
         except:
             print("Cannot launch updates in browser.")
-
-    ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5)
-
-    ctk.CTkButton(tabs , text = "Update", fg_color="#9900cc", hover_color="#aa11dd", command = display_updates, width=90, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5)
-    ctk.CTkButton(tabs , text = "Save", fg_color="#084a66", hover_color="#085a88", command = save_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5)
-    ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", hover_color="#085a88", command = load_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5)
-    ctk.CTkButton(tabs , text = "Help", fg_color="#992222", hover_color="#bb3333", command = display_help, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 135, pady=5)
-
-    # start a thread that tries to get actual gpu names and layer counts
+
+    buttons = [
+        ("Launch", "#2f8d3c", "#2faa3c", guilaunch, 80, 1, 1, "se", 25),
+        ("Update", "#9900cc", "#aa11dd", display_updates, 90, 1, 0, "sw", 5),
+        ("Save", "#084a66", "#085a88", save_config_gui, 60, 1, 1, "sw", 5),
+        ("Load", "#084a66", "#085a88", load_config_gui, 60, 1, 1, "sw", 70),
+        ("Help", "#992222", "#bb3333", display_help, 60, 1, 1, "sw", 135)
+    ]
+
+    for text, fg, hover, command, width, row, col, stick, padx in buttons:
+        ctk.CTkButton(tabs, text=text, fg_color=fg, hover_color=hover, command=command, 
+                      width=width, height=35).grid(row=row, column=col, sticky=stick, padx=padx, pady=5)
+
     gpuinfo_thread = threading.Thread(target=auto_set_backend_gui)
-    gpuinfo_thread.start() #submit job in new thread so nothing is waiting
-
-    # runs main loop until closed or launch clicked
+    gpuinfo_thread.start()
+
     root.mainloop()
-
-    if nextstate==0:
+    
+    if nextstate == 0:
         exitcounter = 999
         print("Exiting by user request.")
         sys.exit(0)
     else:
-        # processing vars
         kcpp_exporting_template = False
         export_vars()
-
-        if not args.model_param and not args.sdmodel and not args.whispermodel:
+        if not any([args.model_param, args.sdmodel, args.whispermodel]):
             exitcounter = 999
-            exit_with_error(2,"No text or image model file was selected. Exiting.")
+            exit_with_error(2, "No text or image model file was selected. Exiting.")
 
 def show_gui_msgbox(title,message):
     print(title + ": " + message, flush=True)
@@ -3436,69 +3433,51 @@ def tunnel_reader():
 
 def unload_libs():
     global handle
+    if not handle:
+        return
+
     OS = platform.system()
     dll_close = None
+
+    def setup_dll_close(lib, func_name="dlclose"):
+        nonlocal dll_close
+        dll_close = getattr(lib, func_name)
+        dll_close.argtypes = [ctypes.c_void_p]
+        dll_close.restype = ctypes.c_int
+
     if OS == "Windows":  # pragma: Windows
         from ctypes import wintypes
         dll_close = ctypes.windll.kernel32.FreeLibrary
         dll_close.argtypes = [wintypes.HMODULE]
         dll_close.restype = ctypes.c_int
     elif OS == "Darwin":
-        try:
-            try:  # macOS 11 (Big Sur). Possibly also later macOS 10s.
-                stdlib = ctypes.CDLL("libc.dylib")
+        for lib_name in ["libc.dylib", "libSystem", "/usr/lib/system/libsystem_c.dylib"]:
+            try:
+                setup_dll_close(ctypes.CDLL(lib_name))
+                break
             except OSError:
-                stdlib = ctypes.CDLL("libSystem")
-        except OSError:
-            # Older macOSs. Not only is the name inconsistent but it's
-            # not even in PATH.
-            stdlib = ctypes.CDLL("/usr/lib/system/libsystem_c.dylib")
-        dll_close = stdlib.dlclose
-        dll_close.argtypes = [ctypes.c_void_p]
-        dll_close.restype = ctypes.c_int
+                continue
     elif OS == "Linux":
         try:
-            stdlib = ctypes.CDLL("")
+            setup_dll_close(ctypes.CDLL(""))
         except OSError:
-            stdlib = ctypes.CDLL("libc.so") # Alpine Linux.
-        dll_close = stdlib.dlclose
-        dll_close.argtypes = [ctypes.c_void_p]
-        dll_close.restype = ctypes.c_int
+            setup_dll_close(ctypes.CDLL("libc.so"))  # Alpine Linux
     elif sys.platform == "msys":
-        # msys can also use `ctypes.CDLL("kernel32.dll").FreeLibrary()`.
-        stdlib = ctypes.CDLL("msys-2.0.dll")
-        dll_close = stdlib.dlclose
-        dll_close.argtypes = [ctypes.c_void_p]
-        dll_close.restype = ctypes.c_int
+        setup_dll_close(ctypes.CDLL("msys-2.0.dll"))
     elif sys.platform == "cygwin":
-        stdlib = ctypes.CDLL("cygwin1.dll")
-        dll_close = stdlib.dlclose
-        dll_close.argtypes = [ctypes.c_void_p]
-        dll_close.restype = ctypes.c_int
+        setup_dll_close(ctypes.CDLL("cygwin1.dll"))
     elif OS == "FreeBSD":
-        # FreeBSD uses `/usr/lib/libc.so.7` where `7` is another version number.
-        # It is not in PATH but using its name instead of its path is somehow the
-        # only way to open it. The name must include the .so.7 suffix.
-        stdlib = ctypes.CDLL("libc.so.7")
-        dll_close = stdlib.close
+        setup_dll_close(ctypes.CDLL("libc.so.7"), "close")
 
-    if handle and dll_close:
+    if dll_close:
         print("Unloading Libraries...")
         dll_close(handle._handle)
-        del handle.load_model
-        del handle.generate
-        del handle.new_token
-        del handle.get_stream_count
-        del handle.has_finished
-        del handle.get_last_eval_time
-        del handle.get_last_process_time
-        del handle.get_last_token_count
-        del handle.get_last_seed
-        del handle.get_total_gens
-        del handle.get_last_stop_reason
-        del handle.abort_generate
-        del handle.token_count
-        del handle.get_pending_output
+        for attr in ['load_model', 'generate', 'new_token', 'get_stream_count', 'has_finished',
+                     'get_last_eval_time', 'get_last_process_time', 'get_last_token_count',
+                     'get_last_seed', 'get_total_gens', 'get_last_stop_reason', 'abort_generate',
+                     'token_count', 'get_pending_output']:
+            delattr(handle, attr)
+        global handle
         del handle
         handle = None
 
@@ -3683,34 +3662,19 @@ def main(launch_args,start_server=True):
             print(f"Warning: Chat Completions Adapter invalid or not found.")
 
     # handle model downloads if needed
-    if args.model_param and args.model_param!="":
-        if args.model_param.endswith("?download=true"):
-            args.model_param = args.model_param.replace("?download=true","")
-        if (args.model_param.startswith("http://") or args.model_param.startswith("https://")) and (args.model_param.endswith(".gguf") or args.model_param.endswith(".bin")):
-            dlfile = download_model_from_url(args.model_param)
-            if dlfile:
-                args.model_param = dlfile
-    if args.sdmodel and args.sdmodel!="":
-        if args.sdmodel.endswith("?download=true"):
-            args.sdmodel = args.sdmodel.replace("?download=true","")
-        if (args.sdmodel.startswith("http://") or args.sdmodel.startswith("https://")) and (args.sdmodel.endswith(".gguf") or args.sdmodel.endswith(".safetensors")):
-            dlfile = download_model_from_url(args.sdmodel)
-            if dlfile:
-                args.sdmodel = dlfile
-    if args.mmproj and args.mmproj!="":
-        if args.mmproj.endswith("?download=true"):
-            args.mmproj = args.mmproj.replace("?download=true","")
-        if (args.mmproj.startswith("http://") or args.mmproj.startswith("https://")) and (args.mmproj.endswith(".gguf")):
-            dlfile = download_model_from_url(args.mmproj)
-            if dlfile:
-                args.mmproj = dlfile
-    if args.whispermodel and args.whispermodel!="":
-        if args.whispermodel.endswith("?download=true"):
-            args.whispermodel = args.whispermodel.replace("?download=true","")
-        if (args.whispermodel.startswith("http://") or args.whispermodel.startswith("https://")) and (args.whispermodel.endswith(".gguf") or args.whispermodel.endswith(".bin")):
-            dlfile = download_model_from_url(args.whispermodel)
-            if dlfile:
-                args.whispermodel = dlfile
+    for arg_name in ['model_param', 'sdmodel', 'mmproj', 'whispermodel']:
+        arg_value = getattr(args, arg_name)
+        if arg_value and arg_value != "":
+            if arg_value.endswith("?download=true"):
+                arg_value = arg_value[:-14]  # Remove "?download=true"
+
+            is_url = arg_value.startswith(("http://", "https://"))
+            valid_extensions = (".gguf", ".bin", ".safetensors")
+
+            if is_url and arg_value.endswith(valid_extensions):
+                dlfile = download_model_from_url(arg_value)
+                if dlfile:
+                    setattr(args, arg_name, dlfile)
 
     # sanitize and replace the default vanity name. remember me....
     if args.model_param and args.model_param!="":
@@ -3889,26 +3853,25 @@ def main(launch_args,start_server=True):
             if not loadok:
                 exitcounter = 999
                 exit_with_error(3,"Could not load image model: " + imgmodel)
-
-    #handle whisper model
-    if args.whispermodel and args.whispermodel!="":
+    
+    # handle whisper model
+    if args.whispermodel:
         whispermodel = args.whispermodel
-        if not whispermodel or not os.path.exists(whispermodel):
+        if not os.path.exists(whispermodel):
             if args.ignoremissing:
                 print(f"Ignoring missing whisper model file: {whispermodel}")
                 args.whispermodel = None
             else:
                 exitcounter = 999
-                exit_with_error(2,f"Cannot find whisper model file: {whispermodel}")
+                exit_with_error(2, f"Cannot find whisper model file: {whispermodel}")
         else:
             whispermodel = os.path.abspath(whispermodel)
             fullwhispermodelpath = whispermodel
             loadok = whisper_load_model(whispermodel)
-            print("Load Whisper Model OK: " + str(loadok))
+            print(f"Load Whisper Model OK: {loadok}")
             if not loadok:
                 exitcounter = 999
-                exit_with_error(3,"Could not load whisper model: " + whispermodel)
-
+                exit_with_error(3, f"Could not load whisper model: {whispermodel}")
 
     #load embedded lite
     try: