diff --git a/koboldcpp.py b/koboldcpp.py index 100a2ed2593..1231e402621 100644 --- a/koboldcpp.py +++ b/koboldcpp.py @@ -557,71 +557,66 @@ def string_contains_sequence_substring(inputstr,sequences): import struct def read_gguf_metadata(file_path): - chunk_size = 8192 # read only first 8kb of file - try: - def read_gguf_key(keyname,data,maxval): - keylen = len(keyname) - index = data.find(keyname) # Search for the magic number, Read 2 chunks of 4 byte numbers - if index != -1 and index + keylen + 8 <= chunk_size: - start_index = index + keylen - first_value_bytes = data[start_index:start_index + 4] - second_value_bytes = data[start_index + 4:start_index + 8] - # Unpack each 4 bytes as an unsigned int32 in little-endian format - value1 = struct.unpack(' 0 and value2 <= maxval: - return value2 #contains the desired value - return 0 - else: - return 0 #not found + CHUNK_SIZE = 8192 # read only first 8kb of file + MIN_FILE_SIZE = 10000 # ignore files under 10kb + + def read_gguf_key(keyname, data, maxval): + index = data.find(keyname) # Search for the magic number + if index != -1 and index + len(keyname) + 8 <= CHUNK_SIZE: + start_index = index + len(keyname) + # Read 2 chunks of 4 byte numbers + # Unpack each 4 bytes as an unsigned int32 in little-endian format + value1, value2 = struct.unpack('10000000: #dont bother with models < 10mb - cs = ctxsize - mem = gpumem - csmul = 1.0 - if cs and cs > 8192: - csmul = 1.4 - elif cs and cs > 4096: - csmul = 1.2 - elif cs and cs > 2048: - csmul = 1.1 - if mem < fsize*1.6*csmul: - ggufmeta = read_gguf_metadata(filepath) - if not ggufmeta or ggufmeta[0]==0: #fail to read or no layers - sizeperlayer = fsize*csmul*0.052 - layerlimit = int(min(200,mem/sizeperlayer)) - else: - layers = ggufmeta[0] - headcount = ggufmeta[1] - headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128) - ratio = mem/(fsize*csmul*1.5) - if headcount > 0: - ratio = max(ratio,mem/(fsize*1.34 + (layers*headcount*headkvlen*cs*4.25))) - layerlimit = int(ratio*layers) - else: - layerlimit = 200 # assume full offload - return layerlimit - except Exception as ex: + if fsize <= 10000000: # dont bother with models < 10mb + return 0 + + cs = ctxsize + mem = gpumem + csmul = 1.0 + (0.4 if cs > 8192 else 0.2 if cs > 4096 else 0.1 if cs > 2048 else 0) + + if mem >= fsize * 1.6 * csmul: + return 200 # assume full offload + + ggufmeta = read_gguf_metadata(filepath) + if not ggufmeta or ggufmeta[0] == 0: # fail to read or no layers + sizeperlayer = fsize * csmul * 0.052 + return int(min(200, mem / sizeperlayer)) + + layers, headcount, headkvlen = ggufmeta + headkvlen = max(headkvlen, 128) + ratio = mem / (fsize * csmul * 1.5) + if headcount > 0: + ratio = max(ratio, mem / (fsize * 1.34 + (layers * headcount * headkvlen * cs * 4.25))) + + return int(ratio * layers) + + except Exception: return 0 def fetch_gpu_properties(testCL,testCU,testVK): @@ -3071,39 +3066,41 @@ def display_help(): wb.open("https://github.com/LostRuins/koboldcpp/wiki") except: print("Cannot launch help in browser.") + def display_updates(): try: - import webbrowser as wb - wb.open("https://github.com/LostRuins/koboldcpp/releases/latest") + import webbrowser + webbrowser.open("https://github.com/LostRuins/koboldcpp/releases/latest") except: print("Cannot launch updates in browser.") - - ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5) - - ctk.CTkButton(tabs , text = "Update", fg_color="#9900cc", hover_color="#aa11dd", command = display_updates, width=90, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5) - ctk.CTkButton(tabs , text = "Save", fg_color="#084a66", hover_color="#085a88", command = save_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5) - ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", hover_color="#085a88", command = load_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5) - ctk.CTkButton(tabs , text = "Help", fg_color="#992222", hover_color="#bb3333", command = display_help, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 135, pady=5) - - # start a thread that tries to get actual gpu names and layer counts + + buttons = [ + ("Launch", "#2f8d3c", "#2faa3c", guilaunch, 80, 1, 1, "se", 25), + ("Update", "#9900cc", "#aa11dd", display_updates, 90, 1, 0, "sw", 5), + ("Save", "#084a66", "#085a88", save_config_gui, 60, 1, 1, "sw", 5), + ("Load", "#084a66", "#085a88", load_config_gui, 60, 1, 1, "sw", 70), + ("Help", "#992222", "#bb3333", display_help, 60, 1, 1, "sw", 135) + ] + + for text, fg, hover, command, width, row, col, stick, padx in buttons: + ctk.CTkButton(tabs, text=text, fg_color=fg, hover_color=hover, command=command, + width=width, height=35).grid(row=row, column=col, sticky=stick, padx=padx, pady=5) + gpuinfo_thread = threading.Thread(target=auto_set_backend_gui) - gpuinfo_thread.start() #submit job in new thread so nothing is waiting - - # runs main loop until closed or launch clicked + gpuinfo_thread.start() + root.mainloop() - - if nextstate==0: + + if nextstate == 0: exitcounter = 999 print("Exiting by user request.") sys.exit(0) else: - # processing vars kcpp_exporting_template = False export_vars() - - if not args.model_param and not args.sdmodel and not args.whispermodel: + if not any([args.model_param, args.sdmodel, args.whispermodel]): exitcounter = 999 - exit_with_error(2,"No text or image model file was selected. Exiting.") + exit_with_error(2, "No text or image model file was selected. Exiting.") def show_gui_msgbox(title,message): print(title + ": " + message, flush=True) @@ -3436,69 +3433,51 @@ def tunnel_reader(): def unload_libs(): global handle + if not handle: + return + OS = platform.system() dll_close = None + + def setup_dll_close(lib, func_name="dlclose"): + nonlocal dll_close + dll_close = getattr(lib, func_name) + dll_close.argtypes = [ctypes.c_void_p] + dll_close.restype = ctypes.c_int + if OS == "Windows": # pragma: Windows from ctypes import wintypes dll_close = ctypes.windll.kernel32.FreeLibrary dll_close.argtypes = [wintypes.HMODULE] dll_close.restype = ctypes.c_int elif OS == "Darwin": - try: - try: # macOS 11 (Big Sur). Possibly also later macOS 10s. - stdlib = ctypes.CDLL("libc.dylib") + for lib_name in ["libc.dylib", "libSystem", "/usr/lib/system/libsystem_c.dylib"]: + try: + setup_dll_close(ctypes.CDLL(lib_name)) + break except OSError: - stdlib = ctypes.CDLL("libSystem") - except OSError: - # Older macOSs. Not only is the name inconsistent but it's - # not even in PATH. - stdlib = ctypes.CDLL("/usr/lib/system/libsystem_c.dylib") - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int + continue elif OS == "Linux": try: - stdlib = ctypes.CDLL("") + setup_dll_close(ctypes.CDLL("")) except OSError: - stdlib = ctypes.CDLL("libc.so") # Alpine Linux. - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int + setup_dll_close(ctypes.CDLL("libc.so")) # Alpine Linux elif sys.platform == "msys": - # msys can also use `ctypes.CDLL("kernel32.dll").FreeLibrary()`. - stdlib = ctypes.CDLL("msys-2.0.dll") - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int + setup_dll_close(ctypes.CDLL("msys-2.0.dll")) elif sys.platform == "cygwin": - stdlib = ctypes.CDLL("cygwin1.dll") - dll_close = stdlib.dlclose - dll_close.argtypes = [ctypes.c_void_p] - dll_close.restype = ctypes.c_int + setup_dll_close(ctypes.CDLL("cygwin1.dll")) elif OS == "FreeBSD": - # FreeBSD uses `/usr/lib/libc.so.7` where `7` is another version number. - # It is not in PATH but using its name instead of its path is somehow the - # only way to open it. The name must include the .so.7 suffix. - stdlib = ctypes.CDLL("libc.so.7") - dll_close = stdlib.close + setup_dll_close(ctypes.CDLL("libc.so.7"), "close") - if handle and dll_close: + if dll_close: print("Unloading Libraries...") dll_close(handle._handle) - del handle.load_model - del handle.generate - del handle.new_token - del handle.get_stream_count - del handle.has_finished - del handle.get_last_eval_time - del handle.get_last_process_time - del handle.get_last_token_count - del handle.get_last_seed - del handle.get_total_gens - del handle.get_last_stop_reason - del handle.abort_generate - del handle.token_count - del handle.get_pending_output + for attr in ['load_model', 'generate', 'new_token', 'get_stream_count', 'has_finished', + 'get_last_eval_time', 'get_last_process_time', 'get_last_token_count', + 'get_last_seed', 'get_total_gens', 'get_last_stop_reason', 'abort_generate', + 'token_count', 'get_pending_output']: + delattr(handle, attr) + global handle del handle handle = None @@ -3683,34 +3662,19 @@ def main(launch_args,start_server=True): print(f"Warning: Chat Completions Adapter invalid or not found.") # handle model downloads if needed - if args.model_param and args.model_param!="": - if args.model_param.endswith("?download=true"): - args.model_param = args.model_param.replace("?download=true","") - if (args.model_param.startswith("http://") or args.model_param.startswith("https://")) and (args.model_param.endswith(".gguf") or args.model_param.endswith(".bin")): - dlfile = download_model_from_url(args.model_param) - if dlfile: - args.model_param = dlfile - if args.sdmodel and args.sdmodel!="": - if args.sdmodel.endswith("?download=true"): - args.sdmodel = args.sdmodel.replace("?download=true","") - if (args.sdmodel.startswith("http://") or args.sdmodel.startswith("https://")) and (args.sdmodel.endswith(".gguf") or args.sdmodel.endswith(".safetensors")): - dlfile = download_model_from_url(args.sdmodel) - if dlfile: - args.sdmodel = dlfile - if args.mmproj and args.mmproj!="": - if args.mmproj.endswith("?download=true"): - args.mmproj = args.mmproj.replace("?download=true","") - if (args.mmproj.startswith("http://") or args.mmproj.startswith("https://")) and (args.mmproj.endswith(".gguf")): - dlfile = download_model_from_url(args.mmproj) - if dlfile: - args.mmproj = dlfile - if args.whispermodel and args.whispermodel!="": - if args.whispermodel.endswith("?download=true"): - args.whispermodel = args.whispermodel.replace("?download=true","") - if (args.whispermodel.startswith("http://") or args.whispermodel.startswith("https://")) and (args.whispermodel.endswith(".gguf") or args.whispermodel.endswith(".bin")): - dlfile = download_model_from_url(args.whispermodel) - if dlfile: - args.whispermodel = dlfile + for arg_name in ['model_param', 'sdmodel', 'mmproj', 'whispermodel']: + arg_value = getattr(args, arg_name) + if arg_value and arg_value != "": + if arg_value.endswith("?download=true"): + arg_value = arg_value[:-14] # Remove "?download=true" + + is_url = arg_value.startswith(("http://", "https://")) + valid_extensions = (".gguf", ".bin", ".safetensors") + + if is_url and arg_value.endswith(valid_extensions): + dlfile = download_model_from_url(arg_value) + if dlfile: + setattr(args, arg_name, dlfile) # sanitize and replace the default vanity name. remember me.... if args.model_param and args.model_param!="": @@ -3889,26 +3853,25 @@ def main(launch_args,start_server=True): if not loadok: exitcounter = 999 exit_with_error(3,"Could not load image model: " + imgmodel) - - #handle whisper model - if args.whispermodel and args.whispermodel!="": + + # handle whisper model + if args.whispermodel: whispermodel = args.whispermodel - if not whispermodel or not os.path.exists(whispermodel): + if not os.path.exists(whispermodel): if args.ignoremissing: print(f"Ignoring missing whisper model file: {whispermodel}") args.whispermodel = None else: exitcounter = 999 - exit_with_error(2,f"Cannot find whisper model file: {whispermodel}") + exit_with_error(2, f"Cannot find whisper model file: {whispermodel}") else: whispermodel = os.path.abspath(whispermodel) fullwhispermodelpath = whispermodel loadok = whisper_load_model(whispermodel) - print("Load Whisper Model OK: " + str(loadok)) + print(f"Load Whisper Model OK: {loadok}") if not loadok: exitcounter = 999 - exit_with_error(3,"Could not load whisper model: " + whispermodel) - + exit_with_error(3, f"Could not load whisper model: {whispermodel}") #load embedded lite try: