-
Notifications
You must be signed in to change notification settings - Fork 693
streamline more code snippets, hopefully to make them shorter as well as easier to maintain #1010
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
f4a504b
6536a97
e9331a5
75ff4ea
5db9acf
85625db
6f0feba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -557,71 +557,66 @@ def string_contains_sequence_substring(inputstr,sequences): | |
| import struct | ||
|
|
||
| def read_gguf_metadata(file_path): | ||
| chunk_size = 8192 # read only first 8kb of file | ||
| try: | ||
| def read_gguf_key(keyname,data,maxval): | ||
| keylen = len(keyname) | ||
| index = data.find(keyname) # Search for the magic number, Read 2 chunks of 4 byte numbers | ||
| if index != -1 and index + keylen + 8 <= chunk_size: | ||
| start_index = index + keylen | ||
| first_value_bytes = data[start_index:start_index + 4] | ||
| second_value_bytes = data[start_index + 4:start_index + 8] | ||
| # Unpack each 4 bytes as an unsigned int32 in little-endian format | ||
| value1 = struct.unpack('<I', first_value_bytes)[0] #4 means its a uint32 | ||
| value2 = struct.unpack('<I', second_value_bytes)[0] | ||
| if value1 == 4 and value2 > 0 and value2 <= maxval: | ||
| return value2 #contains the desired value | ||
| return 0 | ||
| else: | ||
| return 0 #not found | ||
| CHUNK_SIZE = 8192 # read only first 8kb of file | ||
| MIN_FILE_SIZE = 10000 # ignore files under 10kb | ||
|
|
||
| def read_gguf_key(keyname, data, maxval): | ||
| index = data.find(keyname) # Search for the magic number | ||
| if index != -1 and index + len(keyname) + 8 <= CHUNK_SIZE: | ||
| start_index = index + len(keyname) | ||
| # Read 2 chunks of 4 byte numbers | ||
| # Unpack each 4 bytes as an unsigned int32 in little-endian format | ||
| value1, value2 = struct.unpack('<II', data[start_index:start_index + 8]) | ||
| if value1 == 4 and 0 < value2 <= maxval: | ||
| return value2 # contains the desired value | ||
| return 0 # not found | ||
|
|
||
| fsize = os.path.getsize(file_path) | ||
| if fsize < 10000: #ignore files under 10kb | ||
| try: | ||
| if os.path.getsize(file_path) < MIN_FILE_SIZE: # ignore files under 10kb | ||
| return None | ||
|
|
||
| with open(file_path, 'rb') as f: | ||
| file_header = f.read(4) | ||
| if file_header != b'GGUF': #file is not GGUF | ||
| if file_header != b'GGUF': # file is not GGUF | ||
| return None | ||
| data = f.read(chunk_size) | ||
| layercount = read_gguf_key(b'.block_count',data,512) | ||
| head_count_kv = read_gguf_key(b'.attention.head_count_kv',data,8192) | ||
| key_length = read_gguf_key(b'.attention.key_length',data,8192) | ||
| val_length = read_gguf_key(b'.attention.value_length',data,8192) | ||
| return [layercount,head_count_kv, max(key_length,val_length)] | ||
|
|
||
| data = f.read(CHUNK_SIZE) | ||
| layercount = read_gguf_key(b'.block_count', data, 512) | ||
| head_count_kv = read_gguf_key(b'.attention.head_count_kv', data, 8192) | ||
| key_length = read_gguf_key(b'.attention.key_length', data, 8192) | ||
| val_length = read_gguf_key(b'.attention.value_length', data, 8192) | ||
|
|
||
| return [layercount, head_count_kv, max(key_length, val_length)] | ||
| except Exception as ex: | ||
| return None | ||
|
|
||
| def autoset_gpu_layers(filepath,ctxsize,gpumem): #shitty algo to determine how many layers to use | ||
| def autoset_gpu_layers(filepath, ctxsize, gpumem): # shitty algo to determine how many layers to use | ||
| try: | ||
| layerlimit = 0 | ||
| fsize = os.path.getsize(filepath) | ||
| if fsize>10000000: #dont bother with models < 10mb | ||
| cs = ctxsize | ||
| mem = gpumem | ||
| csmul = 1.0 | ||
| if cs and cs > 8192: | ||
| csmul = 1.4 | ||
| elif cs and cs > 4096: | ||
| csmul = 1.2 | ||
| elif cs and cs > 2048: | ||
| csmul = 1.1 | ||
| if mem < fsize*1.6*csmul: | ||
| ggufmeta = read_gguf_metadata(filepath) | ||
| if not ggufmeta or ggufmeta[0]==0: #fail to read or no layers | ||
| sizeperlayer = fsize*csmul*0.052 | ||
| layerlimit = int(min(200,mem/sizeperlayer)) | ||
| else: | ||
| layers = ggufmeta[0] | ||
| headcount = ggufmeta[1] | ||
| headkvlen = (ggufmeta[2] if ggufmeta[2] > 0 else 128) | ||
| ratio = mem/(fsize*csmul*1.5) | ||
| if headcount > 0: | ||
| ratio = max(ratio,mem/(fsize*1.34 + (layers*headcount*headkvlen*cs*4.25))) | ||
| layerlimit = int(ratio*layers) | ||
| else: | ||
| layerlimit = 200 # assume full offload | ||
| return layerlimit | ||
| except Exception as ex: | ||
| if fsize <= 10000000: # dont bother with models < 10mb | ||
| return 0 | ||
|
|
||
| cs = ctxsize | ||
| mem = gpumem | ||
| csmul = 1.0 + (0.4 if cs > 8192 else 0.2 if cs > 4096 else 0.1 if cs > 2048 else 0) | ||
|
|
||
| if mem >= fsize * 1.6 * csmul: | ||
| return 200 # assume full offload | ||
|
|
||
| ggufmeta = read_gguf_metadata(filepath) | ||
| if not ggufmeta or ggufmeta[0] == 0: # fail to read or no layers | ||
| sizeperlayer = fsize * csmul * 0.052 | ||
| return int(min(200, mem / sizeperlayer)) | ||
|
|
||
| layers, headcount, headkvlen = ggufmeta | ||
| headkvlen = max(headkvlen, 128) | ||
| ratio = mem / (fsize * csmul * 1.5) | ||
| if headcount > 0: | ||
| ratio = max(ratio, mem / (fsize * 1.34 + (layers * headcount * headkvlen * cs * 4.25))) | ||
|
|
||
| return int(ratio * layers) | ||
|
|
||
| except Exception: | ||
| return 0 | ||
|
|
||
| def fetch_gpu_properties(testCL,testCU,testVK): | ||
|
|
@@ -3071,39 +3066,41 @@ def display_help(): | |
| wb.open("https://github.com/LostRuins/koboldcpp/wiki") | ||
| except: | ||
| print("Cannot launch help in browser.") | ||
|
|
||
| def display_updates(): | ||
| try: | ||
| import webbrowser as wb | ||
| wb.open("https://github.com/LostRuins/koboldcpp/releases/latest") | ||
| import webbrowser | ||
| webbrowser.open("https://github.com/LostRuins/koboldcpp/releases/latest") | ||
| except: | ||
| print("Cannot launch updates in browser.") | ||
|
|
||
| ctk.CTkButton(tabs , text = "Launch", fg_color="#2f8d3c", hover_color="#2faa3c", command = guilaunch, width=80, height = 35 ).grid(row=1,column=1, stick="se", padx= 25, pady=5) | ||
|
|
||
| ctk.CTkButton(tabs , text = "Update", fg_color="#9900cc", hover_color="#aa11dd", command = display_updates, width=90, height = 35 ).grid(row=1,column=0, stick="sw", padx= 5, pady=5) | ||
| ctk.CTkButton(tabs , text = "Save", fg_color="#084a66", hover_color="#085a88", command = save_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 5, pady=5) | ||
| ctk.CTkButton(tabs , text = "Load", fg_color="#084a66", hover_color="#085a88", command = load_config_gui, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 70, pady=5) | ||
| ctk.CTkButton(tabs , text = "Help", fg_color="#992222", hover_color="#bb3333", command = display_help, width=60, height = 35 ).grid(row=1,column=1, stick="sw", padx= 135, pady=5) | ||
|
|
||
| # start a thread that tries to get actual gpu names and layer counts | ||
|
|
||
| buttons = [ | ||
| ("Launch", "#2f8d3c", "#2faa3c", guilaunch, 80, 1, 1, "se", 25), | ||
| ("Update", "#9900cc", "#aa11dd", display_updates, 90, 1, 0, "sw", 5), | ||
| ("Save", "#084a66", "#085a88", save_config_gui, 60, 1, 1, "sw", 5), | ||
| ("Load", "#084a66", "#085a88", load_config_gui, 60, 1, 1, "sw", 70), | ||
| ("Help", "#992222", "#bb3333", display_help, 60, 1, 1, "sw", 135) | ||
| ] | ||
|
|
||
| for text, fg, hover, command, width, row, col, stick, padx in buttons: | ||
|
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Doing it this way is significantly less readable. In the original code, it's very clear what width means, e.g. 80 is referring to width. Now, by splitting it into an array of unnamed values, you even have a width but not the height. You have the x padding but not the y padding. It becomes super confusing. |
||
| ctk.CTkButton(tabs, text=text, fg_color=fg, hover_color=hover, command=command, | ||
| width=width, height=35).grid(row=row, column=col, sticky=stick, padx=padx, pady=5) | ||
|
|
||
| gpuinfo_thread = threading.Thread(target=auto_set_backend_gui) | ||
| gpuinfo_thread.start() #submit job in new thread so nothing is waiting | ||
|
|
||
| # runs main loop until closed or launch clicked | ||
| gpuinfo_thread.start() | ||
|
|
||
| root.mainloop() | ||
|
|
||
| if nextstate==0: | ||
| if nextstate == 0: | ||
| exitcounter = 999 | ||
| print("Exiting by user request.") | ||
| sys.exit(0) | ||
| else: | ||
| # processing vars | ||
| kcpp_exporting_template = False | ||
| export_vars() | ||
|
|
||
| if not args.model_param and not args.sdmodel and not args.whispermodel: | ||
| if not any([args.model_param, args.sdmodel, args.whispermodel]): | ||
| exitcounter = 999 | ||
| exit_with_error(2,"No text or image model file was selected. Exiting.") | ||
| exit_with_error(2, "No text or image model file was selected. Exiting.") | ||
|
|
||
| def show_gui_msgbox(title,message): | ||
| print(title + ": " + message, flush=True) | ||
|
|
@@ -3436,69 +3433,51 @@ def tunnel_reader(): | |
|
|
||
| def unload_libs(): | ||
| global handle | ||
| if not handle: | ||
| return | ||
|
|
||
| OS = platform.system() | ||
| dll_close = None | ||
|
|
||
| def setup_dll_close(lib, func_name="dlclose"): | ||
| nonlocal dll_close | ||
| dll_close = getattr(lib, func_name) | ||
| dll_close.argtypes = [ctypes.c_void_p] | ||
| dll_close.restype = ctypes.c_int | ||
|
|
||
| if OS == "Windows": # pragma: Windows | ||
| from ctypes import wintypes | ||
| dll_close = ctypes.windll.kernel32.FreeLibrary | ||
| dll_close.argtypes = [wintypes.HMODULE] | ||
| dll_close.restype = ctypes.c_int | ||
| elif OS == "Darwin": | ||
| try: | ||
| try: # macOS 11 (Big Sur). Possibly also later macOS 10s. | ||
| stdlib = ctypes.CDLL("libc.dylib") | ||
| for lib_name in ["libc.dylib", "libSystem", "/usr/lib/system/libsystem_c.dylib"]: | ||
| try: | ||
| setup_dll_close(ctypes.CDLL(lib_name)) | ||
| break | ||
| except OSError: | ||
| stdlib = ctypes.CDLL("libSystem") | ||
| except OSError: | ||
| # Older macOSs. Not only is the name inconsistent but it's | ||
| # not even in PATH. | ||
| stdlib = ctypes.CDLL("/usr/lib/system/libsystem_c.dylib") | ||
| dll_close = stdlib.dlclose | ||
| dll_close.argtypes = [ctypes.c_void_p] | ||
| dll_close.restype = ctypes.c_int | ||
| continue | ||
| elif OS == "Linux": | ||
| try: | ||
| stdlib = ctypes.CDLL("") | ||
| setup_dll_close(ctypes.CDLL("")) | ||
| except OSError: | ||
| stdlib = ctypes.CDLL("libc.so") # Alpine Linux. | ||
| dll_close = stdlib.dlclose | ||
| dll_close.argtypes = [ctypes.c_void_p] | ||
| dll_close.restype = ctypes.c_int | ||
| setup_dll_close(ctypes.CDLL("libc.so")) # Alpine Linux | ||
| elif sys.platform == "msys": | ||
| # msys can also use `ctypes.CDLL("kernel32.dll").FreeLibrary()`. | ||
| stdlib = ctypes.CDLL("msys-2.0.dll") | ||
| dll_close = stdlib.dlclose | ||
| dll_close.argtypes = [ctypes.c_void_p] | ||
| dll_close.restype = ctypes.c_int | ||
| setup_dll_close(ctypes.CDLL("msys-2.0.dll")) | ||
| elif sys.platform == "cygwin": | ||
| stdlib = ctypes.CDLL("cygwin1.dll") | ||
| dll_close = stdlib.dlclose | ||
| dll_close.argtypes = [ctypes.c_void_p] | ||
| dll_close.restype = ctypes.c_int | ||
| setup_dll_close(ctypes.CDLL("cygwin1.dll")) | ||
| elif OS == "FreeBSD": | ||
| # FreeBSD uses `/usr/lib/libc.so.7` where `7` is another version number. | ||
| # It is not in PATH but using its name instead of its path is somehow the | ||
| # only way to open it. The name must include the .so.7 suffix. | ||
| stdlib = ctypes.CDLL("libc.so.7") | ||
| dll_close = stdlib.close | ||
| setup_dll_close(ctypes.CDLL("libc.so.7"), "close") | ||
|
|
||
| if handle and dll_close: | ||
| if dll_close: | ||
| print("Unloading Libraries...") | ||
| dll_close(handle._handle) | ||
| del handle.load_model | ||
| del handle.generate | ||
| del handle.new_token | ||
| del handle.get_stream_count | ||
| del handle.has_finished | ||
| del handle.get_last_eval_time | ||
| del handle.get_last_process_time | ||
| del handle.get_last_token_count | ||
| del handle.get_last_seed | ||
| del handle.get_total_gens | ||
| del handle.get_last_stop_reason | ||
| del handle.abort_generate | ||
| del handle.token_count | ||
| del handle.get_pending_output | ||
| for attr in ['load_model', 'generate', 'new_token', 'get_stream_count', 'has_finished', | ||
| 'get_last_eval_time', 'get_last_process_time', 'get_last_token_count', | ||
| 'get_last_seed', 'get_total_gens', 'get_last_stop_reason', 'abort_generate', | ||
| 'token_count', 'get_pending_output']: | ||
| delattr(handle, attr) | ||
| global handle | ||
| del handle | ||
| handle = None | ||
|
|
||
|
|
@@ -3683,34 +3662,19 @@ def main(launch_args,start_server=True): | |
| print(f"Warning: Chat Completions Adapter invalid or not found.") | ||
|
|
||
| # handle model downloads if needed | ||
| if args.model_param and args.model_param!="": | ||
| if args.model_param.endswith("?download=true"): | ||
| args.model_param = args.model_param.replace("?download=true","") | ||
| if (args.model_param.startswith("http://") or args.model_param.startswith("https://")) and (args.model_param.endswith(".gguf") or args.model_param.endswith(".bin")): | ||
| dlfile = download_model_from_url(args.model_param) | ||
| if dlfile: | ||
| args.model_param = dlfile | ||
| if args.sdmodel and args.sdmodel!="": | ||
| if args.sdmodel.endswith("?download=true"): | ||
| args.sdmodel = args.sdmodel.replace("?download=true","") | ||
| if (args.sdmodel.startswith("http://") or args.sdmodel.startswith("https://")) and (args.sdmodel.endswith(".gguf") or args.sdmodel.endswith(".safetensors")): | ||
| dlfile = download_model_from_url(args.sdmodel) | ||
| if dlfile: | ||
| args.sdmodel = dlfile | ||
| if args.mmproj and args.mmproj!="": | ||
| if args.mmproj.endswith("?download=true"): | ||
| args.mmproj = args.mmproj.replace("?download=true","") | ||
| if (args.mmproj.startswith("http://") or args.mmproj.startswith("https://")) and (args.mmproj.endswith(".gguf")): | ||
| dlfile = download_model_from_url(args.mmproj) | ||
| if dlfile: | ||
| args.mmproj = dlfile | ||
| if args.whispermodel and args.whispermodel!="": | ||
| if args.whispermodel.endswith("?download=true"): | ||
| args.whispermodel = args.whispermodel.replace("?download=true","") | ||
| if (args.whispermodel.startswith("http://") or args.whispermodel.startswith("https://")) and (args.whispermodel.endswith(".gguf") or args.whispermodel.endswith(".bin")): | ||
| dlfile = download_model_from_url(args.whispermodel) | ||
| if dlfile: | ||
| args.whispermodel = dlfile | ||
| for arg_name in ['model_param', 'sdmodel', 'mmproj', 'whispermodel']: | ||
| arg_value = getattr(args, arg_name) | ||
| if arg_value and arg_value != "": | ||
| if arg_value.endswith("?download=true"): | ||
| arg_value = arg_value[:-14] # Remove "?download=true" | ||
|
|
||
| is_url = arg_value.startswith(("http://", "https://")) | ||
| valid_extensions = (".gguf", ".bin", ".safetensors") | ||
|
Comment on lines
+3668
to
+3672
Owner
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is WRONG!!! For example, mmproj cannot be a safetensors! Are you using an LLM to refactor the code? Please do not do that without going through what the code does. Also why would you truncate a string by and offset when just replacing it is so much clearer and more intuitive? |
||
|
|
||
| if is_url and arg_value.endswith(valid_extensions): | ||
| dlfile = download_model_from_url(arg_value) | ||
| if dlfile: | ||
| setattr(args, arg_name, dlfile) | ||
|
|
||
| # sanitize and replace the default vanity name. remember me.... | ||
| if args.model_param and args.model_param!="": | ||
|
|
@@ -3889,26 +3853,25 @@ def main(launch_args,start_server=True): | |
| if not loadok: | ||
| exitcounter = 999 | ||
| exit_with_error(3,"Could not load image model: " + imgmodel) | ||
|
|
||
| #handle whisper model | ||
| if args.whispermodel and args.whispermodel!="": | ||
| # handle whisper model | ||
| if args.whispermodel: | ||
| whispermodel = args.whispermodel | ||
| if not whispermodel or not os.path.exists(whispermodel): | ||
| if not os.path.exists(whispermodel): | ||
| if args.ignoremissing: | ||
| print(f"Ignoring missing whisper model file: {whispermodel}") | ||
| args.whispermodel = None | ||
| else: | ||
| exitcounter = 999 | ||
| exit_with_error(2,f"Cannot find whisper model file: {whispermodel}") | ||
| exit_with_error(2, f"Cannot find whisper model file: {whispermodel}") | ||
| else: | ||
| whispermodel = os.path.abspath(whispermodel) | ||
| fullwhispermodelpath = whispermodel | ||
| loadok = whisper_load_model(whispermodel) | ||
| print("Load Whisper Model OK: " + str(loadok)) | ||
| print(f"Load Whisper Model OK: {loadok}") | ||
| if not loadok: | ||
| exitcounter = 999 | ||
| exit_with_error(3,"Could not load whisper model: " + whispermodel) | ||
|
|
||
| exit_with_error(3, f"Could not load whisper model: {whispermodel}") | ||
|
|
||
| #load embedded lite | ||
| try: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what is this even for? why?