tc-mb · tc-mb · Jun 4, 2024 · Jun 2, 2024 · Jun 4, 2024
diff --git a/Makefile b/Makefile
@@ -862,12 +862,12 @@ llava-cli: examples/llava/llava-cli.cpp examples/llava/clip.h examples/llava/cli
 	$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
 	$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) -o $@ $(LDFLAGS)
 
-minicpmv-cli: examples/minicpmv/minicpmv-cli.cpp examples/minicpmv/clip.h examples/minicpmv/clip.cpp examples/minicpmv/minicpmv.h examples/minicpmv/minicpmv.cpp examples/minicpmv/minicpmv_wrapper.h examples/minicpmv/minicpmv_wrapper.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
+minicpmv-cli: examples/llava/minicpmv-cli.cpp examples/llava/clip.h examples/llava/clip.cpp examples/llava/llava.h examples/llava/llava.cpp examples/llava/minicpmv_wrapper.h examples/llava/minicpmv_wrapper.cpp ggml.o llama.o $(COMMON_DEPS) $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
-	$(CXX) $(CXXFLAGS) -c examples/minicpmv/clip.cpp  -o $(call GET_OBJ_FILE, examples/minicpmv/clip.cpp) -Wno-cast-qual
-	$(CXX) $(CXXFLAGS) -c examples/minicpmv/minicpmv.cpp -o $(call GET_OBJ_FILE, examples/minicpmv/minicpmv.cpp)
-	$(CXX) $(CXXFLAGS) -c examples/minicpmv/minicpmv_wrapper.cpp  -o $(call GET_OBJ_FILE, examples/minicpmv/minicpmv_wrapper.cpp)
-	$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/minicpmv/clip.cpp examples/minicpmv/minicpmv.cpp examples/minicpmv/minicpmv_wrapper.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/minicpmv/clip.cpp) $(call GET_OBJ_FILE, examples/minicpmv/minicpmv.cpp) $(call GET_OBJ_FILE, examples/minicpmv/minicpmv_wrapper.cpp) -o $@ $(LDFLAGS)
+	$(CXX) $(CXXFLAGS) -c examples/llava/clip.cpp  -o $(call GET_OBJ_FILE, examples/llava/clip.cpp) -Wno-cast-qual
+	$(CXX) $(CXXFLAGS) -c examples/llava/llava.cpp -o $(call GET_OBJ_FILE, examples/llava/llava.cpp)
+	$(CXX) $(CXXFLAGS) -c examples/llava/minicpmv_wrapper.cpp  -o $(call GET_OBJ_FILE, examples/llava/minicpmv_wrapper.cpp)
+	$(CXX) $(CXXFLAGS) $(filter-out %.h $< examples/llava/clip.cpp examples/llava/llava.cpp examples/llava/minicpmv_wrapper.cpp,$^) $(call GET_OBJ_FILE, $<) $(call GET_OBJ_FILE, examples/llava/clip.cpp) $(call GET_OBJ_FILE, examples/llava/llava.cpp) $(call GET_OBJ_FILE, examples/llava/minicpmv_wrapper.cpp) -o $@ $(LDFLAGS)
 
 baby-llama: examples/baby-llama/baby-llama.cpp ggml.o llama.o $(COMMON_DEPS) train.o $(OBJS)
 	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
@@ -25,8 +25,6 @@
     sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
 import gguf
 
-from convert import LlamaHfVocab
-
 logger = logging.getLogger("hf-to-gguf")
 
 
@@ -634,7 +632,7 @@ def _set_vocab_sentencepiece(self):
         special_vocab.add_to_gguf(self.gguf_writer)
 
     def _set_vocab_llama_hf(self):
-        vocab = LlamaHfVocab(self.dir_model)
+        vocab = gguf.LlamaHfVocab(self.dir_model)
         tokens = []
         scores = []
         toktypes = []
@@ -675,6 +673,44 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_parallel_residual(self.hparams.get("use_parallel_residual", True))
         self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
 
+    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
+        del bid  # unused
+
+        n_head = self.hparams.get("n_head", self.hparams.get("num_attention_heads"))
+        n_embed = self.hparams.get("hidden_size", self.hparams.get("n_embed"))
+
+        tensors: list[tuple[str, Tensor]] = []
+
+        if re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.weight", name):
+            # Map bloom-style qkv_linear to gpt-style qkv_linear
+            # bloom: https://github.com/huggingface/transformers/blob/main/src/transformers/models/bloom/modeling_bloom.py#L238-L252  # noqa
+            # gpt-2: https://github.com/huggingface/transformers/blob/main/src/transformers/models/gpt2/modeling_gpt2.py#L312  # noqa
+            qkv_weights = data_torch.reshape((n_head, 3, n_embed // n_head, n_embed))
+            data_torch = torch.cat(
+                (
+                    qkv_weights[:, 0, :, :].reshape((-1, n_embed)),
+                    qkv_weights[:, 1, :, :].reshape((-1, n_embed)),
+                    qkv_weights[:, 2, :, :].reshape((-1, n_embed)),
+                ),
+                dim=0,
+            )
+            logger.info("re-format attention.linear_qkv.weight")
+        elif re.match(r"gpt_neox\.layers\.\d+\.attention\.query_key_value\.bias", name):
+            qkv_bias = data_torch.reshape((n_head, 3, n_embed // n_head))
+            data_torch = torch.cat(
+                (
+                    qkv_bias[:, 0, :].reshape((n_embed,)),
+                    qkv_bias[:, 1, :].reshape((n_embed,)),
+                    qkv_bias[:, 2, :].reshape((n_embed,)),
+                ),
+                dim=0,
+            )
+            logger.info("re-format attention.linear_qkv.bias")
+
+        tensors.append((self.map_tensor_name(name), data_torch))
+
+        return tensors
+
 
 @Model.register("BloomForCausalLM")
 class BloomModel(Model):

diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt
@@ -35,3 +35,8 @@ add_executable(llava-cli llava-cli.cpp)
 install(TARGETS llava-cli RUNTIME)
 target_link_libraries(llava-cli PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
 target_compile_features(llava PRIVATE cxx_std_11)
+
+add_library(minicpmv_wrapper OBJECT
+        minicpmv_wrapper.cpp
+)
+target_link_libraries(minicpmv_wrapper PRIVATE llava ${CMAKE_THREAD_LIBS_INIT})
diff --git a/examples/minicpmv/README.md → examples/llava/README_minicpmv2.5.md b/examples/minicpmv/README.md → examples/llava/README_minicpmv2.5.md
diff --git a/...les/minicpmv/assets/xiaomi14pro_test.jpeg → examples/llava/assets/xiaomi14pro_test.jpeg b/...les/minicpmv/assets/xiaomi14pro_test.jpeg → examples/llava/assets/xiaomi14pro_test.jpeg