huggingface · mohdfaour03 · Apr 8, 2026 · Apr 9, 2026 · itazap · Apr 9, 2026
diff --git a/src/transformers/tokenization_utils_tokenizers.py b/src/transformers/tokenization_utils_tokenizers.py
@@ -1360,11 +1360,11 @@ def is_base_mistral(model_id: str) -> bool:
                         ),
                         behavior="isolated",
                     )
-                    current_pretokenizer = tokenizer.backend_tokenizer.pre_tokenizer
+                    current_pretokenizer = tokenizer.pre_tokenizer
                     # Check if it's already a Sequence
                     if isinstance(current_pretokenizer, tokenizers.pre_tokenizers.Sequence):
                         # Replace the first element (the Split pattern)
-                        tokenizer.backend_tokenizer.pre_tokenizer[0] = split_pretokenizer
+                        tokenizer.pre_tokenizer[0] = split_pretokenizer
                     else:
                         # Replace Metaspace with ByteLevel when adding Split, as Metaspace(split=False) doesn't
                         # work correctly with the Split pre-tokenizer and causes spaces to be lost during encoding
@@ -1374,7 +1374,7 @@ def is_base_mistral(model_id: str) -> bool:
                             )
 
                         # Not a Sequence, so create one with Split + current pretokenizer
-                        tokenizer.backend_tokenizer.pre_tokenizer = tokenizers.pre_tokenizers.Sequence(
+                        tokenizer.pre_tokenizer = tokenizers.pre_tokenizers.Sequence(
                             [
                                 split_pretokenizer,
                                 current_pretokenizer,

diff --git a/tests/models/auto/test_tokenization_auto.py b/tests/models/auto/test_tokenization_auto.py
@@ -306,6 +306,27 @@ def test_auto_tokenizer_from_mistral_patching(self):
             "mistralai/Ministral-3-3B-Instruct-2512", fix_mistral_regex=True
         )  # should not error
 
+    @require_tokenizers
+    def test_auto_tokenizer_mistral_patching_applies_pretokenizer(self):
+        """Verify fix_mistral_regex=True actually patches the pre_tokenizer without AttributeError."""
+        import tokenizers
+
+        tokenizer = AutoTokenizer.from_pretrained("mistralai/Ministral-3-3B-Instruct-2512")
+        # Create a temp config with an old transformers_version so the patching code path is exercised
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            config_path = os.path.join(tmp_dir, "config.json")
+            with open(config_path, "w", encoding="utf-8") as f:
+                json.dump({"model_type": "mistral", "transformers_version": "4.50.0"}, f)
+
+            patched = TokenizersBackend._patch_mistral_regex(
+                tokenizer._tokenizer,
+                tmp_dir,
+                is_local=True,
+                fix_mistral_regex=True,
+            )
+        self.assertTrue(getattr(patched, "fix_mistral_regex", False))
+        self.assertIsInstance(patched.pre_tokenizer, tokenizers.pre_tokenizers.Sequence)
+
     @require_tokenizers
     def test_auto_tokenizer_loads_bloom_repo_without_tokenizer_class(self):
         tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-BloomForCausalLM")