From bd5737e0f8a83a89c103dd3a013fc3a451b7b0b2 Mon Sep 17 00:00:00 2001
From: yonigozlan <yoni.gozlan@huggingface.co>
Date: Tue, 7 Apr 2026 16:23:28 +0000
Subject: [PATCH] remove REGISTERED_TOKENIZER_CLASSES

---
 src/transformers/models/auto/tokenization_auto.py | 15 ++++-----------
 tests/models/auto/test_tokenization_auto.py       |  5 -----
 2 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/src/transformers/models/auto/tokenization_auto.py b/src/transformers/models/auto/tokenization_auto.py
index 1b38f2e7a3f1..dbb8e4d95d97 100644
--- a/src/transformers/models/auto/tokenization_auto.py
+++ b/src/transformers/models/auto/tokenization_auto.py
@@ -58,7 +58,6 @@
 logger = logging.get_logger(__name__)
 
 # V5: Simplified mapping - single tokenizer class per model type (always prefer tokenizers-based)
-REGISTERED_TOKENIZER_CLASSES: dict[str, type[Any]] = {}
 REGISTERED_FAST_ALIASES: dict[str, type[Any]] = {}
 
 TOKENIZER_MAPPING_NAMES = OrderedDict[str, str | None](
@@ -412,8 +411,10 @@ def tokenizer_class_from_name(class_name: str) -> type[Any] | None:
     if class_name in REGISTERED_FAST_ALIASES:
         return REGISTERED_FAST_ALIASES[class_name]
 
-    if class_name in REGISTERED_TOKENIZER_CLASSES:
-        return REGISTERED_TOKENIZER_CLASSES[class_name]
+    # User-registered classes take priority over built-ins
+    for tokenizer in TOKENIZER_MAPPING._extra_content.values():
+        if getattr(tokenizer, "__name__", None) == class_name:
+            return tokenizer
 
     if class_name == "TokenizersBackend":
         return TokenizersBackend
@@ -440,10 +441,6 @@ def tokenizer_class_from_name(class_name: str) -> type[Any] | None:
             except AttributeError:
                 continue
 
-    for tokenizer in TOKENIZER_MAPPING._extra_content.values():
-        if getattr(tokenizer, "__name__", None) == class_name:
-            return tokenizer
-
     # We did not find the class, but maybe it's because a dep is missing. In that case, the class will be in the main
     # We did not find the class, but maybe it's because a dep is missing. In that case, the class will be in the main
     # init and we return the proper dummy to get an appropriate error message.
@@ -858,10 +855,6 @@ def register(
             else:
                 raise ValueError("You need to pass a `tokenizer_class`")
 
-        for candidate in (slow_tokenizer_class, fast_tokenizer_class, tokenizer_class):
-            if candidate is not None:
-                REGISTERED_TOKENIZER_CLASSES[candidate.__name__] = candidate
-
         if slow_tokenizer_class is not None and fast_tokenizer_class is not None:
             REGISTERED_FAST_ALIASES[slow_tokenizer_class.__name__] = fast_tokenizer_class
 
diff --git a/tests/models/auto/test_tokenization_auto.py b/tests/models/auto/test_tokenization_auto.py
index 2bc79a3f82d6..3c0150fca28f 100644
--- a/tests/models/auto/test_tokenization_auto.py
+++ b/tests/models/auto/test_tokenization_auto.py
@@ -45,7 +45,6 @@
 from transformers.models.auto.configuration_auto import CONFIG_MAPPING, AutoConfig
 from transformers.models.auto.tokenization_auto import (
     REGISTERED_FAST_ALIASES,
-    REGISTERED_TOKENIZER_CLASSES,
     TOKENIZER_MAPPING,
     TOKENIZER_MAPPING_NAMES,
     get_tokenizer_config,
@@ -364,7 +363,6 @@ def test_new_tokenizer_registration(self):
                 del CONFIG_MAPPING._extra_content["custom"]
             if CustomConfig in TOKENIZER_MAPPING._extra_content:
                 del TOKENIZER_MAPPING._extra_content[CustomConfig]
-            REGISTERED_TOKENIZER_CLASSES.pop("CustomTokenizer", None)
 
     @require_tokenizers
     def test_new_tokenizer_fast_registration(self):
@@ -409,8 +407,6 @@ def test_new_tokenizer_fast_registration(self):
                 del CONFIG_MAPPING._extra_content["custom"]
             if CustomConfig in TOKENIZER_MAPPING._extra_content:
                 del TOKENIZER_MAPPING._extra_content[CustomConfig]
-            REGISTERED_TOKENIZER_CLASSES.pop("CustomTokenizer", None)
-            REGISTERED_TOKENIZER_CLASSES.pop("CustomTokenizerFast", None)
             REGISTERED_FAST_ALIASES.pop("CustomTokenizer", None)
 
     def test_from_pretrained_dynamic_tokenizer(self):
@@ -523,7 +519,6 @@ class NewTokenizer(BertTokenizer):
                 del CONFIG_MAPPING._extra_content["custom"]
             if CustomConfig in TOKENIZER_MAPPING._extra_content:
                 del TOKENIZER_MAPPING._extra_content[CustomConfig]
-            REGISTERED_TOKENIZER_CLASSES.pop("NewTokenizer", None)
 
     def test_from_pretrained_dynamic_tokenizer_legacy_format(self):
         tokenizer = AutoTokenizer.from_pretrained(