Replace null tokenizer with a teacher tokenizer

danielkorzekwa · danielkorzekwa · commit f2b65ec45d79 · 2026-02-22T23:53:18.000-08:00
Signed-off-by: Daniel Korzekwa &lt;dkorzekwa@nvidia.com&gt;
diff --git a/examples/puzzletron/mbridge_distillation/distill_hf_keval.py b/examples/puzzletron/mbridge_distillation/distill_hf_keval.py
@@ -221,7 +221,11 @@ def _build_model_provider(hf_path):
             wandb_exp_name=args.wandb_exp_name,
         ),
         tokenizer=TokenizerConfig(
-            tokenizer_type="NullTokenizer", vocab_size=distill_provider.vocab_size
+            tokenizer_type="HuggingFaceTokenizer",
+            # Use teacher tokenizer as the source of knowledge; fallback to student if teacher unavailable
+            # In distillation, both models should use the same tokenizer to process the same input
+            tokenizer_model=args.teacher_hf_path,
+            vocab_size=distill_provider.vocab_size,
         ),
         checkpoint=CheckpointConfig(
             save_interval=args.eval_interval,