We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 99400f3 commit f2b65ecCopy full SHA for f2b65ec
examples/puzzletron/mbridge_distillation/distill_hf_keval.py
@@ -221,7 +221,11 @@ def _build_model_provider(hf_path):
221
wandb_exp_name=args.wandb_exp_name,
222
),
223
tokenizer=TokenizerConfig(
224
- tokenizer_type="NullTokenizer", vocab_size=distill_provider.vocab_size
+ tokenizer_type="HuggingFaceTokenizer",
225
+ # Use teacher tokenizer as the source of knowledge; fallback to student if teacher unavailable
226
+ # In distillation, both models should use the same tokenizer to process the same input
227
+ tokenizer_model=args.teacher_hf_path,
228
+ vocab_size=distill_provider.vocab_size,
229
230
checkpoint=CheckpointConfig(
231
save_interval=args.eval_interval,
0 commit comments