From c68a5312a61d2db56eb7660db62aa643f9e4dd5f Mon Sep 17 00:00:00 2001 From: Steven I Reeves Date: Wed, 21 Jul 2021 22:28:50 +0000 Subject: [PATCH] Reducing training and validation steps --- scripts/gpt2-tf2/gpt2_train_distributed.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gpt2-tf2/gpt2_train_distributed.py b/scripts/gpt2-tf2/gpt2_train_distributed.py index 771b7c4fad4f..ddb715488557 100644 --- a/scripts/gpt2-tf2/gpt2_train_distributed.py +++ b/scripts/gpt2-tf2/gpt2_train_distributed.py @@ -50,7 +50,7 @@ def get_dataset(fil): def tokenize(data, tokenizer, truncate=False): if truncate: - data = tokenizer(data[:1000], return_tensors='tf', padding=True, truncation=True) + data = tokenizer(data[:100], return_tensors='tf', padding=True, truncation=True) else: data = tokenizer(data, return_tensors='tf', padding=True, truncation=True) return tf.data.Dataset.from_tensor_slices((dict(data), data['input_ids']))