diff --git a/Architecture/partial-rope-full-rope.py b/Architecture/partial-rope-full-rope.py
index 5bba030..2a3eae4 100644
--- a/Architecture/partial-rope-full-rope.py
+++ b/Architecture/partial-rope-full-rope.py
@@ -8,7 +8,7 @@
 import matplotlib.pyplot as plt
 
 class PartialRoPE(nn.Module):
-    """Partial RoPE implementasyonu - sadece belirli bir oranı döndürür"""
+    """Partial RoPE implementation that rotates only a fraction of dimensions."""
     def __init__(self, dim, max_position_embeddings=2048, base=10000, partial_rotary_factor=0.5):
         super().__init__()
         self.dim = dim
@@ -16,7 +16,7 @@ def __init__(self, dim, max_position_embeddings=2048, base=10000, partial_rotary
         self.base = base
         self.partial_rotary_factor = partial_rotary_factor
         
-        # Sadece partial faktörü kadar dimension kullan
+        # Use only the fraction defined by the partial factor
         self.rotary_dim = int(self.dim * self.partial_rotary_factor)
         
         inv_freq = 1.0 / (self.base ** (torch.arange(0, self.rotary_dim, 2).float() / self.rotary_dim))
@@ -43,7 +43,7 @@ def forward(self, q, k, seq_len=None):
         if seq_len > self.max_seq_len_cached:
             self._set_cos_sin_cache(seq_len)
         
-        # Sadece rotary_dim kadar uygula
+        # Apply RoPE only to the rotary portion
         q_rot = q[..., :self.rotary_dim]
         q_pass = q[..., self.rotary_dim:]
         k_rot = k[..., :self.rotary_dim]
@@ -52,11 +52,11 @@ def forward(self, q, k, seq_len=None):
         cos = self.cos_cached[:seq_len, :].unsqueeze(0).unsqueeze(0)
         sin = self.sin_cached[:seq_len, :].unsqueeze(0).unsqueeze(0)
         
-        # RoPE sadece rotary kısmına uygula
+        # Embed only the rotary portion with RoPE
         q_rot_embed = (q_rot * cos) + (self._rotate_half(q_rot) * sin)
         k_rot_embed = (k_rot * cos) + (self._rotate_half(k_rot) * sin)
         
-        # Rotary ve pass kısımlarını birleştir
+        # Concatenate rotary and passthrough sections
         q_embed = torch.cat([q_rot_embed, q_pass], dim=-1)
         k_embed = torch.cat([k_rot_embed, k_pass], dim=-1)
         
@@ -64,7 +64,7 @@ def forward(self, q, k, seq_len=None):
 
 
 class FullRoPE(nn.Module):
-    """Tam RoPE implementasyonu"""
+    """Full RoPE implementation."""
     def __init__(self, dim, max_position_embeddings=2048, base=10000):
         super().__init__()
         self.dim = dim
@@ -105,7 +105,7 @@ def forward(self, q, k, seq_len=None):
 
 
 class AttentionWithRoPE(nn.Module):
-    """RoPE kullanan Attention katmanı"""
+    """Attention layer that uses RoPE."""
     def __init__(self, dim, num_heads, rope_module):
         super().__init__()
         self.dim = dim
@@ -123,15 +123,15 @@ def __init__(self, dim, num_heads, rope_module):
     def forward(self, x, mask=None):
         batch_size, seq_len, _ = x.shape
         
-        # Q, K, V projeksiyonları
+        # Q, K, V projections
         q = self.q_proj(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
         k = self.k_proj(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
         v = self.v_proj(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
         
-        # RoPE uygula
+        # Apply RoPE
         q, k = self.rope(q, k)
         
-        # Attention hesapla
+        # Compute attention
         attn_scores = torch.matmul(q, k.transpose(-2, -1)) * self.scale
         
         if mask is not None:
@@ -140,7 +140,7 @@ def forward(self, x, mask=None):
         attn_probs = F.softmax(attn_scores, dim=-1)
         attn_output = torch.matmul(attn_probs, v)
         
-        # Çıktıyı birleştir
+        # Merge the heads back together
         attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.dim)
         output = self.out_proj(attn_output)
         
@@ -148,7 +148,7 @@ def forward(self, x, mask=None):
 
 
 class SimpleTransformerBlock(nn.Module):
-    """Basit Transformer bloğu"""
+    """Simple Transformer block."""
     def __init__(self, dim, num_heads, rope_module, mlp_ratio=4):
         super().__init__()
         self.attention = AttentionWithRoPE(dim, num_heads, rope_module)
@@ -170,7 +170,7 @@ def forward(self, x, mask=None):
 
 
 class LanguageModel(nn.Module):
-    """Basit dil modeli"""
+    """Simple language model."""
     def __init__(self, vocab_size, dim, num_heads, num_layers, rope_module):
         super().__init__()
         self.embedding = nn.Embedding(vocab_size, dim)
@@ -194,17 +194,17 @@ def forward(self, input_ids, mask=None):
 
 
 def create_causal_mask(seq_len, device):
-    """Causal mask oluştur"""
+    """Create a causal mask."""
     mask = torch.triu(torch.ones(seq_len, seq_len, device=device), diagonal=1)
     return mask == 0
 
 
 def train_step(model, data, labels, optimizer, device):
-    """Tek eğitim adımı"""
+    """Single training step."""
     model.train()
     data, labels = data.to(device), labels.to(device)
     
-    # Causal mask oluştur
+    # Build the causal mask
     seq_len = data.shape[1]
     mask = create_causal_mask(seq_len, device)
     
@@ -221,7 +221,7 @@ def train_step(model, data, labels, optimizer, device):
 
 
 def evaluate_perplexity(model, data, labels, device):
-    """Perplexity hesapla"""
+    """Compute perplexity."""
     model.eval()
     data, labels = data.to(device), labels.to(device)
     
@@ -236,27 +236,27 @@ def evaluate_perplexity(model, data, labels, device):
 
 
 def benchmark_rope_performance():
-    """Partial ve Full RoPE performans karşılaştırması"""
-    
-    # Türkçe örnek metinler
+    """Compare the performance of partial and full RoPE."""
+
+    # Sample Turkish texts used for training data
     turkish_texts = [
-        "Merhaba dünya! Bugün hava çok güzel.",
-        "İstanbul'un tarihi ve kültürel zenginlikleri dünyaca ünlüdür.",
-        "Türk mutfağı, zengin lezzetleri ve çeşitliliği ile tanınır.",
-        "Yapay zeka teknolojileri hızla gelişmektedir.",
-        "Kitap okumak, hayal gücünü geliştiren harika bir aktivitedir.",
-        "Spor yapmak sağlıklı bir yaşam için önemlidir.",
-        "Müzik, evrensel bir dil olarak kabul edilir.",
-        "Doğa, insanlara huzur ve ilham verir.",
-        "Eğitim, toplumların gelişimi için temel taştır.",
-        "Teknoloji hayatımızı kolaylaştırır ama dengeli kullanılmalıdır."
+        "Hello world! The weather is wonderful today.",
+        "Istanbul's historic and cultural richness is famous worldwide.",
+        "Turkish cuisine is known for its rich flavours and variety.",
+        "Artificial intelligence technologies are rapidly advancing.",
+        "Reading books is a fantastic activity that boosts imagination.",
+        "Exercising is important for a healthy life.",
+        "Music is considered a universal language.",
+        "Nature gives people peace and inspiration.",
+        "Education is the cornerstone of societal development.",
+        "Technology makes our lives easier but should be used in balance."
     ]
-    
-    # Tokenizer yükle
-    print("Tokenizer yükleniyor...")
+
+    # Load the tokenizer
+    print("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
     
-    # Padding token ayarla
+    # Configure the padding token
     if tokenizer.pad_token is None:
         if tokenizer.eos_token is not None:
             tokenizer.pad_token = tokenizer.eos_token
@@ -265,10 +265,10 @@ def benchmark_rope_performance():
         else:
             tokenizer.add_special_tokens({'pad_token': '[PAD]'})
     
-    # Metinleri tokenize et
-    print("Metinler tokenize ediliyor...")
+    # Tokenise the texts
+    print("Tokenising texts...")
     encoded = tokenizer(
-        turkish_texts * 10,  # Daha fazla veri için tekrarla
+        turkish_texts * 10,  # Repeat to create additional data
         padding=True,
         truncation=True,
         max_length=64,
@@ -278,7 +278,7 @@ def benchmark_rope_performance():
     input_ids = encoded["input_ids"]
     labels = input_ids.clone()
     
-    # Model parametreleri
+    # Model parameters
     vocab_size = tokenizer.vocab_size
     dim = 128
     num_heads = 8
@@ -288,9 +288,9 @@ def benchmark_rope_performance():
     learning_rate = 1e-3
     
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Cihaz: {device}")
+    print(f"Device: {device}")
     
-    # Veriyi batch'lere böl
+    # Prepare mini-batches
     num_samples = input_ids.shape[0]
     num_batches = num_samples // batch_size
     
@@ -299,8 +299,8 @@ def benchmark_rope_performance():
         "full_rope": {"losses": [], "perplexities": [], "times": []}
     }
     
-    # Partial RoPE modeli
-    print("\n=== Partial RoPE Eğitimi ===")
+    # Partial RoPE model
+    print("\n=== Training Partial RoPE ===")
     partial_rope = PartialRoPE(dim // num_heads, partial_rotary_factor=0.5)
     model_partial = LanguageModel(vocab_size, dim, num_heads, num_layers, partial_rope).to(device)
     optimizer_partial = torch.optim.Adam(model_partial.parameters(), lr=learning_rate)
@@ -329,8 +329,8 @@ def benchmark_rope_performance():
         
         print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, Perplexity={perplexity:.2f}, Time={epoch_time:.3f}s")
     
-    # Full RoPE modeli
-    print("\n=== Full RoPE Eğitimi ===")
+    # Full RoPE model
+    print("\n=== Training Full RoPE ===")
     full_rope = FullRoPE(dim // num_heads)
     model_full = LanguageModel(vocab_size, dim, num_heads, num_layers, full_rope).to(device)
     optimizer_full = torch.optim.Adam(model_full.parameters(), lr=learning_rate)
@@ -359,54 +359,53 @@ def benchmark_rope_performance():
         
         print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, Perplexity={perplexity:.2f}, Time={epoch_time:.3f}s")
     
-    # Sonuçları görselleştir
+    # Visualise the results
     visualize_results(results)
     
-    # Özet istatistikler
-    print("\n=== Performans Özeti ===")
+    # Summary statistics
+    print("\n=== Performance Summary ===")
     print(f"Partial RoPE - Final Loss: {results['partial_rope']['losses'][-1]:.4f}")
     print(f"Full RoPE - Final Loss: {results['full_rope']['losses'][-1]:.4f}")
     print(f"Partial RoPE - Final Perplexity: {results['partial_rope']['perplexities'][-1]:.2f}")
     print(f"Full RoPE - Final Perplexity: {results['full_rope']['perplexities'][-1]:.2f}")
     print(f"Partial RoPE - Avg Time/Epoch: {np.mean(results['partial_rope']['times']):.3f}s")
     print(f"Full RoPE - Avg Time/Epoch: {np.mean(results['full_rope']['times']):.3f}s")
-    
-    # Hız kazancı
+
+    # Speed-up percentage
     speed_gain = (np.mean(results['full_rope']['times']) - np.mean(results['partial_rope']['times'])) / np.mean(results['full_rope']['times']) * 100
-    print(f"\nPartial RoPE hız kazancı: %{speed_gain:.1f}")
+    print(f"\nPartial RoPE speed-up: {speed_gain:.1f}%")
 
 
 def visualize_results(results):
-    """Sonuçları görselleştir"""
+    """Visualise benchmarking metrics."""
     fig, axes = plt.subplots(1, 3, figsize=(15, 5))
     
     epochs = range(1, len(results["partial_rope"]["losses"]) + 1)
     
-    # Loss grafiği
-        # Loss grafiği
+    # Loss chart
     axes[0].plot(epochs, results["partial_rope"]["losses"], 'b-', label='Partial RoPE', linewidth=2)
     axes[0].plot(epochs, results["full_rope"]["losses"], 'r-', label='Full RoPE', linewidth=2)
     axes[0].set_xlabel('Epoch')
     axes[0].set_ylabel('Loss')
-    axes[0].set_title('Eğitim Loss Karşılaştırması')
+    axes[0].set_title('Training Loss Comparison')
     axes[0].legend()
     axes[0].grid(True, alpha=0.3)
     
-    # Perplexity grafiği
+    # Perplexity chart
     axes[1].plot(epochs, results["partial_rope"]["perplexities"], 'b-', label='Partial RoPE', linewidth=2)
     axes[1].plot(epochs, results["full_rope"]["perplexities"], 'r-', label='Full RoPE', linewidth=2)
     axes[1].set_xlabel('Epoch')
     axes[1].set_ylabel('Perplexity')
-    axes[1].set_title('Perplexity Karşılaştırması')
+    axes[1].set_title('Perplexity Comparison')
     axes[1].legend()
     axes[1].grid(True, alpha=0.3)
     
-    # Eğitim süresi grafiği
+    # Training time chart
     axes[2].plot(epochs, results["partial_rope"]["times"], 'b-', label='Partial RoPE', linewidth=2)
     axes[2].plot(epochs, results["full_rope"]["times"], 'r-', label='Full RoPE', linewidth=2)
     axes[2].set_xlabel('Epoch')
-    axes[2].set_ylabel('Süre (saniye)')
-    axes[2].set_title('Epoch Başına Eğitim Süresi')
+    axes[2].set_ylabel('Time (seconds)')
+    axes[2].set_title('Training Time per Epoch')
     axes[2].legend()
     axes[2].grid(True, alpha=0.3)
     
@@ -416,54 +415,54 @@ def visualize_results(results):
 
 
 def inference_comparison(model_partial, model_full, tokenizer, device):
-    """Çıkarım performansı karşılaştırması"""
-    print("\n=== Çıkarım Performansı Karşılaştırması ===")
+    """Compare inference behaviour between the models."""
+    print("\n=== Inference Performance Comparison ===")
     
     test_texts = [
-        "Bugün hava",
-        "Türkiye'nin başkenti",
-        "Yapay zeka",
-        "En sevdiğim yemek"
+        "Today the weather",
+        "The capital of Turkey",
+        "Artificial intelligence",
+        "My favourite meal"
     ]
     
     model_partial.eval()
     model_full.eval()
     
     for text in test_texts:
-        print(f"\nGiriş: '{text}'")
+        print(f"\nInput: '{text}'")
         
-        # Tokenize et
+        # Tokenise the prompt
         inputs = tokenizer(text, return_tensors="pt").to(device)
         input_ids = inputs["input_ids"]
         
-        # Partial RoPE ile tahmin
+        # Prediction with Partial RoPE
         with torch.no_grad():
             start_time = time.time()
             mask = create_causal_mask(input_ids.shape[1], device)
             logits_partial = model_partial(input_ids, mask)
             partial_time = time.time() - start_time
             
-            # En olası sonraki kelimeyi bul
+            # Select the most probable next token
             next_token_partial = torch.argmax(logits_partial[0, -1, :])
             next_word_partial = tokenizer.decode(next_token_partial)
         
-        # Full RoPE ile tahmin
+        # Prediction with Full RoPE
         with torch.no_grad():
             start_time = time.time()
             logits_full = model_full(input_ids, mask)
             full_time = time.time() - start_time
             
-            # En olası sonraki kelimeyi bul
+            # Select the most probable next token
             next_token_full = torch.argmax(logits_full[0, -1, :])
             next_word_full = tokenizer.decode(next_token_full)
         
-        print(f"  Partial RoPE tahmini: '{next_word_partial}' (Süre: {partial_time*1000:.2f}ms)")
-        print(f"  Full RoPE tahmini: '{next_word_full}' (Süre: {full_time*1000:.2f}ms)")
+        print(f"  Partial RoPE prediction: '{next_word_partial}' (Time: {partial_time*1000:.2f}ms)")
+        print(f"  Full RoPE prediction: '{next_word_full}' (Time: {full_time*1000:.2f}ms)")
 
 
 def memory_comparison():
-    """Bellek kullanımı karşılaştırması"""
-    print("\n=== Bellek Kullanımı Karşılaştırması ===")
+    """Contrast memory usage between the two approaches."""
+    print("\n=== Memory Usage Comparison ===")
     
     dim = 512
     seq_len = 1024
@@ -471,7 +470,7 @@ def memory_comparison():
     num_heads = 8
     head_dim = dim // num_heads
     
-    # Partial RoPE bellek kullanımı
+    # Memory usage for Partial RoPE
     partial_rope = PartialRoPE(head_dim, max_position_embeddings=seq_len, partial_rotary_factor=0.5)
     q = torch.randn(batch_size, num_heads, seq_len, head_dim)
     k = torch.randn(batch_size, num_heads, seq_len, head_dim)
@@ -485,24 +484,24 @@ def memory_comparison():
         _ = partial_rope(q, k)
         partial_memory = torch.cuda.max_memory_allocated() / 1024**2  # MB
         
-        # Full RoPE bellek kullanımı
+        # Memory usage for Full RoPE
         torch.cuda.reset_peak_memory_stats()
         full_rope = FullRoPE(head_dim, max_position_embeddings=seq_len).cuda()
         
         # Forward pass
         _ = full_rope(q, k)
         full_memory = torch.cuda.max_memory_allocated() / 1024**2  # MB
-        
-        print(f"Partial RoPE bellek kullanımı: {partial_memory:.2f} MB")
-        print(f"Full RoPE bellek kullanımı: {full_memory:.2f} MB")
-        print(f"Bellek tasarrufu: {(full_memory - partial_memory) / full_memory * 100:.1f}%")
+
+        print(f"Partial RoPE memory usage: {partial_memory:.2f} MB")
+        print(f"Full RoPE memory usage: {full_memory:.2f} MB")
+        print(f"Memory savings: {(full_memory - partial_memory) / full_memory * 100:.1f}%")
     else:
-        print("CUDA mevcut değil, bellek karşılaştırması yapılamıyor.")
+        print("CUDA is not available, memory comparison skipped.")
 
 
 def ablation_study():
-    """Farklı partial_rotary_factor değerleri için ablasyon çalışması"""
-    print("\n=== Ablasyon Çalışması: Farklı Partial Rotary Factor Değerleri ===")
+    """Ablation study for varying partial_rotary_factor values."""
+    print("\n=== Ablation Study: Partial Rotary Factor Variants ===")
     
     factors = [0.25, 0.5, 0.75, 1.0]
     dim = 64
@@ -521,7 +520,7 @@ def ablation_study():
         q = torch.randn(batch_size, 1, seq_len, dim)
         k = torch.randn(batch_size, 1, seq_len, dim)
         
-        # Performans ölçümü
+        # Measure execution time
         start_time = time.time()
         for _ in range(num_iterations):
             _ = rope(q, k)
@@ -532,18 +531,18 @@ def ablation_study():
         
         print(f"Factor {factor}: {avg_time:.3f} ms/iteration")
     
-    # Sonuçları görselleştir
+    # Visualise the ablation results
     plt.figure(figsize=(8, 6))
     factors_list = list(results.keys())
     times_list = list(results.values())
     
     plt.bar(factors_list, times_list, color=['blue', 'green', 'orange', 'red'])
     plt.xlabel('Partial Rotary Factor')
-    plt.ylabel('Ortalama Süre (ms)')
-    plt.title('Farklı Partial Rotary Factor Değerleri için Performans')
+    plt.ylabel('Average Time (ms)')
+    plt.title('Performance for Different Partial Rotary Factors')
     plt.grid(True, alpha=0.3)
     
-    # Değerleri bar üzerine yaz
+    # Annotate the bars with values
     for i, (factor, exec_time) in enumerate(zip(factors_list, times_list)):
         plt.text(i, exec_time + 0.01, f'{exec_time:.3f}', ha='center', va='bottom')
     
@@ -552,19 +551,19 @@ def ablation_study():
     plt.show()
 
 
-# Ana fonksiyon
+# Main entry point
 if __name__ == "__main__":
-    print("Partial RoPE vs Full RoPE Performans Karşılaştırması")
+    print("Partial RoPE vs Full RoPE Performance Benchmark")
     print("=" * 60)
-    
-    # Ana benchmark
+
+    # Main benchmark
     benchmark_rope_performance()
-    
-    # Bellek karşılaştırması
+
+    # Memory comparison
     memory_comparison()
-    
-    # Ablasyon çalışması
+
+    # Ablation study
     ablation_study()
-    
-    print("\n✅ Tüm testler tamamlandı!")
-    print("📊 Grafikler 'rope_comparison.png' ve 'ablation_study.png' olarak kaydedildi.")
\ No newline at end of file
+
+    print("\n✅ All benchmarks completed!")
+    print("📊 Charts saved as 'rope_comparison.png' and 'ablation_study.png'.")
\ No newline at end of file
diff --git a/Architecture/sigmoid-gates.py b/Architecture/sigmoid-gates.py
index f7da3a0..1e88f74 100644
--- a/Architecture/sigmoid-gates.py
+++ b/Architecture/sigmoid-gates.py
@@ -5,7 +5,7 @@
 import matplotlib.pyplot as plt
 
 class SigmoidGateExamples(nn.Module):
-    """Farklı sigmoid gate örnekleri"""
+    """Examples of different sigmoid gate mechanisms."""
     
     def __init__(self, input_dim, hidden_dim):
         super().__init__()
@@ -29,88 +29,88 @@ def __init__(self, input_dim, hidden_dim):
         self.highway_transform = nn.Linear(input_dim, input_dim)
     
     def simple_gate(self, x):
-        """Basit sigmoid gate örneği"""
-        # Gate değeri hesapla (0-1 arası)
+        """Basic sigmoid gate example."""
+        # Compute gate value between 0 and 1
         gate = torch.sigmoid(self.gate_linear(x))
         
-        # Gate'i uygula: çıktı = gate * input
+        # Apply the gate: output = gate * input
         output = gate * x[:, :self.hidden_dim]
         
         return output, gate
     
     def lstm_gates_example(self, x, h, c):
-        """LSTM'deki 4 sigmoid gate"""
+        """The four sigmoid gates used in an LSTM."""
         # x: input, h: hidden state, c: cell state
         combined = torch.cat([x, h], dim=1)
         gates = self.lstm_gates(combined)
         
-        # 4 gate'e ayır
+        # Split into the four gates
         i, f, g, o = gates.chunk(4, dim=1)
         
         # Sigmoid gates
-        i = torch.sigmoid(i)  # Input gate: neyi hatırlayacağız
-        f = torch.sigmoid(f)  # Forget gate: neyi unutacağız
-        o = torch.sigmoid(o)  # Output gate: neyi çıktı olarak vereceğiz
-        g = torch.tanh(g)     # Candidate values (gate değil)
+        i = torch.sigmoid(i)  # Input gate: what we keep in memory
+        f = torch.sigmoid(f)  # Forget gate: what we discard
+        o = torch.sigmoid(o)  # Output gate: what we expose as output
+        g = torch.tanh(g)     # Candidate values (not a sigmoid gate)
         
-        # Yeni cell state
+        # Updated cell state
         c_new = f * c + i * g
         
-        # Yeni hidden state
+        # Updated hidden state
         h_new = o * torch.tanh(c_new)
         
         return h_new, c_new, {'input': i, 'forget': f, 'output': o}
     
     def gru_gates_example(self, x, h):
-        """GRU'daki sigmoid gates"""
+        """Sigmoid gates inside a GRU."""
         combined = torch.cat([x, h], dim=1)
         gates = self.gru_gates(combined)
         
-        # 3 kısma ayır
+        # Split into three sections
         r, z, n = gates.chunk(3, dim=1)
         
-        # Reset gate: önceki bilginin ne kadarını kullanacağız
+        # Reset gate: how much of the previous state to use
         r = torch.sigmoid(r)
         
-        # Update gate: yeni ve eski bilgiyi nasıl birleştireceğiz
+        # Update gate: how to mix new and old information
         z = torch.sigmoid(z)
         
-        # Yeni hidden state adayı
+        # Candidate hidden state
         n = torch.tanh(n)
         
-        # Yeni hidden state
+        # Updated hidden state
         h_new = (1 - z) * n + z * h
         
         return h_new, {'reset': r, 'update': z}
     
     def glu_example(self, x):
-        """Gated Linear Unit (GLU)"""
-        # Linear dönüşüm
+        """Gated Linear Unit (GLU)."""
+        # Linear projection
         output = self.glu_linear(x)
         
-        # İkiye böl
+        # Split in two
         a, b = output.chunk(2, dim=1)
         
         # GLU: a * sigmoid(b)
         return a * torch.sigmoid(b)
     
     def highway_gate_example(self, x):
-        """Highway Network gate"""
-        # Transform gate (T): ne kadar dönüşüm uygulayacağız
+        """Highway Network gate."""
+        # Transform gate (T): how much of the transformed signal to use
         T = torch.sigmoid(self.highway_gate(x))
         
-        # Dönüştürülmüş veri
+        # Transformed data
         H = torch.relu(self.highway_transform(x))
         
-        # Highway formülü: y = T * H + (1 - T) * x
-        # T=1: tamamen dönüşüm, T=0: girdiyi olduğu gibi geçir
+        # Highway formula: y = T * H + (1 - T) * x
+        # T=1: full transform, T=0: passthrough the input
         output = T * H + (1 - T) * x
         
         return output, T
 
 
 class AttentionGate(nn.Module):
-    """Attention mekanizmasında sigmoid gate kullanımı"""
+    """Attention mechanism augmented with sigmoid gating."""
     
     def __init__(self, hidden_dim):
         super().__init__()
@@ -125,18 +125,18 @@ def forward(self, query, keys, values):
         """
         batch_size, seq_len, hidden_dim = keys.shape
         
-        # Query'yi genişlet
+        # Broadcast the query across the sequence dimension
         query_expanded = query.unsqueeze(1).expand(-1, seq_len, -1)
         
-        # Attention hesapla
+        # Compute the attention projection
         combined = torch.cat([query_expanded, keys], dim=2)
         attention_hidden = torch.tanh(self.attention_linear(combined))
         
-        # Sigmoid gate ile attention weights
+        # Attention weights from the sigmoid gate
         attention_scores = self.gate_linear(attention_hidden).squeeze(-1)
         attention_weights = torch.sigmoid(attention_scores)
         
-        # Normalize (opsiyonel - soft attention için)
+        # Normalize (optional, for soft attention)
         attention_weights = attention_weights / (attention_weights.sum(dim=1, keepdim=True) + 1e-8)
         
         # Weighted sum
@@ -146,13 +146,13 @@ def forward(self, query, keys, values):
 
 
 class SigmoidGatingMechanism(nn.Module):
-    """Genel amaçlı sigmoid gating mekanizması"""
+    """General-purpose sigmoid gating mechanism."""
     
     def __init__(self, input_dim, num_experts=4):
         super().__init__()
         self.num_experts = num_experts
         
-        # Her expert için bir ağ
+        # A small network for each expert
         self.experts = nn.ModuleList([
             nn.Linear(input_dim, input_dim) for _ in range(num_experts)
         ])
@@ -165,61 +165,61 @@ def __init__(self, input_dim, num_experts=4):
         )
     
     def forward(self, x):
-        # Expert çıktıları
+        # Collect expert outputs
         expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1)
         
-        # Gate değerleri (sigmoid)
+        # Sigmoid gate values
         gates = torch.sigmoid(self.gate_network(x))
         gates = gates.unsqueeze(-1)
         
-        # Ağırlıklı toplam
+        # Weighted sum of expert outputs
         output = (gates * expert_outputs).sum(dim=1)
         
         return output, gates.squeeze(-1)
 
 
 def visualize_sigmoid_gate():
-    """Sigmoid fonksiyonunu ve gate davranışını görselleştir"""
+    """Visualize the sigmoid function and gate behaviour."""
     x = np.linspace(-10, 10, 1000)
     sigmoid = 1 / (1 + np.exp(-x))
     
     fig, axes = plt.subplots(2, 2, figsize=(12, 10))
     
-    # 1. Sigmoid fonksiyonu
+    # 1. Sigmoid function
     axes[0, 0].plot(x, sigmoid, 'b-', linewidth=2)
     axes[0, 0].axhline(y=0.5, color='r', linestyle='--', alpha=0.5)
     axes[0, 0].axvline(x=0, color='r', linestyle='--', alpha=0.5)
-    axes[0, 0].set_title('Sigmoid Fonksiyonu')
+    axes[0, 0].set_title('Sigmoid Function')
     axes[0, 0].set_xlabel('x')
     axes[0, 0].set_ylabel('σ(x)')
     axes[0, 0].grid(True, alpha=0.3)
     
-    # 2. Gate çarpımı etkisi
+    # 2. Effect of multiplying by the gate
     input_signal = np.sin(x)
     gated_signal = sigmoid * input_signal
     
-    axes[0, 1].plot(x, input_signal, 'g-', label='Giriş sinyali', alpha=0.7)
-    axes[0, 1].plot(x, sigmoid, 'r-', label='Gate değeri', alpha=0.7)
-    axes[0, 1].plot(x, gated_signal, 'b-', label='Gate * Sinyal', linewidth=2)
-    axes[0, 1].set_title('Gate Çarpımı Etkisi')
+    axes[0, 1].plot(x, input_signal, 'g-', label='Input signal', alpha=0.7)
+    axes[0, 1].plot(x, sigmoid, 'r-', label='Gate value', alpha=0.7)
+    axes[0, 1].plot(x, gated_signal, 'b-', label='Gate * Signal', linewidth=2)
+    axes[0, 1].set_title('Effect of Gate Multiplication')
     axes[0, 1].set_xlabel('x')
     axes[0, 1].legend()
     axes[0, 1].grid(True, alpha=0.3)
     
-    # 3. Farklı gate değerleri
+    # 3. Different gate values
     gate_values = [0.1, 0.3, 0.5, 0.7, 0.9]
     colors = plt.cm.viridis(np.linspace(0, 1, len(gate_values)))
     
     for gate, color in zip(gate_values, colors):
         axes[1, 0].plot(x, gate * np.sin(x), color=color, label=f'Gate={gate}')
-    
-    axes[1, 0].set_title('Farklı Gate Değerlerinin Etkisi')
+
+    axes[1, 0].set_title('Effect of Different Gate Values')
     axes[1, 0].set_xlabel('x')
     axes[1, 0].set_ylabel('Gate * sin(x)')
     axes[1, 0].legend()
     axes[1, 0].grid(True, alpha=0.3)
     
-    # 4. LSTM gate dinamikleri
+    # 4. LSTM gate dynamics
     time_steps = 50
     forget_gate = np.random.beta(5, 2, time_steps)
     input_gate = np.random.beta(2, 5, time_steps)
@@ -228,9 +228,9 @@ def visualize_sigmoid_gate():
     axes[1, 1].plot(forget_gate, 'r-', label='Forget gate', linewidth=2)
     axes[1, 1].plot(input_gate, 'g-', label='Input gate', linewidth=2)
     axes[1, 1].plot(output_gate, 'b-', label='Output gate', linewidth=2)
-    axes[1, 1].set_title('LSTM Gate Dinamikleri (Örnek)')
-    axes[1, 1].set_xlabel('Zaman adımı')
-    axes[1, 1].set_ylabel('Gate değeri')
+    axes[1, 1].set_title('LSTM Gate Dynamics (Sample)')
+    axes[1, 1].set_xlabel('Time step')
+    axes[1, 1].set_ylabel('Gate value')
     axes[1, 1].legend()
     axes[1, 1].grid(True, alpha=0.3)
     axes[1, 1].set_ylim(0, 1)
@@ -241,10 +241,10 @@ def visualize_sigmoid_gate():
 
 
 def demonstrate_gate_effects():
-    """Gate'lerin etkilerini göster"""
-    print("=== Sigmoid Gate Etkileri Demonstrasyonu ===\n")
+    """Showcase how different gates behave."""
+    print("=== Sigmoid Gate Effects Demonstration ===\n")
     
-    # Örnek veri
+    # Sample data
     batch_size = 2
     input_dim = 4
     hidden_dim = 4
@@ -253,48 +253,48 @@ def demonstrate_gate_effects():
     h = torch.randn(batch_size, hidden_dim)
     c = torch.randn(batch_size, hidden_dim)
     
-    # Model oluştur
+    # Build model
     model = SigmoidGateExamples(input_dim, hidden_dim)
     
-    # 1. Basit gate
-    print("1. Basit Sigmoid Gate:")
+    # 1. Simple gate
+    print("1. Simple Sigmoid Gate:")
     output, gate = model.simple_gate(x)
-    print(f"   Giriş boyutu: {x.shape}")
-    print(f"   Gate değerleri: {gate[0, :4].detach().numpy()}")
-    print(f"   Çıktı: {output[0, :4].detach().numpy()}\n")
+    print(f"   Input shape: {x.shape}")
+    print(f"   Gate values: {gate[0, :4].detach().numpy()}")
+    print(f"   Output: {output[0, :4].detach().numpy()}\n")
     
     # 2. LSTM gates
     print("2. LSTM Gates:")
     h_new, c_new, lstm_gates = model.lstm_gates_example(x, h, c)
-    print(f"   Input gate ortalaması: {lstm_gates['input'].mean().item():.3f}")
-    print(f"   Forget gate ortalaması: {lstm_gates['forget'].mean().item():.3f}")
-    print(f"   Output gate ortalaması: {lstm_gates['output'].mean().item():.3f}\n")
+    print(f"   Input gate mean: {lstm_gates['input'].mean().item():.3f}")
+    print(f"   Forget gate mean: {lstm_gates['forget'].mean().item():.3f}")
+    print(f"   Output gate mean: {lstm_gates['output'].mean().item():.3f}\n")
     
     # 3. GRU gates
     print("3. GRU Gates:")
     h_new, gru_gates = model.gru_gates_example(x, h)
-    print(f"   Reset gate ortalaması: {gru_gates['reset'].mean().item():.3f}")
-    print(f"   Update gate ortalaması: {gru_gates['update'].mean().item():.3f}\n")
+    print(f"   Reset gate mean: {gru_gates['reset'].mean().item():.3f}")
+    print(f"   Update gate mean: {gru_gates['update'].mean().item():.3f}\n")
     
     # 4. Highway gate
     print("4. Highway Gate:")
     output, transform_gate = model.highway_gate_example(x)
-    print(f"   Transform gate ortalaması: {transform_gate.mean().item():.3f}")
-    print(f"   Bypass oranı: {(1 - transform_gate).mean().item():.3f}\n")
+    print(f"   Transform gate mean: {transform_gate.mean().item():.3f}")
+    print(f"   Bypass rate: {(1 - transform_gate).mean().item():.3f}\n")
     
     # 5. Expert gating
     print("5. Expert Gating:")
     expert_model = SigmoidGatingMechanism(input_dim, num_experts=4)
     output, expert_gates = expert_model(x)
-    print(f"   Expert gate değerleri: {expert_gates[0].detach().numpy()}")
-    print(f"   En aktif expert: {expert_gates[0].argmax().item()}")
+    print(f"   Expert gate values: {expert_gates[0].detach().numpy()}")
+    print(f"   Most active expert: {expert_gates[0].argmax().item()}")
 
 
 if __name__ == "__main__":
-    # Görselleştirme
+    # Visualization
     visualize_sigmoid_gate()
-    
-    # Demonstrasyon
+
+    # Demonstration
     demonstrate_gate_effects()
-    
-    print("\n✅ Sigmoid gates demonstrasyonu tamamlandı!")
\ No newline at end of file
+
+    print("\n✅ Sigmoid gates demonstration completed!")
\ No newline at end of file
diff --git a/Genel-1/cross-attn_llm.ipynb b/Genel-1/cross-attn_llm.ipynb
index 368a056..74bf4a9 100644
--- a/Genel-1/cross-attn_llm.ipynb
+++ b/Genel-1/cross-attn_llm.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -13,59 +13,59 @@
     "class LayerNormalization(nn.Module):\n",
     "    def __init__(self, features: int, eps: float = 10**-6) -> None:\n",
     "        super().__init__()\n",
-    "        self.eps = eps  # Küçük bir değer, sıfıra bölünmeyi önlemek için\n",
-    "        self.alpha = nn.Parameter(torch.ones(features))  # Ölçeklendirme parametresi (öğrenilebilir)\n",
-    "        self.bias = nn.Parameter(torch.zeros(features))  # Kaydırma parametresi (öğrenilebilir)\n",
+    "        self.eps = eps  # A small value to prevent division by zero\n",
+    "        self.alpha = nn.Parameter(torch.ones(features))  # Scaling parameter (learnable)\n",
+    "        self.bias = nn.Parameter(torch.zeros(features))  # Shift parameter (learnable)\n",
     "\n",
     "    def forward(self, x):\n",
-    "        # Girdinin ortalamasını ve standart sapmasını hesapla\n",
+    "        # Compute the input mean and standard deviation\n",
     "        mean = x.mean(dim=-1, keepdim=True)  # (batch, seq_len, 1)\n",
     "        std = x.std(dim=-1, keepdim=True)  # (batch, seq_len, 1)\n",
-    "        # Normalizasyon formülü: (x - mean) / (std + eps) * alpha + bias\n",
+    "        # Normalization formula: (x - mean) / (std + eps) * alpha + bias\n",
     "        return self.alpha * (x - mean) / (std + self.eps) + self.bias\n",
     "\n",
     "\n",
     "class FeedForwardBlock(nn.Module):\n",
     "    def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:\n",
     "        super().__init__()\n",
-    "        self.linear_1 = nn.Linear(d_model, d_ff)  # İlk lineer dönüşüm (w1 ve b1)\n",
-    "        self.dropout = nn.Dropout(dropout)  # Dropout katmanı\n",
-    "        self.linear_2 = nn.Linear(d_ff, d_model)  # İkinci lineer dönüşüm (w2 ve b2)\n",
+    "        self.linear_1 = nn.Linear(d_model, d_ff)  # First linear transformation (w1 and b1)\n",
+    "        self.dropout = nn.Dropout(dropout)  # Dropout layer\n",
+    "        self.linear_2 = nn.Linear(d_ff, d_model)  # Second linear transformation (w2 and b2)\n",
     "\n",
     "    def forward(self, x):\n",
-    "        # İleri beslemeli sinir ağı: Lineer -> ReLU -> Dropout -> Lineer\n",
+    "        # Feed-forward network: Linear -> ReLU -> Dropout -> Linear\n",
     "        return self.linear_2(self.dropout(torch.relu(self.linear_1(x))))\n",
     "\n",
     "\n",
     "class InputEmbeddings(nn.Module):\n",
     "    def __init__(self, d_model: int, vocab_size: int) -> None:\n",
     "        super().__init__()\n",
-    "        self.d_model = d_model  # Gömme vektörlerinin boyutu\n",
-    "        self.vocab_size = vocab_size  # Kelime dağarcığı boyutu\n",
-    "        self.embedding = nn.Embedding(vocab_size, d_model)  # Gömme katmanı\n",
+    "        self.d_model = d_model  # Embedding dimension\n",
+    "        self.vocab_size = vocab_size  # Vocabulary size\n",
+    "        self.embedding = nn.Embedding(vocab_size, d_model)  # Embedding layer\n",
     "\n",
     "    def forward(self, x):\n",
-    "        # Token indekslerini gömme vektörlerine dönüştür ve ölçeklendir\n",
+    "        # Convert token indices to embeddings and scale them\n",
     "        return self.embedding(x) * math.sqrt(self.d_model)\n",
     "\n",
     "\n",
     "class PositionalEncoding(nn.Module):\n",
     "    def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:\n",
     "        super().__init__()\n",
-    "        self.d_model = d_model  # Gömme vektörlerinin boyutu\n",
-    "        self.seq_len = seq_len  # Maksimum dizi uzunluğu\n",
-    "        self.dropout = nn.Dropout(dropout)  # Dropout katmanı\n",
-    "        # Konumsal kodlama matrisini oluştur\n",
+    "        self.d_model = d_model  # Embedding dimension\n",
+    "        self.seq_len = seq_len  # Maximum sequence length\n",
+    "        self.dropout = nn.Dropout(dropout)  # Dropout layer\n",
+    "        # Create the positional encoding matrix\n",
     "        pe = torch.zeros(seq_len, d_model)\n",
-    "        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)  # Pozisyon vektörü\n",
-    "        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))  # Bölme terimi\n",
-    "        pe[:, 0::2] = torch.sin(position * div_term)  # Çift indeksler için sinüs\n",
-    "        pe[:, 1::2] = torch.cos(position * div_term)  # Tek indeksler için kosinüs\n",
+    "        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)  # Position vector\n",
+    "        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))  # Divisor term\n",
+    "        pe[:, 0::2] = torch.sin(position * div_term)  # Sine for even indices\n",
+    "        pe[:, 1::2] = torch.cos(position * div_term)  # Cosine for odd indices\n",
     "        pe = pe.unsqueeze(0)  # Batch boyutu ekle\n",
-    "        self.register_buffer('pe', pe)  # Konumsal kodlamayı sabit olarak kaydet\n",
+    "        self.register_buffer('pe', pe)  # Register the positional encoding as a buffer\n",
     "\n",
     "    def forward(self, x):\n",
-    "        # Girdiye konumsal kodlamayı ekle\n",
+    "        # Add positional encoding to the input\n",
     "        x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)\n",
     "        return self.dropout(x)\n",
     "\n",
@@ -73,50 +73,50 @@
     "class ResidualConnection(nn.Module):\n",
     "    def __init__(self, features: int, dropout: float) -> None:\n",
     "        super().__init__()\n",
-    "        self.dropout = nn.Dropout(dropout)  # Dropout katmanı\n",
+    "        self.dropout = nn.Dropout(dropout)  # Dropout layer\n",
     "        self.norm = LayerNormalization(features)  # Katman normalizasyonu\n",
     "\n",
     "    def forward(self, x, sublayer):\n",
-    "        # Artık bağlantı: x + dropout(sublayer(norm(x)))\n",
+    "        # Residual connection: x + dropout(sublayer(norm(x)))\n",
     "        return x + self.dropout(sublayer(self.norm(x)))\n",
     "\n",
     "\n",
     "class MultiHeadAttentionBlock(nn.Module):\n",
     "    def __init__(self, d_model: int, h: int, dropout: float) -> None:\n",
     "        super().__init__()\n",
-    "        self.d_model = d_model  # Gömme vektörlerinin boyutu\n",
-    "        self.h = h  # Dikkat kafalarının sayısı\n",
-    "        assert d_model % h == 0, \"d_model is not divisible by h\"  # d_model h'a bölünebilir olmalı\n",
-    "        self.d_k = d_model // h  # Her bir kafanın vektör boyutu\n",
-    "        self.w_q = nn.Linear(d_model, d_model, bias=False)  # Query dönüşümü\n",
-    "        self.w_k = nn.Linear(d_model, d_model, bias=False)  # Key dönüşümü\n",
-    "        self.w_v = nn.Linear(d_model, d_model, bias=False)  # Value dönüşümü\n",
-    "        self.w_o = nn.Linear(d_model, d_model, bias=False)  # Çıktı dönüşümü\n",
-    "        self.dropout = nn.Dropout(dropout)  # Dropout katmanı\n",
+    "        self.d_model = d_model  # Embedding dimension\n",
+    "        self.h = h  # Number of attention heads\n",
+    "        assert d_model % h == 0, \"d_model is not divisible by h\"  # d_model must be divisible by h\n",
+    "        self.d_k = d_model // h  # Vector dimension of each head\n",
+    "        self.w_q = nn.Linear(d_model, d_model, bias=False)  # Query projection\n",
+    "        self.w_k = nn.Linear(d_model, d_model, bias=False)  # Key projection\n",
+    "        self.w_v = nn.Linear(d_model, d_model, bias=False)  # Value projection\n",
+    "        self.w_o = nn.Linear(d_model, d_model, bias=False)  # Output projection\n",
+    "        self.dropout = nn.Dropout(dropout)  # Dropout layer\n",
     "\n",
     "    @staticmethod\n",
     "    def attention(query, key, value, mask, dropout: nn.Dropout):\n",
     "        d_k = query.shape[-1]\n",
-    "        # Dikkat skorlarını hesapla: (Q * K^T) / sqrt(d_k)\n",
+    "        # Compute attention scores: (Q * K^T) / sqrt(d_k)\n",
     "        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)\n",
     "        if mask is not None:\n",
     "            attention_scores.masked_fill_(mask == 0, -1e9)  # Maskeli yerleri -∞ yap\n",
     "        attention_scores = attention_scores.softmax(dim=-1)  # Softmax uygula\n",
     "        if dropout is not None:\n",
     "            attention_scores = dropout(attention_scores)  # Dropout uygula\n",
-    "        return (attention_scores @ value), attention_scores  # Çıktı ve dikkat skorları\n",
+    "        return (attention_scores @ value), attention_scores  # Output and attention scores\n",
     "\n",
     "    def forward(self, q, k, v, mask):\n",
-    "        query = self.w_q(q)  # Query dönüşümü\n",
-    "        key = self.w_k(k)  # Key dönüşümü\n",
-    "        value = self.w_v(v)  # Value dönüşümü\n",
-    "        # Çok kafalı dikkat için şekil değiştir\n",
+    "        query = self.w_q(q)  # Query projection\n",
+    "        key = self.w_k(k)  # Key projection\n",
+    "        value = self.w_v(v)  # Value projection\n",
+    "        # Reshape for multi-head attention\n",
     "        query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1, 2)\n",
     "        key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1, 2)\n",
     "        value = value.view(value.shape[0], value.shape[1], self.h, self.d_k).transpose(1, 2)\n",
-    "        # Dikkat mekanizmasını uygula\n",
+    "        # Apply the attention mechanism\n",
     "        x, self.attention_scores = MultiHeadAttentionBlock.attention(query, key, value, mask, self.dropout)\n",
-    "        # Kafaları birleştir ve çıktı dönüşümü uygula\n",
+    "        # Merge the heads and apply the output projection\n",
     "        x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.h * self.d_k)\n",
     "        return self.w_o(x)\n",
     "\n",
@@ -124,14 +124,14 @@
     "class EncoderBlock(nn.Module):\n",
     "    def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:\n",
     "        super().__init__()\n",
-    "        self.self_attention_block = self_attention_block  # Self-attention katmanı\n",
-    "        self.feed_forward_block = feed_forward_block  # İleri beslemeli sinir ağı\n",
-    "        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])  # Artık bağlantılar\n",
+    "        self.self_attention_block = self_attention_block  # Self-attention layer\n",
+    "        self.feed_forward_block = feed_forward_block  # Feed-forward network\n",
+    "        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])  # Residual connections\n",
     "\n",
     "    def forward(self, x, src_mask):\n",
-    "        # Self-attention ve artık bağlantı\n",
+    "        # Self-attention with residual connection\n",
     "        x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, src_mask))\n",
-    "        # İleri beslemeli sinir ağı ve artık bağlantı\n",
+    "        # Feed-forward network with residual connection\n",
     "        x = self.residual_connections[1](x, self.feed_forward_block)\n",
     "        return x\n",
     "\n",
@@ -139,11 +139,11 @@
     "class Encoder(nn.Module):\n",
     "    def __init__(self, features: int, layers: nn.ModuleList) -> None:\n",
     "        super().__init__()\n",
-    "        self.layers = layers  # Encoder blokları\n",
+    "        self.layers = layers  # Encoder blocks\n",
     "        self.norm = LayerNormalization(features)  # Son katman normalizasyonu\n",
     "\n",
     "    def forward(self, x, mask):\n",
-    "        # Tüm encoder bloklarını uygula\n",
+    "        # Apply all encoder blocks\n",
     "        for layer in self.layers:\n",
     "            x = layer(x, mask)\n",
     "        return self.norm(x)  # Son katman normalizasyonu\n",
@@ -152,17 +152,17 @@
     "class DecoderBlock(nn.Module):\n",
     "    def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, cross_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:\n",
     "        super().__init__()\n",
-    "        self.self_attention_block = self_attention_block  # Self-attention katmanı\n",
-    "        self.cross_attention_block = cross_attention_block  # Cross-attention katmanı\n",
-    "        self.feed_forward_block = feed_forward_block  # İleri beslemeli sinir ağı\n",
-    "        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])  # Artık bağlantılar\n",
+    "        self.self_attention_block = self_attention_block  # Self-attention layer\n",
+    "        self.cross_attention_block = cross_attention_block  # Cross-attention layer\n",
+    "        self.feed_forward_block = feed_forward_block  # Feed-forward network\n",
+    "        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])  # Residual connections\n",
     "\n",
     "    def forward(self, x, encoder_output, src_mask, tgt_mask):\n",
-    "        # Self-attention ve artık bağlantı\n",
+    "        # Self-attention with residual connection\n",
     "        x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))\n",
-    "        # Cross-attention ve artık bağlantı\n",
+    "        # Cross-attention with residual connection\n",
     "        x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output, encoder_output, src_mask))\n",
-    "        # İleri beslemeli sinir ağı ve artık bağlantı\n",
+    "        # Feed-forward network with residual connection\n",
     "        x = self.residual_connections[2](x, self.feed_forward_block)\n",
     "        return x\n",
     "\n",
@@ -170,11 +170,11 @@
     "class Decoder(nn.Module):\n",
     "    def __init__(self, features: int, layers: nn.ModuleList) -> None:\n",
     "        super().__init__()\n",
-    "        self.layers = layers  # Decoder blokları\n",
+    "        self.layers = layers  # Decoder blocks\n",
     "        self.norm = LayerNormalization(features)  # Son katman normalizasyonu\n",
     "\n",
     "    def forward(self, x, encoder_output, src_mask, tgt_mask):\n",
-    "        # Tüm decoder bloklarını uygula\n",
+    "        # Apply all decoder blocks\n",
     "        for layer in self.layers:\n",
     "            x = layer(x, encoder_output, src_mask, tgt_mask)\n",
     "        return self.norm(x)  # Son katman normalizasyonu\n",
@@ -183,78 +183,78 @@
     "class ProjectionLayer(nn.Module):\n",
     "    def __init__(self, d_model, vocab_size) -> None:\n",
     "        super().__init__()\n",
-    "        self.proj = nn.Linear(d_model, vocab_size)  # Lineer projeksiyon katmanı\n",
+    "        self.proj = nn.Linear(d_model, vocab_size)  # Linear projection layer\n",
     "\n",
     "    def forward(self, x) -> None:\n",
-    "        # Girdiyi kelime dağarcığı boyutuna projelendir\n",
+    "        # Project the input to the vocabulary dimension\n",
     "        return self.proj(x)\n",
     "\n",
     "\n",
     "class Transformer(nn.Module):\n",
     "    def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer) -> None:\n",
     "        super().__init__()\n",
-    "        self.encoder = encoder  # Encoder katmanı\n",
-    "        self.decoder = decoder  # Decoder katmanı\n",
-    "        self.src_embed = src_embed  # Kaynak gömme katmanı\n",
-    "        self.tgt_embed = tgt_embed  # Hedef gömme katmanı\n",
+    "        self.encoder = encoder  # Encoder layer\n",
+    "        self.decoder = decoder  # Decoder layer\n",
+    "        self.src_embed = src_embed  # Source embedding layer\n",
+    "        self.tgt_embed = tgt_embed  # Target embedding layer\n",
     "        self.src_pos = src_pos  # Kaynak konumsal kodlama\n",
     "        self.tgt_pos = tgt_pos  # Hedef konumsal kodlama\n",
-    "        self.projection_layer = projection_layer  # Projeksiyon katmanı\n",
+    "        self.projection_layer = projection_layer  # Projection layer\n",
     "\n",
     "    def encode(self, src, src_mask):\n",
     "        # Kaynak diziyi kodla\n",
-    "        src = self.src_embed(src)  # Gömme katmanı\n",
+    "        src = self.src_embed(src)  # Embedding layer\n",
     "        src = self.src_pos(src)  # Konumsal kodlama\n",
-    "        return self.encoder(src, src_mask)  # Encoder katmanı\n",
+    "        return self.encoder(src, src_mask)  # Encoder layer\n",
     "\n",
     "    def decode(self, encoder_output: torch.Tensor, src_mask: torch.Tensor, tgt: torch.Tensor, tgt_mask: torch.Tensor):\n",
-    "        # Hedef diziyi çöz\n",
-    "        tgt = self.tgt_embed(tgt)  # Gömme katmanı\n",
+    "        # Decode the target sequence\n",
+    "        tgt = self.tgt_embed(tgt)  # Embedding layer\n",
     "        tgt = self.tgt_pos(tgt)  # Konumsal kodlama\n",
-    "        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)  # Decoder katmanı\n",
+    "        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)  # Decoder layer\n",
     "\n",
     "    def project(self, x):\n",
-    "        # Çıktıyı kelime dağarcığı boyutuna projelendir\n",
+    "        # Project the output to the vocabulary dimension\n",
     "        return self.projection_layer(x)\n",
     "\n",
     "\n",
     "def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int, d_model: int = 512, N: int = 6, h: int = 8, dropout: float = 0.1, d_ff: int = 2048) -> Transformer:\n",
-    "    # Gömme katmanlarını oluştur\n",
-    "    src_embed = InputEmbeddings(d_model, src_vocab_size)  # Kaynak gömme\n",
-    "    tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)  # Hedef gömme\n",
+    "    # Build the embedding layers\n",
+    "    src_embed = InputEmbeddings(d_model, src_vocab_size)  # Source embedding\n",
+    "    tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)  # Target embedding\n",
     "\n",
-    "    # Konumsal kodlama katmanlarını oluştur\n",
+    "    # Build the positional encoding layers\n",
     "    src_pos = PositionalEncoding(d_model, src_seq_len, dropout)  # Kaynak konumsal kodlama\n",
     "    tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)  # Hedef konumsal kodlama\n",
     "\n",
-    "    # Encoder bloklarını oluştur\n",
+    "    # Build the encoder blocks\n",
     "    encoder_blocks = []\n",
     "    for _ in range(N):\n",
     "        encoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)  # Self-attention\n",
-    "        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)  # İleri beslemeli sinir ağı\n",
-    "        encoder_block = EncoderBlock(d_model, encoder_self_attention_block, feed_forward_block, dropout)  # Encoder bloğu\n",
+    "        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)  # Feed-forward network\n",
+    "        encoder_block = EncoderBlock(d_model, encoder_self_attention_block, feed_forward_block, dropout)  # Encoder block\n",
     "        encoder_blocks.append(encoder_block)\n",
     "\n",
-    "    # Decoder bloklarını oluştur\n",
+    "    # Build the decoder blocks\n",
     "    decoder_blocks = []\n",
     "    for _ in range(N):\n",
     "        decoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)  # Self-attention\n",
     "        decoder_cross_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)  # Cross-attention\n",
-    "        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)  # İleri beslemeli sinir ağı\n",
-    "        decoder_block = DecoderBlock(d_model, decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)  # Decoder bloğu\n",
+    "        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)  # Feed-forward network\n",
+    "        decoder_block = DecoderBlock(d_model, decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)  # Decoder block\n",
     "        decoder_blocks.append(decoder_block)\n",
     "\n",
-    "    # Encoder ve Decoder'ı oluştur\n",
+    "    # Build the encoder and decoder\n",
     "    encoder = Encoder(d_model, nn.ModuleList(encoder_blocks))  # Encoder\n",
     "    decoder = Decoder(d_model, nn.ModuleList(decoder_blocks))  # Decoder\n",
     "\n",
-    "    # Projeksiyon katmanını oluştur\n",
-    "    projection_layer = ProjectionLayer(d_model, tgt_vocab_size)  # Projeksiyon katmanı\n",
+    "    # Build the projection layer\n",
+    "    projection_layer = ProjectionLayer(d_model, tgt_vocab_size)  # Projection layer\n",
     "\n",
-    "    # Transformer modelini oluştur\n",
+    "    # Build the Transformer model\n",
     "    transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer)\n",
     "\n",
-    "    # Parametreleri Xavier uniform ile başlat\n",
+    "    # Initialize parameters with Xavier uniform\n",
     "    for p in transformer.parameters():\n",
     "        if p.dim() > 1:\n",
     "            nn.init.xavier_uniform_(p)\n",
@@ -266,2007 +266,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Eğitim başlatılıyor...\n",
-      "Kullanılan cihaz: cpu\n",
-      "Veri seti yükleniyor...\n",
-      "Veri seti yüklendi. Train örnek sayısı: 1801350, Validation örnek sayısı: 3760\n",
-      "Tokenizer oluşturuluyor...\n",
-      "Tokenizer eğitimi için batch 1 işleniyor...\n",
-      "Tokenizer eğitimi için batch 2 işleniyor...\n",
-      "Tokenizer eğitimi için batch 3 işleniyor...\n",
-      "Tokenizer eğitimi için batch 4 işleniyor...\n",
-      "Tokenizer eğitimi için batch 5 işleniyor...\n",
-      "Tokenizer eğitimi için batch 6 işleniyor...\n",
-      "Tokenizer eğitimi için batch 7 işleniyor...\n",
-      "Tokenizer eğitimi için batch 8 işleniyor...\n",
-      "Tokenizer eğitimi için batch 9 işleniyor...\n",
-      "Tokenizer eğitimi için batch 10 işleniyor...\n",
-      "Tokenizer eğitimi için batch 11 işleniyor...\n",
-      "Tokenizer eğitimi için batch 12 işleniyor...\n",
-      "Tokenizer eğitimi için batch 13 işleniyor...\n",
-      "Tokenizer eğitimi için batch 14 işleniyor...\n",
-      "Tokenizer eğitimi için batch 15 işleniyor...\n",
-      "Tokenizer eğitimi için batch 16 işleniyor...\n",
-      "Tokenizer eğitimi için batch 17 işleniyor...\n",
-      "Tokenizer eğitimi için batch 18 işleniyor...\n",
-      "Tokenizer eğitimi için batch 19 işleniyor...\n",
-      "Tokenizer eğitimi için batch 20 işleniyor...\n",
-      "Tokenizer eğitimi için batch 21 işleniyor...\n",
-      "Tokenizer eğitimi için batch 22 işleniyor...\n",
-      "Tokenizer eğitimi için batch 23 işleniyor...\n",
-      "Tokenizer eğitimi için batch 24 işleniyor...\n",
-      "Tokenizer eğitimi için batch 25 işleniyor...\n",
-      "Tokenizer eğitimi için batch 26 işleniyor...\n",
-      "Tokenizer eğitimi için batch 27 işleniyor...\n",
-      "Tokenizer eğitimi için batch 28 işleniyor...\n",
-      "Tokenizer eğitimi için batch 29 işleniyor...\n",
-      "Tokenizer eğitimi için batch 30 işleniyor...\n",
-      "Tokenizer eğitimi için batch 31 işleniyor...\n",
-      "Tokenizer eğitimi için batch 32 işleniyor...\n",
-      "Tokenizer eğitimi için batch 33 işleniyor...\n",
-      "Tokenizer eğitimi için batch 34 işleniyor...\n",
-      "Tokenizer eğitimi için batch 35 işleniyor...\n",
-      "Tokenizer eğitimi için batch 36 işleniyor...\n",
-      "Tokenizer eğitimi için batch 37 işleniyor...\n",
-      "Tokenizer eğitimi için batch 38 işleniyor...\n",
-      "Tokenizer eğitimi için batch 39 işleniyor...\n",
-      "Tokenizer eğitimi için batch 40 işleniyor...\n",
-      "Tokenizer eğitimi için batch 41 işleniyor...\n",
-      "Tokenizer eğitimi için batch 42 işleniyor...\n",
-      "Tokenizer eğitimi için batch 43 işleniyor...\n",
-      "Tokenizer eğitimi için batch 44 işleniyor...\n",
-      "Tokenizer eğitimi için batch 45 işleniyor...\n",
-      "Tokenizer eğitimi için batch 46 işleniyor...\n",
-      "Tokenizer eğitimi için batch 47 işleniyor...\n",
-      "Tokenizer eğitimi için batch 48 işleniyor...\n",
-      "Tokenizer eğitimi için batch 49 işleniyor...\n",
-      "Tokenizer eğitimi için batch 50 işleniyor...\n",
-      "Tokenizer eğitimi için batch 51 işleniyor...\n",
-      "Tokenizer eğitimi için batch 52 işleniyor...\n",
-      "Tokenizer eğitimi için batch 53 işleniyor...\n",
-      "Tokenizer eğitimi için batch 54 işleniyor...\n",
-      "Tokenizer eğitimi için batch 55 işleniyor...\n",
-      "Tokenizer eğitimi için batch 56 işleniyor...\n",
-      "Tokenizer eğitimi için batch 57 işleniyor...\n",
-      "Tokenizer eğitimi için batch 58 işleniyor...\n",
-      "Tokenizer eğitimi için batch 59 işleniyor...\n",
-      "Tokenizer eğitimi için batch 60 işleniyor...\n",
-      "Tokenizer eğitimi için batch 61 işleniyor...\n",
-      "Tokenizer eğitimi için batch 62 işleniyor...\n",
-      "Tokenizer eğitimi için batch 63 işleniyor...\n",
-      "Tokenizer eğitimi için batch 64 işleniyor...\n",
-      "Tokenizer eğitimi için batch 65 işleniyor...\n",
-      "Tokenizer eğitimi için batch 66 işleniyor...\n",
-      "Tokenizer eğitimi için batch 67 işleniyor...\n",
-      "Tokenizer eğitimi için batch 68 işleniyor...\n",
-      "Tokenizer eğitimi için batch 69 işleniyor...\n",
-      "Tokenizer eğitimi için batch 70 işleniyor...\n",
-      "Tokenizer eğitimi için batch 71 işleniyor...\n",
-      "Tokenizer eğitimi için batch 72 işleniyor...\n",
-      "Tokenizer eğitimi için batch 73 işleniyor...\n",
-      "Tokenizer eğitimi için batch 74 işleniyor...\n",
-      "Tokenizer eğitimi için batch 75 işleniyor...\n",
-      "Tokenizer eğitimi için batch 76 işleniyor...\n",
-      "Tokenizer eğitimi için batch 77 işleniyor...\n",
-      "Tokenizer eğitimi için batch 78 işleniyor...\n",
-      "Tokenizer eğitimi için batch 79 işleniyor...\n",
-      "Tokenizer eğitimi için batch 80 işleniyor...\n",
-      "Tokenizer eğitimi için batch 81 işleniyor...\n",
-      "Tokenizer eğitimi için batch 82 işleniyor...\n",
-      "Tokenizer eğitimi için batch 83 işleniyor...\n",
-      "Tokenizer eğitimi için batch 84 işleniyor...\n",
-      "Tokenizer eğitimi için batch 85 işleniyor...\n",
-      "Tokenizer eğitimi için batch 86 işleniyor...\n",
-      "Tokenizer eğitimi için batch 87 işleniyor...\n",
-      "Tokenizer eğitimi için batch 88 işleniyor...\n",
-      "Tokenizer eğitimi için batch 89 işleniyor...\n",
-      "Tokenizer eğitimi için batch 90 işleniyor...\n",
-      "Tokenizer eğitimi için batch 91 işleniyor...\n",
-      "Tokenizer eğitimi için batch 92 işleniyor...\n",
-      "Tokenizer eğitimi için batch 93 işleniyor...\n",
-      "Tokenizer eğitimi için batch 94 işleniyor...\n",
-      "Tokenizer eğitimi için batch 95 işleniyor...\n",
-      "Tokenizer eğitimi için batch 96 işleniyor...\n",
-      "Tokenizer eğitimi için batch 97 işleniyor...\n",
-      "Tokenizer eğitimi için batch 98 işleniyor...\n",
-      "Tokenizer eğitimi için batch 99 işleniyor...\n",
-      "Tokenizer eğitimi için batch 100 işleniyor...\n",
-      "Tokenizer eğitimi için batch 101 işleniyor...\n",
-      "Tokenizer eğitimi için batch 102 işleniyor...\n",
-      "Tokenizer eğitimi için batch 103 işleniyor...\n",
-      "Tokenizer eğitimi için batch 104 işleniyor...\n",
-      "Tokenizer eğitimi için batch 105 işleniyor...\n",
-      "Tokenizer eğitimi için batch 106 işleniyor...\n",
-      "Tokenizer eğitimi için batch 107 işleniyor...\n",
-      "Tokenizer eğitimi için batch 108 işleniyor...\n",
-      "Tokenizer eğitimi için batch 109 işleniyor...\n",
-      "Tokenizer eğitimi için batch 110 işleniyor...\n",
-      "Tokenizer eğitimi için batch 111 işleniyor...\n",
-      "Tokenizer eğitimi için batch 112 işleniyor...\n",
-      "Tokenizer eğitimi için batch 113 işleniyor...\n",
-      "Tokenizer eğitimi için batch 114 işleniyor...\n",
-      "Tokenizer eğitimi için batch 115 işleniyor...\n",
-      "Tokenizer eğitimi için batch 116 işleniyor...\n",
-      "Tokenizer eğitimi için batch 117 işleniyor...\n",
-      "Tokenizer eğitimi için batch 118 işleniyor...\n",
-      "Tokenizer eğitimi için batch 119 işleniyor...\n",
-      "Tokenizer eğitimi için batch 120 işleniyor...\n",
-      "Tokenizer eğitimi için batch 121 işleniyor...\n",
-      "Tokenizer eğitimi için batch 122 işleniyor...\n",
-      "Tokenizer eğitimi için batch 123 işleniyor...\n",
-      "Tokenizer eğitimi için batch 124 işleniyor...\n",
-      "Tokenizer eğitimi için batch 125 işleniyor...\n",
-      "Tokenizer eğitimi için batch 126 işleniyor...\n",
-      "Tokenizer eğitimi için batch 127 işleniyor...\n",
-      "Tokenizer eğitimi için batch 128 işleniyor...\n",
-      "Tokenizer eğitimi için batch 129 işleniyor...\n",
-      "Tokenizer eğitimi için batch 130 işleniyor...\n",
-      "Tokenizer eğitimi için batch 131 işleniyor...\n",
-      "Tokenizer eğitimi için batch 132 işleniyor...\n",
-      "Tokenizer eğitimi için batch 133 işleniyor...\n",
-      "Tokenizer eğitimi için batch 134 işleniyor...\n",
-      "Tokenizer eğitimi için batch 135 işleniyor...\n",
-      "Tokenizer eğitimi için batch 136 işleniyor...\n",
-      "Tokenizer eğitimi için batch 137 işleniyor...\n",
-      "Tokenizer eğitimi için batch 138 işleniyor...\n",
-      "Tokenizer eğitimi için batch 139 işleniyor...\n",
-      "Tokenizer eğitimi için batch 140 işleniyor...\n",
-      "Tokenizer eğitimi için batch 141 işleniyor...\n",
-      "Tokenizer eğitimi için batch 142 işleniyor...\n",
-      "Tokenizer eğitimi için batch 143 işleniyor...\n",
-      "Tokenizer eğitimi için batch 144 işleniyor...\n",
-      "Tokenizer eğitimi için batch 145 işleniyor...\n",
-      "Tokenizer eğitimi için batch 146 işleniyor...\n",
-      "Tokenizer eğitimi için batch 147 işleniyor...\n",
-      "Tokenizer eğitimi için batch 148 işleniyor...\n",
-      "Tokenizer eğitimi için batch 149 işleniyor...\n",
-      "Tokenizer eğitimi için batch 150 işleniyor...\n",
-      "Tokenizer eğitimi için batch 151 işleniyor...\n",
-      "Tokenizer eğitimi için batch 152 işleniyor...\n",
-      "Tokenizer eğitimi için batch 153 işleniyor...\n",
-      "Tokenizer eğitimi için batch 154 işleniyor...\n",
-      "Tokenizer eğitimi için batch 155 işleniyor...\n",
-      "Tokenizer eğitimi için batch 156 işleniyor...\n",
-      "Tokenizer eğitimi için batch 157 işleniyor...\n",
-      "Tokenizer eğitimi için batch 158 işleniyor...\n",
-      "Tokenizer eğitimi için batch 159 işleniyor...\n",
-      "Tokenizer eğitimi için batch 160 işleniyor...\n",
-      "Tokenizer eğitimi için batch 161 işleniyor...\n",
-      "Tokenizer eğitimi için batch 162 işleniyor...\n",
-      "Tokenizer eğitimi için batch 163 işleniyor...\n",
-      "Tokenizer eğitimi için batch 164 işleniyor...\n",
-      "Tokenizer eğitimi için batch 165 işleniyor...\n",
-      "Tokenizer eğitimi için batch 166 işleniyor...\n",
-      "Tokenizer eğitimi için batch 167 işleniyor...\n",
-      "Tokenizer eğitimi için batch 168 işleniyor...\n",
-      "Tokenizer eğitimi için batch 169 işleniyor...\n",
-      "Tokenizer eğitimi için batch 170 işleniyor...\n",
-      "Tokenizer eğitimi için batch 171 işleniyor...\n",
-      "Tokenizer eğitimi için batch 172 işleniyor...\n",
-      "Tokenizer eğitimi için batch 173 işleniyor...\n",
-      "Tokenizer eğitimi için batch 174 işleniyor...\n",
-      "Tokenizer eğitimi için batch 175 işleniyor...\n",
-      "Tokenizer eğitimi için batch 176 işleniyor...\n",
-      "Tokenizer eğitimi için batch 177 işleniyor...\n",
-      "Tokenizer eğitimi için batch 178 işleniyor...\n",
-      "Tokenizer eğitimi için batch 179 işleniyor...\n",
-      "Tokenizer eğitimi için batch 180 işleniyor...\n",
-      "Tokenizer eğitimi için batch 181 işleniyor...\n",
-      "Tokenizer eğitimi için batch 182 işleniyor...\n",
-      "Tokenizer eğitimi için batch 183 işleniyor...\n",
-      "Tokenizer eğitimi için batch 184 işleniyor...\n",
-      "Tokenizer eğitimi için batch 185 işleniyor...\n",
-      "Tokenizer eğitimi için batch 186 işleniyor...\n",
-      "Tokenizer eğitimi için batch 187 işleniyor...\n",
-      "Tokenizer eğitimi için batch 188 işleniyor...\n",
-      "Tokenizer eğitimi için batch 189 işleniyor...\n",
-      "Tokenizer eğitimi için batch 190 işleniyor...\n",
-      "Tokenizer eğitimi için batch 191 işleniyor...\n",
-      "Tokenizer eğitimi için batch 192 işleniyor...\n",
-      "Tokenizer eğitimi için batch 193 işleniyor...\n",
-      "Tokenizer eğitimi için batch 194 işleniyor...\n",
-      "Tokenizer eğitimi için batch 195 işleniyor...\n",
-      "Tokenizer eğitimi için batch 196 işleniyor...\n",
-      "Tokenizer eğitimi için batch 197 işleniyor...\n",
-      "Tokenizer eğitimi için batch 198 işleniyor...\n",
-      "Tokenizer eğitimi için batch 199 işleniyor...\n",
-      "Tokenizer eğitimi için batch 200 işleniyor...\n",
-      "Tokenizer eğitimi için batch 201 işleniyor...\n",
-      "Tokenizer eğitimi için batch 202 işleniyor...\n",
-      "Tokenizer eğitimi için batch 203 işleniyor...\n",
-      "Tokenizer eğitimi için batch 204 işleniyor...\n",
-      "Tokenizer eğitimi için batch 205 işleniyor...\n",
-      "Tokenizer eğitimi için batch 206 işleniyor...\n",
-      "Tokenizer eğitimi için batch 207 işleniyor...\n",
-      "Tokenizer eğitimi için batch 208 işleniyor...\n",
-      "Tokenizer eğitimi için batch 209 işleniyor...\n",
-      "Tokenizer eğitimi için batch 210 işleniyor...\n",
-      "Tokenizer eğitimi için batch 211 işleniyor...\n",
-      "Tokenizer eğitimi için batch 212 işleniyor...\n",
-      "Tokenizer eğitimi için batch 213 işleniyor...\n",
-      "Tokenizer eğitimi için batch 214 işleniyor...\n",
-      "Tokenizer eğitimi için batch 215 işleniyor...\n",
-      "Tokenizer eğitimi için batch 216 işleniyor...\n",
-      "Tokenizer eğitimi için batch 217 işleniyor...\n",
-      "Tokenizer eğitimi için batch 218 işleniyor...\n",
-      "Tokenizer eğitimi için batch 219 işleniyor...\n",
-      "Tokenizer eğitimi için batch 220 işleniyor...\n",
-      "Tokenizer eğitimi için batch 221 işleniyor...\n",
-      "Tokenizer eğitimi için batch 222 işleniyor...\n",
-      "Tokenizer eğitimi için batch 223 işleniyor...\n",
-      "Tokenizer eğitimi için batch 224 işleniyor...\n",
-      "Tokenizer eğitimi için batch 225 işleniyor...\n",
-      "Tokenizer eğitimi için batch 226 işleniyor...\n",
-      "Tokenizer eğitimi için batch 227 işleniyor...\n",
-      "Tokenizer eğitimi için batch 228 işleniyor...\n",
-      "Tokenizer eğitimi için batch 229 işleniyor...\n",
-      "Tokenizer eğitimi için batch 230 işleniyor...\n",
-      "Tokenizer eğitimi için batch 231 işleniyor...\n",
-      "Tokenizer eğitimi için batch 232 işleniyor...\n",
-      "Tokenizer eğitimi için batch 233 işleniyor...\n",
-      "Tokenizer eğitimi için batch 234 işleniyor...\n",
-      "Tokenizer eğitimi için batch 235 işleniyor...\n",
-      "Tokenizer eğitimi için batch 236 işleniyor...\n",
-      "Tokenizer eğitimi için batch 237 işleniyor...\n",
-      "Tokenizer eğitimi için batch 238 işleniyor...\n",
-      "Tokenizer eğitimi için batch 239 işleniyor...\n",
-      "Tokenizer eğitimi için batch 240 işleniyor...\n",
-      "Tokenizer eğitimi için batch 241 işleniyor...\n",
-      "Tokenizer eğitimi için batch 242 işleniyor...\n",
-      "Tokenizer eğitimi için batch 243 işleniyor...\n",
-      "Tokenizer eğitimi için batch 244 işleniyor...\n",
-      "Tokenizer eğitimi için batch 245 işleniyor...\n",
-      "Tokenizer eğitimi için batch 246 işleniyor...\n",
-      "Tokenizer eğitimi için batch 247 işleniyor...\n",
-      "Tokenizer eğitimi için batch 248 işleniyor...\n",
-      "Tokenizer eğitimi için batch 249 işleniyor...\n",
-      "Tokenizer eğitimi için batch 250 işleniyor...\n",
-      "Tokenizer eğitimi için batch 251 işleniyor...\n",
-      "Tokenizer eğitimi için batch 252 işleniyor...\n",
-      "Tokenizer eğitimi için batch 253 işleniyor...\n",
-      "Tokenizer eğitimi için batch 254 işleniyor...\n",
-      "Tokenizer eğitimi için batch 255 işleniyor...\n",
-      "Tokenizer eğitimi için batch 256 işleniyor...\n",
-      "Tokenizer eğitimi için batch 257 işleniyor...\n",
-      "Tokenizer eğitimi için batch 258 işleniyor...\n",
-      "Tokenizer eğitimi için batch 259 işleniyor...\n",
-      "Tokenizer eğitimi için batch 260 işleniyor...\n",
-      "Tokenizer eğitimi için batch 261 işleniyor...\n",
-      "Tokenizer eğitimi için batch 262 işleniyor...\n",
-      "Tokenizer eğitimi için batch 263 işleniyor...\n",
-      "Tokenizer eğitimi için batch 264 işleniyor...\n",
-      "Tokenizer eğitimi için batch 265 işleniyor...\n",
-      "Tokenizer eğitimi için batch 266 işleniyor...\n",
-      "Tokenizer eğitimi için batch 267 işleniyor...\n",
-      "Tokenizer eğitimi için batch 268 işleniyor...\n",
-      "Tokenizer eğitimi için batch 269 işleniyor...\n",
-      "Tokenizer eğitimi için batch 270 işleniyor...\n",
-      "Tokenizer eğitimi için batch 271 işleniyor...\n",
-      "Tokenizer eğitimi için batch 272 işleniyor...\n",
-      "Tokenizer eğitimi için batch 273 işleniyor...\n",
-      "Tokenizer eğitimi için batch 274 işleniyor...\n",
-      "Tokenizer eğitimi için batch 275 işleniyor...\n",
-      "Tokenizer eğitimi için batch 276 işleniyor...\n",
-      "Tokenizer eğitimi için batch 277 işleniyor...\n",
-      "Tokenizer eğitimi için batch 278 işleniyor...\n",
-      "Tokenizer eğitimi için batch 279 işleniyor...\n",
-      "Tokenizer eğitimi için batch 280 işleniyor...\n",
-      "Tokenizer eğitimi için batch 281 işleniyor...\n",
-      "Tokenizer eğitimi için batch 282 işleniyor...\n",
-      "Tokenizer eğitimi için batch 283 işleniyor...\n",
-      "Tokenizer eğitimi için batch 284 işleniyor...\n",
-      "Tokenizer eğitimi için batch 285 işleniyor...\n",
-      "Tokenizer eğitimi için batch 286 işleniyor...\n",
-      "Tokenizer eğitimi için batch 287 işleniyor...\n",
-      "Tokenizer eğitimi için batch 288 işleniyor...\n",
-      "Tokenizer eğitimi için batch 289 işleniyor...\n",
-      "Tokenizer eğitimi için batch 290 işleniyor...\n",
-      "Tokenizer eğitimi için batch 291 işleniyor...\n",
-      "Tokenizer eğitimi için batch 292 işleniyor...\n",
-      "Tokenizer eğitimi için batch 293 işleniyor...\n",
-      "Tokenizer eğitimi için batch 294 işleniyor...\n",
-      "Tokenizer eğitimi için batch 295 işleniyor...\n",
-      "Tokenizer eğitimi için batch 296 işleniyor...\n",
-      "Tokenizer eğitimi için batch 297 işleniyor...\n",
-      "Tokenizer eğitimi için batch 298 işleniyor...\n",
-      "Tokenizer eğitimi için batch 299 işleniyor...\n",
-      "Tokenizer eğitimi için batch 300 işleniyor...\n",
-      "Tokenizer eğitimi için batch 301 işleniyor...\n",
-      "Tokenizer eğitimi için batch 302 işleniyor...\n",
-      "Tokenizer eğitimi için batch 303 işleniyor...\n",
-      "Tokenizer eğitimi için batch 304 işleniyor...\n",
-      "Tokenizer eğitimi için batch 305 işleniyor...\n",
-      "Tokenizer eğitimi için batch 306 işleniyor...\n",
-      "Tokenizer eğitimi için batch 307 işleniyor...\n",
-      "Tokenizer eğitimi için batch 308 işleniyor...\n",
-      "Tokenizer eğitimi için batch 309 işleniyor...\n",
-      "Tokenizer eğitimi için batch 310 işleniyor...\n",
-      "Tokenizer eğitimi için batch 311 işleniyor...\n",
-      "Tokenizer eğitimi için batch 312 işleniyor...\n",
-      "Tokenizer eğitimi için batch 313 işleniyor...\n",
-      "Tokenizer eğitimi için batch 314 işleniyor...\n",
-      "Tokenizer eğitimi için batch 315 işleniyor...\n",
-      "Tokenizer eğitimi için batch 316 işleniyor...\n",
-      "Tokenizer eğitimi için batch 317 işleniyor...\n",
-      "Tokenizer eğitimi için batch 318 işleniyor...\n",
-      "Tokenizer eğitimi için batch 319 işleniyor...\n",
-      "Tokenizer eğitimi için batch 320 işleniyor...\n",
-      "Tokenizer eğitimi için batch 321 işleniyor...\n",
-      "Tokenizer eğitimi için batch 322 işleniyor...\n",
-      "Tokenizer eğitimi için batch 323 işleniyor...\n",
-      "Tokenizer eğitimi için batch 324 işleniyor...\n",
-      "Tokenizer eğitimi için batch 325 işleniyor...\n",
-      "Tokenizer eğitimi için batch 326 işleniyor...\n",
-      "Tokenizer eğitimi için batch 327 işleniyor...\n",
-      "Tokenizer eğitimi için batch 328 işleniyor...\n",
-      "Tokenizer eğitimi için batch 329 işleniyor...\n",
-      "Tokenizer eğitimi için batch 330 işleniyor...\n",
-      "Tokenizer eğitimi için batch 331 işleniyor...\n",
-      "Tokenizer eğitimi için batch 332 işleniyor...\n",
-      "Tokenizer eğitimi için batch 333 işleniyor...\n",
-      "Tokenizer eğitimi için batch 334 işleniyor...\n",
-      "Tokenizer eğitimi için batch 335 işleniyor...\n",
-      "Tokenizer eğitimi için batch 336 işleniyor...\n",
-      "Tokenizer eğitimi için batch 337 işleniyor...\n",
-      "Tokenizer eğitimi için batch 338 işleniyor...\n",
-      "Tokenizer eğitimi için batch 339 işleniyor...\n",
-      "Tokenizer eğitimi için batch 340 işleniyor...\n",
-      "Tokenizer eğitimi için batch 341 işleniyor...\n",
-      "Tokenizer eğitimi için batch 342 işleniyor...\n",
-      "Tokenizer eğitimi için batch 343 işleniyor...\n",
-      "Tokenizer eğitimi için batch 344 işleniyor...\n",
-      "Tokenizer eğitimi için batch 345 işleniyor...\n",
-      "Tokenizer eğitimi için batch 346 işleniyor...\n",
-      "Tokenizer eğitimi için batch 347 işleniyor...\n",
-      "Tokenizer eğitimi için batch 348 işleniyor...\n",
-      "Tokenizer eğitimi için batch 349 işleniyor...\n",
-      "Tokenizer eğitimi için batch 350 işleniyor...\n",
-      "Tokenizer eğitimi için batch 351 işleniyor...\n",
-      "Tokenizer eğitimi için batch 352 işleniyor...\n",
-      "Tokenizer eğitimi için batch 353 işleniyor...\n",
-      "Tokenizer eğitimi için batch 354 işleniyor...\n",
-      "Tokenizer eğitimi için batch 355 işleniyor...\n",
-      "Tokenizer eğitimi için batch 356 işleniyor...\n",
-      "Tokenizer eğitimi için batch 357 işleniyor...\n",
-      "Tokenizer eğitimi için batch 358 işleniyor...\n",
-      "Tokenizer eğitimi için batch 359 işleniyor...\n",
-      "Tokenizer eğitimi için batch 360 işleniyor...\n",
-      "Tokenizer eğitimi için batch 361 işleniyor...\n",
-      "Tokenizer eğitimi için batch 362 işleniyor...\n",
-      "Tokenizer eğitimi için batch 363 işleniyor...\n",
-      "Tokenizer eğitimi için batch 364 işleniyor...\n",
-      "Tokenizer eğitimi için batch 365 işleniyor...\n",
-      "Tokenizer eğitimi için batch 366 işleniyor...\n",
-      "Tokenizer eğitimi için batch 367 işleniyor...\n",
-      "Tokenizer eğitimi için batch 368 işleniyor...\n",
-      "Tokenizer eğitimi için batch 369 işleniyor...\n",
-      "Tokenizer eğitimi için batch 370 işleniyor...\n",
-      "Tokenizer eğitimi için batch 371 işleniyor...\n",
-      "Tokenizer eğitimi için batch 372 işleniyor...\n",
-      "Tokenizer eğitimi için batch 373 işleniyor...\n",
-      "Tokenizer eğitimi için batch 374 işleniyor...\n",
-      "Tokenizer eğitimi için batch 375 işleniyor...\n",
-      "Tokenizer eğitimi için batch 376 işleniyor...\n",
-      "Tokenizer eğitimi için batch 377 işleniyor...\n",
-      "Tokenizer eğitimi için batch 378 işleniyor...\n",
-      "Tokenizer eğitimi için batch 379 işleniyor...\n",
-      "Tokenizer eğitimi için batch 380 işleniyor...\n",
-      "Tokenizer eğitimi için batch 381 işleniyor...\n",
-      "Tokenizer eğitimi için batch 382 işleniyor...\n",
-      "Tokenizer eğitimi için batch 383 işleniyor...\n",
-      "Tokenizer eğitimi için batch 384 işleniyor...\n",
-      "Tokenizer eğitimi için batch 385 işleniyor...\n",
-      "Tokenizer eğitimi için batch 386 işleniyor...\n",
-      "Tokenizer eğitimi için batch 387 işleniyor...\n",
-      "Tokenizer eğitimi için batch 388 işleniyor...\n",
-      "Tokenizer eğitimi için batch 389 işleniyor...\n",
-      "Tokenizer eğitimi için batch 390 işleniyor...\n",
-      "Tokenizer eğitimi için batch 391 işleniyor...\n",
-      "Tokenizer eğitimi için batch 392 işleniyor...\n",
-      "Tokenizer eğitimi için batch 393 işleniyor...\n",
-      "Tokenizer eğitimi için batch 394 işleniyor...\n",
-      "Tokenizer eğitimi için batch 395 işleniyor...\n",
-      "Tokenizer eğitimi için batch 396 işleniyor...\n",
-      "Tokenizer eğitimi için batch 397 işleniyor...\n",
-      "Tokenizer eğitimi için batch 398 işleniyor...\n",
-      "Tokenizer eğitimi için batch 399 işleniyor...\n",
-      "Tokenizer eğitimi için batch 400 işleniyor...\n",
-      "Tokenizer eğitimi için batch 401 işleniyor...\n",
-      "Tokenizer eğitimi için batch 402 işleniyor...\n",
-      "Tokenizer eğitimi için batch 403 işleniyor...\n",
-      "Tokenizer eğitimi için batch 404 işleniyor...\n",
-      "Tokenizer eğitimi için batch 405 işleniyor...\n",
-      "Tokenizer eğitimi için batch 406 işleniyor...\n",
-      "Tokenizer eğitimi için batch 407 işleniyor...\n",
-      "Tokenizer eğitimi için batch 408 işleniyor...\n",
-      "Tokenizer eğitimi için batch 409 işleniyor...\n",
-      "Tokenizer eğitimi için batch 410 işleniyor...\n",
-      "Tokenizer eğitimi için batch 411 işleniyor...\n",
-      "Tokenizer eğitimi için batch 412 işleniyor...\n",
-      "Tokenizer eğitimi için batch 413 işleniyor...\n",
-      "Tokenizer eğitimi için batch 414 işleniyor...\n",
-      "Tokenizer eğitimi için batch 415 işleniyor...\n",
-      "Tokenizer eğitimi için batch 416 işleniyor...\n",
-      "Tokenizer eğitimi için batch 417 işleniyor...\n",
-      "Tokenizer eğitimi için batch 418 işleniyor...\n",
-      "Tokenizer eğitimi için batch 419 işleniyor...\n",
-      "Tokenizer eğitimi için batch 420 işleniyor...\n",
-      "Tokenizer eğitimi için batch 421 işleniyor...\n",
-      "Tokenizer eğitimi için batch 422 işleniyor...\n",
-      "Tokenizer eğitimi için batch 423 işleniyor...\n",
-      "Tokenizer eğitimi için batch 424 işleniyor...\n",
-      "Tokenizer eğitimi için batch 425 işleniyor...\n",
-      "Tokenizer eğitimi için batch 426 işleniyor...\n",
-      "Tokenizer eğitimi için batch 427 işleniyor...\n",
-      "Tokenizer eğitimi için batch 428 işleniyor...\n",
-      "Tokenizer eğitimi için batch 429 işleniyor...\n",
-      "Tokenizer eğitimi için batch 430 işleniyor...\n",
-      "Tokenizer eğitimi için batch 431 işleniyor...\n",
-      "Tokenizer eğitimi için batch 432 işleniyor...\n",
-      "Tokenizer eğitimi için batch 433 işleniyor...\n",
-      "Tokenizer eğitimi için batch 434 işleniyor...\n",
-      "Tokenizer eğitimi için batch 435 işleniyor...\n",
-      "Tokenizer eğitimi için batch 436 işleniyor...\n",
-      "Tokenizer eğitimi için batch 437 işleniyor...\n",
-      "Tokenizer eğitimi için batch 438 işleniyor...\n",
-      "Tokenizer eğitimi için batch 439 işleniyor...\n",
-      "Tokenizer eğitimi için batch 440 işleniyor...\n",
-      "Tokenizer eğitimi için batch 441 işleniyor...\n",
-      "Tokenizer eğitimi için batch 442 işleniyor...\n",
-      "Tokenizer eğitimi için batch 443 işleniyor...\n",
-      "Tokenizer eğitimi için batch 444 işleniyor...\n",
-      "Tokenizer eğitimi için batch 445 işleniyor...\n",
-      "Tokenizer eğitimi için batch 446 işleniyor...\n",
-      "Tokenizer eğitimi için batch 447 işleniyor...\n",
-      "Tokenizer eğitimi için batch 448 işleniyor...\n",
-      "Tokenizer eğitimi için batch 449 işleniyor...\n",
-      "Tokenizer eğitimi için batch 450 işleniyor...\n",
-      "Tokenizer eğitimi için batch 451 işleniyor...\n",
-      "Tokenizer eğitimi için batch 452 işleniyor...\n",
-      "Tokenizer eğitimi için batch 453 işleniyor...\n",
-      "Tokenizer eğitimi için batch 454 işleniyor...\n",
-      "Tokenizer eğitimi için batch 455 işleniyor...\n",
-      "Tokenizer eğitimi için batch 456 işleniyor...\n",
-      "Tokenizer eğitimi için batch 457 işleniyor...\n",
-      "Tokenizer eğitimi için batch 458 işleniyor...\n",
-      "Tokenizer eğitimi için batch 459 işleniyor...\n",
-      "Tokenizer eğitimi için batch 460 işleniyor...\n",
-      "Tokenizer eğitimi için batch 461 işleniyor...\n",
-      "Tokenizer eğitimi için batch 462 işleniyor...\n",
-      "Tokenizer eğitimi için batch 463 işleniyor...\n",
-      "Tokenizer eğitimi için batch 464 işleniyor...\n",
-      "Tokenizer eğitimi için batch 465 işleniyor...\n",
-      "Tokenizer eğitimi için batch 466 işleniyor...\n",
-      "Tokenizer eğitimi için batch 467 işleniyor...\n",
-      "Tokenizer eğitimi için batch 468 işleniyor...\n",
-      "Tokenizer eğitimi için batch 469 işleniyor...\n",
-      "Tokenizer eğitimi için batch 470 işleniyor...\n",
-      "Tokenizer eğitimi için batch 471 işleniyor...\n",
-      "Tokenizer eğitimi için batch 472 işleniyor...\n",
-      "Tokenizer eğitimi için batch 473 işleniyor...\n",
-      "Tokenizer eğitimi için batch 474 işleniyor...\n",
-      "Tokenizer eğitimi için batch 475 işleniyor...\n",
-      "Tokenizer eğitimi için batch 476 işleniyor...\n",
-      "Tokenizer eğitimi için batch 477 işleniyor...\n",
-      "Tokenizer eğitimi için batch 478 işleniyor...\n",
-      "Tokenizer eğitimi için batch 479 işleniyor...\n",
-      "Tokenizer eğitimi için batch 480 işleniyor...\n",
-      "Tokenizer eğitimi için batch 481 işleniyor...\n",
-      "Tokenizer eğitimi için batch 482 işleniyor...\n",
-      "Tokenizer eğitimi için batch 483 işleniyor...\n",
-      "Tokenizer eğitimi için batch 484 işleniyor...\n",
-      "Tokenizer eğitimi için batch 485 işleniyor...\n",
-      "Tokenizer eğitimi için batch 486 işleniyor...\n",
-      "Tokenizer eğitimi için batch 487 işleniyor...\n",
-      "Tokenizer eğitimi için batch 488 işleniyor...\n",
-      "Tokenizer eğitimi için batch 489 işleniyor...\n",
-      "Tokenizer eğitimi için batch 490 işleniyor...\n",
-      "Tokenizer eğitimi için batch 491 işleniyor...\n",
-      "Tokenizer eğitimi için batch 492 işleniyor...\n",
-      "Tokenizer eğitimi için batch 493 işleniyor...\n",
-      "Tokenizer eğitimi için batch 494 işleniyor...\n",
-      "Tokenizer eğitimi için batch 495 işleniyor...\n",
-      "Tokenizer eğitimi için batch 496 işleniyor...\n",
-      "Tokenizer eğitimi için batch 497 işleniyor...\n",
-      "Tokenizer eğitimi için batch 498 işleniyor...\n",
-      "Tokenizer eğitimi için batch 499 işleniyor...\n",
-      "Tokenizer eğitimi için batch 500 işleniyor...\n",
-      "Tokenizer eğitimi için batch 501 işleniyor...\n",
-      "Tokenizer eğitimi için batch 502 işleniyor...\n",
-      "Tokenizer eğitimi için batch 503 işleniyor...\n",
-      "Tokenizer eğitimi için batch 504 işleniyor...\n",
-      "Tokenizer eğitimi için batch 505 işleniyor...\n",
-      "Tokenizer eğitimi için batch 506 işleniyor...\n",
-      "Tokenizer eğitimi için batch 507 işleniyor...\n",
-      "Tokenizer eğitimi için batch 508 işleniyor...\n",
-      "Tokenizer eğitimi için batch 509 işleniyor...\n",
-      "Tokenizer eğitimi için batch 510 işleniyor...\n",
-      "Tokenizer eğitimi için batch 511 işleniyor...\n",
-      "Tokenizer eğitimi için batch 512 işleniyor...\n",
-      "Tokenizer eğitimi için batch 513 işleniyor...\n",
-      "Tokenizer eğitimi için batch 514 işleniyor...\n",
-      "Tokenizer eğitimi için batch 515 işleniyor...\n",
-      "Tokenizer eğitimi için batch 516 işleniyor...\n",
-      "Tokenizer eğitimi için batch 517 işleniyor...\n",
-      "Tokenizer eğitimi için batch 518 işleniyor...\n",
-      "Tokenizer eğitimi için batch 519 işleniyor...\n",
-      "Tokenizer eğitimi için batch 520 işleniyor...\n",
-      "Tokenizer eğitimi için batch 521 işleniyor...\n",
-      "Tokenizer eğitimi için batch 522 işleniyor...\n",
-      "Tokenizer eğitimi için batch 523 işleniyor...\n",
-      "Tokenizer eğitimi için batch 524 işleniyor...\n",
-      "Tokenizer eğitimi için batch 525 işleniyor...\n",
-      "Tokenizer eğitimi için batch 526 işleniyor...\n",
-      "Tokenizer eğitimi için batch 527 işleniyor...\n",
-      "Tokenizer eğitimi için batch 528 işleniyor...\n",
-      "Tokenizer eğitimi için batch 529 işleniyor...\n",
-      "Tokenizer eğitimi için batch 530 işleniyor...\n",
-      "Tokenizer eğitimi için batch 531 işleniyor...\n",
-      "Tokenizer eğitimi için batch 532 işleniyor...\n",
-      "Tokenizer eğitimi için batch 533 işleniyor...\n",
-      "Tokenizer eğitimi için batch 534 işleniyor...\n",
-      "Tokenizer eğitimi için batch 535 işleniyor...\n",
-      "Tokenizer eğitimi için batch 536 işleniyor...\n",
-      "Tokenizer eğitimi için batch 537 işleniyor...\n",
-      "Tokenizer eğitimi için batch 538 işleniyor...\n",
-      "Tokenizer eğitimi için batch 539 işleniyor...\n",
-      "Tokenizer eğitimi için batch 540 işleniyor...\n",
-      "Tokenizer eğitimi için batch 541 işleniyor...\n",
-      "Tokenizer eğitimi için batch 542 işleniyor...\n",
-      "Tokenizer eğitimi için batch 543 işleniyor...\n",
-      "Tokenizer eğitimi için batch 544 işleniyor...\n",
-      "Tokenizer eğitimi için batch 545 işleniyor...\n",
-      "Tokenizer eğitimi için batch 546 işleniyor...\n",
-      "Tokenizer eğitimi için batch 547 işleniyor...\n",
-      "Tokenizer eğitimi için batch 548 işleniyor...\n",
-      "Tokenizer eğitimi için batch 549 işleniyor...\n",
-      "Tokenizer eğitimi için batch 550 işleniyor...\n",
-      "Tokenizer eğitimi için batch 551 işleniyor...\n",
-      "Tokenizer eğitimi için batch 552 işleniyor...\n",
-      "Tokenizer eğitimi için batch 553 işleniyor...\n",
-      "Tokenizer eğitimi için batch 554 işleniyor...\n",
-      "Tokenizer eğitimi için batch 555 işleniyor...\n",
-      "Tokenizer eğitimi için batch 556 işleniyor...\n",
-      "Tokenizer eğitimi için batch 557 işleniyor...\n",
-      "Tokenizer eğitimi için batch 558 işleniyor...\n",
-      "Tokenizer eğitimi için batch 559 işleniyor...\n",
-      "Tokenizer eğitimi için batch 560 işleniyor...\n",
-      "Tokenizer eğitimi için batch 561 işleniyor...\n",
-      "Tokenizer eğitimi için batch 562 işleniyor...\n",
-      "Tokenizer eğitimi için batch 563 işleniyor...\n",
-      "Tokenizer eğitimi için batch 564 işleniyor...\n",
-      "Tokenizer eğitimi için batch 565 işleniyor...\n",
-      "Tokenizer eğitimi için batch 566 işleniyor...\n",
-      "Tokenizer eğitimi için batch 567 işleniyor...\n",
-      "Tokenizer eğitimi için batch 568 işleniyor...\n",
-      "Tokenizer eğitimi için batch 569 işleniyor...\n",
-      "Tokenizer eğitimi için batch 570 işleniyor...\n",
-      "Tokenizer eğitimi için batch 571 işleniyor...\n",
-      "Tokenizer eğitimi için batch 572 işleniyor...\n",
-      "Tokenizer eğitimi için batch 573 işleniyor...\n",
-      "Tokenizer eğitimi için batch 574 işleniyor...\n",
-      "Tokenizer eğitimi için batch 575 işleniyor...\n",
-      "Tokenizer eğitimi için batch 576 işleniyor...\n",
-      "Tokenizer eğitimi için batch 577 işleniyor...\n",
-      "Tokenizer eğitimi için batch 578 işleniyor...\n",
-      "Tokenizer eğitimi için batch 579 işleniyor...\n",
-      "Tokenizer eğitimi için batch 580 işleniyor...\n",
-      "Tokenizer eğitimi için batch 581 işleniyor...\n",
-      "Tokenizer eğitimi için batch 582 işleniyor...\n",
-      "Tokenizer eğitimi için batch 583 işleniyor...\n",
-      "Tokenizer eğitimi için batch 584 işleniyor...\n",
-      "Tokenizer eğitimi için batch 585 işleniyor...\n",
-      "Tokenizer eğitimi için batch 586 işleniyor...\n",
-      "Tokenizer eğitimi için batch 587 işleniyor...\n",
-      "Tokenizer eğitimi için batch 588 işleniyor...\n",
-      "Tokenizer eğitimi için batch 589 işleniyor...\n",
-      "Tokenizer eğitimi için batch 590 işleniyor...\n",
-      "Tokenizer eğitimi için batch 591 işleniyor...\n",
-      "Tokenizer eğitimi için batch 592 işleniyor...\n",
-      "Tokenizer eğitimi için batch 593 işleniyor...\n",
-      "Tokenizer eğitimi için batch 594 işleniyor...\n",
-      "Tokenizer eğitimi için batch 595 işleniyor...\n",
-      "Tokenizer eğitimi için batch 596 işleniyor...\n",
-      "Tokenizer eğitimi için batch 597 işleniyor...\n",
-      "Tokenizer eğitimi için batch 598 işleniyor...\n",
-      "Tokenizer eğitimi için batch 599 işleniyor...\n",
-      "Tokenizer eğitimi için batch 600 işleniyor...\n",
-      "Tokenizer eğitimi için batch 601 işleniyor...\n",
-      "Tokenizer eğitimi için batch 602 işleniyor...\n",
-      "Tokenizer eğitimi için batch 603 işleniyor...\n",
-      "Tokenizer eğitimi için batch 604 işleniyor...\n",
-      "Tokenizer eğitimi için batch 605 işleniyor...\n",
-      "Tokenizer eğitimi için batch 606 işleniyor...\n",
-      "Tokenizer eğitimi için batch 607 işleniyor...\n",
-      "Tokenizer eğitimi için batch 608 işleniyor...\n",
-      "Tokenizer eğitimi için batch 609 işleniyor...\n",
-      "Tokenizer eğitimi için batch 610 işleniyor...\n",
-      "Tokenizer eğitimi için batch 611 işleniyor...\n",
-      "Tokenizer eğitimi için batch 612 işleniyor...\n",
-      "Tokenizer eğitimi için batch 613 işleniyor...\n",
-      "Tokenizer eğitimi için batch 614 işleniyor...\n",
-      "Tokenizer eğitimi için batch 615 işleniyor...\n",
-      "Tokenizer eğitimi için batch 616 işleniyor...\n",
-      "Tokenizer eğitimi için batch 617 işleniyor...\n",
-      "Tokenizer eğitimi için batch 618 işleniyor...\n",
-      "Tokenizer eğitimi için batch 619 işleniyor...\n",
-      "Tokenizer eğitimi için batch 620 işleniyor...\n",
-      "Tokenizer eğitimi için batch 621 işleniyor...\n",
-      "Tokenizer eğitimi için batch 622 işleniyor...\n",
-      "Tokenizer eğitimi için batch 623 işleniyor...\n",
-      "Tokenizer eğitimi için batch 624 işleniyor...\n",
-      "Tokenizer eğitimi için batch 625 işleniyor...\n",
-      "Tokenizer eğitimi için batch 626 işleniyor...\n",
-      "Tokenizer eğitimi için batch 627 işleniyor...\n",
-      "Tokenizer eğitimi için batch 628 işleniyor...\n",
-      "Tokenizer eğitimi için batch 629 işleniyor...\n",
-      "Tokenizer eğitimi için batch 630 işleniyor...\n",
-      "Tokenizer eğitimi için batch 631 işleniyor...\n",
-      "Tokenizer eğitimi için batch 632 işleniyor...\n",
-      "Tokenizer eğitimi için batch 633 işleniyor...\n",
-      "Tokenizer eğitimi için batch 634 işleniyor...\n",
-      "Tokenizer eğitimi için batch 635 işleniyor...\n",
-      "Tokenizer eğitimi için batch 636 işleniyor...\n",
-      "Tokenizer eğitimi için batch 637 işleniyor...\n",
-      "Tokenizer eğitimi için batch 638 işleniyor...\n",
-      "Tokenizer eğitimi için batch 639 işleniyor...\n",
-      "Tokenizer eğitimi için batch 640 işleniyor...\n",
-      "Tokenizer eğitimi için batch 641 işleniyor...\n",
-      "Tokenizer eğitimi için batch 642 işleniyor...\n",
-      "Tokenizer eğitimi için batch 643 işleniyor...\n",
-      "Tokenizer eğitimi için batch 644 işleniyor...\n",
-      "Tokenizer eğitimi için batch 645 işleniyor...\n",
-      "Tokenizer eğitimi için batch 646 işleniyor...\n",
-      "Tokenizer eğitimi için batch 647 işleniyor...\n",
-      "Tokenizer eğitimi için batch 648 işleniyor...\n",
-      "Tokenizer eğitimi için batch 649 işleniyor...\n",
-      "Tokenizer eğitimi için batch 650 işleniyor...\n",
-      "Tokenizer eğitimi için batch 651 işleniyor...\n",
-      "Tokenizer eğitimi için batch 652 işleniyor...\n",
-      "Tokenizer eğitimi için batch 653 işleniyor...\n",
-      "Tokenizer eğitimi için batch 654 işleniyor...\n",
-      "Tokenizer eğitimi için batch 655 işleniyor...\n",
-      "Tokenizer eğitimi için batch 656 işleniyor...\n",
-      "Tokenizer eğitimi için batch 657 işleniyor...\n",
-      "Tokenizer eğitimi için batch 658 işleniyor...\n",
-      "Tokenizer eğitimi için batch 659 işleniyor...\n",
-      "Tokenizer eğitimi için batch 660 işleniyor...\n",
-      "Tokenizer eğitimi için batch 661 işleniyor...\n",
-      "Tokenizer eğitimi için batch 662 işleniyor...\n",
-      "Tokenizer eğitimi için batch 663 işleniyor...\n",
-      "Tokenizer eğitimi için batch 664 işleniyor...\n",
-      "Tokenizer eğitimi için batch 665 işleniyor...\n",
-      "Tokenizer eğitimi için batch 666 işleniyor...\n",
-      "Tokenizer eğitimi için batch 667 işleniyor...\n",
-      "Tokenizer eğitimi için batch 668 işleniyor...\n",
-      "Tokenizer eğitimi için batch 669 işleniyor...\n",
-      "Tokenizer eğitimi için batch 670 işleniyor...\n",
-      "Tokenizer eğitimi için batch 671 işleniyor...\n",
-      "Tokenizer eğitimi için batch 672 işleniyor...\n",
-      "Tokenizer eğitimi için batch 673 işleniyor...\n",
-      "Tokenizer eğitimi için batch 674 işleniyor...\n",
-      "Tokenizer eğitimi için batch 675 işleniyor...\n",
-      "Tokenizer eğitimi için batch 676 işleniyor...\n",
-      "Tokenizer eğitimi için batch 677 işleniyor...\n",
-      "Tokenizer eğitimi için batch 678 işleniyor...\n",
-      "Tokenizer eğitimi için batch 679 işleniyor...\n",
-      "Tokenizer eğitimi için batch 680 işleniyor...\n",
-      "Tokenizer eğitimi için batch 681 işleniyor...\n",
-      "Tokenizer eğitimi için batch 682 işleniyor...\n",
-      "Tokenizer eğitimi için batch 683 işleniyor...\n",
-      "Tokenizer eğitimi için batch 684 işleniyor...\n",
-      "Tokenizer eğitimi için batch 685 işleniyor...\n",
-      "Tokenizer eğitimi için batch 686 işleniyor...\n",
-      "Tokenizer eğitimi için batch 687 işleniyor...\n",
-      "Tokenizer eğitimi için batch 688 işleniyor...\n",
-      "Tokenizer eğitimi için batch 689 işleniyor...\n",
-      "Tokenizer eğitimi için batch 690 işleniyor...\n",
-      "Tokenizer eğitimi için batch 691 işleniyor...\n",
-      "Tokenizer eğitimi için batch 692 işleniyor...\n",
-      "Tokenizer eğitimi için batch 693 işleniyor...\n",
-      "Tokenizer eğitimi için batch 694 işleniyor...\n",
-      "Tokenizer eğitimi için batch 695 işleniyor...\n",
-      "Tokenizer eğitimi için batch 696 işleniyor...\n",
-      "Tokenizer eğitimi için batch 697 işleniyor...\n",
-      "Tokenizer eğitimi için batch 698 işleniyor...\n",
-      "Tokenizer eğitimi için batch 699 işleniyor...\n",
-      "Tokenizer eğitimi için batch 700 işleniyor...\n",
-      "Tokenizer eğitimi için batch 701 işleniyor...\n",
-      "Tokenizer eğitimi için batch 702 işleniyor...\n",
-      "Tokenizer eğitimi için batch 703 işleniyor...\n",
-      "Tokenizer eğitimi için batch 704 işleniyor...\n",
-      "Tokenizer eğitimi için batch 705 işleniyor...\n",
-      "Tokenizer eğitimi için batch 706 işleniyor...\n",
-      "Tokenizer eğitimi için batch 707 işleniyor...\n",
-      "Tokenizer eğitimi için batch 708 işleniyor...\n",
-      "Tokenizer eğitimi için batch 709 işleniyor...\n",
-      "Tokenizer eğitimi için batch 710 işleniyor...\n",
-      "Tokenizer eğitimi için batch 711 işleniyor...\n",
-      "Tokenizer eğitimi için batch 712 işleniyor...\n",
-      "Tokenizer eğitimi için batch 713 işleniyor...\n",
-      "Tokenizer eğitimi için batch 714 işleniyor...\n",
-      "Tokenizer eğitimi için batch 715 işleniyor...\n",
-      "Tokenizer eğitimi için batch 716 işleniyor...\n",
-      "Tokenizer eğitimi için batch 717 işleniyor...\n",
-      "Tokenizer eğitimi için batch 718 işleniyor...\n",
-      "Tokenizer eğitimi için batch 719 işleniyor...\n",
-      "Tokenizer eğitimi için batch 720 işleniyor...\n",
-      "Tokenizer eğitimi için batch 721 işleniyor...\n",
-      "Tokenizer eğitimi için batch 722 işleniyor...\n",
-      "Tokenizer eğitimi için batch 723 işleniyor...\n",
-      "Tokenizer eğitimi için batch 724 işleniyor...\n",
-      "Tokenizer eğitimi için batch 725 işleniyor...\n",
-      "Tokenizer eğitimi için batch 726 işleniyor...\n",
-      "Tokenizer eğitimi için batch 727 işleniyor...\n",
-      "Tokenizer eğitimi için batch 728 işleniyor...\n",
-      "Tokenizer eğitimi için batch 729 işleniyor...\n",
-      "Tokenizer eğitimi için batch 730 işleniyor...\n",
-      "Tokenizer eğitimi için batch 731 işleniyor...\n",
-      "Tokenizer eğitimi için batch 732 işleniyor...\n",
-      "Tokenizer eğitimi için batch 733 işleniyor...\n",
-      "Tokenizer eğitimi için batch 734 işleniyor...\n",
-      "Tokenizer eğitimi için batch 735 işleniyor...\n",
-      "Tokenizer eğitimi için batch 736 işleniyor...\n",
-      "Tokenizer eğitimi için batch 737 işleniyor...\n",
-      "Tokenizer eğitimi için batch 738 işleniyor...\n",
-      "Tokenizer eğitimi için batch 739 işleniyor...\n",
-      "Tokenizer eğitimi için batch 740 işleniyor...\n",
-      "Tokenizer eğitimi için batch 741 işleniyor...\n",
-      "Tokenizer eğitimi için batch 742 işleniyor...\n",
-      "Tokenizer eğitimi için batch 743 işleniyor...\n",
-      "Tokenizer eğitimi için batch 744 işleniyor...\n",
-      "Tokenizer eğitimi için batch 745 işleniyor...\n",
-      "Tokenizer eğitimi için batch 746 işleniyor...\n",
-      "Tokenizer eğitimi için batch 747 işleniyor...\n",
-      "Tokenizer eğitimi için batch 748 işleniyor...\n",
-      "Tokenizer eğitimi için batch 749 işleniyor...\n",
-      "Tokenizer eğitimi için batch 750 işleniyor...\n",
-      "Tokenizer eğitimi için batch 751 işleniyor...\n",
-      "Tokenizer eğitimi için batch 752 işleniyor...\n",
-      "Tokenizer eğitimi için batch 753 işleniyor...\n",
-      "Tokenizer eğitimi için batch 754 işleniyor...\n",
-      "Tokenizer eğitimi için batch 755 işleniyor...\n",
-      "Tokenizer eğitimi için batch 756 işleniyor...\n",
-      "Tokenizer eğitimi için batch 757 işleniyor...\n",
-      "Tokenizer eğitimi için batch 758 işleniyor...\n",
-      "Tokenizer eğitimi için batch 759 işleniyor...\n",
-      "Tokenizer eğitimi için batch 760 işleniyor...\n",
-      "Tokenizer eğitimi için batch 761 işleniyor...\n",
-      "Tokenizer eğitimi için batch 762 işleniyor...\n",
-      "Tokenizer eğitimi için batch 763 işleniyor...\n",
-      "Tokenizer eğitimi için batch 764 işleniyor...\n",
-      "Tokenizer eğitimi için batch 765 işleniyor...\n",
-      "Tokenizer eğitimi için batch 766 işleniyor...\n",
-      "Tokenizer eğitimi için batch 767 işleniyor...\n",
-      "Tokenizer eğitimi için batch 768 işleniyor...\n",
-      "Tokenizer eğitimi için batch 769 işleniyor...\n",
-      "Tokenizer eğitimi için batch 770 işleniyor...\n",
-      "Tokenizer eğitimi için batch 771 işleniyor...\n",
-      "Tokenizer eğitimi için batch 772 işleniyor...\n",
-      "Tokenizer eğitimi için batch 773 işleniyor...\n",
-      "Tokenizer eğitimi için batch 774 işleniyor...\n",
-      "Tokenizer eğitimi için batch 775 işleniyor...\n",
-      "Tokenizer eğitimi için batch 776 işleniyor...\n",
-      "Tokenizer eğitimi için batch 777 işleniyor...\n",
-      "Tokenizer eğitimi için batch 778 işleniyor...\n",
-      "Tokenizer eğitimi için batch 779 işleniyor...\n",
-      "Tokenizer eğitimi için batch 780 işleniyor...\n",
-      "Tokenizer eğitimi için batch 781 işleniyor...\n",
-      "Tokenizer eğitimi için batch 782 işleniyor...\n",
-      "Tokenizer eğitimi için batch 783 işleniyor...\n",
-      "Tokenizer eğitimi için batch 784 işleniyor...\n",
-      "Tokenizer eğitimi için batch 785 işleniyor...\n",
-      "Tokenizer eğitimi için batch 786 işleniyor...\n",
-      "Tokenizer eğitimi için batch 787 işleniyor...\n",
-      "Tokenizer eğitimi için batch 788 işleniyor...\n",
-      "Tokenizer eğitimi için batch 789 işleniyor...\n",
-      "Tokenizer eğitimi için batch 790 işleniyor...\n",
-      "Tokenizer eğitimi için batch 791 işleniyor...\n",
-      "Tokenizer eğitimi için batch 792 işleniyor...\n",
-      "Tokenizer eğitimi için batch 793 işleniyor...\n",
-      "Tokenizer eğitimi için batch 794 işleniyor...\n",
-      "Tokenizer eğitimi için batch 795 işleniyor...\n",
-      "Tokenizer eğitimi için batch 796 işleniyor...\n",
-      "Tokenizer eğitimi için batch 797 işleniyor...\n",
-      "Tokenizer eğitimi için batch 798 işleniyor...\n",
-      "Tokenizer eğitimi için batch 799 işleniyor...\n",
-      "Tokenizer eğitimi için batch 800 işleniyor...\n",
-      "Tokenizer eğitimi için batch 801 işleniyor...\n",
-      "Tokenizer eğitimi için batch 802 işleniyor...\n",
-      "Tokenizer eğitimi için batch 803 işleniyor...\n",
-      "Tokenizer eğitimi için batch 804 işleniyor...\n",
-      "Tokenizer eğitimi için batch 805 işleniyor...\n",
-      "Tokenizer eğitimi için batch 806 işleniyor...\n",
-      "Tokenizer eğitimi için batch 807 işleniyor...\n",
-      "Tokenizer eğitimi için batch 808 işleniyor...\n",
-      "Tokenizer eğitimi için batch 809 işleniyor...\n",
-      "Tokenizer eğitimi için batch 810 işleniyor...\n",
-      "Tokenizer eğitimi için batch 811 işleniyor...\n",
-      "Tokenizer eğitimi için batch 812 işleniyor...\n",
-      "Tokenizer eğitimi için batch 813 işleniyor...\n",
-      "Tokenizer eğitimi için batch 814 işleniyor...\n",
-      "Tokenizer eğitimi için batch 815 işleniyor...\n",
-      "Tokenizer eğitimi için batch 816 işleniyor...\n",
-      "Tokenizer eğitimi için batch 817 işleniyor...\n",
-      "Tokenizer eğitimi için batch 818 işleniyor...\n",
-      "Tokenizer eğitimi için batch 819 işleniyor...\n",
-      "Tokenizer eğitimi için batch 820 işleniyor...\n",
-      "Tokenizer eğitimi için batch 821 işleniyor...\n",
-      "Tokenizer eğitimi için batch 822 işleniyor...\n",
-      "Tokenizer eğitimi için batch 823 işleniyor...\n",
-      "Tokenizer eğitimi için batch 824 işleniyor...\n",
-      "Tokenizer eğitimi için batch 825 işleniyor...\n",
-      "Tokenizer eğitimi için batch 826 işleniyor...\n",
-      "Tokenizer eğitimi için batch 827 işleniyor...\n",
-      "Tokenizer eğitimi için batch 828 işleniyor...\n",
-      "Tokenizer eğitimi için batch 829 işleniyor...\n",
-      "Tokenizer eğitimi için batch 830 işleniyor...\n",
-      "Tokenizer eğitimi için batch 831 işleniyor...\n",
-      "Tokenizer eğitimi için batch 832 işleniyor...\n",
-      "Tokenizer eğitimi için batch 833 işleniyor...\n",
-      "Tokenizer eğitimi için batch 834 işleniyor...\n",
-      "Tokenizer eğitimi için batch 835 işleniyor...\n",
-      "Tokenizer eğitimi için batch 836 işleniyor...\n",
-      "Tokenizer eğitimi için batch 837 işleniyor...\n",
-      "Tokenizer eğitimi için batch 838 işleniyor...\n",
-      "Tokenizer eğitimi için batch 839 işleniyor...\n",
-      "Tokenizer eğitimi için batch 840 işleniyor...\n",
-      "Tokenizer eğitimi için batch 841 işleniyor...\n",
-      "Tokenizer eğitimi için batch 842 işleniyor...\n",
-      "Tokenizer eğitimi için batch 843 işleniyor...\n",
-      "Tokenizer eğitimi için batch 844 işleniyor...\n",
-      "Tokenizer eğitimi için batch 845 işleniyor...\n",
-      "Tokenizer eğitimi için batch 846 işleniyor...\n",
-      "Tokenizer eğitimi için batch 847 işleniyor...\n",
-      "Tokenizer eğitimi için batch 848 işleniyor...\n",
-      "Tokenizer eğitimi için batch 849 işleniyor...\n",
-      "Tokenizer eğitimi için batch 850 işleniyor...\n",
-      "Tokenizer eğitimi için batch 851 işleniyor...\n",
-      "Tokenizer eğitimi için batch 852 işleniyor...\n",
-      "Tokenizer eğitimi için batch 853 işleniyor...\n",
-      "Tokenizer eğitimi için batch 854 işleniyor...\n",
-      "Tokenizer eğitimi için batch 855 işleniyor...\n",
-      "Tokenizer eğitimi için batch 856 işleniyor...\n",
-      "Tokenizer eğitimi için batch 857 işleniyor...\n",
-      "Tokenizer eğitimi için batch 858 işleniyor...\n",
-      "Tokenizer eğitimi için batch 859 işleniyor...\n",
-      "Tokenizer eğitimi için batch 860 işleniyor...\n",
-      "Tokenizer eğitimi için batch 861 işleniyor...\n",
-      "Tokenizer eğitimi için batch 862 işleniyor...\n",
-      "Tokenizer eğitimi için batch 863 işleniyor...\n",
-      "Tokenizer eğitimi için batch 864 işleniyor...\n",
-      "Tokenizer eğitimi için batch 865 işleniyor...\n",
-      "Tokenizer eğitimi için batch 866 işleniyor...\n",
-      "Tokenizer eğitimi için batch 867 işleniyor...\n",
-      "Tokenizer eğitimi için batch 868 işleniyor...\n",
-      "Tokenizer eğitimi için batch 869 işleniyor...\n",
-      "Tokenizer eğitimi için batch 870 işleniyor...\n",
-      "Tokenizer eğitimi için batch 871 işleniyor...\n",
-      "Tokenizer eğitimi için batch 872 işleniyor...\n",
-      "Tokenizer eğitimi için batch 873 işleniyor...\n",
-      "Tokenizer eğitimi için batch 874 işleniyor...\n",
-      "Tokenizer eğitimi için batch 875 işleniyor...\n",
-      "Tokenizer eğitimi için batch 876 işleniyor...\n",
-      "Tokenizer eğitimi için batch 877 işleniyor...\n",
-      "Tokenizer eğitimi için batch 878 işleniyor...\n",
-      "Tokenizer eğitimi için batch 879 işleniyor...\n",
-      "Tokenizer eğitimi için batch 880 işleniyor...\n",
-      "Tokenizer eğitimi için batch 881 işleniyor...\n",
-      "Tokenizer eğitimi için batch 882 işleniyor...\n",
-      "Tokenizer eğitimi için batch 883 işleniyor...\n",
-      "Tokenizer eğitimi için batch 884 işleniyor...\n",
-      "Tokenizer eğitimi için batch 885 işleniyor...\n",
-      "Tokenizer eğitimi için batch 886 işleniyor...\n",
-      "Tokenizer eğitimi için batch 887 işleniyor...\n",
-      "Tokenizer eğitimi için batch 888 işleniyor...\n",
-      "Tokenizer eğitimi için batch 889 işleniyor...\n",
-      "Tokenizer eğitimi için batch 890 işleniyor...\n",
-      "Tokenizer eğitimi için batch 891 işleniyor...\n",
-      "Tokenizer eğitimi için batch 892 işleniyor...\n",
-      "Tokenizer eğitimi için batch 893 işleniyor...\n",
-      "Tokenizer eğitimi için batch 894 işleniyor...\n",
-      "Tokenizer eğitimi için batch 895 işleniyor...\n",
-      "Tokenizer eğitimi için batch 896 işleniyor...\n",
-      "Tokenizer eğitimi için batch 897 işleniyor...\n",
-      "Tokenizer eğitimi için batch 898 işleniyor...\n",
-      "Tokenizer eğitimi için batch 899 işleniyor...\n",
-      "Tokenizer eğitimi için batch 900 işleniyor...\n",
-      "Tokenizer eğitimi için batch 901 işleniyor...\n",
-      "Tokenizer eğitimi için batch 902 işleniyor...\n",
-      "Tokenizer eğitimi için batch 903 işleniyor...\n",
-      "Tokenizer eğitimi için batch 904 işleniyor...\n",
-      "Tokenizer eğitimi için batch 905 işleniyor...\n",
-      "Tokenizer eğitimi için batch 906 işleniyor...\n",
-      "Tokenizer eğitimi için batch 907 işleniyor...\n",
-      "Tokenizer eğitimi için batch 908 işleniyor...\n",
-      "Tokenizer eğitimi için batch 909 işleniyor...\n",
-      "Tokenizer eğitimi için batch 910 işleniyor...\n",
-      "Tokenizer eğitimi için batch 911 işleniyor...\n",
-      "Tokenizer eğitimi için batch 912 işleniyor...\n",
-      "Tokenizer eğitimi için batch 913 işleniyor...\n",
-      "Tokenizer eğitimi için batch 914 işleniyor...\n",
-      "Tokenizer eğitimi için batch 915 işleniyor...\n",
-      "Tokenizer eğitimi için batch 916 işleniyor...\n",
-      "Tokenizer eğitimi için batch 917 işleniyor...\n",
-      "Tokenizer eğitimi için batch 918 işleniyor...\n",
-      "Tokenizer eğitimi için batch 919 işleniyor...\n",
-      "Tokenizer eğitimi için batch 920 işleniyor...\n",
-      "Tokenizer eğitimi için batch 921 işleniyor...\n",
-      "Tokenizer eğitimi için batch 922 işleniyor...\n",
-      "Tokenizer eğitimi için batch 923 işleniyor...\n",
-      "Tokenizer eğitimi için batch 924 işleniyor...\n",
-      "Tokenizer eğitimi için batch 925 işleniyor...\n",
-      "Tokenizer eğitimi için batch 926 işleniyor...\n",
-      "Tokenizer eğitimi için batch 927 işleniyor...\n",
-      "Tokenizer eğitimi için batch 928 işleniyor...\n",
-      "Tokenizer eğitimi için batch 929 işleniyor...\n",
-      "Tokenizer eğitimi için batch 930 işleniyor...\n",
-      "Tokenizer eğitimi için batch 931 işleniyor...\n",
-      "Tokenizer eğitimi için batch 932 işleniyor...\n",
-      "Tokenizer eğitimi için batch 933 işleniyor...\n",
-      "Tokenizer eğitimi için batch 934 işleniyor...\n",
-      "Tokenizer eğitimi için batch 935 işleniyor...\n",
-      "Tokenizer eğitimi için batch 936 işleniyor...\n",
-      "Tokenizer eğitimi için batch 937 işleniyor...\n",
-      "Tokenizer eğitimi için batch 938 işleniyor...\n",
-      "Tokenizer eğitimi için batch 939 işleniyor...\n",
-      "Tokenizer eğitimi için batch 940 işleniyor...\n",
-      "Tokenizer eğitimi için batch 941 işleniyor...\n",
-      "Tokenizer eğitimi için batch 942 işleniyor...\n",
-      "Tokenizer eğitimi için batch 943 işleniyor...\n",
-      "Tokenizer eğitimi için batch 944 işleniyor...\n",
-      "Tokenizer eğitimi için batch 945 işleniyor...\n",
-      "Tokenizer eğitimi için batch 946 işleniyor...\n",
-      "Tokenizer eğitimi için batch 947 işleniyor...\n",
-      "Tokenizer eğitimi için batch 948 işleniyor...\n",
-      "Tokenizer eğitimi için batch 949 işleniyor...\n",
-      "Tokenizer eğitimi için batch 950 işleniyor...\n",
-      "Tokenizer eğitimi için batch 951 işleniyor...\n",
-      "Tokenizer eğitimi için batch 952 işleniyor...\n",
-      "Tokenizer eğitimi için batch 953 işleniyor...\n",
-      "Tokenizer eğitimi için batch 954 işleniyor...\n",
-      "Tokenizer eğitimi için batch 955 işleniyor...\n",
-      "Tokenizer eğitimi için batch 956 işleniyor...\n",
-      "Tokenizer eğitimi için batch 957 işleniyor...\n",
-      "Tokenizer eğitimi için batch 958 işleniyor...\n",
-      "Tokenizer eğitimi için batch 959 işleniyor...\n",
-      "Tokenizer eğitimi için batch 960 işleniyor...\n",
-      "Tokenizer eğitimi için batch 961 işleniyor...\n",
-      "Tokenizer eğitimi için batch 962 işleniyor...\n",
-      "Tokenizer eğitimi için batch 963 işleniyor...\n",
-      "Tokenizer eğitimi için batch 964 işleniyor...\n",
-      "Tokenizer eğitimi için batch 965 işleniyor...\n",
-      "Tokenizer eğitimi için batch 966 işleniyor...\n",
-      "Tokenizer eğitimi için batch 967 işleniyor...\n",
-      "Tokenizer eğitimi için batch 968 işleniyor...\n",
-      "Tokenizer eğitimi için batch 969 işleniyor...\n",
-      "Tokenizer eğitimi için batch 970 işleniyor...\n",
-      "Tokenizer eğitimi için batch 971 işleniyor...\n",
-      "Tokenizer eğitimi için batch 972 işleniyor...\n",
-      "Tokenizer eğitimi için batch 973 işleniyor...\n",
-      "Tokenizer eğitimi için batch 974 işleniyor...\n",
-      "Tokenizer eğitimi için batch 975 işleniyor...\n",
-      "Tokenizer eğitimi için batch 976 işleniyor...\n",
-      "Tokenizer eğitimi için batch 977 işleniyor...\n",
-      "Tokenizer eğitimi için batch 978 işleniyor...\n",
-      "Tokenizer eğitimi için batch 979 işleniyor...\n",
-      "Tokenizer eğitimi için batch 980 işleniyor...\n",
-      "Tokenizer eğitimi için batch 981 işleniyor...\n",
-      "Tokenizer eğitimi için batch 982 işleniyor...\n",
-      "Tokenizer eğitimi için batch 983 işleniyor...\n",
-      "Tokenizer eğitimi için batch 984 işleniyor...\n",
-      "Tokenizer eğitimi için batch 985 işleniyor...\n",
-      "Tokenizer eğitimi için batch 986 işleniyor...\n",
-      "Tokenizer eğitimi için batch 987 işleniyor...\n",
-      "Tokenizer eğitimi için batch 988 işleniyor...\n",
-      "Tokenizer eğitimi için batch 989 işleniyor...\n",
-      "Tokenizer eğitimi için batch 990 işleniyor...\n",
-      "Tokenizer eğitimi için batch 991 işleniyor...\n",
-      "Tokenizer eğitimi için batch 992 işleniyor...\n",
-      "Tokenizer eğitimi için batch 993 işleniyor...\n",
-      "Tokenizer eğitimi için batch 994 işleniyor...\n",
-      "Tokenizer eğitimi için batch 995 işleniyor...\n",
-      "Tokenizer eğitimi için batch 996 işleniyor...\n",
-      "Tokenizer eğitimi için batch 997 işleniyor...\n",
-      "Tokenizer eğitimi için batch 998 işleniyor...\n",
-      "Tokenizer eğitimi için batch 999 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1000 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1001 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1002 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1003 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1004 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1005 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1006 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1007 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1008 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1009 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1010 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1011 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1012 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1013 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1014 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1015 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1016 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1017 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1018 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1019 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1020 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1021 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1022 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1023 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1024 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1025 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1026 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1027 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1028 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1029 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1030 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1031 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1032 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1033 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1034 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1035 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1036 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1037 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1038 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1039 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1040 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1041 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1042 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1043 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1044 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1045 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1046 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1047 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1048 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1049 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1050 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1051 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1052 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1053 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1054 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1055 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1056 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1057 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1058 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1059 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1060 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1061 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1062 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1063 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1064 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1065 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1066 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1067 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1068 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1069 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1070 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1071 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1072 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1073 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1074 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1075 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1076 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1077 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1078 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1079 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1080 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1081 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1082 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1083 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1084 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1085 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1086 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1087 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1088 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1089 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1090 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1091 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1092 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1093 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1094 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1095 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1096 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1097 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1098 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1099 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1100 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1101 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1102 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1103 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1104 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1105 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1106 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1107 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1108 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1109 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1110 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1111 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1112 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1113 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1114 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1115 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1116 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1117 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1118 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1119 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1120 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1121 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1122 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1123 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1124 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1125 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1126 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1127 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1128 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1129 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1130 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1131 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1132 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1133 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1134 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1135 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1136 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1137 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1138 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1139 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1140 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1141 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1142 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1143 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1144 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1145 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1146 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1147 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1148 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1149 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1150 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1151 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1152 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1153 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1154 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1155 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1156 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1157 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1158 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1159 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1160 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1161 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1162 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1163 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1164 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1165 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1166 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1167 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1168 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1169 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1170 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1171 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1172 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1173 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1174 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1175 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1176 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1177 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1178 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1179 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1180 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1181 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1182 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1183 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1184 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1185 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1186 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1187 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1188 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1189 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1190 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1191 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1192 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1193 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1194 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1195 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1196 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1197 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1198 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1199 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1200 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1201 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1202 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1203 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1204 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1205 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1206 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1207 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1208 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1209 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1210 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1211 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1212 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1213 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1214 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1215 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1216 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1217 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1218 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1219 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1220 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1221 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1222 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1223 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1224 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1225 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1226 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1227 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1228 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1229 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1230 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1231 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1232 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1233 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1234 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1235 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1236 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1237 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1238 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1239 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1240 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1241 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1242 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1243 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1244 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1245 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1246 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1247 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1248 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1249 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1250 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1251 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1252 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1253 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1254 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1255 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1256 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1257 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1258 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1259 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1260 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1261 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1262 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1263 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1264 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1265 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1266 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1267 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1268 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1269 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1270 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1271 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1272 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1273 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1274 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1275 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1276 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1277 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1278 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1279 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1280 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1281 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1282 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1283 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1284 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1285 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1286 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1287 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1288 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1289 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1290 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1291 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1292 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1293 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1294 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1295 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1296 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1297 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1298 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1299 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1300 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1301 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1302 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1303 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1304 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1305 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1306 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1307 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1308 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1309 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1310 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1311 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1312 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1313 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1314 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1315 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1316 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1317 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1318 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1319 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1320 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1321 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1322 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1323 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1324 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1325 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1326 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1327 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1328 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1329 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1330 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1331 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1332 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1333 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1334 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1335 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1336 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1337 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1338 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1339 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1340 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1341 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1342 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1343 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1344 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1345 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1346 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1347 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1348 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1349 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1350 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1351 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1352 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1353 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1354 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1355 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1356 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1357 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1358 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1359 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1360 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1361 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1362 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1363 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1364 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1365 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1366 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1367 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1368 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1369 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1370 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1371 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1372 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1373 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1374 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1375 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1376 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1377 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1378 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1379 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1380 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1381 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1382 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1383 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1384 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1385 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1386 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1387 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1388 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1389 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1390 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1391 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1392 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1393 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1394 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1395 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1396 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1397 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1398 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1399 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1400 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1401 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1402 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1403 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1404 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1405 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1406 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1407 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1408 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1409 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1410 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1411 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1412 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1413 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1414 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1415 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1416 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1417 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1418 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1419 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1420 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1421 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1422 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1423 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1424 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1425 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1426 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1427 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1428 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1429 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1430 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1431 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1432 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1433 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1434 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1435 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1436 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1437 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1438 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1439 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1440 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1441 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1442 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1443 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1444 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1445 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1446 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1447 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1448 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1449 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1450 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1451 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1452 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1453 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1454 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1455 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1456 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1457 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1458 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1459 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1460 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1461 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1462 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1463 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1464 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1465 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1466 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1467 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1468 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1469 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1470 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1471 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1472 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1473 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1474 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1475 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1476 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1477 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1478 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1479 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1480 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1481 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1482 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1483 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1484 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1485 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1486 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1487 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1488 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1489 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1490 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1491 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1492 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1493 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1494 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1495 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1496 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1497 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1498 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1499 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1500 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1501 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1502 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1503 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1504 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1505 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1506 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1507 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1508 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1509 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1510 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1511 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1512 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1513 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1514 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1515 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1516 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1517 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1518 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1519 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1520 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1521 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1522 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1523 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1524 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1525 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1526 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1527 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1528 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1529 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1530 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1531 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1532 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1533 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1534 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1535 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1536 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1537 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1538 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1539 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1540 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1541 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1542 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1543 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1544 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1545 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1546 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1547 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1548 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1549 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1550 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1551 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1552 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1553 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1554 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1555 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1556 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1557 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1558 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1559 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1560 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1561 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1562 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1563 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1564 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1565 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1566 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1567 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1568 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1569 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1570 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1571 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1572 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1573 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1574 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1575 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1576 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1577 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1578 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1579 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1580 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1581 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1582 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1583 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1584 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1585 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1586 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1587 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1588 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1589 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1590 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1591 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1592 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1593 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1594 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1595 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1596 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1597 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1598 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1599 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1600 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1601 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1602 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1603 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1604 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1605 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1606 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1607 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1608 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1609 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1610 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1611 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1612 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1613 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1614 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1615 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1616 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1617 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1618 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1619 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1620 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1621 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1622 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1623 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1624 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1625 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1626 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1627 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1628 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1629 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1630 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1631 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1632 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1633 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1634 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1635 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1636 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1637 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1638 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1639 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1640 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1641 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1642 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1643 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1644 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1645 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1646 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1647 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1648 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1649 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1650 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1651 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1652 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1653 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1654 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1655 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1656 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1657 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1658 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1659 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1660 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1661 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1662 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1663 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1664 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1665 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1666 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1667 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1668 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1669 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1670 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1671 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1672 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1673 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1674 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1675 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1676 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1677 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1678 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1679 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1680 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1681 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1682 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1683 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1684 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1685 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1686 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1687 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1688 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1689 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1690 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1691 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1692 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1693 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1694 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1695 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1696 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1697 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1698 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1699 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1700 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1701 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1702 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1703 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1704 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1705 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1706 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1707 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1708 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1709 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1710 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1711 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1712 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1713 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1714 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1715 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1716 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1717 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1718 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1719 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1720 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1721 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1722 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1723 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1724 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1725 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1726 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1727 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1728 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1729 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1730 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1731 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1732 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1733 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1734 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1735 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1736 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1737 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1738 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1739 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1740 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1741 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1742 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1743 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1744 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1745 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1746 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1747 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1748 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1749 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1750 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1751 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1752 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1753 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1754 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1755 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1756 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1757 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1758 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1759 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1760 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1761 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1762 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1763 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1764 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1765 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1766 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1767 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1768 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1769 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1770 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1771 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1772 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1773 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1774 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1775 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1776 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1777 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1778 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1779 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1780 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1781 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1782 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1783 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1784 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1785 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1786 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1787 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1788 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1789 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1790 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1791 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1792 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1793 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1794 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1795 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1796 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1797 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1798 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1799 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1800 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1801 işleniyor...\n",
-      "Tokenizer eğitimi için batch 1802 işleniyor...\n",
-      "Tokenizer eğitimi tamamlandı.\n",
-      "TextDataset oluşturuldu. Toplam örnek sayısı: 1801350\n",
-      "TextDataset oluşturuldu. Toplam örnek sayısı: 3760\n",
-      "Train loader örnek sayısı: 28147, Validation loader örnek sayısı: 59\n",
-      "Transformer modeli oluşturuluyor...\n",
-      "Gömme katmanları oluşturuldu.\n",
-      "Konumsal kodlama katmanları oluşturuldu.\n",
-      "Encoder block 1/6 oluşturuldu.\n",
-      "Encoder block 2/6 oluşturuldu.\n",
-      "Encoder block 3/6 oluşturuldu.\n",
-      "Encoder block 4/6 oluşturuldu.\n",
-      "Encoder block 5/6 oluşturuldu.\n",
-      "Encoder block 6/6 oluşturuldu.\n",
-      "Encoder blokları tamamlandı.\n",
-      "Decoder block 1/6 oluşturuldu.\n",
-      "Decoder block 2/6 oluşturuldu.\n",
-      "Decoder block 3/6 oluşturuldu.\n",
-      "Decoder block 4/6 oluşturuldu.\n",
-      "Decoder block 5/6 oluşturuldu.\n",
-      "Decoder block 6/6 oluşturuldu.\n",
-      "Decoder blokları tamamlandı.\n",
-      "Encoder ve Decoder oluşturuldu.\n",
-      "Projeksiyon katmanı oluşturuldu.\n",
-      "Transformer modeli başarıyla oluşturuldu.\n",
-      "Model parametreleri başlatıldı.\n",
-      "Tokenizing text:  United Kingdom \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Mastered by Ted Jensen \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = = Orsini – Skanderbeg correspondence = = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = Raccoons and humans = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  In publications focused on manga and anime review...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = = Flora = = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  In October she became part of Task Force 61 and p...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = Fortifications of Valletta = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  The mascot is \" NJ Devil \" , a 7 @-@ foot ( 2 @.@...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  For the season , Harvard set numerous program rec...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  The surviving members of the Farrar – Green famil...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  In September 1955 , The New York Times reported t...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  In a significant improvement over the previous we...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Kagame spent most of his childhood and young adul...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  The Battle of P 'ohang @-@ dong was an engagement...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  The popularity of Neon Genesis Evangelion extends...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Cammalleri was involved in trade rumours followin...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  U @-@ 64 went to sea on 6 April 1940 . For eight ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = Founding of the Browns in the AAFC = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  After a string of four wins and three losses , th...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  The seven word classes are exemplified in this sa...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  300th Airlanding Anti @-@ Tank Battery Royal Arti...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  According to Citizenship and Immigration Canada ,...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  See and Bassett flew in one Northrop T @-@ 38A Ta...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = = Fourth quarter = = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  At the 39th Daytime Emmy Awards in 2012 , Bumpass...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = Establishment = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Before the casting process , no actors had been c...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  On June 25 , 2015 , Comedy Central announced that...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Ernest 's position was often linked to his brothe...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  White said of his approach to producing stories ,...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Upon the expiration of his previous contract , Ni...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = = Relationship with local leaders = = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = Taxonomy and phylogeny = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Șerban Cazan – songwriting , producing \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  The design of the Borodino @-@ class ships was mo...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Johnson maintained his lead at the restart . On l...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  Hamlin maintained the Drivers ' championship lead...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = = Nagasaki during World War II = = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = Opposing forces = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text:  = = History = = \n",
-      "...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Tokenizing text: ...\n",
-      "Tokenized sequence length: 256\n",
-      "Maskeler birleştiriliyor...\n",
-      "Padding maskesi oluşturuluyor...\n",
-      "Padding mask shape: torch.Size([64, 1, 1, 255])\n",
-      "Padding maskesi oluşturuluyor...\n",
-      "Padding mask shape: torch.Size([64, 1, 1, 255])\n",
-      "Causal maskesi oluşturuluyor...\n",
-      "Causal mask shape: torch.Size([1, 1, 255, 255])\n",
-      "Source mask shape: torch.Size([64, 1, 1, 255]), Target mask shape: torch.Size([64, 1, 255, 255])\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "C:\\Users\\emreq\\AppData\\Local\\Temp\\ipykernel_13168\\2647060649.py:222: FutureWarning: `torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.\n",
-      "  scaler = GradScaler()\n",
-      "C:\\Users\\emreq\\AppData\\Local\\Temp\\ipykernel_13168\\2647060649.py:236: FutureWarning: `torch.cuda.amp.autocast(args...)` is deprecated. Please use `torch.amp.autocast('cuda', args...)` instead.\n",
-      "  with autocast():\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "from torch import nn, optim\n",
@@ -2279,7 +279,7 @@
     "import math\n",
     "from torch.cuda.amp import GradScaler, autocast\n",
     "\n",
-    "# Yapılandırma parametreleri\n",
+    "# Configuration parameters\n",
     "CONFIG = {\n",
     "    \"batch_size\": 64,\n",
     "    \"d_model\": 512,\n",
@@ -2295,22 +295,22 @@
     "    \"d_ff\": 1024\n",
     "}\n",
     "\n",
-    "# Maskeleri oluşturma fonksiyonları\n",
+    "# Mask creation functions\n",
     "def create_padding_mask(seq):\n",
-    "    print(\"Padding maskesi oluşturuluyor...\")\n",
+    "    print(\"Creating padding mask...\")\n",
     "    mask = (seq != 0).unsqueeze(1).unsqueeze(2)\n",
     "    print(f\"Padding mask shape: {mask.shape}\")\n",
     "    return mask\n",
     "\n",
     "def create_causal_mask(size):\n",
-    "    print(\"Causal maskesi oluşturuluyor...\")\n",
+    "    print(\"Creating causal mask...\")\n",
     "    mask = torch.tril(torch.ones(size, size)).bool()\n",
     "    mask = mask.unsqueeze(0).unsqueeze(0)\n",
     "    print(f\"Causal mask shape: {mask.shape}\")\n",
     "    return mask\n",
     "\n",
     "def create_mask(src, tgt):\n",
-    "    print(\"Maskeler birleştiriliyor...\")\n",
+    "    print(\"Combining masks...\")\n",
     "    src_padding_mask = create_padding_mask(src)\n",
     "    tgt_padding_mask = create_padding_mask(tgt)\n",
     "    causal_mask = create_causal_mask(tgt.size(1))\n",
@@ -2318,20 +318,20 @@
     "    print(f\"Source mask shape: {src_padding_mask.shape}, Target mask shape: {tgt_mask.shape}\")\n",
     "    return src_padding_mask.to(src.device), tgt_mask.to(tgt.device)\n",
     "\n",
-    "# Veri Seti Sınıfı\n",
+    "# Dataset class\n",
     "class TextDataset(Dataset):\n",
     "    def __init__(self, data, tokenizer, seq_len):\n",
     "        self.texts = data['text']\n",
     "        self.tokenizer = tokenizer\n",
     "        self.seq_len = seq_len\n",
-    "        print(f\"TextDataset oluşturuldu. Toplam örnek sayısı: {len(self.texts)}\")\n",
+    "        print(f\"TextDataset created. Total samples: {len(self.texts)}\")\n",
     "        \n",
     "    def __len__(self):\n",
     "        return len(self.texts)\n",
     "    \n",
     "    def __getitem__(self, idx):\n",
     "        text = self.texts[idx]\n",
-    "        print(f\"Tokenizing text: {text[:50]}...\")  # İlk 50 karakteri göster\n",
+    "        print(f\"Tokenizing text: {text[:50]}...\")  # Show the first 50 characters\n",
     "        tokens = self.tokenizer.encode(text).ids\n",
     "        \n",
     "        if len(tokens) < self.seq_len:\n",
@@ -2342,9 +342,9 @@
     "        print(f\"Tokenized sequence length: {len(tokens)}\")\n",
     "        return torch.tensor(tokens[:-1]), torch.tensor(tokens[1:])\n",
     "\n",
-    "# Tokenizer Oluşturma\n",
+    "# Build the tokenizer\n",
     "def build_tokenizer(dataset):\n",
-    "    print(\"Tokenizer oluşturuluyor...\")\n",
+    "    print(\"Building tokenizer...\")\n",
     "    tokenizer = Tokenizer(WordLevel(unk_token=\"[UNK]\"))\n",
     "    tokenizer.pre_tokenizer = Whitespace()\n",
     "    trainer = WordLevelTrainer(\n",
@@ -2355,18 +355,18 @@
     "    def batch_iterator():\n",
     "        for i in range(0, len(dataset), 1000):\n",
     "            batch = dataset[i:i+1000]['text']\n",
-    "            print(f\"Tokenizer eğitimi için batch {i//1000 + 1} işleniyor...\")\n",
+    "            print(f\"Processing batch {i//1000 + 1} is being processed...\")\n",
     "            yield batch\n",
     "            \n",
     "    tokenizer.train_from_iterator(batch_iterator(), trainer)\n",
-    "    print(\"Tokenizer eğitimi tamamlandı.\")\n",
+    "    print(\"Tokenizer training completed.\")\n",
     "    return tokenizer\n",
     "\n",
-    "# Veri Yükleme\n",
+    "# Data loading\n",
     "def get_data():\n",
-    "    print(\"Veri seti yükleniyor...\")\n",
+    "    print(\"Loading dataset...\")\n",
     "    dataset = load_dataset(CONFIG['hf_dataset'], CONFIG['dataset_name'])\n",
-    "    print(f\"Veri seti yüklendi. Train örnek sayısı: {len(dataset['train'])}, Validation örnek sayısı: {len(dataset['validation'])}\")\n",
+    "    print(f\"Dataset loaded. Train sample count: {len(dataset['train'])}, Validation sample count: {len(dataset['validation'])}\")\n",
     "    \n",
     "    tokenizer = build_tokenizer(dataset['train'])\n",
     "    \n",
@@ -2376,33 +376,33 @@
     "    train_loader = DataLoader(train_data, batch_size=CONFIG['batch_size'], shuffle=True)\n",
     "    valid_loader = DataLoader(valid_data, batch_size=CONFIG['batch_size'])\n",
     "    \n",
-    "    print(f\"Train loader örnek sayısı: {len(train_loader)}, Validation loader örnek sayısı: {len(valid_loader)}\")\n",
+    "    print(f\"Train loader sample count: {len(train_loader)}, Validation loader sample count: {len(valid_loader)}\")\n",
     "    return train_loader, valid_loader, tokenizer.get_vocab_size()\n",
     "\n",
     "# Transformer Modeli\n",
     "def build_transformer(src_vocab_size, tgt_vocab_size, src_seq_len, tgt_seq_len, d_model, N, h, dropout, d_ff):\n",
-    "    print(\"Transformer modeli oluşturuluyor...\")\n",
-    "    # Gömme katmanları\n",
+    "    print(\"Building the Transformer model...\")\n",
+    "    # Embedding layers\n",
     "    src_embed = InputEmbeddings(d_model, src_vocab_size)\n",
     "    tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)\n",
-    "    print(\"Gömme katmanları oluşturuldu.\")\n",
+    "    print(\"Embedding layers created.\")\n",
     "\n",
     "    # Konumsal kodlama\n",
     "    src_pos = PositionalEncoding(d_model, src_seq_len, dropout)\n",
     "    tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)\n",
-    "    print(\"Konumsal kodlama katmanları oluşturuldu.\")\n",
+    "    print(\"Positional encoding layers created.\")\n",
     "\n",
-    "    # Encoder blokları\n",
+    "    # Encoder blocks\n",
     "    encoder_blocks = []\n",
     "    for i in range(N):\n",
     "        encoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)\n",
     "        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)\n",
     "        encoder_block = EncoderBlock(d_model, encoder_self_attention_block, feed_forward_block, dropout)\n",
     "        encoder_blocks.append(encoder_block)\n",
-    "        print(f\"Encoder block {i+1}/{N} oluşturuldu.\")\n",
-    "    print(\"Encoder blokları tamamlandı.\")\n",
+    "        print(f\"Encoder block {i+1}/{N} created.\")\n",
+    "    print(\"Encoder blocks completed.\")\n",
     "\n",
-    "    # Decoder blokları\n",
+    "    # Decoder blocks\n",
     "    decoder_blocks = []\n",
     "    for i in range(N):\n",
     "        decoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)\n",
@@ -2410,17 +410,17 @@
     "        feed_forward_block = FeedForwardBlock(d_model, d_ff, dropout)\n",
     "        decoder_block = DecoderBlock(d_model, decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)\n",
     "        decoder_blocks.append(decoder_block)\n",
-    "        print(f\"Decoder block {i+1}/{N} oluşturuldu.\")\n",
-    "    print(\"Decoder blokları tamamlandı.\")\n",
+    "        print(f\"Decoder block {i+1}/{N} created.\")\n",
+    "    print(\"Decoder blocks completed.\")\n",
     "\n",
-    "    # Encoder ve Decoder\n",
+    "    # Encoder and decoder\n",
     "    encoder = Encoder(d_model, nn.ModuleList(encoder_blocks))\n",
     "    decoder = Decoder(d_model, nn.ModuleList(decoder_blocks))\n",
-    "    print(\"Encoder ve Decoder oluşturuldu.\")\n",
+    "    print(\"Encoder and decoder created.\")\n",
     "\n",
-    "    # Projeksiyon katmanı\n",
+    "    # Projection layer\n",
     "    projection_layer = ProjectionLayer(d_model, tgt_vocab_size)\n",
-    "    print(\"Projeksiyon katmanı oluşturuldu.\")\n",
+    "    print(\"Projection layer created.\")\n",
     "\n",
     "    # Transformer modeli\n",
     "    transformer = Transformer(\n",
@@ -2432,21 +432,21 @@
     "        tgt_pos=tgt_pos,\n",
     "        projection_layer=projection_layer\n",
     "    )\n",
-    "    print(\"Transformer modeli başarıyla oluşturuldu.\")\n",
+    "    print(\"Transformer model created successfully.\")\n",
     "\n",
-    "    # Parametreleri Xavier uniform ile başlat\n",
+    "    # Initialize parameters with Xavier uniform\n",
     "    for p in transformer.parameters():\n",
     "        if p.dim() > 1:\n",
     "            nn.init.xavier_uniform_(p)\n",
-    "    print(\"Model parametreleri başlatıldı.\")\n",
+    "    print(\"Model parameters initialized.\")\n",
     "\n",
     "    return transformer\n",
     "\n",
-    "# Eğitim Döngüsü\n",
+    "# Training Loop\n",
     "def train_model():\n",
-    "    print(\"Eğitim başlatılıyor...\")\n",
+    "    print(\"Starting training...\")\n",
     "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "    print(f\"Kullanılan cihaz: {device}\")\n",
+    "    print(f\"Device in use: {device}\")\n",
     "    \n",
     "    train_loader, valid_loader, vocab_size = get_data()\n",
     "    \n",
@@ -2467,11 +467,11 @@
     "    \n",
     "    from torch.cuda.amp import GradScaler, autocast\n",
     "\n",
-    "# Eğitim döngüsü\n",
+    "# Training loop\n",
     "def train_model():\n",
-    "    print(\"Eğitim başlatılıyor...\")\n",
+    "    print(\"Starting training...\")\n",
     "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "    print(f\"Kullanılan cihaz: {device}\")\n",
+    "    print(f\"Device in use: {device}\")\n",
     "    \n",
     "    train_loader, valid_loader, vocab_size = get_data()\n",
     "    \n",
@@ -2490,7 +490,7 @@
     "    optimizer = optim.Adam(transformer.parameters(), lr=CONFIG['lr'])\n",
     "    criterion = nn.CrossEntropyLoss(ignore_index=0)\n",
     "    scaler = GradScaler()\n",
-    "    accumulation_steps = 4  # Gradyan biriktirme adım sayısı\n",
+    "    accumulation_steps = 4  # Number of gradient accumulation steps\n",
     "\n",
     "    for epoch in range(CONFIG['num_epochs']):\n",
     "        transformer.train()\n",
@@ -2521,7 +521,7 @@
     "        avg_loss = total_loss / len(train_loader)\n",
     "        print(f\"Epoch [{epoch+1}/{CONFIG['num_epochs']}] Ortalama Loss: {avg_loss:.4f}\")\n",
     "\n",
-    "# Modeli Çalıştırma\n",
+    "# Run the model\n",
     "if __name__ == \"__main__\":\n",
     "    train_model()"
    ]
diff --git a/Genel-1/cross_attention_trfnms.py b/Genel-1/cross_attention_trfnms.py
index 25e3dae..4fbae75 100644
--- a/Genel-1/cross_attention_trfnms.py
+++ b/Genel-1/cross_attention_trfnms.py
@@ -1,70 +1,69 @@
 import torch
 import torch.nn as nn
 import math
-
 class LayerNormalization(nn.Module):
     def __init__(self, features: int, eps: float = 10**-6) -> None:
         super().__init__()
-        self.eps = eps  # Küçük bir değer, sıfıra bölünmeyi önlemek için
-        self.alpha = nn.Parameter(torch.ones(features))  # Ölçeklendirme parametresi (öğrenilebilir)
-        self.bias = nn.Parameter(torch.zeros(features))  # Kaydırma parametresi (öğrenilebilir)
+        self.eps = eps  # Small value to avoid division by zero
+        self.alpha = nn.Parameter(torch.ones(features))  # Learnable scaling parameter
+        self.bias = nn.Parameter(torch.zeros(features))  # Learnable bias parameter
 
     def forward(self, x):
-        # Girdinin ortalamasını ve standart sapmasını hesapla
+        # Compute the mean and standard deviation of the input
         mean = x.mean(dim=-1, keepdim=True)  # (batch, seq_len, 1)
         std = x.std(dim=-1, keepdim=True)  # (batch, seq_len, 1)
-        # Normalizasyon formülü: (x - mean) / (std + eps) * alpha + bias
+        # Normalization formula: (x - mean) / (std + eps) * alpha + bias
         return self.alpha * (x - mean) / (std + self.eps) + self.bias
 
 
 class FeedForwardBlock(nn.Module):
     def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:
         super().__init__()
-        # Projeksiyon katmanları
-        self.gate_proj = nn.Linear(d_model, d_ff, bias=False)  # Gate projeksiyonu
-        self.up_proj = nn.Linear(d_model, d_ff, bias=False)  # Up projeksiyonu
-        self.down_proj = nn.Linear(d_ff, d_model, bias=False)  # Down projeksiyonu
+        # Projection layers
+        self.gate_proj = nn.Linear(d_model, d_ff, bias=False)  # Gate projection
+        self.up_proj = nn.Linear(d_model, d_ff, bias=False)  # Up projection
+        self.down_proj = nn.Linear(d_ff, d_model, bias=False)  # Down projection
         self.dropout = nn.Dropout(dropout)
 
     def forward(self, x):
-        # Gate ve Up projeksiyonlarını uygula
-        gate = torch.sigmoid(self.gate_proj(x))  # Gate mekanizması
-        up = self.up_proj(x)  # Up projeksiyonu
-        # Gate ve Up'ı birleştir
+        # Apply the gate and up projections
+        gate = torch.sigmoid(self.gate_proj(x))  # Gate mechanism
+        up = self.up_proj(x)  # Up projection
+        # Combine the gate and up paths
         x = gate * up
-        # Dropout ve Down projeksiyonu uygula
+        # Apply dropout and the down projection
         return self.down_proj(self.dropout(x))
 
 
 class InputEmbeddings(nn.Module):
     def __init__(self, d_model: int, vocab_size: int) -> None:
         super().__init__()
-        self.d_model = d_model  # Gömme vektörlerinin boyutu
-        self.vocab_size = vocab_size  # Kelime dağarcığı boyutu
-        self.embedding = nn.Embedding(vocab_size, d_model)  # Gömme katmanı
+        self.d_model = d_model  # Embedding dimension
+        self.vocab_size = vocab_size  # Vocabulary size
+        self.embedding = nn.Embedding(vocab_size, d_model)  # Embedding layer
 
     def forward(self, x):
-        # Token indekslerini gömme vektörlerine dönüştür ve ölçeklendir
+        # Convert token indices to embeddings and scale them
         return self.embedding(x) * math.sqrt(self.d_model)
 
 
 class PositionalEncoding(nn.Module):
     def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
         super().__init__()
-        self.d_model = d_model  # Gömme vektörlerinin boyutu
-        self.seq_len = seq_len  # Maksimum dizi uzunluğu
-        self.dropout = nn.Dropout(dropout)  # Dropout katmanı
-        # Konumsal kodlama matrisini oluştur
+        self.d_model = d_model  # Embedding dimension
+        self.seq_len = seq_len  # Maximum sequence length
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+        # Build the positional encoding matrix
         pe = torch.zeros(seq_len, d_model)
-        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)  # Pozisyon vektörü
-        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))  # Bölme terimi
-        pe[:, 0::2] = torch.sin(position * div_term)  # Çift indeksler için sinüs
-        pe[:, 1::2] = torch.cos(position * div_term)  # Tek indeksler için kosinüs
-        pe = pe.unsqueeze(0)  # Batch boyutu ekle
-        self.register_buffer('pe', pe)  # Konumsal kodlamayı sabit olarak kaydet
+        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)  # Position vector
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))  # Divisor term
+        pe[:, 0::2] = torch.sin(position * div_term)  # Sine for even indices
+        pe[:, 1::2] = torch.cos(position * div_term)  # Cosine for odd indices
+        pe = pe.unsqueeze(0)  # Add the batch dimension
+        self.register_buffer('pe', pe)  # Register the positional encoding as a buffer
 
     def forward(self, x):
-        # Girdiye konumsal kodlamayı ekle
+        # Add positional encoding to the input
         x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)
         return self.dropout(x)
 
@@ -72,11 +71,11 @@ def forward(self, x):
 class ResidualConnection(nn.Module):
     def __init__(self, features: int, dropout: float) -> None:
         super().__init__()
-        self.dropout = nn.Dropout(dropout)  # Dropout katmanı
-        self.norm = LayerNormalization(features)  # Katman normalizasyonu
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+        self.norm = LayerNormalization(features)  # Layer normalization
 
     def forward(self, x, sublayer):
-        # Artık bağlantı: x + dropout(sublayer(norm(x)))
+        # Residual connection: x + dropout(sublayer(norm(x)))
         return x + self.dropout(sublayer(self.norm(x)))
 
 
@@ -88,11 +87,11 @@ def __init__(self, d_model: int, h: int, dropout: float) -> None:
         assert d_model % h == 0, "d_model is not divisible by h"
         self.d_k = d_model // h
 
-        # Projeksiyon katmanları
-        self.q_proj = nn.Linear(d_model, d_model, bias=False)  # Query projeksiyonu
-        self.k_proj = nn.Linear(d_model, d_model, bias=False)  # Key projeksiyonu
-        self.v_proj = nn.Linear(d_model, d_model, bias=False)  # Value projeksiyonu
-        self.o_proj = nn.Linear(d_model, d_model, bias=False)  # Çıktı projeksiyonu
+        # Projection layers
+        self.q_proj = nn.Linear(d_model, d_model, bias=False)  # Query projection
+        self.k_proj = nn.Linear(d_model, d_model, bias=False)  # Key projection
+        self.v_proj = nn.Linear(d_model, d_model, bias=False)  # Value projection
+        self.o_proj = nn.Linear(d_model, d_model, bias=False)  # Output projection
 
         self.dropout = nn.Dropout(dropout)
 
@@ -101,27 +100,27 @@ def attention(query, key, value, mask, dropout: nn.Dropout):
         d_k = query.shape[-1]
         attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)
         if mask is not None:
-            attention_scores.masked_fill_(mask == 0, -1e9) # Bu satır, mask değeri sıfır olan konumları −1e9 ile doldurur. Böylece o konumlardaki dikkat skorları etkisiz hâle getirilir (maskeleme).
+            attention_scores.masked_fill_(mask == 0, -1e9) # This line fills masked positions with -1e9 to suppress attention on those tokens.
         attention_scores = attention_scores.softmax(dim=-1)
         if dropout is not None:
             attention_scores = dropout(attention_scores)
         return (attention_scores @ value), attention_scores
 
     def forward(self, q, k, v, mask):
-        # Query, Key, Value projeksiyonları
+        # Query, key, value projections
         query = self.q_proj(q)
         key = self.k_proj(k)
         value = self.v_proj(v)
 
-        # Çok kafalı dikkat için şekil değiştir
+        # Reshape for multi-head attention
         query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1, 2)
         key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1, 2)
         value = value.view(value.shape[0], value.shape[1], self.h, self.d_k).transpose(1, 2)
 
-        # Dikkat mekanizmasını uygula
+        # Apply the attention mechanism
         x, self.attention_scores = MultiHeadAttentionBlock.attention(query, key, value, mask, self.dropout)
 
-        # Kafaları birleştir ve çıktı projeksiyonu uygula
+        # Merge the heads and apply the output projection
         x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.h * self.d_k)
         return self.o_proj(x)
 
@@ -129,14 +128,14 @@ def forward(self, q, k, v, mask):
 class EncoderBlock(nn.Module):
     def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
         super().__init__()
-        self.self_attention_block = self_attention_block  # Self-attention katmanı
-        self.feed_forward_block = feed_forward_block  # İleri beslemeli sinir ağı
-        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])  # Artık bağlantılar
+        self.self_attention_block = self_attention_block  # Self-attention layer
+        self.feed_forward_block = feed_forward_block  # Feed-forward network
+        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])  # Residual connections
 
     def forward(self, x, src_mask):
-        # Self-attention ve artık bağlantı
+        # Self-attention with a residual connection
         x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, src_mask))
-        # İleri beslemeli sinir ağı ve artık bağlantı
+        # Feed-forward network with a residual connection
         x = self.residual_connections[1](x, self.feed_forward_block)
         return x
 
@@ -144,30 +143,30 @@ def forward(self, x, src_mask):
 class Encoder(nn.Module):
     def __init__(self, features: int, layers: nn.ModuleList) -> None:
         super().__init__()
-        self.layers = layers  # Encoder blokları
-        self.norm = LayerNormalization(features)  # Son katman normalizasyonu
+        self.layers = layers  # Encoder blocks
+        self.norm = LayerNormalization(features)  # Final layer normalization
 
     def forward(self, x, mask):
-        # Tüm encoder bloklarını uygula
+        # Apply all encoder blocks
         for layer in self.layers:
             x = layer(x, mask)
-        return self.norm(x)  # Son katman normalizasyonu
+        return self.norm(x)  # Final layer normalization
 
 
 class DecoderBlock(nn.Module):
     def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, cross_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
         super().__init__()
-        self.self_attention_block = self_attention_block  # Self-attention katmanı
-        self.cross_attention_block = cross_attention_block  # Cross-attention katmanı
-        self.feed_forward_block = feed_forward_block  # İleri beslemeli sinir ağı
-        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])  # Artık bağlantılar
+        self.self_attention_block = self_attention_block  # Self-attention layer
+        self.cross_attention_block = cross_attention_block  # Cross-attention layer
+        self.feed_forward_block = feed_forward_block  # Feed-forward network
+        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])  # Residual connections
 
     def forward(self, x, encoder_output, src_mask, tgt_mask):
-        # Self-attention: Decoder'ın kendi çıktısına dikkat eder
+        # Self-attention: the decoder attends to its own outputs
         x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))
-        # Cross-attention: Decoder, encoder'ın çıktısına dikkat eder
+        # Cross-attention: the decoder attends to the encoder outputs
         x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output, encoder_output, src_mask))
-        # İleri beslemeli sinir ağı
+        # Feed-forward network
         x = self.residual_connections[2](x, self.feed_forward_block)
         return x
 
@@ -175,64 +174,64 @@ def forward(self, x, encoder_output, src_mask, tgt_mask):
 class Decoder(nn.Module):
     def __init__(self, features: int, layers: nn.ModuleList) -> None:
         super().__init__()
-        self.layers = layers  # Decoder blokları
-        self.norm = LayerNormalization(features)  # Son katman normalizasyonu
+        self.layers = layers  # Decoder blocks
+        self.norm = LayerNormalization(features)  # Final layer normalization
 
     def forward(self, x, encoder_output, src_mask, tgt_mask):
-        # Tüm decoder bloklarını uygula
+        # Apply all decoder blocks
         for layer in self.layers:
             x = layer(x, encoder_output, src_mask, tgt_mask)
-        return self.norm(x)  # Son katman normalizasyonu
+        return self.norm(x)  # Final layer normalization
 
 
 class ProjectionLayer(nn.Module):
     def __init__(self, d_model, vocab_size) -> None:
         super().__init__()
-        self.proj = nn.Linear(d_model, vocab_size)  # Lineer projeksiyon katmanı
+        self.proj = nn.Linear(d_model, vocab_size)  # Linear projection layer
 
     def forward(self, x) -> None:
-        # Girdiyi kelime dağarcığı boyutuna projelendir
+        # Project the input to the vocabulary dimension
         return self.proj(x)
 
 
 class Transformer(nn.Module):
     def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer) -> None:
         super().__init__()
-        self.encoder = encoder  # Encoder katmanı
-        self.decoder = decoder  # Decoder katmanı
-        self.src_embed = src_embed  # Kaynak gömme katmanı
-        self.tgt_embed = tgt_embed  # Hedef gömme katmanı
-        self.src_pos = src_pos  # Kaynak konumsal kodlama
-        self.tgt_pos = tgt_pos  # Hedef konumsal kodlama
-        self.projection_layer = projection_layer  # Projeksiyon katmanı
+        self.encoder = encoder  # Encoder module
+        self.decoder = decoder  # Decoder module
+        self.src_embed = src_embed  # Source embedding layer
+        self.tgt_embed = tgt_embed  # Target embedding layer
+        self.src_pos = src_pos  # Source positional encoding
+        self.tgt_pos = tgt_pos  # Target positional encoding
+        self.projection_layer = projection_layer  # Projection layer
 
     def encode(self, src, src_mask):
-        # Kaynak diziyi kodla
-        src = self.src_embed(src)  # Gömme katmanı
-        src = self.src_pos(src)  # Konumsal kodlama
-        return self.encoder(src, src_mask)  # Encoder katmanı
+        # Encode the source sequence
+        src = self.src_embed(src)  # Embedding layer
+        src = self.src_pos(src)  # Positional encoding
+        return self.encoder(src, src_mask)  # Encoder module
 
     def decode(self, encoder_output: torch.Tensor, src_mask: torch.Tensor, tgt: torch.Tensor, tgt_mask: torch.Tensor):
-        # Hedef diziyi çöz
-        tgt = self.tgt_embed(tgt)  # Gömme katmanı
-        tgt = self.tgt_pos(tgt)  # Konumsal kodlama
-        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)  # Decoder katmanı
+        # Decode the target sequence
+        tgt = self.tgt_embed(tgt)  # Embedding layer
+        tgt = self.tgt_pos(tgt)  # Positional encoding
+        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)  # Decoder module
 
     def project(self, x):
-        # Çıktıyı kelime dağarcığı boyutuna projelendir
+        # Project the output to the vocabulary dimension
         return self.projection_layer(x)
 
 
 def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int, d_model: int = 512, N: int = 6, h: int = 8, dropout: float = 0.1, d_ff: int = 2048) -> Transformer:
-    # Gömme katmanlarını oluştur
+    # Build the embedding layers
     src_embed = InputEmbeddings(d_model, src_vocab_size)
     tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)
 
-    # Konumsal kodlama katmanlarını oluştur
+    # Build the positional encoding layers
     src_pos = PositionalEncoding(d_model, src_seq_len, dropout)
     tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)
 
-    # Encoder bloklarını oluştur
+    # Build the encoder blocks
     encoder_blocks = []
     for _ in range(N):
         encoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
@@ -240,7 +239,7 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
         encoder_block = EncoderBlock(d_model, encoder_self_attention_block, feed_forward_block, dropout)
         encoder_blocks.append(encoder_block)
 
-    # Decoder bloklarını oluştur
+    # Build the decoder blocks
     decoder_blocks = []
     for _ in range(N):
         decoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
@@ -249,88 +248,84 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
         decoder_block = DecoderBlock(d_model, decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)
         decoder_blocks.append(decoder_block)
 
-    # Encoder ve Decoder'ı oluştur
+    # Build the encoder and decoder
     encoder = Encoder(d_model, nn.ModuleList(encoder_blocks))
     decoder = Decoder(d_model, nn.ModuleList(decoder_blocks))
 
-    # Projeksiyon katmanını oluştur
+    # Build the projection layer
     projection_layer = ProjectionLayer(d_model, tgt_vocab_size)
 
-    # Transformer modelini oluştur
+    # Build the transformer model
     transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer)
 
-    # Parametreleri Xavier uniform ile başlat
+    # Initialize parameters with Xavier uniform
     for p in transformer.parameters():
         if p.dim() > 1:
             nn.init.xavier_uniform_(p)
 
-import torch
-import torch.nn as nn
-import math
-
 class LayerNormalization(nn.Module):
     def __init__(self, features: int, eps: float = 10**-6) -> None:
         super().__init__()
-        self.eps = eps  # Küçük bir değer, sıfıra bölünmeyi önlemek için
-        self.alpha = nn.Parameter(torch.ones(features))  # Ölçeklendirme parametresi (öğrenilebilir)
-        self.bias = nn.Parameter(torch.zeros(features))  # Kaydırma parametresi (öğrenilebilir)
+        self.eps = eps  # Small value to avoid division by zero
+        self.alpha = nn.Parameter(torch.ones(features))  # Learnable scaling parameter
+        self.bias = nn.Parameter(torch.zeros(features))  # Learnable bias parameter
 
     def forward(self, x):
-        # Girdinin ortalamasını ve standart sapmasını hesapla
+        # Compute the mean and standard deviation of the input
         mean = x.mean(dim=-1, keepdim=True)  # (batch, seq_len, 1)
         std = x.std(dim=-1, keepdim=True)  # (batch, seq_len, 1)
-        # Normalizasyon formülü: (x - mean) / (std + eps) * alpha + bias
+        # Normalization formula: (x - mean) / (std + eps) * alpha + bias
         return self.alpha * (x - mean) / (std + self.eps) + self.bias
 
 
 class FeedForwardBlock(nn.Module):
     def __init__(self, d_model: int, d_ff: int, dropout: float) -> None:
         super().__init__()
-        # Projeksiyon katmanları
-        self.gate_proj = nn.Linear(d_model, d_ff, bias=False)  # Gate projeksiyonu
-        self.up_proj = nn.Linear(d_model, d_ff, bias=False)  # Up projeksiyonu
-        self.down_proj = nn.Linear(d_ff, d_model, bias=False)  # Down projeksiyonu
+        # Projection layers
+        self.gate_proj = nn.Linear(d_model, d_ff, bias=False)  # Gate projection
+        self.up_proj = nn.Linear(d_model, d_ff, bias=False)  # Up projection
+        self.down_proj = nn.Linear(d_ff, d_model, bias=False)  # Down projection
         self.dropout = nn.Dropout(dropout)
 
     def forward(self, x):
-        # Gate ve Up projeksiyonlarını uygula
-        gate = torch.sigmoid(self.gate_proj(x))  # Gate mekanizması
-        up = self.up_proj(x)  # Up projeksiyonu
-        # Gate ve Up'ı birleştir
+        # Apply the gate and up projections
+        gate = torch.sigmoid(self.gate_proj(x))  # Gate mechanism
+        up = self.up_proj(x)  # Up projection
+        # Combine the gate and up paths
         x = gate * up
-        # Dropout ve Down projeksiyonu uygula
+        # Apply dropout and the down projection
         return self.down_proj(self.dropout(x))
 
 
 class InputEmbeddings(nn.Module):
     def __init__(self, d_model: int, vocab_size: int) -> None:
         super().__init__()
-        self.d_model = d_model  # Gömme vektörlerinin boyutu
-        self.vocab_size = vocab_size  # Kelime dağarcığı boyutu
-        self.embedding = nn.Embedding(vocab_size, d_model)  # Gömme katmanı
+        self.d_model = d_model  # Embedding dimension
+        self.vocab_size = vocab_size  # Vocabulary size
+        self.embedding = nn.Embedding(vocab_size, d_model)  # Embedding layer
 
     def forward(self, x):
-        # Token indekslerini gömme vektörlerine dönüştür ve ölçeklendir
+        # Convert token indices to embeddings and scale them
         return self.embedding(x) * math.sqrt(self.d_model)
 
 
 class PositionalEncoding(nn.Module):
     def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
         super().__init__()
-        self.d_model = d_model  # Gömme vektörlerinin boyutu
-        self.seq_len = seq_len  # Maksimum dizi uzunluğu
-        self.dropout = nn.Dropout(dropout)  # Dropout katmanı
-        # Konumsal kodlama matrisini oluştur
+        self.d_model = d_model  # Embedding dimension
+        self.seq_len = seq_len  # Maximum sequence length
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+        # Build the positional encoding matrix
         pe = torch.zeros(seq_len, d_model)
-        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)  # Pozisyon vektörü
-        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))  # Bölme terimi
-        pe[:, 0::2] = torch.sin(position * div_term)  # Çift indeksler için sinüs
-        pe[:, 1::2] = torch.cos(position * div_term)  # Tek indeksler için kosinüs
-        pe = pe.unsqueeze(0)  # Batch boyutu ekle
-        self.register_buffer('pe', pe)  # Konumsal kodlamayı sabit olarak kaydet
+        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1)  # Position vector
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))  # Divisor term
+        pe[:, 0::2] = torch.sin(position * div_term)  # Sine for even indices
+        pe[:, 1::2] = torch.cos(position * div_term)  # Cosine for odd indices
+        pe = pe.unsqueeze(0)  # Add the batch dimension
+        self.register_buffer('pe', pe)  # Register the positional encoding as a buffer
 
     def forward(self, x):
-        # Girdiye konumsal kodlamayı ekle
+        # Add positional encoding to the input
         x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)
         return self.dropout(x)
 
@@ -338,11 +333,11 @@ def forward(self, x):
 class ResidualConnection(nn.Module):
     def __init__(self, features: int, dropout: float) -> None:
         super().__init__()
-        self.dropout = nn.Dropout(dropout)  # Dropout katmanı
-        self.norm = LayerNormalization(features)  # Katman normalizasyonu
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+        self.norm = LayerNormalization(features)  # Layer normalization
 
     def forward(self, x, sublayer):
-        # Artık bağlantı: x + dropout(sublayer(norm(x)))
+        # Residual connection: x + dropout(sublayer(norm(x)))
         return x + self.dropout(sublayer(self.norm(x)))
 
 
@@ -354,11 +349,11 @@ def __init__(self, d_model: int, h: int, dropout: float) -> None:
         assert d_model % h == 0, "d_model is not divisible by h"
         self.d_k = d_model // h
 
-        # Projeksiyon katmanları
-        self.q_proj = nn.Linear(d_model, d_model, bias=False)  # Query projeksiyonu
-        self.k_proj = nn.Linear(d_model, d_model, bias=False)  # Key projeksiyonu
-        self.v_proj = nn.Linear(d_model, d_model, bias=False)  # Value projeksiyonu
-        self.o_proj = nn.Linear(d_model, d_model, bias=False)  # Çıktı projeksiyonu
+        # Projection layers
+        self.q_proj = nn.Linear(d_model, d_model, bias=False)  # Query projection
+        self.k_proj = nn.Linear(d_model, d_model, bias=False)  # Key projection
+        self.v_proj = nn.Linear(d_model, d_model, bias=False)  # Value projection
+        self.o_proj = nn.Linear(d_model, d_model, bias=False)  # Output projection
 
         self.dropout = nn.Dropout(dropout)
 
@@ -367,27 +362,27 @@ def attention(query, key, value, mask, dropout: nn.Dropout):
         d_k = query.shape[-1]
         attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)
         if mask is not None:
-            attention_scores.masked_fill_(mask == 0, -1e9) # Bu satır, mask değeri sıfır olan konumları −1e9 ile doldurur. Böylece o konumlardaki dikkat skorları etkisiz hâle getirilir (maskeleme).
+            attention_scores.masked_fill_(mask == 0, -1e9) # This line fills masked positions with -1e9 to suppress attention on those tokens.
         attention_scores = attention_scores.softmax(dim=-1)
         if dropout is not None:
             attention_scores = dropout(attention_scores)
         return (attention_scores @ value), attention_scores
 
     def forward(self, q, k, v, mask):
-        # Query, Key, Value projeksiyonları
+        # Query, key, value projections
         query = self.q_proj(q)
         key = self.k_proj(k)
         value = self.v_proj(v)
 
-        # Çok kafalı dikkat için şekil değiştir
+        # Reshape for multi-head attention
         query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1, 2)
         key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1, 2)
         value = value.view(value.shape[0], value.shape[1], self.h, self.d_k).transpose(1, 2)
 
-        # Dikkat mekanizmasını uygula
+        # Apply the attention mechanism
         x, self.attention_scores = MultiHeadAttentionBlock.attention(query, key, value, mask, self.dropout)
 
-        # Kafaları birleştir ve çıktı projeksiyonu uygula
+        # Merge the heads and apply the output projection
         x = x.transpose(1, 2).contiguous().view(x.shape[0], -1, self.h * self.d_k)
         return self.o_proj(x)
 
@@ -395,14 +390,14 @@ def forward(self, q, k, v, mask):
 class EncoderBlock(nn.Module):
     def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
         super().__init__()
-        self.self_attention_block = self_attention_block  # Self-attention katmanı
-        self.feed_forward_block = feed_forward_block  # İleri beslemeli sinir ağı
-        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])  # Artık bağlantılar
+        self.self_attention_block = self_attention_block  # Self-attention layer
+        self.feed_forward_block = feed_forward_block  # Feed-forward network
+        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(2)])  # Residual connections
 
     def forward(self, x, src_mask):
-        # Self-attention ve artık bağlantı
+        # Self-attention with a residual connection
         x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, src_mask))
-        # İleri beslemeli sinir ağı ve artık bağlantı
+        # Feed-forward network with a residual connection
         x = self.residual_connections[1](x, self.feed_forward_block)
         return x
 
@@ -410,30 +405,30 @@ def forward(self, x, src_mask):
 class Encoder(nn.Module):
     def __init__(self, features: int, layers: nn.ModuleList) -> None:
         super().__init__()
-        self.layers = layers  # Encoder blokları
-        self.norm = LayerNormalization(features)  # Son katman normalizasyonu
+        self.layers = layers  # Encoder blocks
+        self.norm = LayerNormalization(features)  # Final layer normalization
 
     def forward(self, x, mask):
-        # Tüm encoder bloklarını uygula
+        # Apply all encoder blocks
         for layer in self.layers:
             x = layer(x, mask)
-        return self.norm(x)  # Son katman normalizasyonu
+        return self.norm(x)  # Final layer normalization
 
 
 class DecoderBlock(nn.Module):
     def __init__(self, features: int, self_attention_block: MultiHeadAttentionBlock, cross_attention_block: MultiHeadAttentionBlock, feed_forward_block: FeedForwardBlock, dropout: float) -> None:
         super().__init__()
-        self.self_attention_block = self_attention_block  # Self-attention katmanı
-        self.cross_attention_block = cross_attention_block  # Cross-attention katmanı
-        self.feed_forward_block = feed_forward_block  # İleri beslemeli sinir ağı
-        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])  # Artık bağlantılar
+        self.self_attention_block = self_attention_block  # Self-attention layer
+        self.cross_attention_block = cross_attention_block  # Cross-attention layer
+        self.feed_forward_block = feed_forward_block  # Feed-forward network
+        self.residual_connections = nn.ModuleList([ResidualConnection(features, dropout) for _ in range(3)])  # Residual connections
 
     def forward(self, x, encoder_output, src_mask, tgt_mask):
-        # Self-attention: Decoder'ın kendi çıktısına dikkat eder
+        # Self-attention: the decoder attends to its own outputs
         x = self.residual_connections[0](x, lambda x: self.self_attention_block(x, x, x, tgt_mask))
-        # Cross-attention: Decoder, encoder'ın çıktısına dikkat eder
+        # Cross-attention: the decoder attends to the encoder outputs
         x = self.residual_connections[1](x, lambda x: self.cross_attention_block(x, encoder_output, encoder_output, src_mask))
-        # İleri beslemeli sinir ağı
+        # Feed-forward network
         x = self.residual_connections[2](x, self.feed_forward_block)
         return x
 
@@ -441,64 +436,64 @@ def forward(self, x, encoder_output, src_mask, tgt_mask):
 class Decoder(nn.Module):
     def __init__(self, features: int, layers: nn.ModuleList) -> None:
         super().__init__()
-        self.layers = layers  # Decoder blokları
-        self.norm = LayerNormalization(features)  # Son katman normalizasyonu
+        self.layers = layers  # Decoder blocks
+        self.norm = LayerNormalization(features)  # Final layer normalization
 
     def forward(self, x, encoder_output, src_mask, tgt_mask):
-        # Tüm decoder bloklarını uygula
+        # Apply all decoder blocks
         for layer in self.layers:
             x = layer(x, encoder_output, src_mask, tgt_mask)
-        return self.norm(x)  # Son katman normalizasyonu
+        return self.norm(x)  # Final layer normalization
 
 
 class ProjectionLayer(nn.Module):
     def __init__(self, d_model, vocab_size) -> None:
         super().__init__()
-        self.proj = nn.Linear(d_model, vocab_size)  # Lineer projeksiyon katmanı
+        self.proj = nn.Linear(d_model, vocab_size)  # Linear projection layer
 
     def forward(self, x) -> None:
-        # Girdiyi kelime dağarcığı boyutuna projelendir
+        # Project the input to the vocabulary dimension
         return self.proj(x)
 
 
 class Transformer(nn.Module):
     def __init__(self, encoder: Encoder, decoder: Decoder, src_embed: InputEmbeddings, tgt_embed: InputEmbeddings, src_pos: PositionalEncoding, tgt_pos: PositionalEncoding, projection_layer: ProjectionLayer) -> None:
         super().__init__()
-        self.encoder = encoder  # Encoder katmanı
-        self.decoder = decoder  # Decoder katmanı
-        self.src_embed = src_embed  # Kaynak gömme katmanı
-        self.tgt_embed = tgt_embed  # Hedef gömme katmanı
-        self.src_pos = src_pos  # Kaynak konumsal kodlama
-        self.tgt_pos = tgt_pos  # Hedef konumsal kodlama
-        self.projection_layer = projection_layer  # Projeksiyon katmanı
+        self.encoder = encoder  # Encoder module
+        self.decoder = decoder  # Decoder module
+        self.src_embed = src_embed  # Source embedding layer
+        self.tgt_embed = tgt_embed  # Target embedding layer
+        self.src_pos = src_pos  # Source positional encoding
+        self.tgt_pos = tgt_pos  # Target positional encoding
+        self.projection_layer = projection_layer  # Projection layer
 
     def encode(self, src, src_mask):
-        # Kaynak diziyi kodla
-        src = self.src_embed(src)  # Gömme katmanı
-        src = self.src_pos(src)  # Konumsal kodlama
-        return self.encoder(src, src_mask)  # Encoder katmanı
+        # Encode the source sequence
+        src = self.src_embed(src)  # Embedding layer
+        src = self.src_pos(src)  # Positional encoding
+        return self.encoder(src, src_mask)  # Encoder module
 
     def decode(self, encoder_output: torch.Tensor, src_mask: torch.Tensor, tgt: torch.Tensor, tgt_mask: torch.Tensor):
-        # Hedef diziyi çöz
-        tgt = self.tgt_embed(tgt)  # Gömme katmanı
-        tgt = self.tgt_pos(tgt)  # Konumsal kodlama
-        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)  # Decoder katmanı
+        # Decode the target sequence
+        tgt = self.tgt_embed(tgt)  # Embedding layer
+        tgt = self.tgt_pos(tgt)  # Positional encoding
+        return self.decoder(tgt, encoder_output, src_mask, tgt_mask)  # Decoder module
 
     def project(self, x):
-        # Çıktıyı kelime dağarcığı boyutuna projelendir
+        # Project the output to the vocabulary dimension
         return self.projection_layer(x)
 
 
 def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int, tgt_seq_len: int, d_model: int = 512, N: int = 6, h: int = 8, dropout: float = 0.1, d_ff: int = 2048) -> Transformer:
-    # Gömme katmanlarını oluştur
+    # Build the embedding layers
     src_embed = InputEmbeddings(d_model, src_vocab_size)
     tgt_embed = InputEmbeddings(d_model, tgt_vocab_size)
 
-    # Konumsal kodlama katmanlarını oluştur
+    # Build the positional encoding layers
     src_pos = PositionalEncoding(d_model, src_seq_len, dropout)
     tgt_pos = PositionalEncoding(d_model, tgt_seq_len, dropout)
 
-    # Encoder bloklarını oluştur
+    # Build the encoder blocks
     encoder_blocks = []
     for _ in range(N):
         encoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
@@ -506,7 +501,7 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
         encoder_block = EncoderBlock(d_model, encoder_self_attention_block, feed_forward_block, dropout)
         encoder_blocks.append(encoder_block)
 
-    # Decoder bloklarını oluştur
+    # Build the decoder blocks
     decoder_blocks = []
     for _ in range(N):
         decoder_self_attention_block = MultiHeadAttentionBlock(d_model, h, dropout)
@@ -515,19 +510,19 @@ def build_transformer(src_vocab_size: int, tgt_vocab_size: int, src_seq_len: int
         decoder_block = DecoderBlock(d_model, decoder_self_attention_block, decoder_cross_attention_block, feed_forward_block, dropout)
         decoder_blocks.append(decoder_block)
 
-    # Encoder ve Decoder'ı oluştur
+    # Build the encoder and decoder
     encoder = Encoder(d_model, nn.ModuleList(encoder_blocks))
     decoder = Decoder(d_model, nn.ModuleList(decoder_blocks))
 
-    # Projeksiyon katmanını oluştur
+    # Build the projection layer
     projection_layer = ProjectionLayer(d_model, tgt_vocab_size)
 
-    # Transformer modelini oluştur
+    # Build the transformer model
     transformer = Transformer(encoder, decoder, src_embed, tgt_embed, src_pos, tgt_pos, projection_layer)
 
-    # Parametreleri Xavier uniform ile başlat
+    # Initialize parameters with Xavier uniform
     for p in transformer.parameters():
         if p.dim() > 1:
             nn.init.xavier_uniform_(p)
 
-    return transformer
\ No newline at end of file
+    return transformer
diff --git a/Genel-1/gpt_2_config_deepseek.ipynb b/Genel-1/gpt_2_config_deepseek.ipynb
index 1480f1d..fb66663 100644
--- a/Genel-1/gpt_2_config_deepseek.ipynb
+++ b/Genel-1/gpt_2_config_deepseek.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -19,7 +19,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -57,7 +57,7 @@
     "        \n",
     "        return attn_output + sum(expert_outputs)\n",
     "\n",
-    "# 2. Ultra Derin Dil Modeli\n",
+    "# 2. Ultra Deep Language Model\n",
     "class DeepSeekClone(nn.Module):\n",
     "    vocab_size: int\n",
     "    num_layers: int = 32\n",
@@ -85,7 +85,7 @@
     "        \n",
     "        return nn.Dense(self.vocab_size)(x)\n",
     "\n",
-    "# 3. Optimizasyon ve Eğitim State\n",
+    "# 3. Optimization and Training State\n",
     "def create_train_state(rng, config):\n",
     "    model = DeepSeekClone(**config)\n",
     "    params = model.init(rng, jnp.ones((1, 512), dtype=jnp.int32))['params']\n",
@@ -101,7 +101,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -130,7 +130,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -189,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
diff --git a/Genel-1/mixedtransformers.py b/Genel-1/mixedtransformers.py
index c12e53f..1eaf72e 100644
--- a/Genel-1/mixedtransformers.py
+++ b/Genel-1/mixedtransformers.py
@@ -1,6 +1,7 @@
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from collections import Counter
 from torch.utils.data import Dataset, DataLoader
 
 class TextProcessor:
@@ -9,11 +10,11 @@ def __init__(self, max_vocab=20000, max_len=128):
         self.max_len = max_len
 
     def build_vocab(self, texts):
-        counter = counter()
+        token_counter = Counter()
         for text in texts:
             tokens = text.lower().split()
-            counter.update(tokens)
-        vocab_list = ['<pad>', '<unk>'] + [word for word, _ in counter.most_common(20000)]
+            token_counter.update(tokens)
+        vocab_list = ['<pad>', '<unk>'] + [word for word, _ in token_counter.most_common(20000)]
         self.vocab = {word: idx for idx, word in enumerate(vocab_list)}
 
     def text_to_indices(self, text):
@@ -37,19 +38,19 @@ def forward(self, x):
         B, S, _ = x.shape
         x = x.view(B, S, self.heads, self.head_dim)
         
-        # Padding ekle
+        # Add padding on both sides of the windowed sequence
         pad_size = self.window_size
         padded_x = F.pad(x, (0,0,0,0, pad_size, pad_size))
-        
-        # Pencereleri oluştur
+
+        # Build the local attention windows
         windows = []
         for i in range(S):
             start = i
             end = start + 2*self.window_size + 1
             windows.append(padded_x[:, start:end])
         windows = torch.stack(windows, dim=1)
-        
-        # Attention hesapla
+
+        # Compute attention weights within each local window
         Q = self.query(windows)
         K = self.key(windows)
         V = self.value(windows)
@@ -97,7 +98,7 @@ def forward(self, x):
             x = layer(x)
         return self.classifier(x.mean(dim=1))
 
-# Test
+# Quick smoke test
 if __name__ == "__main__":
     texts = ["positive text", "negative text"]
     labels = [1, 0]
diff --git a/Genel-1/transformers_config_ft.ipynb b/Genel-1/transformers_config_ft.ipynb
index 52e5bea..0da77e9 100644
--- a/Genel-1/transformers_config_ft.ipynb
+++ b/Genel-1/transformers_config_ft.ipynb
@@ -55,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +66,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -201,30 +201,30 @@
     "from datasets import load_dataset\n",
     "from transformers import BertTokenizerFast\n",
     "\n",
-    "# IMDb veri setini yükleme\n",
+    "# Load the IMDb dataset\n",
     "dataset = load_dataset('imdb')\n",
     "\n",
-    "# Tokenizer seçimi\n",
+    "# Tokenizer selection\n",
     "tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')\n",
     "\n",
-    "# Tokenize fonksiyonu\n",
+    "# Tokenization function\n",
     "def tokenize_function(examples):\n",
     "    return tokenizer(examples['text'], padding=\"max_length\", truncation=True, max_length=512)\n",
     "\n",
-    "# Tokenized verileri hazırlama\n",
+    "# Prepare tokenized data\n",
     "tokenized_datasets = dataset.map(tokenize_function, batched=True)\n",
     "\n",
-    "# Gereksiz sütunları kaldırma\n",
+    "# Remove unnecessary columns\n",
     "tokenized_datasets = tokenized_datasets.remove_columns(['text'])\n",
     "\n",
-    "# PyTorch tensör formatına dönüştürme\n",
+    "# Convert to PyTorch tensor format\n",
     "tokenized_datasets.set_format('torch')\n",
     "\n",
-    "# Eğitim ve test veri setlerini ayırma\n",
+    "# Split training and test datasets\n",
     "train_dataset = tokenized_datasets['train']\n",
     "test_dataset = tokenized_datasets['test']\n",
     "\n",
-    "# Örnek veri kontrolü\n",
+    "# Sample data inspection\n",
     "print(train_dataset[0])\n",
     "print(test_dataset[0])"
    ]
@@ -242,7 +242,7 @@
     "train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=data_collator)\n",
     "test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=data_collator)\n",
     "\n",
-    "# Örnek veri kontrolü\n",
+    "# Sample data inspection\n",
     "for batch in train_loader:\n",
     "    print(batch)\n",
     "    break\n",
@@ -254,7 +254,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -263,72 +263,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating language skills...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Device set to use cpu\n",
-      "SQuAD Evaluation: 100%|██████████| 10/10 [00:00<00:00, 19.72it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating coding skills...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Coding Evaluation: 100%|██████████| 10/10 [00:00<00:00, 11.22it/s]\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Evaluating conversation quality...\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Conversation Evaluation: 100%|██████████| 10/10 [00:00<00:00, 23.68it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Final Evaluation Results:\n",
-      "squad_accuracy: 0.8000\n",
-      "sst2_accuracy: 0.4000\n",
-      "code_accuracy: 0.2000\n",
-      "conversation_score: 0.4821\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
     "import numpy as np\n",
diff --git a/Genel-2/app.py b/Genel-2/app.py
index 8229840..c9a9536 100644
--- a/Genel-2/app.py
+++ b/Genel-2/app.py
@@ -2,17 +2,17 @@
 import polars as pl
 from huggingface_hub import login
 
-# Hugging Face'e giriş yapmak - Environment variable kullan
+# Log in to Hugging Face using the environment variable
 hf_token = os.getenv('HUGGINGFACE_TOKEN')
 if hf_token:
     login(hf_token)
 else:
-    print("Uyarı: HUGGINGFACE_TOKEN environment variable bulunamadı. Bazı özel modellere erişiminiz olmayabilir.")
+    print("Warning: HUGGINGFACE_TOKEN environment variable not found. You may not have access to private models.")
 
-# Hugging Face'ten doğru dosyayı yüklemek için veri kümesinin yolunu doğru şekilde kontrol edin
+# Ensure the dataset path is correct before downloading from Hugging Face
 try:
     df = pl.read_parquet('hf://datasets/HuggingFaceM4/the_cauldron/textcaps/train-00011-of-00012-baf9399db4a7051d.parquet')
-    print("Veri kümesi yüklendi!")
+    print("Dataset loaded!")
     print(df.head())
 except Exception as e:
-    print("Veri kümesi yüklenirken bir hata oluştu:", e)
+    print("An error occurred while loading the dataset:", e)
diff --git a/Genel-2/benimmodel.py b/Genel-2/benimmodel.py
index df3e589..fc40528 100644
--- a/Genel-2/benimmodel.py
+++ b/Genel-2/benimmodel.py
@@ -2,33 +2,33 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from huggingface_hub import login
 
-# Hugging Face giriş işlemi - Environment variable kullan
+# Handle the Hugging Face login using an environment variable
 hf_token = os.getenv('HUGGINGFACE_TOKEN')
 if hf_token:
     login(token=hf_token)
-    print("Başarıyla giriş yapıldı!")
+    print("Successfully authenticated with Hugging Face!")
 else:
-    print("Uyarı: HUGGINGFACE_TOKEN environment variable bulunamadı. Bazı özel modellere erişiminiz olmayabilir.")
+    print("Warning: The HUGGINGFACE_TOKEN environment variable is missing. Access to private models may be limited.")
 
-# Tokenizer ve modelin yüklenmesi
+# Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("emredeveloper/DeepSeek-R1-Medical-COT")
 model = AutoModelForCausalLM.from_pretrained("emredeveloper/DeepSeek-R1-Medical-COT")
 
-# Modeli kullanarak bir metin oluşturma
+# Generate a response with the model
 def generate_response(input_text):
-    # Input metnini token'lara dönüştürme
+    # Convert input text into tokens
     inputs = tokenizer(input_text, return_tensors="pt")
-    
-    # Modelden çıkışı almak için generate metodunu kullanma
+
+    # Use the generate method to produce output
     outputs = model.generate(**inputs)
-    
-    # Çıktıyı decode ederek cevap verme
+
+    # Decode the output tokens into text
     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
     return response
 
-# Kullanıcıdan input almak
-input_text = "baş ağrısı nedir?"
+# Provide an input example
+input_text = "What is a headache?"
 
-# Modeli çalıştırma ve sonucu yazdırma
+# Run the model and print the result
 response = generate_response(input_text)
-print(f"Model Cevabı: {response}")
+print(f"Model Response: {response}")
diff --git a/Genel-2/vision_transformer_chatgpt.py b/Genel-2/vision_transformer_chatgpt.py
index 0355435..f7ab8a8 100644
--- a/Genel-2/vision_transformer_chatgpt.py
+++ b/Genel-2/vision_transformer_chatgpt.py
@@ -11,15 +11,15 @@
 import base64
 import requests
 
-# -------------------- Yeni Veri Kümesi Sınıfı --------------------
+# -------------------- Updated Dataset Class --------------------
 class TextCapsDataset(Dataset):
     def __init__(self, dataset, num_samples=100):
-        self.dataset = dataset.select(range(num_samples))  # Sadece ilk 100 örnek
+        self.dataset = dataset.select(range(num_samples))  # Use only the first 100 samples
         self.tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
         self.transform = transforms.Compose([
             transforms.Resize((224, 224)),
             transforms.ToTensor(),
-            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
+            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                  std=[0.229, 0.224, 0.225]),
         ])
         
@@ -29,12 +29,12 @@ def __len__(self):
     def __getitem__(self, idx):
         item = self.dataset[idx]
         
-        # Görüntüyü yükleme
+        # Load the image
         img_url = item['image']
         img = self.load_image_from_url(img_url)
-        
-        # Metin işleme
-        caption = item['user']  # Kullanıcı tarafından sağlanan açıklama
+
+        # Process the caption text provided by the user
+        caption = item['user']
         
         inputs = self.tokenizer(
             caption,
@@ -51,25 +51,25 @@ def __getitem__(self, idx):
         }
     
     def load_image_from_url(self, url):
-        """URL'den görseli indirip PIL formatında döndürür."""
+        """Download an image from the provided URL and return it as a PIL image."""
         response = requests.get(url)
         img = Image.open(io.BytesIO(response.content)).convert("RGB")
         return img
 
 
-# -------------------- İyileştirilmiş Model Mimarisi --------------------
+# -------------------- Improved Model Architecture --------------------
 class EnhancedTextToImageModel(nn.Module):
     def __init__(self):
         super().__init__()
         
-        # Önceden eğitilmiş modeller
+        # Pretrained encoders
         self.vision_encoder = ViTModel.from_pretrained("google/vit-base-patch16-224")
         self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
-        
-        # Çapraz dikkat mekanizması
+
+        # Cross-attention module
         self.cross_attn = nn.MultiheadAttention(embed_dim=768, num_heads=12)
-        
-        # Gelişmiş decoder
+
+        # Image decoder
         self.decoder = nn.Sequential(
             nn.Linear(768, 1024),
             nn.ReLU(),
@@ -85,38 +85,36 @@ def __init__(self):
         )
 
     def forward(self, pixel_values, input_ids, attention_mask):
-        # Görsel özellikler
+        # Visual features
         vision_outputs = self.vision_encoder(pixel_values)
         vision_features = vision_outputs.last_hidden_state
-        
-        # Metin özellikleri
-        text_outputs = self.text_encoder(input_ids=input_ids, 
+
+        # Text features
+        text_outputs = self.text_encoder(input_ids=input_ids,
                                        attention_mask=attention_mask)
         text_features = text_outputs.last_hidden_state
-        
-        # Çapraz dikkat
+
+        # Cross-attention
         attn_output, _ = self.cross_attn(
             vision_features.permute(1, 0, 2),
             text_features.permute(1, 0, 2),
             text_features.permute(1, 0, 2)
         )
-        
-        # Görüntü oluşturma
+
+        # Generate the output image representation
         combined = attn_output.permute(1, 0, 2).mean(dim=1)
         return self.decoder(combined.unsqueeze(-1).unsqueeze(-1))
 
 
-# -------------------- Ana İşlem --------------------
+# -------------------- Main Execution --------------------
 if __name__ == "__main__":
-    # HuggingFace M4 - The Cauldron veri setini yükle (textcaps alt kümesi)
-    
-
+    # Load the HuggingFace M4 - The Cauldron dataset (textcaps subset)
     dataset = load_dataset("HuggingFaceM4/the_cauldron", "textcaps")
-    
-    # Örnek kullanım
+
+    # Example usage
     custom_dataset = TextCapsDataset(dataset)
-    print(f"Toplam örnek sayısı: {len(custom_dataset)}")
+    print(f"Total samples: {len(custom_dataset)}")
     sample = custom_dataset[0]
-    print("Örnek veri şekilleri:")
-    print(f"Görüntü: {sample['pixel_values'].shape}")
-    print(f"Metin ID: {sample['input_ids'].shape}")
+    print("Sample tensor shapes:")
+    print(f"Image: {sample['pixel_values'].shape}")
+    print(f"Text IDs: {sample['input_ids'].shape}")
diff --git a/Genel-2/vision_transformer_deepseek.py b/Genel-2/vision_transformer_deepseek.py
index 338151b..9e97414 100644
--- a/Genel-2/vision_transformer_deepseek.py
+++ b/Genel-2/vision_transformer_deepseek.py
@@ -31,22 +31,22 @@
     logger.error(f"Failed to login to Hugging Face: {e}")
     raise
 
-# -------------------- Veri Yükleme --------------------
+# -------------------- Data Loading --------------------
 def load_textcaps_data(num_samples: int = 100):
     """Load TextCaps dataset from Hugging Face with streaming."""
     try:
-        # Stream modunda veri setini yükle
+        # Load the dataset in streaming mode
         dataset = load_dataset("HuggingFaceM4/the_cauldron", "textcaps", streaming=True)
-        # İlk num_samples kadar veriyi al
+        # Retrieve the first `num_samples` entries
         train_data = list(islice(dataset["train"], num_samples))
-        
+
         logger.info(f"Loaded {len(train_data)} samples from dataset")
         return train_data
     except Exception as e:
         logger.error(f"Failed to load dataset: {e}")
         raise
 
-# -------------------- Veri Kümesi Sınıfı --------------------
+# -------------------- Dataset Class --------------------
 class TextCapsDataset(Dataset):
     """Dataset class for TextCaps data."""
     
@@ -69,9 +69,9 @@ def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
             
             # Get text
             conversations = item.get('conversations', [])
-            text = next((conv['content'] for conv in conversations 
+            text = next((conv['content'] for conv in conversations
                         if conv.get('from') == 'assistant'), "")
-            
+
             # Handle image
             images = item.get('images', [])
             if not images:
@@ -96,7 +96,7 @@ def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
             
             # Transform image
             img_tensor = self.transform(image)
-            
+
             # Validate tensor shape
             if img_tensor.shape != (3, 224, 224):
                 logger.warning(f"Unexpected image tensor shape at index {idx}: {img_tensor.shape}")
@@ -120,7 +120,7 @@ def __getitem__(self, idx: int) -> Dict[str, torch.Tensor]:
             logger.error(f"Item structure: {item}")
             raise
 
-# -------------------- Model Mimarisi (Aynı) --------------------
+# -------------------- Model Architecture --------------------
 class EnhancedTextToImageModel(nn.Module):
     def __init__(self):
         super().__init__()
@@ -128,8 +128,8 @@ def __init__(self):
         self.text_encoder = BertModel.from_pretrained("bert-base-uncased")
         self.cross_attn = nn.MultiheadAttention(embed_dim=768, num_heads=12)
         
-        self.projection = nn.Linear(768, 768)  # remains the same
-        # Updated decoder to output (3, 224, 224)
+        self.projection = nn.Linear(768, 768)
+        # Decoder maps features back to the 3x224x224 image space
         self.decoder = nn.Sequential(
             nn.Linear(768, 256 * 7 * 7),
             nn.ReLU(),
@@ -168,7 +168,7 @@ def forward(self, pixel_values, input_ids, attention_mask):
         # Decode
         return self.decoder(projected)
 
-# -------------------- Eğitim Fonksiyonu --------------------
+# -------------------- Training Function --------------------
 def train_model(
     num_samples: int = 1000,
     batch_size: int = 8,
@@ -179,18 +179,18 @@ def train_model(
     try:
         dataset = load_textcaps_data(num_samples)
         
-        # Veri setini böl
+        # Split the dataset
         val_size = int(len(dataset) * val_split)
         train_dataset = dataset[val_size:]
         val_dataset = dataset[:val_size]
-        
+
         logger.info(f"Training on {len(train_dataset)} samples")
         logger.info(f"Validating on {len(val_dataset)} samples")
-        
+
         # Create data loaders
         train_data = TextCapsDataset(train_dataset)
         val_data = TextCapsDataset(val_dataset)
-        
+
         train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
         val_loader = DataLoader(val_data, batch_size=batch_size)
 
@@ -207,7 +207,7 @@ def train_model(
             train_loss = 0
             for batch in train_loader:
                 inputs = {k: v.to(device) for k, v in batch.items()}
-                
+
                 optimizer.zero_grad()
                 outputs = model(**inputs)
                 loss = criterion(outputs, inputs['pixel_values'])
@@ -239,7 +239,7 @@ def train_model(
         logger.error(f"Training failed: {e}")
         raise
 
-# -------------------- Test Fonksiyonu --------------------
+# -------------------- Test Function --------------------
 def test_with_local_image(model_path, image_path):
     model = EnhancedTextToImageModel()
     model.load_state_dict(torch.load(model_path))
@@ -271,10 +271,10 @@ def test_with_local_image(model_path, image_path):
     plt.savefig("textcaps_output_1k.png")
     plt.show()
 
-# -------------------- Ana İşlem --------------------
+# -------------------- Main Execution --------------------
 if __name__ == "__main__":
     try:
         trained_model = train_model()
         test_with_local_image("best_model.pth", "image.jpeg")
     except Exception as e:
-        logger.error(f"Application failed: {e}")
\ No newline at end of file
+        logger.error(f"Application failed: {e}")
diff --git a/Genel-2/vl_transformers.py b/Genel-2/vl_transformers.py
index 51d6b8c..f478c31 100644
--- a/Genel-2/vl_transformers.py
+++ b/Genel-2/vl_transformers.py
@@ -6,43 +6,43 @@ class SimpleVisionLanguageModel(nn.Module):
     def __init__(self, vision_model_name="google/vit-base-patch16-224", language_model_name="bert-base-uncased"):
         super().__init__()
         
-        # Vision Encoder (e.g., ViT)
+        # Vision encoder (e.g., ViT)
         self.vision_encoder = AutoModel.from_pretrained(vision_model_name)
         self.vision_hidden_size = self.vision_encoder.config.hidden_size
         
-        # Language Model (e.g., BERT)
+        # Language model (e.g., BERT)
         self.language_model = AutoModel.from_pretrained(language_model_name)
         self.language_hidden_size = self.language_model.config.hidden_size
         
-        # Projection Layer: Vision embeddings -> Language embeddings boyutuna dönüştürme
+        # Projection layer: map vision embeddings to the language embedding dimension
         self.projection = nn.Linear(self.vision_hidden_size, self.language_hidden_size)
-        
-        # Output Layer: Dil modelinin çıkışını kullanarak bir görev için (örneğin, sınıflandırma) kullanılabilir.
-        self.output_layer = nn.Linear(self.language_hidden_size, 1)  # Örnek olarak tek bir çıktı
+
+        # Output layer for downstream tasks (e.g., classification)
+        self.output_layer = nn.Linear(self.language_hidden_size, 1)  # Single-output example
     
     def forward(self, images, input_ids, attention_mask):
         """
         Args:
-            images (torch.Tensor): [batch_size, 3, height, width] - Görsel girişler
-            input_ids (torch.LongTensor): [batch_size, seq_len] - Metin girişleri
-            attention_mask (torch.Tensor): [batch_size, seq_len] - Metin için dikkat maskesi
+            images (torch.Tensor): [batch_size, 3, height, width] - visual inputs
+            input_ids (torch.LongTensor): [batch_size, seq_len] - text inputs
+            attention_mask (torch.Tensor): [batch_size, seq_len] - attention mask for the text
         
         Returns:
-            logits (torch.Tensor): Modelin çıkışları
+            logits (torch.Tensor): model outputs
         """
-        # 1. Görsel bilgileri işle
+        # 1. Encode the visual information
         vision_outputs = self.vision_encoder(images).last_hidden_state  # [batch_size, num_patches, vision_hidden_size]
-        vision_embeds = vision_outputs[:, 0, :]  # CLS token'ını al (varsayılan olarak ViT'de mevcut)
+        vision_embeds = vision_outputs[:, 0, :]  # Use the CLS token (available by default in ViT)
         projected_vision_embeds = self.projection(vision_embeds)  # [batch_size, language_hidden_size]
-        
-        # 2. Metin bilgilerini işle
+
+        # 2. Encode the text information
         language_outputs = self.language_model(input_ids=input_ids, attention_mask=attention_mask)
         language_embeds = language_outputs.last_hidden_state  # [batch_size, seq_len, language_hidden_size]
-        
-        # 3. Görsel ve metin bilgilerini birleştir
+
+        # 3. Combine the visual and textual representations
         combined_embeds = projected_vision_embeds.unsqueeze(1) + language_embeds[:, 0, :].unsqueeze(1)  # [batch_size, 1, language_hidden_size]
-        
-        # 4. Çıkış katmanından geçir
+
+        # 4. Pass through the output layer
         logits = self.output_layer(combined_embeds.squeeze(1))  # [batch_size, 1]
         
         return logits
@@ -53,21 +53,21 @@ def forward(self, images, input_ids, attention_mask):
 from transformers import AutoTokenizer, AutoFeatureExtractor
 import torch
 
-# Modeli yükle
+# Load the model
 model = SimpleVisionLanguageModel()
 
-# Tokenizer ve Feature Extractor
+# Tokenizer and feature extractor
 tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
 feature_extractor = AutoFeatureExtractor.from_pretrained("google/vit-base-patch16-224")
 
-# Görsel veriyi yükle
+# Load the image
 image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg"
 image = Image.open(requests.get(image_url, stream=True).raw)
 
-# Metin verisi
+# Text input
 text = "This is a cat."
 
-# Ön işleme
+# Pre-processing
 inputs = feature_extractor(images=image, return_tensors="pt")
 pixel_values = inputs["pixel_values"]  # [1, 3, 224, 224]
 
@@ -75,7 +75,7 @@ def forward(self, images, input_ids, attention_mask):
 input_ids = text_inputs["input_ids"]
 attention_mask = text_inputs["attention_mask"]
 
-# Modeli çalıştır
+# Run the model
 with torch.no_grad():
     outputs = model(pixel_values, input_ids, attention_mask)
-    print("Çıkış:", outputs)
\ No newline at end of file
+    print("Output:", outputs)
diff --git a/Genel-3/LCM.py b/Genel-3/LCM.py
index cad9b41..0fd4873 100644
--- a/Genel-3/LCM.py
+++ b/Genel-3/LCM.py
@@ -8,45 +8,45 @@
 import plotly.express as px
 import plotly.graph_objects as go
 
-# --- Global Ayarlar ve Model Yüklemesi ---
+# --- Global configuration and model loading ---
 encoder = SentenceTransformer('paraphrase-MiniLM-L6-v2')
 
-# Başlangıç referans metin havuzu
+# Initial pool of reference texts
 reference_texts = [
-    "Kedi halının üzerine yattı.",
-    "Güneşli bir gündü.",
-    "Birden mutfaktan gürültülü bir ses geldi.",
-    "Yağmur yağıyordu.",
-    "Telefon çaldı."
+    "The cat lay down on the rug.",
+    "It was a sunny day.",
+    "A loud noise suddenly came from the kitchen.",
+    "It was raining.",
+    "The phone rang."
 ]
 reference_embeddings = encoder.encode(reference_texts)
 
-# En yakın komşu ayarları
+# Nearest-neighbour configuration
 n_neighbors = 3
 nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine').fit(reference_embeddings)
 
-# Loglama fonksiyonu
+# Logging helper
 def log_query(query, results):
     with open("log.txt", "a", encoding="utf-8") as log_file:
-        log_file.write(f"{datetime.now()} - Sorgu: {query} - Sonuçlar: {results}\n")
+        log_file.write(f"{datetime.now()} - Query: {query} - Results: {results}\n")
 
-# Referans havuzunu güncelleyen fonksiyon
+# Function to update the reference pool
 def update_reference_pool(new_texts):
     global reference_texts, reference_embeddings, nbrs
     reference_texts.extend(new_texts)
     reference_embeddings = encoder.encode(reference_texts)
     nbrs = NearestNeighbors(n_neighbors=n_neighbors, metric='cosine').fit(reference_embeddings)
-    print("Referans havuzu güncellendi.")
+    print("Reference pool updated.")
 
-# K-means kümeleme fonksiyonu
+# K-means clustering helper
 def perform_clustering(n_clusters=2):
     kmeans = KMeans(n_clusters=n_clusters, random_state=42)
     clusters = kmeans.fit_predict(reference_embeddings)
-    # Küme sonuçlarını referans metinlerle eşleştiriyoruz
+    # Pair the cluster assignments with the reference texts
     cluster_info = {text: int(cluster) for text, cluster in zip(reference_texts, clusters)}
     return cluster_info
 
-# Sorgu işleme fonksiyonu (tek veya toplu sorgu desteği)
+# Query processing function (single or batched queries)
 def process_queries(queries, similarity_threshold=0.7):
     if isinstance(queries, str):
         queries = [queries]
@@ -64,13 +64,13 @@ def process_queries(queries, similarity_threshold=0.7):
                 result.append({
                     "text": reference_texts[idx],
                     "similarity": round(sim, 2),
-                    "status": "Benzerlik düşük, eşleşme yapılmadı"
+                    "status": "Similarity too low, no match"
                 })
             else:
                 result.append({
                     "text": reference_texts[idx],
                     "similarity": round(sim, 2),
-                    "status": "Eşleşme yapıldı"
+                    "status": "Match found"
                 })
         elapsed = time.time() - start
         results_all.append({
@@ -78,56 +78,56 @@ def process_queries(queries, similarity_threshold=0.7):
             "results": result,
             "processing_time_sec": round(elapsed, 4)
         })
-        # Loglama işlemi
+        # Write to the log file
         log_query(query_text, result)
     total_time = time.time() - start_total
-    print(f"Toplam işleme süresi: {total_time:.4f} saniye")
+    print(f"Total processing time: {total_time:.4f} seconds")
     return results_all
 
-# İnteraktif görselleştirme (Plotly) fonksiyonu
+# Interactive visualisation (Plotly) function
 def visualize_embeddings(query_text=None):
-    # Gerçek bir boyut indirgeme için PCA kullanılabilir.
-    # Örnek amaçlı rastgele 2D koordinatlar üretilmiştir.
+    # PCA could be applied for real dimensionality reduction.
+    # Random 2D coordinates are generated for demonstration purposes.
     np.random.seed(42)
     coords = np.random.rand(len(reference_texts), 2)
     fig = px.scatter(x=coords[:,0], y=coords[:,1], text=reference_texts,
-                     title="Referans Metinlerin Görselleştirmesi")
+                     title="Reference text visualisation")
     if query_text:
         query_embedding = encoder.encode([query_text])
-        query_coord = np.random.rand(1, 2)  # örnek koordinat
+        query_coord = np.random.rand(1, 2)  # sample coordinate
         fig.add_trace(go.Scatter(x=query_coord[:,0], y=query_coord[:,1],
                                  mode='markers+text', marker=dict(color='red', size=12),
-                                 text=[query_text], name="Sorgu Metni"))
+                                 text=[query_text], name="Query Text"))
     fig.show()
 
-# --- Ana Program Bölümü ---
+# --- Main program block ---
 if __name__ == "__main__":
-    # Referans havuzunu güncelleme (isteğe bağlı)
-    update_choice = input("Yeni referans metin eklemek ister misiniz? (E/H): ").strip().lower()
-    if update_choice == 'e':
-        new_texts_input = input("Eklemek istediğiniz metinleri virgülle ayırarak giriniz: ")
+    # Optionally update the reference pool
+    update_choice = input("Would you like to add new reference texts? (y/n): ").strip().lower()
+    if update_choice == 'y':
+        new_texts_input = input("Enter the texts you want to add, separated by commas: ")
         new_texts = [txt.strip() for txt in new_texts_input.split(",") if txt.strip()]
         if new_texts:
             update_reference_pool(new_texts)
 
-    # K-means kümeleme sonucu gösterilsin mi?
-    cluster_choice = input("Kümeleme sonuçlarını görmek ister misiniz? (E/H): ").strip().lower()
-    if cluster_choice == 'e':
+    # Show the K-means clustering results?
+    cluster_choice = input("Would you like to see the clustering results? (y/n): ").strip().lower()
+    if cluster_choice == 'y':
         clusters = perform_clustering(n_clusters=2)
-        print("Kümeleme Sonuçları:")
+        print("Clustering Results:")
         for text, cluster in clusters.items():
-            print(f"'{text}' -> Küme {cluster}")
+            print(f"'{text}' -> Cluster {cluster}")
 
-    # Toplu sorgu desteği: virgülle ayrılmış birden fazla sorgu girişi
-    queries_input = input("Sorgu cümlelerini giriniz (virgülle ayırınız): ")
+    # Batch query support: supply multiple queries separated by commas
+    queries_input = input("Enter query sentences (separated by commas): ")
     queries = [q.strip() for q in queries_input.split(",") if q.strip()]
     results = process_queries(queries)
     
     for res in results:
-        print("\nSorgu:", res["query"])
+        print("\nQuery:", res["query"])
         for item in res["results"]:
-            print(f"Metin: '{item['text']}' - Cosine Benzerliği: {item['similarity']} - Durum: {item['status']}")
+            print(f"Text: '{item['text']}' - Cosine Similarity: {item['similarity']} - Status: {item['status']}")
 
-    # İnteraktif görselleştirme: ilk sorgu için örnek
+    # Interactive visualisation: display an example for the first query
     if queries:
         visualize_embeddings(query_text=queries[0])
\ No newline at end of file
diff --git a/Genel-3/flash_Attn.ipynb b/Genel-3/flash_Attn.ipynb
index 580ffd2..0edcbe1 100644
--- a/Genel-3/flash_Attn.ipynb
+++ b/Genel-3/flash_Attn.ipynb
@@ -2,61 +2,33 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "nvcc: NVIDIA (R) Cuda compiler driver\n",
-      "Copyright (c) 2005-2024 NVIDIA Corporation\n",
-      "Built on Wed_Oct_30_01:18:48_Pacific_Daylight_Time_2024\n",
-      "Cuda compilation tools, release 12.6, V12.6.85\n",
-      "Build cuda_12.6.r12.6/compiler.35059454_0\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "!nvcc --version"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
-    "print(torch.cuda.is_available())"
+    "print(torch.cuda.is_available())  # Should be True\n",
+    "print(torch.version.cuda)         # Shows the CUDA version\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "True\n",
-      "12.6\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import torch\n",
-    "print(torch.cuda.is_available())  # True olmalı\n",
-    "print(torch.version.cuda)         # CUDA sürümünü gösterir"
+    "print(torch.cuda.is_available())  # Should be True\n",
+    "print(torch.version.cuda)         # Shows the CUDA version"
    ]
   }
  ],
diff --git a/Genel-4/DyT_vs_RMSNorm.ipynb b/Genel-4/DyT_vs_RMSNorm.ipynb
index 70dacf8..3b1432d 100644
--- a/Genel-4/DyT_vs_RMSNorm.ipynb
+++ b/Genel-4/DyT_vs_RMSNorm.ipynb
@@ -1,5936 +1,401 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4",
+   "authorship_tag": "ABX9TyMvKDhNQ1pgBRmULhDR4kMt",
+   "include_colab_link": true
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  },
+  "accelerator": "GPU"
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "view-in-github",
+    "colab_type": "text"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/DyT_vs_RMSNorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "provenance": [],
-      "gpuType": "T4",
-      "authorship_tag": "ABX9TyMvKDhNQ1pgBRmULhDR4kMt",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
+     "base_uri": "https://localhost:8080/"
     },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
+    "id": "ZZHpfH5HoXIb",
+    "outputId": "b77f76ef-941e-46aa-909e-093fd4eafa72"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import time\n",
+    "from torchvision import datasets, transforms\n",
+    "from torch.utils.data import DataLoader\n",
+    "from tqdm import tqdm  # Adding tqdm for the progress bar\n",
+    "\n",
+    "# 1. RMSNorm Class\n",
+    "class RMSNorm(nn.Module):\n",
+    "    def __init__(self, dim, eps=1e-6):\n",
+    "        super(RMSNorm, self).__init__()\n",
+    "        self.dim = dim\n",
+    "        self.eps = eps\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
+    "        x_norm = x / rms\n",
+    "        return self.gamma * x_norm + self.beta\n",
+    "\n",
+    "# 2. DyT Class\n",
+    "class DyT(nn.Module):\n",
+    "    def __init__(self, dim, init_alpha=0.5):\n",
+    "        super(DyT, self).__init__()\n",
+    "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = torch.tanh(self.alpha * x)\n",
+    "        return self.gamma * x + self.beta\n",
+    "\n",
+    "# 3. TransformerBlock Class\n",
+    "class TransformerBlock(nn.Module):\n",
+    "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
+    "        super(TransformerBlock, self).__init__()\n",
+    "        if norm_layer == 'RMSNorm':\n",
+    "            self.norm1 = RMSNorm(dim)\n",
+    "            self.norm2 = RMSNorm(dim)\n",
+    "        elif norm_layer == 'DyT':\n",
+    "            self.norm1 = DyT(dim, init_alpha)\n",
+    "            self.norm2 = DyT(dim, init_alpha)\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid norm_layer. Choose 'RMSNorm' or 'DyT'.\")\n",
+    "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(dim, dim * 4),\n",
+    "            nn.GELU(),\n",
+    "            nn.Linear(dim * 4, dim)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
+    "        x = x + attn_output\n",
+    "        ffn_output = self.ffn(self.norm2(x))\n",
+    "        x = x + ffn_output\n",
+    "        return x\n",
+    "\n",
+    "# 4. SimpleViT Class\n",
+    "class SimpleViT(nn.Module):\n",
+    "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
+    "        super(SimpleViT, self).__init__()\n",
+    "        assert img_size % patch_size == 0, \"Image size must be divisible by the patch size\"\n",
+    "        num_patches = (img_size // patch_size) ** 2\n",
+    "\n",
+    "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
+    "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
+    "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
+    "\n",
+    "        self.blocks = nn.ModuleList([\n",
+    "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
+    "        ])\n",
+    "\n",
+    "        self.head = nn.Linear(dim, num_classes)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        B = x.shape[0]\n",
+    "        x = self.patch_embed(x)\n",
+    "        x = x.flatten(2).transpose(1, 2)\n",
+    "\n",
+    "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
+    "        x = torch.cat((cls_tokens, x), dim=1)\n",
+    "        x = x + self.pos_embed\n",
+    "\n",
+    "        x = x.transpose(0, 1)\n",
+    "        for block in self.blocks:\n",
+    "            x = block(x)\n",
+    "        x = x.transpose(0, 1)\n",
+    "\n",
+    "        x = x[:, 0]\n",
+    "        x = self.head(x)\n",
+    "        return x\n",
+    "\n",
+    "# 5. Training and evaluation function\n",
+    "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
+    "    model.to(device)\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # tqdm for epochs\n",
+    "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
+    "        model.train()\n",
+    "        running_loss = 0.0\n",
+    "        correct = 0\n",
+    "        total = 0\n",
+    "\n",
+    "        # tqdm for batches\n",
+    "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
+    "            inputs, labels = inputs.to(device), labels.to(device)\n",
+    "            optimizer.zero_grad()\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "            running_loss += loss.item()\n",
+    "            _, predicted = torch.max(outputs, 1)\n",
+    "            total += labels.size(0)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "\n",
+    "        accuracy = 100 * correct / total\n",
+    "        avg_loss = running_loss / len(dataloader)\n",
+    "        print(f\"Epoch {epoch+1}/{num_epochs} completed. Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%\")\n",
+    "\n",
+    "    end_time = time.time()\n",
+    "    training_time = end_time - start_time\n",
+    "    return training_time, accuracy\n",
+    "\n",
+    "# Veri Seti ve DataLoader (CIFAR-10)\n",
+    "transform = transforms.Compose([\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
+    "])\n",
+    "\n",
+    "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
+    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
+    "\n",
+    "# Device and training parameters\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "num_epochs = 1\n",
+    "\n",
+    "# RMSNorm Modeli\n",
+    "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
+    "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
+    "# DyT Modeli\n",
+    "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
+    "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
+    "\n",
+    "# Training and comparison\n",
+    "print(\"RMSNorm Model training...\")\n",
+    "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
+    "print(f\"RMSNorm Training Time: {time_rms:.2f} seconds, Final Accuracy: {acc_rms:.2f}%\")\n",
+    "\n",
+    "print(\"\\nDyT Model training...\")\n",
+    "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
+    "print(f\"DyT Training Time: {time_dyt:.2f} seconds, Final Accuracy: {acc_dyt:.2f}%\")\n",
+    "\n",
+    "# Comparison results\n",
+    "print(\"\\nComparison:\")\n",
+    "print(f\"RMSNorm - Time: {time_rms:.2f}s, Accuracy: {acc_rms:.2f}%\")\n",
+    "print(f\"DyT - Time: {time_dyt:.2f}s, Accuracy: {acc_dyt:.2f}%\")"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/DyT_vs_RMSNorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ZZHpfH5HoXIb",
-        "outputId": "b77f76ef-941e-46aa-909e-093fd4eafa72"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "RMSNorm Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/1 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/1:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 1/1563 [00:00<03:23,  7.69batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 2/1563 [00:00<03:22,  7.71batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 3/1563 [00:00<03:11,  8.13batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 4/1563 [00:00<03:02,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 5/1563 [00:00<02:53,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 7/1563 [00:00<02:46,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 8/1563 [00:00<02:51,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 9/1563 [00:01<02:53,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 10/1563 [00:01<02:54,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 11/1563 [00:01<02:56,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 12/1563 [00:01<02:58,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 13/1563 [00:01<02:57,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 14/1563 [00:01<03:03,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 15/1563 [00:01<03:00,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 16/1563 [00:01<02:58,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 17/1563 [00:01<02:57,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 18/1563 [00:02<02:57,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 19/1563 [00:02<02:59,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 20/1563 [00:02<03:01,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 21/1563 [00:02<03:01,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 22/1563 [00:02<03:01,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 23/1563 [00:02<03:01,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 24/1563 [00:02<02:59,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 26/1563 [00:02<02:42,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 28/1563 [00:03<02:34,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 30/1563 [00:03<02:30, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 32/1563 [00:03<02:28, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 34/1563 [00:03<02:25, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 36/1563 [00:03<02:23, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 38/1563 [00:04<02:23, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 40/1563 [00:04<02:23, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 42/1563 [00:04<02:22, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 44/1563 [00:04<02:23, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 46/1563 [00:04<02:22, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 48/1563 [00:05<02:24, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 50/1563 [00:05<02:23, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 52/1563 [00:05<02:22, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 54/1563 [00:05<02:22, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 56/1563 [00:05<02:21, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 58/1563 [00:05<02:20, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 60/1563 [00:06<02:20, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 62/1563 [00:06<02:20, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 64/1563 [00:06<02:20, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 66/1563 [00:06<02:18, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 68/1563 [00:06<02:18, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 70/1563 [00:07<02:18, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 72/1563 [00:07<02:18, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 74/1563 [00:07<02:18, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 76/1563 [00:07<02:19, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 78/1563 [00:07<02:18, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 80/1563 [00:08<02:18, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 82/1563 [00:08<02:17, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 84/1563 [00:08<02:17, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 86/1563 [00:08<02:18, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 88/1563 [00:08<02:17, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 90/1563 [00:08<02:16, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 92/1563 [00:09<02:16, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 94/1563 [00:09<02:17, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 96/1563 [00:09<02:17, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 98/1563 [00:09<02:16, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 100/1563 [00:09<02:15, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 102/1563 [00:10<02:15, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 104/1563 [00:10<02:15, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 106/1563 [00:10<02:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 108/1563 [00:10<02:15, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 110/1563 [00:10<02:14, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 112/1563 [00:10<02:14, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 114/1563 [00:11<02:14, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 116/1563 [00:11<02:14, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 118/1563 [00:11<02:15, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 120/1563 [00:11<02:14, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 122/1563 [00:11<02:13, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 124/1563 [00:12<02:13, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 126/1563 [00:12<02:13, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 128/1563 [00:12<02:12, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 130/1563 [00:12<02:14, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 132/1563 [00:12<02:15, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 134/1563 [00:13<02:23,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 136/1563 [00:13<02:26,  9.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 137/1563 [00:13<02:28,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 138/1563 [00:13<02:33,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 139/1563 [00:13<02:34,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 140/1563 [00:13<02:35,  9.17batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 141/1563 [00:13<02:36,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 142/1563 [00:13<02:38,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 143/1563 [00:14<02:40,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 144/1563 [00:14<02:42,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 145/1563 [00:14<02:43,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 146/1563 [00:14<02:44,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 147/1563 [00:14<02:48,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 148/1563 [00:14<02:47,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 149/1563 [00:14<02:47,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 150/1563 [00:14<02:46,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 152/1563 [00:15<02:30,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 154/1563 [00:15<02:23,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 156/1563 [00:15<02:19, 10.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 157/1563 [00:15<02:19, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 159/1563 [00:15<02:16, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 161/1563 [00:15<02:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 163/1563 [00:16<02:12, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 165/1563 [00:16<02:11, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 167/1563 [00:16<02:11, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 169/1563 [00:16<02:10, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 171/1563 [00:16<02:10, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 173/1563 [00:17<02:10, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 175/1563 [00:17<02:10, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 177/1563 [00:17<02:09, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 179/1563 [00:17<02:11, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 181/1563 [00:17<02:10, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 183/1563 [00:18<02:09, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 185/1563 [00:18<02:09, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 187/1563 [00:18<02:09, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 189/1563 [00:18<02:08, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 191/1563 [00:18<02:08, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 193/1563 [00:18<02:08, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 195/1563 [00:19<02:08, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 197/1563 [00:19<02:07, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 199/1563 [00:19<02:07, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 201/1563 [00:19<02:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 203/1563 [00:19<02:06, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 205/1563 [00:20<02:06, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 207/1563 [00:20<02:06, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 209/1563 [00:20<02:05, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 211/1563 [00:20<02:06, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▎        | 213/1563 [00:20<02:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 215/1563 [00:21<02:05, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 217/1563 [00:21<02:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 219/1563 [00:21<02:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 221/1563 [00:21<02:05, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 223/1563 [00:21<02:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 225/1563 [00:21<02:04, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 227/1563 [00:22<02:03, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 229/1563 [00:22<02:04, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 231/1563 [00:22<02:04, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 233/1563 [00:22<02:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 235/1563 [00:22<02:03, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 237/1563 [00:23<02:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 239/1563 [00:23<02:03, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 241/1563 [00:23<02:02, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 243/1563 [00:23<02:02, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 245/1563 [00:23<02:01, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 247/1563 [00:23<02:02, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 249/1563 [00:24<02:02, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 251/1563 [00:24<02:03, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 253/1563 [00:24<02:04, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 255/1563 [00:24<02:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 257/1563 [00:24<02:03, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 259/1563 [00:25<02:11,  9.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 260/1563 [00:25<02:14,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 261/1563 [00:25<02:18,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 262/1563 [00:25<02:21,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 263/1563 [00:25<02:23,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 264/1563 [00:25<02:25,  8.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 265/1563 [00:25<02:26,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 266/1563 [00:25<02:27,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 267/1563 [00:26<02:28,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 268/1563 [00:26<02:29,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 269/1563 [00:26<02:30,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 270/1563 [00:26<02:29,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 271/1563 [00:26<02:32,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 272/1563 [00:26<02:32,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 273/1563 [00:26<02:32,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 274/1563 [00:26<02:31,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 275/1563 [00:27<02:33,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 277/1563 [00:27<02:18,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 279/1563 [00:27<02:10,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 280/1563 [00:27<02:12,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 282/1563 [00:27<02:06, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 284/1563 [00:27<02:02, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 286/1563 [00:28<02:00, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 288/1563 [00:28<02:01, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 290/1563 [00:28<02:00, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 292/1563 [00:28<02:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 294/1563 [00:28<01:59, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 296/1563 [00:29<01:59, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 298/1563 [00:29<01:58, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 300/1563 [00:29<01:58, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 302/1563 [00:29<01:58, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 304/1563 [00:29<01:59, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 306/1563 [00:29<01:58, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 308/1563 [00:30<01:59, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 310/1563 [00:30<01:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 312/1563 [00:30<01:58, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 314/1563 [00:30<01:57, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 316/1563 [00:30<01:56, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 318/1563 [00:31<01:57, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 320/1563 [00:31<01:57, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 322/1563 [00:31<01:56, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 324/1563 [00:31<01:56, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 326/1563 [00:31<01:55, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 328/1563 [00:32<01:55, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 330/1563 [00:32<01:57, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 332/1563 [00:32<01:56, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 334/1563 [00:32<01:55, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 336/1563 [00:32<01:55, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 338/1563 [00:32<01:54, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 340/1563 [00:33<01:55, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 342/1563 [00:33<01:55, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 344/1563 [00:33<01:55, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 346/1563 [00:33<01:54, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 348/1563 [00:33<01:53, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 350/1563 [00:34<01:53, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 352/1563 [00:34<01:55, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 354/1563 [00:34<01:54, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 356/1563 [00:34<01:54, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 358/1563 [00:34<01:53, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 360/1563 [00:35<01:54, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 362/1563 [00:35<01:54, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 364/1563 [00:35<01:53, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 366/1563 [00:35<01:53, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 368/1563 [00:35<01:52, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 370/1563 [00:36<01:53, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 372/1563 [00:36<01:52, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 374/1563 [00:36<01:51, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 376/1563 [00:36<01:51, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 378/1563 [00:36<01:51, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 380/1563 [00:36<01:50, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 382/1563 [00:37<01:54, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 384/1563 [00:37<02:04,  9.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 385/1563 [00:37<02:06,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 386/1563 [00:37<02:08,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 387/1563 [00:37<02:09,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 388/1563 [00:37<02:10,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 389/1563 [00:37<02:11,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 390/1563 [00:38<02:12,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 391/1563 [00:38<02:14,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 392/1563 [00:38<02:16,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 393/1563 [00:38<02:16,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 394/1563 [00:38<02:23,  8.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 395/1563 [00:38<02:22,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 396/1563 [00:38<02:22,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 397/1563 [00:38<02:22,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 398/1563 [00:39<02:22,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 399/1563 [00:39<02:20,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 401/1563 [00:39<02:08,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 403/1563 [00:39<02:01,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 405/1563 [00:39<01:56,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 407/1563 [00:39<01:53, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 409/1563 [00:40<01:51, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 411/1563 [00:40<01:49, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 413/1563 [00:40<01:50, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 415/1563 [00:40<01:49, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 417/1563 [00:40<01:48, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 419/1563 [00:41<01:48, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 421/1563 [00:41<01:47, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 423/1563 [00:41<01:48, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 425/1563 [00:41<01:48, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 427/1563 [00:41<01:47, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 429/1563 [00:42<01:46, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 431/1563 [00:42<01:46, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 433/1563 [00:42<01:46, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 435/1563 [00:42<01:45, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 437/1563 [00:42<01:44, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 439/1563 [00:42<01:43, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 441/1563 [00:43<01:43, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 443/1563 [00:43<01:43, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 445/1563 [00:43<01:43, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 447/1563 [00:43<01:43, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 449/1563 [00:43<01:43, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 451/1563 [00:44<01:42, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 453/1563 [00:44<01:42, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 455/1563 [00:44<01:43, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 457/1563 [00:44<01:43, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 459/1563 [00:44<01:43, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 461/1563 [00:45<01:42, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 463/1563 [00:45<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 465/1563 [00:45<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 467/1563 [00:45<01:42, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 469/1563 [00:45<01:42, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 471/1563 [00:45<01:41, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 473/1563 [00:46<01:41, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 475/1563 [00:46<01:42, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 477/1563 [00:46<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 479/1563 [00:46<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 481/1563 [00:46<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 483/1563 [00:47<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 485/1563 [00:47<01:41, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 487/1563 [00:47<01:41, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 489/1563 [00:47<01:41, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 491/1563 [00:47<01:41, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 493/1563 [00:48<01:40, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 495/1563 [00:48<01:39, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 497/1563 [00:48<01:39, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 499/1563 [00:48<01:40, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 501/1563 [00:48<01:39, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 503/1563 [00:48<01:38, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 505/1563 [00:49<01:38, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 507/1563 [00:49<01:42, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 509/1563 [00:49<01:47,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 510/1563 [00:49<01:50,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 511/1563 [00:49<01:52,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 512/1563 [00:49<01:54,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 513/1563 [00:50<01:58,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 514/1563 [00:50<02:00,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 515/1563 [00:50<01:59,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 516/1563 [00:50<01:59,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 517/1563 [00:50<01:59,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 518/1563 [00:50<02:01,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 519/1563 [00:50<02:00,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 520/1563 [00:50<01:59,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 521/1563 [00:50<01:58,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 522/1563 [00:51<01:58,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 523/1563 [00:51<01:59,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 524/1563 [00:51<01:59,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 525/1563 [00:51<01:58,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 527/1563 [00:51<01:48,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 528/1563 [00:51<01:49,  9.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 530/1563 [00:51<01:44,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 532/1563 [00:52<01:41, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 534/1563 [00:52<01:39, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 536/1563 [00:52<01:38, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 538/1563 [00:52<01:39, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 540/1563 [00:52<01:38, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 542/1563 [00:53<01:37, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 544/1563 [00:53<01:37, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 546/1563 [00:53<01:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 548/1563 [00:53<01:36, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 550/1563 [00:53<01:36, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 552/1563 [00:53<01:35, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 554/1563 [00:54<01:35, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 556/1563 [00:54<01:35, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 558/1563 [00:54<01:35, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 560/1563 [00:54<01:34, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 562/1563 [00:54<01:34, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 564/1563 [00:55<01:34, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 566/1563 [00:55<01:33, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 568/1563 [00:55<01:33, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 570/1563 [00:55<01:33, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 572/1563 [00:55<01:33, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 574/1563 [00:56<01:33, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 576/1563 [00:56<01:33, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 578/1563 [00:56<01:33, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 580/1563 [00:56<01:33, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 582/1563 [00:56<01:33, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 584/1563 [00:57<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 586/1563 [00:57<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 588/1563 [00:57<01:31, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 590/1563 [00:57<01:31, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 592/1563 [00:57<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 594/1563 [00:57<01:33, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 596/1563 [00:58<01:31, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 598/1563 [00:58<01:31, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 600/1563 [00:58<01:31, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 602/1563 [00:58<01:30, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 604/1563 [00:58<01:31, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 606/1563 [00:59<01:31, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 608/1563 [00:59<01:30, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 610/1563 [00:59<01:30, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 612/1563 [00:59<01:30, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 614/1563 [00:59<01:29, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 616/1563 [01:00<01:29, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 618/1563 [01:00<01:29, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 620/1563 [01:00<01:28, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 622/1563 [01:00<01:28, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 624/1563 [01:00<01:28, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 626/1563 [01:00<01:28, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 628/1563 [01:01<01:28, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 630/1563 [01:01<01:28, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 632/1563 [01:01<01:33,  9.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 634/1563 [01:01<01:37,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 635/1563 [01:01<01:39,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 636/1563 [01:02<01:41,  9.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 637/1563 [01:02<01:42,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 638/1563 [01:02<01:44,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 639/1563 [01:02<01:45,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 640/1563 [01:02<01:46,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 641/1563 [01:02<01:47,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 642/1563 [01:02<01:47,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 643/1563 [01:02<01:48,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 644/1563 [01:03<01:50,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 645/1563 [01:03<01:50,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 646/1563 [01:03<01:49,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 647/1563 [01:03<01:48,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 648/1563 [01:03<01:48,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 649/1563 [01:03<01:52,  8.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 651/1563 [01:03<01:39,  9.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 653/1563 [01:04<01:34,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 654/1563 [01:04<01:34,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 656/1563 [01:04<01:30,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 657/1563 [01:04<01:30,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 659/1563 [01:04<01:28, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 661/1563 [01:04<01:27, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 663/1563 [01:04<01:25, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 665/1563 [01:05<01:27, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 667/1563 [01:05<01:26, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 669/1563 [01:05<01:25, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 671/1563 [01:05<01:25, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 673/1563 [01:05<01:24, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 675/1563 [01:06<01:23, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 677/1563 [01:06<01:24, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 679/1563 [01:06<01:23, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 681/1563 [01:06<01:23, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 683/1563 [01:06<01:23, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 685/1563 [01:07<01:22, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 687/1563 [01:07<01:22, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 689/1563 [01:07<01:22, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 691/1563 [01:07<01:22, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 693/1563 [01:07<01:21, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 695/1563 [01:07<01:21, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 697/1563 [01:08<01:20, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 699/1563 [01:08<01:20, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 701/1563 [01:08<01:21, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 703/1563 [01:08<01:20, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 705/1563 [01:08<01:20, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 707/1563 [01:09<01:20, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 709/1563 [01:09<01:19, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 711/1563 [01:09<01:19, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 713/1563 [01:09<01:19, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 715/1563 [01:09<01:19, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 717/1563 [01:10<01:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 719/1563 [01:10<01:19, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 721/1563 [01:10<01:19, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 723/1563 [01:10<01:19, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 725/1563 [01:10<01:19, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 727/1563 [01:11<01:19, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 729/1563 [01:11<01:19, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 731/1563 [01:11<01:19, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 733/1563 [01:11<01:19, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 735/1563 [01:11<01:18, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 737/1563 [01:11<01:17, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 739/1563 [01:12<01:17, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 741/1563 [01:12<01:18, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 743/1563 [01:12<01:17, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 745/1563 [01:12<01:16, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 747/1563 [01:12<01:16, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 749/1563 [01:13<01:16, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 751/1563 [01:13<01:16, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 753/1563 [01:13<01:17, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 755/1563 [01:13<01:20, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 757/1563 [01:13<01:23,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 758/1563 [01:14<01:24,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 759/1563 [01:14<01:25,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 760/1563 [01:14<01:28,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 761/1563 [01:14<01:31,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 762/1563 [01:14<01:32,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 763/1563 [01:14<01:32,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 764/1563 [01:14<01:34,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 765/1563 [01:14<01:33,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 766/1563 [01:14<01:34,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 767/1563 [01:15<01:37,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 768/1563 [01:15<01:36,  8.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 769/1563 [01:15<01:37,  8.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 770/1563 [01:15<01:39,  8.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 771/1563 [01:15<01:37,  8.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 772/1563 [01:15<01:35,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 774/1563 [01:15<01:25,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 775/1563 [01:16<01:24,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 777/1563 [01:16<01:19,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 779/1563 [01:16<01:17, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 780/1563 [01:16<01:18,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 782/1563 [01:16<01:16, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 784/1563 [01:16<01:15, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 786/1563 [01:17<01:14, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 788/1563 [01:17<01:13, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 790/1563 [01:17<01:13, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 792/1563 [01:17<01:14, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 794/1563 [01:17<01:13, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 796/1563 [01:18<01:12, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 798/1563 [01:18<01:12, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 800/1563 [01:18<01:12, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 802/1563 [01:18<01:13, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 804/1563 [01:18<01:13, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 806/1563 [01:18<01:12, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 808/1563 [01:19<01:12, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 810/1563 [01:19<01:12, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 812/1563 [01:19<01:11, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 814/1563 [01:19<01:11, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 816/1563 [01:19<01:11, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 818/1563 [01:20<01:10, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 820/1563 [01:20<01:10, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 822/1563 [01:20<01:10, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 824/1563 [01:20<01:10, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 826/1563 [01:20<01:10, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 828/1563 [01:21<01:10, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 830/1563 [01:21<01:09, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 832/1563 [01:21<01:09, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 834/1563 [01:21<01:10, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 836/1563 [01:21<01:09, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 838/1563 [01:22<01:08, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 840/1563 [01:22<01:08, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 842/1563 [01:22<01:07, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 844/1563 [01:22<01:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 846/1563 [01:22<01:08, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 848/1563 [01:22<01:07, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 850/1563 [01:23<01:06, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 852/1563 [01:23<01:06, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 854/1563 [01:23<01:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 856/1563 [01:23<01:07, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 858/1563 [01:23<01:06, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 860/1563 [01:24<01:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 862/1563 [01:24<01:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 864/1563 [01:24<01:05, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 866/1563 [01:24<01:05, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 868/1563 [01:24<01:05, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 870/1563 [01:25<01:05, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 872/1563 [01:25<01:05, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 874/1563 [01:25<01:04, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 876/1563 [01:25<01:04, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 878/1563 [01:25<01:05, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 880/1563 [01:26<01:09,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 881/1563 [01:26<01:11,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 882/1563 [01:26<01:12,  9.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 883/1563 [01:26<01:13,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 884/1563 [01:26<01:15,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 885/1563 [01:26<01:16,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 886/1563 [01:26<01:16,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 887/1563 [01:26<01:16,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 888/1563 [01:26<01:17,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 889/1563 [01:27<01:17,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 890/1563 [01:27<01:17,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 891/1563 [01:27<01:16,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 892/1563 [01:27<01:17,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 893/1563 [01:27<01:16,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 894/1563 [01:27<01:17,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 895/1563 [01:27<01:18,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 896/1563 [01:27<01:20,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 898/1563 [01:28<01:12,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 900/1563 [01:28<01:08,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 902/1563 [01:28<01:05, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 903/1563 [01:28<01:06,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 905/1563 [01:28<01:04, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 907/1563 [01:28<01:03, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 909/1563 [01:29<01:02, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 911/1563 [01:29<01:02, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 913/1563 [01:29<01:01, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 915/1563 [01:29<01:01, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 917/1563 [01:29<01:00, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 919/1563 [01:30<01:00, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 921/1563 [01:30<00:59, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 923/1563 [01:30<00:59, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 925/1563 [01:30<01:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 927/1563 [01:30<00:59, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 929/1563 [01:31<00:59, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 931/1563 [01:31<00:59, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 933/1563 [01:31<00:58, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 935/1563 [01:31<00:58, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 937/1563 [01:31<00:58, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 939/1563 [01:31<00:59, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 941/1563 [01:32<00:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 943/1563 [01:32<00:58, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 945/1563 [01:32<00:58, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 947/1563 [01:32<00:58, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 949/1563 [01:32<00:57, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 951/1563 [01:33<00:57, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 953/1563 [01:33<00:57, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 955/1563 [01:33<00:57, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 957/1563 [01:33<00:57, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 959/1563 [01:33<00:56, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 961/1563 [01:34<00:57, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 963/1563 [01:34<00:57, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 965/1563 [01:34<00:56, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 967/1563 [01:34<00:56, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 969/1563 [01:34<00:55, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 971/1563 [01:34<00:56, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 973/1563 [01:35<00:56, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 975/1563 [01:35<00:55, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 977/1563 [01:35<00:55, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 979/1563 [01:35<00:55, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 981/1563 [01:35<00:54, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 983/1563 [01:36<00:55, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 985/1563 [01:36<00:54, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 987/1563 [01:36<00:54, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 989/1563 [01:36<00:54, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 991/1563 [01:36<00:53, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 993/1563 [01:37<00:53, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 995/1563 [01:37<00:53, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 997/1563 [01:37<00:53, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 999/1563 [01:37<00:53, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1001/1563 [01:37<00:53, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1003/1563 [01:38<00:55, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1005/1563 [01:38<00:57,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1006/1563 [01:38<00:58,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1007/1563 [01:38<00:59,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1008/1563 [01:38<00:59,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1009/1563 [01:38<01:00,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1010/1563 [01:38<01:01,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1011/1563 [01:38<01:01,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1012/1563 [01:39<01:01,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1013/1563 [01:39<01:03,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1014/1563 [01:39<01:02,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1015/1563 [01:39<01:02,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1016/1563 [01:39<01:04,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1017/1563 [01:39<01:04,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1018/1563 [01:39<01:04,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1019/1563 [01:39<01:05,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1020/1563 [01:40<01:04,  8.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1021/1563 [01:40<01:04,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1022/1563 [01:40<01:02,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1024/1563 [01:40<00:56,  9.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1025/1563 [01:40<00:56,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1027/1563 [01:40<00:53,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1029/1563 [01:40<00:52, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1031/1563 [01:41<00:51, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1033/1563 [01:41<00:50, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1035/1563 [01:41<00:50, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1037/1563 [01:41<00:49, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1039/1563 [01:41<00:49, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1041/1563 [01:42<00:49, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1043/1563 [01:42<00:48, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1045/1563 [01:42<00:48, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1047/1563 [01:42<00:48, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1049/1563 [01:42<00:48, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1051/1563 [01:42<00:48, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1053/1563 [01:43<00:48, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1055/1563 [01:43<00:48, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1057/1563 [01:43<00:48, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1059/1563 [01:43<00:47, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1061/1563 [01:43<00:47, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1063/1563 [01:44<00:47, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1065/1563 [01:44<00:47, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1067/1563 [01:44<00:47, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1069/1563 [01:44<00:46, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1071/1563 [01:44<00:46, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1073/1563 [01:45<00:46, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1075/1563 [01:45<00:46, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1077/1563 [01:45<00:46, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1079/1563 [01:45<00:46, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1081/1563 [01:45<00:46, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1083/1563 [01:46<00:45, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1085/1563 [01:46<00:45, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1087/1563 [01:46<00:45, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1089/1563 [01:46<00:45, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1091/1563 [01:46<00:44, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1093/1563 [01:46<00:44, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1095/1563 [01:47<00:44, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1097/1563 [01:47<00:44, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1099/1563 [01:47<00:44, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1101/1563 [01:47<00:43, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1103/1563 [01:47<00:43, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1105/1563 [01:48<00:43, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1107/1563 [01:48<00:43, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1109/1563 [01:48<00:44, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1111/1563 [01:48<00:43, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1113/1563 [01:48<00:43, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1115/1563 [01:49<00:42, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1117/1563 [01:49<00:42, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1119/1563 [01:49<00:42, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1121/1563 [01:49<00:42, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1123/1563 [01:49<00:42, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1125/1563 [01:50<00:41, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1127/1563 [01:50<00:42, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1129/1563 [01:50<00:44,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1130/1563 [01:50<00:45,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1131/1563 [01:50<00:47,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1132/1563 [01:50<00:48,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1133/1563 [01:50<00:49,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1134/1563 [01:51<00:50,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1135/1563 [01:51<00:50,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1136/1563 [01:51<00:50,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1137/1563 [01:51<00:49,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1138/1563 [01:51<00:51,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1139/1563 [01:51<00:50,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1140/1563 [01:51<00:51,  8.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1141/1563 [01:51<00:50,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1142/1563 [01:52<00:51,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1143/1563 [01:52<00:51,  8.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1144/1563 [01:52<00:51,  8.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1145/1563 [01:52<00:50,  8.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1146/1563 [01:52<00:49,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1148/1563 [01:52<00:44,  9.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1150/1563 [01:52<00:42,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1151/1563 [01:53<00:42,  9.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1152/1563 [01:53<00:42,  9.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1154/1563 [01:53<00:40, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1155/1563 [01:53<00:40, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1156/1563 [01:53<00:40,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1157/1563 [01:53<00:41,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1159/1563 [01:53<00:40, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1161/1563 [01:54<00:39, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1163/1563 [01:54<00:38, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1165/1563 [01:54<00:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1167/1563 [01:54<00:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1169/1563 [01:54<00:37, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1171/1563 [01:54<00:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1173/1563 [01:55<00:37, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1175/1563 [01:55<00:37, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1177/1563 [01:55<00:37, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1179/1563 [01:55<00:37, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1181/1563 [01:55<00:37, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1183/1563 [01:56<00:36, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1185/1563 [01:56<00:36, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1187/1563 [01:56<00:36, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1189/1563 [01:56<00:36, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1191/1563 [01:56<00:35, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1193/1563 [01:57<00:35, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1195/1563 [01:57<00:35, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1197/1563 [01:57<00:35, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1199/1563 [01:57<00:35, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1201/1563 [01:57<00:34, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1203/1563 [01:58<00:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1205/1563 [01:58<00:34, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1207/1563 [01:58<00:33, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1209/1563 [01:58<00:34, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1211/1563 [01:58<00:33, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1213/1563 [01:59<00:33, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1215/1563 [01:59<00:33, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1217/1563 [01:59<00:33, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1219/1563 [01:59<00:32, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1221/1563 [01:59<00:33, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1223/1563 [01:59<00:32, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1225/1563 [02:00<00:32, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1227/1563 [02:00<00:32, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1229/1563 [02:00<00:32, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1231/1563 [02:00<00:31, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1233/1563 [02:00<00:31, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1235/1563 [02:01<00:31, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1237/1563 [02:01<00:31, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1239/1563 [02:01<00:31, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1241/1563 [02:01<00:31, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1243/1563 [02:01<00:31, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1245/1563 [02:02<00:30, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1247/1563 [02:02<00:30, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1249/1563 [02:02<00:30, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1251/1563 [02:02<00:31,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1252/1563 [02:02<00:33,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1253/1563 [02:02<00:33,  9.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1254/1563 [02:03<00:34,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1255/1563 [02:03<00:35,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1256/1563 [02:03<00:35,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1257/1563 [02:03<00:36,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1258/1563 [02:03<00:36,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1259/1563 [02:03<00:35,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1260/1563 [02:03<00:35,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1261/1563 [02:03<00:35,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1262/1563 [02:04<00:35,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1263/1563 [02:04<00:35,  8.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1264/1563 [02:04<00:35,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1265/1563 [02:04<00:37,  8.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1266/1563 [02:04<00:36,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1267/1563 [02:04<00:35,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1268/1563 [02:04<00:35,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1270/1563 [02:04<00:32,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1272/1563 [02:05<00:30,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1273/1563 [02:05<00:30,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1274/1563 [02:05<00:30,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1276/1563 [02:05<00:29,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1278/1563 [02:05<00:28,  9.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1279/1563 [02:05<00:28,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1280/1563 [02:05<00:28,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1281/1563 [02:06<00:28,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1282/1563 [02:06<00:28,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1283/1563 [02:06<00:28,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1284/1563 [02:06<00:28,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1285/1563 [02:06<00:28,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1286/1563 [02:06<00:28,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1288/1563 [02:06<00:27,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1289/1563 [02:06<00:27,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1291/1563 [02:07<00:26, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1293/1563 [02:07<00:26, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1295/1563 [02:07<00:26, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1297/1563 [02:07<00:26, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1299/1563 [02:07<00:25, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1301/1563 [02:08<00:25, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1303/1563 [02:08<00:25, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1305/1563 [02:08<00:25, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1307/1563 [02:08<00:25, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1309/1563 [02:08<00:24, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1311/1563 [02:09<00:24, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1313/1563 [02:09<00:24, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1315/1563 [02:09<00:24, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1317/1563 [02:09<00:23, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1319/1563 [02:09<00:23, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1321/1563 [02:09<00:23, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1323/1563 [02:10<00:23, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1325/1563 [02:10<00:23, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1327/1563 [02:10<00:22, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1329/1563 [02:10<00:22, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1331/1563 [02:10<00:22, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1333/1563 [02:11<00:22, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1335/1563 [02:11<00:22, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1337/1563 [02:11<00:22, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1339/1563 [02:11<00:21, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1341/1563 [02:11<00:21, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1343/1563 [02:12<00:21, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1345/1563 [02:12<00:21, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1347/1563 [02:12<00:20, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1349/1563 [02:12<00:20, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1351/1563 [02:12<00:20, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1353/1563 [02:13<00:20, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1355/1563 [02:13<00:20, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1357/1563 [02:13<00:19, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1359/1563 [02:13<00:19, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1361/1563 [02:13<00:19, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1363/1563 [02:14<00:19, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1365/1563 [02:14<00:19, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1367/1563 [02:14<00:19, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1369/1563 [02:14<00:19, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1371/1563 [02:14<00:19,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1372/1563 [02:14<00:19,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1373/1563 [02:15<00:20,  9.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1374/1563 [02:15<00:20,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1375/1563 [02:15<00:20,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1376/1563 [02:15<00:20,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1377/1563 [02:15<00:21,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1378/1563 [02:15<00:21,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1379/1563 [02:15<00:21,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1380/1563 [02:15<00:21,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1381/1563 [02:16<00:21,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1382/1563 [02:16<00:21,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1383/1563 [02:16<00:21,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1384/1563 [02:16<00:21,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1385/1563 [02:16<00:21,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1386/1563 [02:16<00:21,  8.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1387/1563 [02:16<00:21,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1388/1563 [02:16<00:21,  8.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1389/1563 [02:17<00:21,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1390/1563 [02:17<00:20,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1391/1563 [02:17<00:19,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1392/1563 [02:17<00:18,  9.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1393/1563 [02:17<00:18,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1394/1563 [02:17<00:17,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1396/1563 [02:17<00:16,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1398/1563 [02:17<00:16, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1400/1563 [02:18<00:15, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1402/1563 [02:18<00:15, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1404/1563 [02:18<00:15, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1406/1563 [02:18<00:15, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1408/1563 [02:18<00:15, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1410/1563 [02:19<00:14, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1412/1563 [02:19<00:14, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1414/1563 [02:19<00:14, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1416/1563 [02:19<00:14, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1418/1563 [02:19<00:14, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1420/1563 [02:20<00:13, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1422/1563 [02:20<00:13, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1424/1563 [02:20<00:13, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1426/1563 [02:20<00:13, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1428/1563 [02:20<00:13, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1430/1563 [02:21<00:12, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1432/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1434/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1436/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1438/1563 [02:21<00:12, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1440/1563 [02:21<00:11, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1442/1563 [02:22<00:11, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1444/1563 [02:22<00:11, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1446/1563 [02:22<00:11, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1448/1563 [02:22<00:11, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1450/1563 [02:22<00:10, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1452/1563 [02:23<00:10, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1454/1563 [02:23<00:10, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1456/1563 [02:23<00:10, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1458/1563 [02:23<00:10, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1460/1563 [02:23<00:10, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1462/1563 [02:24<00:09, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1464/1563 [02:24<00:09, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1466/1563 [02:24<00:09, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1468/1563 [02:24<00:09, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1470/1563 [02:24<00:09, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1472/1563 [02:25<00:08, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1474/1563 [02:25<00:08, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1476/1563 [02:25<00:08, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1478/1563 [02:25<00:08, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1480/1563 [02:25<00:08, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1482/1563 [02:26<00:07, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1484/1563 [02:26<00:07, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1486/1563 [02:26<00:07, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1488/1563 [02:26<00:07, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1490/1563 [02:26<00:07, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1492/1563 [02:27<00:06, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1494/1563 [02:27<00:06,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1495/1563 [02:27<00:07,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1496/1563 [02:27<00:07,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1497/1563 [02:27<00:07,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1498/1563 [02:27<00:07,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1499/1563 [02:27<00:07,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1500/1563 [02:28<00:07,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1501/1563 [02:28<00:07,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1502/1563 [02:28<00:07,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1503/1563 [02:28<00:07,  8.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1504/1563 [02:28<00:07,  8.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1505/1563 [02:28<00:07,  8.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1506/1563 [02:28<00:07,  7.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1507/1563 [02:28<00:06,  8.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1508/1563 [02:28<00:06,  8.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1509/1563 [02:29<00:06,  8.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1510/1563 [02:29<00:06,  8.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1511/1563 [02:29<00:06,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1512/1563 [02:29<00:05,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1513/1563 [02:29<00:05,  8.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1514/1563 [02:29<00:05,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1515/1563 [02:29<00:05,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1516/1563 [02:29<00:05,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1517/1563 [02:29<00:04,  9.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1519/1563 [02:30<00:04,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1521/1563 [02:30<00:04, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1522/1563 [02:30<00:04, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1523/1563 [02:30<00:04,  9.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1524/1563 [02:30<00:03,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1525/1563 [02:30<00:03,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1526/1563 [02:30<00:03,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1527/1563 [02:30<00:03,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1528/1563 [02:31<00:03,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1529/1563 [02:31<00:03,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1530/1563 [02:31<00:03,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1531/1563 [02:31<00:03,  9.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1533/1563 [02:31<00:03,  9.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1535/1563 [02:31<00:02, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1537/1563 [02:31<00:02, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1539/1563 [02:32<00:02, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1541/1563 [02:32<00:02, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1543/1563 [02:32<00:01, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1545/1563 [02:32<00:01, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1547/1563 [02:32<00:01, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1549/1563 [02:33<00:01, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1551/1563 [02:33<00:01, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1553/1563 [02:33<00:00, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1555/1563 [02:33<00:00, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1557/1563 [02:33<00:00, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1559/1563 [02:34<00:00, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1561/1563 [02:34<00:00, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|██████████| 1563/1563 [02:34<00:00, 10.58batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 1/1 [02:34<00:00, 154.51s/epoch]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/1 tamamlandı. Kayıp: 1.8101, Doğruluk: 33.27%\n",
-            "RMSNorm Eğitim Süresi: 154.52 saniye, Son Doğruluk: 33.27%\n",
-            "\n",
-            "DyT Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/1 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/1:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 1/1563 [00:00<03:43,  6.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 2/1563 [00:00<03:03,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 3/1563 [00:00<02:53,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 5/1563 [00:00<02:35, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 7/1563 [00:00<02:29, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 9/1563 [00:00<02:32, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 11/1563 [00:01<02:29, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 13/1563 [00:01<02:25, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 15/1563 [00:01<02:24, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 17/1563 [00:01<02:22, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 19/1563 [00:01<02:21, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 21/1563 [00:02<02:23, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 23/1563 [00:02<02:21, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 25/1563 [00:02<02:19, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 27/1563 [00:02<02:18, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 29/1563 [00:02<02:18, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 31/1563 [00:02<02:18, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 33/1563 [00:03<02:17, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 35/1563 [00:03<02:16, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 37/1563 [00:03<02:20, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 39/1563 [00:03<02:19, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 41/1563 [00:03<02:18, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 43/1563 [00:04<02:19, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 45/1563 [00:04<02:17, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 47/1563 [00:04<02:17, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 49/1563 [00:04<02:19, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 51/1563 [00:04<02:19, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 53/1563 [00:04<02:28, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 55/1563 [00:05<02:35,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 56/1563 [00:05<02:37,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 57/1563 [00:05<02:40,  9.39batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 58/1563 [00:05<02:47,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 59/1563 [00:05<02:49,  8.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 60/1563 [00:05<02:48,  8.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 61/1563 [00:05<02:51,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 62/1563 [00:06<02:55,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 63/1563 [00:06<02:52,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 64/1563 [00:06<02:54,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 65/1563 [00:06<02:52,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 66/1563 [00:06<02:53,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 67/1563 [00:06<02:59,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 68/1563 [00:06<03:02,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 69/1563 [00:06<02:59,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 70/1563 [00:06<02:55,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 71/1563 [00:07<02:49,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 73/1563 [00:07<02:32,  9.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 75/1563 [00:07<02:25, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 77/1563 [00:07<02:24, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 79/1563 [00:07<02:20, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 81/1563 [00:07<02:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 83/1563 [00:08<02:17, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 85/1563 [00:08<02:15, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 87/1563 [00:08<02:14, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 89/1563 [00:08<02:16, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 91/1563 [00:08<02:16, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 93/1563 [00:09<02:16, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 95/1563 [00:09<02:14, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 97/1563 [00:09<02:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 99/1563 [00:09<02:13, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 101/1563 [00:09<02:12, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 103/1563 [00:09<02:13, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 105/1563 [00:10<02:12, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 107/1563 [00:10<02:11, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 109/1563 [00:10<02:11, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 111/1563 [00:10<02:12, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 113/1563 [00:10<02:11, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 115/1563 [00:11<02:11, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 117/1563 [00:11<02:11, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 119/1563 [00:11<02:10, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 121/1563 [00:11<02:10, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 123/1563 [00:11<02:12, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 125/1563 [00:11<02:12, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 127/1563 [00:12<02:10, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 129/1563 [00:12<02:10, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 131/1563 [00:12<02:10, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 133/1563 [00:12<02:10, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 135/1563 [00:12<02:10, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 137/1563 [00:13<02:10, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 139/1563 [00:13<02:09, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 141/1563 [00:13<02:09, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 143/1563 [00:13<02:09, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 145/1563 [00:13<02:08, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 147/1563 [00:13<02:08, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 149/1563 [00:14<02:08, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 151/1563 [00:14<02:07, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 153/1563 [00:14<02:07, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 155/1563 [00:14<02:07, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 157/1563 [00:14<02:09, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 159/1563 [00:15<02:09, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 161/1563 [00:15<02:08, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 163/1563 [00:15<02:07, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 165/1563 [00:15<02:07, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 167/1563 [00:15<02:08, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 169/1563 [00:16<02:09, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 171/1563 [00:16<02:08, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 173/1563 [00:16<02:08, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 175/1563 [00:16<02:07, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 177/1563 [00:16<02:07, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 179/1563 [00:16<02:08, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 181/1563 [00:17<02:16, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 183/1563 [00:17<02:19,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 184/1563 [00:17<02:23,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 185/1563 [00:17<02:23,  9.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 186/1563 [00:17<02:24,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 187/1563 [00:17<02:24,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 188/1563 [00:17<02:27,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 189/1563 [00:18<02:35,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 190/1563 [00:18<02:34,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 191/1563 [00:18<02:33,  8.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 192/1563 [00:18<02:33,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 193/1563 [00:18<02:35,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 194/1563 [00:18<02:37,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 195/1563 [00:18<02:37,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 196/1563 [00:18<02:38,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 197/1563 [00:18<02:42,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 198/1563 [00:19<02:40,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 200/1563 [00:19<02:24,  9.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 202/1563 [00:19<02:15, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 204/1563 [00:19<02:10, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 206/1563 [00:19<02:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 208/1563 [00:20<02:07, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 210/1563 [00:20<02:05, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▎        | 212/1563 [00:20<02:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▎        | 214/1563 [00:20<02:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 216/1563 [00:20<02:04, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 218/1563 [00:20<02:04, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 220/1563 [00:21<02:06, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 222/1563 [00:21<02:04, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 224/1563 [00:21<02:03, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 226/1563 [00:21<02:02, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 228/1563 [00:21<02:02, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 230/1563 [00:22<02:05, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 232/1563 [00:22<02:03, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 234/1563 [00:22<02:02, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 236/1563 [00:22<02:01, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 238/1563 [00:22<02:00, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 240/1563 [00:22<02:01, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 242/1563 [00:23<02:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 244/1563 [00:23<02:01, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 246/1563 [00:23<02:00, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 248/1563 [00:23<02:00, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 250/1563 [00:23<02:00, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 252/1563 [00:24<02:00, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 254/1563 [00:24<01:59, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 256/1563 [00:24<01:58, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 258/1563 [00:24<01:58, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 260/1563 [00:24<01:58, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 262/1563 [00:24<01:58, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 264/1563 [00:25<01:58, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 266/1563 [00:25<01:57, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 268/1563 [00:25<01:57, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 270/1563 [00:25<01:58, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 272/1563 [00:25<01:58, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 274/1563 [00:26<01:57, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 276/1563 [00:26<01:58, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 278/1563 [00:26<01:57, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 280/1563 [00:26<01:57, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 282/1563 [00:26<01:57, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 284/1563 [00:26<01:56, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 286/1563 [00:27<01:56, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 288/1563 [00:27<01:55, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 290/1563 [00:27<01:56, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 292/1563 [00:27<01:58, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 294/1563 [00:27<01:57, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 296/1563 [00:28<01:56, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 298/1563 [00:28<01:55, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 300/1563 [00:28<01:55, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 302/1563 [00:28<01:56, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 304/1563 [00:28<01:55, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 306/1563 [00:28<01:55, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 308/1563 [00:29<01:55, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 310/1563 [00:29<02:01, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 312/1563 [00:29<02:06,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 313/1563 [00:29<02:08,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 314/1563 [00:29<02:09,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 315/1563 [00:29<02:10,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 316/1563 [00:30<02:14,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 317/1563 [00:30<02:14,  9.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 318/1563 [00:30<02:18,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 319/1563 [00:30<02:18,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 320/1563 [00:30<02:17,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 321/1563 [00:30<02:18,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 322/1563 [00:30<02:20,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 323/1563 [00:30<02:19,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 324/1563 [00:30<02:24,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 325/1563 [00:31<02:23,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 326/1563 [00:31<02:24,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 327/1563 [00:31<02:28,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 328/1563 [00:31<02:30,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 330/1563 [00:31<02:12,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 332/1563 [00:31<02:03,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 334/1563 [00:32<01:59, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 336/1563 [00:32<01:56, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 338/1563 [00:32<01:56, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 340/1563 [00:32<01:54, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 342/1563 [00:32<01:52, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 344/1563 [00:32<01:51, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 346/1563 [00:33<01:51, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 348/1563 [00:33<01:50, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 350/1563 [00:33<01:52, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 352/1563 [00:33<01:51, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 354/1563 [00:33<01:49, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 356/1563 [00:34<01:50, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 358/1563 [00:34<01:50, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 360/1563 [00:34<01:49, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 362/1563 [00:34<01:49, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 364/1563 [00:34<01:48, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 366/1563 [00:34<01:47, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 368/1563 [00:35<01:48, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 370/1563 [00:35<01:47, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 372/1563 [00:35<01:47, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 374/1563 [00:35<01:47, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 376/1563 [00:35<01:46, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 378/1563 [00:36<01:49, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 380/1563 [00:36<01:49, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 382/1563 [00:36<01:48, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 384/1563 [00:36<01:49, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 386/1563 [00:36<01:48, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 388/1563 [00:36<01:47, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 390/1563 [00:37<01:48, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 392/1563 [00:37<01:48, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 394/1563 [00:37<01:48, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 396/1563 [00:37<01:47, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 398/1563 [00:37<01:45, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 400/1563 [00:38<01:46, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 402/1563 [00:38<01:46, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 404/1563 [00:38<01:45, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 406/1563 [00:38<01:44, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 408/1563 [00:38<01:44, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 410/1563 [00:38<01:44, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 412/1563 [00:39<01:45, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 414/1563 [00:39<01:44, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 416/1563 [00:39<01:44, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 418/1563 [00:39<01:43, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 420/1563 [00:39<01:43, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 422/1563 [00:40<01:45, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 424/1563 [00:40<01:43, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 426/1563 [00:40<01:43, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 428/1563 [00:40<01:44, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 430/1563 [00:40<01:44, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 432/1563 [00:40<01:44, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 434/1563 [00:41<01:44, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 436/1563 [00:41<01:43, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 438/1563 [00:41<01:45, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 440/1563 [00:41<01:50, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 442/1563 [00:41<01:52,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 443/1563 [00:42<01:54,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 444/1563 [00:42<01:57,  9.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 445/1563 [00:42<01:58,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 446/1563 [00:42<01:59,  9.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 447/1563 [00:42<02:00,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 448/1563 [00:42<02:06,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 449/1563 [00:42<02:07,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 450/1563 [00:42<02:08,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 451/1563 [00:43<02:10,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 452/1563 [00:43<02:11,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 453/1563 [00:43<02:07,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 454/1563 [00:43<02:08,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 455/1563 [00:43<02:06,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 456/1563 [00:43<02:06,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 457/1563 [00:43<02:08,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 459/1563 [00:43<01:56,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 461/1563 [00:44<01:50,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 463/1563 [00:44<01:45, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 465/1563 [00:44<01:43, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 467/1563 [00:44<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 469/1563 [00:44<01:40, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 471/1563 [00:44<01:39, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 473/1563 [00:45<01:39, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 475/1563 [00:45<01:41, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 477/1563 [00:45<01:40, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 479/1563 [00:45<01:39, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 481/1563 [00:45<01:39, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 483/1563 [00:46<01:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 485/1563 [00:46<01:37, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 487/1563 [00:46<01:36, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 489/1563 [00:46<01:37, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 491/1563 [00:46<01:37, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 493/1563 [00:46<01:37, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 495/1563 [00:47<01:36, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 497/1563 [00:47<01:36, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 499/1563 [00:47<01:36, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 501/1563 [00:47<01:35, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 503/1563 [00:47<01:34, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 505/1563 [00:48<01:35, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 507/1563 [00:48<01:35, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 509/1563 [00:48<01:35, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 511/1563 [00:48<01:34, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 513/1563 [00:48<01:34, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 515/1563 [00:48<01:36, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 517/1563 [00:49<01:36, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 519/1563 [00:49<01:34, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 521/1563 [00:49<01:34, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 523/1563 [00:49<01:33, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 525/1563 [00:49<01:35, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 527/1563 [00:50<01:35, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 529/1563 [00:50<01:34, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 531/1563 [00:50<01:33, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 533/1563 [00:50<01:33, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 535/1563 [00:50<01:33, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 537/1563 [00:50<01:33, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 539/1563 [00:51<01:33, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 541/1563 [00:51<01:32, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 543/1563 [00:51<01:31, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 545/1563 [00:51<01:31, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 547/1563 [00:51<01:31, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 549/1563 [00:52<01:32, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 551/1563 [00:52<01:31, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 553/1563 [00:52<01:31, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 555/1563 [00:52<01:31, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 557/1563 [00:52<01:31, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 559/1563 [00:52<01:32, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 561/1563 [00:53<01:31, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 563/1563 [00:53<01:30, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 565/1563 [00:53<01:30, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 567/1563 [00:53<01:30, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 569/1563 [00:53<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 571/1563 [00:54<01:39,  9.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 573/1563 [00:54<01:41,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 574/1563 [00:54<01:42,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 575/1563 [00:54<01:43,  9.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 576/1563 [00:54<01:43,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 577/1563 [00:54<01:44,  9.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 578/1563 [00:54<01:44,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 579/1563 [00:54<01:45,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 580/1563 [00:55<01:48,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 581/1563 [00:55<01:52,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 582/1563 [00:55<01:51,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 583/1563 [00:55<01:51,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 584/1563 [00:55<01:54,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 585/1563 [00:55<01:53,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 586/1563 [00:55<01:55,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 588/1563 [00:55<01:42,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 589/1563 [00:56<01:42,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 591/1563 [00:56<01:35, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 593/1563 [00:56<01:31, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 595/1563 [00:56<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 597/1563 [00:56<01:30, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 599/1563 [00:57<01:29, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 601/1563 [00:57<01:27, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 603/1563 [00:57<01:27, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 605/1563 [00:57<01:26, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 607/1563 [00:57<01:26, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 609/1563 [00:57<01:25, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 611/1563 [00:58<01:25, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 613/1563 [00:58<01:27, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 615/1563 [00:58<01:26, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 617/1563 [00:58<01:26, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 619/1563 [00:58<01:25, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 621/1563 [00:58<01:25, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 623/1563 [00:59<01:25, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 625/1563 [00:59<01:25, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 627/1563 [00:59<01:24, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 629/1563 [00:59<01:23, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 631/1563 [00:59<01:23, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 633/1563 [01:00<01:23, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 635/1563 [01:00<01:23, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 637/1563 [01:00<01:23, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 639/1563 [01:00<01:23, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 641/1563 [01:00<01:22, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 643/1563 [01:00<01:23, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 645/1563 [01:01<01:25, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 647/1563 [01:01<01:25, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 649/1563 [01:01<01:23, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 651/1563 [01:01<01:23, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 653/1563 [01:01<01:22, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 655/1563 [01:02<01:22, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 657/1563 [01:02<01:22, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 659/1563 [01:02<01:21, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 661/1563 [01:02<01:21, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 663/1563 [01:02<01:21, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 665/1563 [01:02<01:21, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 667/1563 [01:03<01:20, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 669/1563 [01:03<01:20, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 671/1563 [01:03<01:20, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 673/1563 [01:03<01:20, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 675/1563 [01:03<01:20, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 677/1563 [01:04<01:19, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 679/1563 [01:04<01:19, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 681/1563 [01:04<01:20, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 683/1563 [01:04<01:20, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 685/1563 [01:04<01:19, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 687/1563 [01:04<01:19, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 689/1563 [01:05<01:20, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 691/1563 [01:05<01:20, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 693/1563 [01:05<01:20, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 695/1563 [01:05<01:19, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 697/1563 [01:05<01:21, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 699/1563 [01:06<01:26,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 701/1563 [01:06<01:28,  9.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 702/1563 [01:06<01:30,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 703/1563 [01:06<01:32,  9.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 704/1563 [01:06<01:35,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 705/1563 [01:06<01:37,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 706/1563 [01:06<01:36,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 707/1563 [01:07<01:36,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 708/1563 [01:07<01:35,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 709/1563 [01:07<01:35,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 710/1563 [01:07<01:37,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 711/1563 [01:07<01:37,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 712/1563 [01:07<01:36,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 713/1563 [01:07<01:38,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 714/1563 [01:07<01:38,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 715/1563 [01:07<01:37,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 717/1563 [01:08<01:28,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 719/1563 [01:08<01:23, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 720/1563 [01:08<01:23, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 722/1563 [01:08<01:20, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 724/1563 [01:08<01:18, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 726/1563 [01:09<01:18, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 728/1563 [01:09<01:16, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 730/1563 [01:09<01:16, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 732/1563 [01:09<01:15, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 734/1563 [01:09<01:15, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 736/1563 [01:09<01:15, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 738/1563 [01:10<01:15, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 740/1563 [01:10<01:14, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 742/1563 [01:10<01:13, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 744/1563 [01:10<01:15, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 746/1563 [01:10<01:14, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 748/1563 [01:10<01:14, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 750/1563 [01:11<01:13, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 752/1563 [01:11<01:13, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 754/1563 [01:11<01:13, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 756/1563 [01:11<01:13, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 758/1563 [01:11<01:13, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 760/1563 [01:12<01:13, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 762/1563 [01:12<01:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 764/1563 [01:12<01:13, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 766/1563 [01:12<01:12, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 768/1563 [01:12<01:12, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 770/1563 [01:13<01:12, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 772/1563 [01:13<01:11, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 774/1563 [01:13<01:11, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 776/1563 [01:13<01:11, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 778/1563 [01:13<01:12, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 780/1563 [01:13<01:12, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 782/1563 [01:14<01:11, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 784/1563 [01:14<01:11, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 786/1563 [01:14<01:11, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 788/1563 [01:14<01:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 790/1563 [01:14<01:11, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 792/1563 [01:15<01:11, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 794/1563 [01:15<01:10, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 796/1563 [01:15<01:10, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 798/1563 [01:15<01:10, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 800/1563 [01:15<01:10, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 802/1563 [01:15<01:09, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 804/1563 [01:16<01:09, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 806/1563 [01:16<01:09, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 808/1563 [01:16<01:08, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 810/1563 [01:16<01:08, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 812/1563 [01:16<01:08, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 814/1563 [01:17<01:08, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 816/1563 [01:17<01:08, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 818/1563 [01:17<01:07, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 820/1563 [01:17<01:07, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 822/1563 [01:17<01:07, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 824/1563 [01:17<01:07, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 826/1563 [01:18<01:11, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 828/1563 [01:18<01:14,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 829/1563 [01:18<01:16,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 830/1563 [01:18<01:18,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 831/1563 [01:18<01:21,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 832/1563 [01:18<01:23,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 833/1563 [01:18<01:22,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 834/1563 [01:19<01:21,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 835/1563 [01:19<01:21,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 836/1563 [01:19<01:21,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 837/1563 [01:19<01:22,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 838/1563 [01:19<01:22,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 839/1563 [01:19<01:22,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 840/1563 [01:19<01:26,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 841/1563 [01:19<01:26,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 842/1563 [01:20<01:24,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 843/1563 [01:20<01:24,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 845/1563 [01:20<01:15,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 847/1563 [01:20<01:12,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 849/1563 [01:20<01:08, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 851/1563 [01:20<01:07, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 853/1563 [01:21<01:06, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 855/1563 [01:21<01:05, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 857/1563 [01:21<01:05, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 859/1563 [01:21<01:04, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 861/1563 [01:21<01:03, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 863/1563 [01:21<01:03, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 865/1563 [01:22<01:04, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 867/1563 [01:22<01:04, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 869/1563 [01:22<01:03, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 871/1563 [01:22<01:02, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 873/1563 [01:22<01:03, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 875/1563 [01:23<01:03, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 877/1563 [01:23<01:02, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 879/1563 [01:23<01:01, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 881/1563 [01:23<01:02, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 883/1563 [01:23<01:01, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 885/1563 [01:23<01:02, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 887/1563 [01:24<01:01, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 889/1563 [01:24<01:01, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 891/1563 [01:24<01:01, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 893/1563 [01:24<01:00, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 895/1563 [01:24<01:00, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 897/1563 [01:25<01:00, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 899/1563 [01:25<01:00, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 901/1563 [01:25<00:59, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 903/1563 [01:25<01:00, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 905/1563 [01:25<01:00, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 907/1563 [01:25<01:00, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 909/1563 [01:26<00:59, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 911/1563 [01:26<00:59, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 913/1563 [01:26<00:59, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 915/1563 [01:26<00:59, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 917/1563 [01:26<00:59, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 919/1563 [01:27<00:59, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 921/1563 [01:27<00:59, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 923/1563 [01:27<00:58, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 925/1563 [01:27<00:58, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 927/1563 [01:27<00:57, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 929/1563 [01:28<00:59, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 931/1563 [01:28<00:58, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 933/1563 [01:28<00:58, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 935/1563 [01:28<00:57, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 937/1563 [01:28<00:57, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 939/1563 [01:28<00:57, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 941/1563 [01:29<00:58, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 943/1563 [01:29<00:57, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 945/1563 [01:29<00:57, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 947/1563 [01:29<00:56, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 949/1563 [01:29<00:56, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 951/1563 [01:30<00:56, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 953/1563 [01:30<00:58, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 955/1563 [01:30<01:01,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 956/1563 [01:30<01:01,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 957/1563 [01:30<01:04,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 958/1563 [01:30<01:05,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 959/1563 [01:30<01:06,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 960/1563 [01:31<01:08,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 961/1563 [01:31<01:07,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 962/1563 [01:31<01:06,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 963/1563 [01:31<01:08,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 964/1563 [01:31<01:07,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 965/1563 [01:31<01:07,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 966/1563 [01:31<01:08,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 967/1563 [01:31<01:07,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 968/1563 [01:31<01:06,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 969/1563 [01:32<01:08,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 970/1563 [01:32<01:08,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 971/1563 [01:32<01:08,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 972/1563 [01:32<01:09,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 974/1563 [01:32<01:02,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 976/1563 [01:32<00:58, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 978/1563 [01:32<00:56, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 980/1563 [01:33<00:56, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 982/1563 [01:33<00:54, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 984/1563 [01:33<00:54, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 986/1563 [01:33<00:53, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 988/1563 [01:33<00:52, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 990/1563 [01:34<00:52, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 992/1563 [01:34<00:52, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 994/1563 [01:34<00:52, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 996/1563 [01:34<00:51, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 998/1563 [01:34<00:51, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1000/1563 [01:34<00:51, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1002/1563 [01:35<00:50, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1004/1563 [01:35<00:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1006/1563 [01:35<00:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1008/1563 [01:35<00:50, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1010/1563 [01:35<00:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1012/1563 [01:36<00:50, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1014/1563 [01:36<00:50, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1016/1563 [01:36<00:49, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1018/1563 [01:36<00:49, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1020/1563 [01:36<00:49, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1022/1563 [01:37<00:49, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1024/1563 [01:37<00:49, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1026/1563 [01:37<00:50, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1028/1563 [01:37<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1030/1563 [01:37<00:48, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1032/1563 [01:37<00:48, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1034/1563 [01:38<00:49, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1036/1563 [01:38<00:49, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1038/1563 [01:38<00:48, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1040/1563 [01:38<00:48, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1042/1563 [01:38<00:47, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1044/1563 [01:39<00:47, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1046/1563 [01:39<00:47, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1048/1563 [01:39<00:47, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1050/1563 [01:39<00:46, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1052/1563 [01:39<00:46, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1054/1563 [01:39<00:46, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1056/1563 [01:40<00:46, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1058/1563 [01:40<00:46, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1060/1563 [01:40<00:45, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1062/1563 [01:40<00:45, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1064/1563 [01:40<00:45, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1066/1563 [01:41<00:45, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1068/1563 [01:41<00:45, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1070/1563 [01:41<00:45, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1072/1563 [01:41<00:44, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1074/1563 [01:41<00:44, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1076/1563 [01:41<00:44, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1078/1563 [01:42<00:44, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1080/1563 [01:42<00:44, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1082/1563 [01:42<00:46, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1084/1563 [01:42<00:48,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1086/1563 [01:42<00:48,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1087/1563 [01:43<00:48,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1088/1563 [01:43<00:49,  9.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1089/1563 [01:43<00:49,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1090/1563 [01:43<00:51,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1091/1563 [01:43<00:51,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1092/1563 [01:43<00:51,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1093/1563 [01:43<00:51,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1094/1563 [01:43<00:51,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1095/1563 [01:43<00:52,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1096/1563 [01:44<00:53,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1097/1563 [01:44<00:52,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1098/1563 [01:44<00:52,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1099/1563 [01:44<00:52,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1100/1563 [01:44<00:53,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1101/1563 [01:44<00:53,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1102/1563 [01:44<00:52,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1104/1563 [01:44<00:47,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1106/1563 [01:45<00:45, 10.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1108/1563 [01:45<00:43, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1110/1563 [01:45<00:42, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1112/1563 [01:45<00:42, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1114/1563 [01:45<00:41, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1116/1563 [01:46<00:41, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1118/1563 [01:46<00:40, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1120/1563 [01:46<00:40, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1122/1563 [01:46<00:40, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1124/1563 [01:46<00:39, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1126/1563 [01:46<00:39, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1128/1563 [01:47<00:40, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1130/1563 [01:47<00:39, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1132/1563 [01:47<00:39, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1134/1563 [01:47<00:39, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1136/1563 [01:47<00:39, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1138/1563 [01:48<00:39, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1140/1563 [01:48<00:38, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1142/1563 [01:48<00:38, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1144/1563 [01:48<00:38, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1146/1563 [01:48<00:38, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1148/1563 [01:48<00:38, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1150/1563 [01:49<00:37, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1152/1563 [01:49<00:37, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1154/1563 [01:49<00:37, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1156/1563 [01:49<00:37, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1158/1563 [01:49<00:37, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1160/1563 [01:50<00:37, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1162/1563 [01:50<00:36, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1164/1563 [01:50<00:36, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1166/1563 [01:50<00:36, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1168/1563 [01:50<00:36, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1170/1563 [01:51<00:36, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1172/1563 [01:51<00:35, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1174/1563 [01:51<00:35, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1176/1563 [01:51<00:35, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1178/1563 [01:51<00:35, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1180/1563 [01:51<00:34, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1182/1563 [01:52<00:34, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1184/1563 [01:52<00:34, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1186/1563 [01:52<00:34, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1188/1563 [01:52<00:34, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1190/1563 [01:52<00:34, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1192/1563 [01:53<00:34, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1194/1563 [01:53<00:33, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1196/1563 [01:53<00:33, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1198/1563 [01:53<00:33, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1200/1563 [01:53<00:33, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1202/1563 [01:53<00:33, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1204/1563 [01:54<00:32, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1206/1563 [01:54<00:32, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1208/1563 [01:54<00:32, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1210/1563 [01:54<00:32, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1212/1563 [01:54<00:34, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1214/1563 [01:55<00:35,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1215/1563 [01:55<00:36,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1216/1563 [01:55<00:37,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1217/1563 [01:55<00:37,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1218/1563 [01:55<00:37,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1219/1563 [01:55<00:37,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1220/1563 [01:55<00:38,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1221/1563 [01:55<00:37,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1222/1563 [01:56<00:37,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1223/1563 [01:56<00:37,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1224/1563 [01:56<00:36,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1225/1563 [01:56<00:36,  9.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1226/1563 [01:56<00:36,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1227/1563 [01:56<00:36,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1228/1563 [01:56<00:36,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1229/1563 [01:56<00:37,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1230/1563 [01:56<00:38,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1231/1563 [01:57<00:39,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1232/1563 [01:57<00:37,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1234/1563 [01:57<00:34,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1235/1563 [01:57<00:33,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1236/1563 [01:57<00:33,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1238/1563 [01:57<00:31, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1240/1563 [01:57<00:31, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1242/1563 [01:58<00:30, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1244/1563 [01:58<00:29, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1246/1563 [01:58<00:29, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1248/1563 [01:58<00:29, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1250/1563 [01:58<00:28, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1252/1563 [01:59<00:28, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1254/1563 [01:59<00:28, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1256/1563 [01:59<00:28, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1258/1563 [01:59<00:28, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1260/1563 [01:59<00:28, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1262/1563 [01:59<00:28, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1264/1563 [02:00<00:28, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1266/1563 [02:00<00:27, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1268/1563 [02:00<00:27, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1270/1563 [02:00<00:27, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1272/1563 [02:00<00:26, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1274/1563 [02:01<00:26, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1276/1563 [02:01<00:26, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1278/1563 [02:01<00:25, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1280/1563 [02:01<00:26, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1282/1563 [02:01<00:25, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1284/1563 [02:01<00:25, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1286/1563 [02:02<00:25, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1288/1563 [02:02<00:25, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1290/1563 [02:02<00:25, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1292/1563 [02:02<00:25, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1294/1563 [02:02<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1296/1563 [02:03<00:24, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1298/1563 [02:03<00:24, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1300/1563 [02:03<00:24, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1302/1563 [02:03<00:23, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1304/1563 [02:03<00:23, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1306/1563 [02:04<00:23, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1308/1563 [02:04<00:23, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1310/1563 [02:04<00:23, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1312/1563 [02:04<00:23, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1314/1563 [02:04<00:22, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1316/1563 [02:04<00:22, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1318/1563 [02:05<00:23, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1320/1563 [02:05<00:22, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1322/1563 [02:05<00:22, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1324/1563 [02:05<00:22, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1326/1563 [02:05<00:21, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1328/1563 [02:06<00:21, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1330/1563 [02:06<00:21, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1332/1563 [02:06<00:21, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1334/1563 [02:06<00:21, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1336/1563 [02:06<00:20, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1338/1563 [02:06<00:20, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1340/1563 [02:07<00:21, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1342/1563 [02:07<00:21, 10.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1344/1563 [02:07<00:22,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1345/1563 [02:07<00:22,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1346/1563 [02:07<00:22,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1347/1563 [02:07<00:23,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1348/1563 [02:08<00:24,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1349/1563 [02:08<00:23,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1350/1563 [02:08<00:23,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1351/1563 [02:08<00:23,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1352/1563 [02:08<00:23,  9.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1353/1563 [02:08<00:22,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1354/1563 [02:08<00:23,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1355/1563 [02:08<00:23,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1356/1563 [02:08<00:23,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1357/1563 [02:09<00:23,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1358/1563 [02:09<00:23,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1359/1563 [02:09<00:23,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1360/1563 [02:09<00:23,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1362/1563 [02:09<00:21,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1364/1563 [02:09<00:19,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1366/1563 [02:10<00:19, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1368/1563 [02:10<00:18, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1370/1563 [02:10<00:18, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1372/1563 [02:10<00:18, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1374/1563 [02:10<00:17, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1376/1563 [02:10<00:17, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1378/1563 [02:11<00:17, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1380/1563 [02:11<00:16, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1382/1563 [02:11<00:16, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1384/1563 [02:11<00:16, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1386/1563 [02:11<00:16, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1388/1563 [02:12<00:16, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1390/1563 [02:12<00:16, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1392/1563 [02:12<00:15, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1394/1563 [02:12<00:15, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1396/1563 [02:12<00:15, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1398/1563 [02:12<00:14, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1400/1563 [02:13<00:14, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1402/1563 [02:13<00:14, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1404/1563 [02:13<00:14, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1406/1563 [02:13<00:14, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1408/1563 [02:13<00:14, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1410/1563 [02:14<00:14, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1412/1563 [02:14<00:13, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1414/1563 [02:14<00:13, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1416/1563 [02:14<00:13, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1418/1563 [02:14<00:13, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1420/1563 [02:14<00:13, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1422/1563 [02:15<00:12, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1424/1563 [02:15<00:12, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1426/1563 [02:15<00:12, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1428/1563 [02:15<00:12, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1430/1563 [02:15<00:12, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1432/1563 [02:16<00:12, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1434/1563 [02:16<00:11, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1436/1563 [02:16<00:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1438/1563 [02:16<00:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1440/1563 [02:16<00:11, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1442/1563 [02:17<00:11, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1444/1563 [02:17<00:10, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1446/1563 [02:17<00:10, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1448/1563 [02:17<00:10, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1450/1563 [02:17<00:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1452/1563 [02:17<00:10, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1454/1563 [02:18<00:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1456/1563 [02:18<00:09, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1458/1563 [02:18<00:09, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1460/1563 [02:18<00:09, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1462/1563 [02:18<00:09, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1464/1563 [02:19<00:09, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1466/1563 [02:19<00:08, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1468/1563 [02:19<00:08, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1470/1563 [02:19<00:09, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1472/1563 [02:19<00:09,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1474/1563 [02:20<00:09,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1475/1563 [02:20<00:09,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1476/1563 [02:20<00:09,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1477/1563 [02:20<00:09,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1478/1563 [02:20<00:09,  9.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1479/1563 [02:20<00:09,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1480/1563 [02:20<00:09,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1481/1563 [02:20<00:09,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1482/1563 [02:20<00:09,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1483/1563 [02:21<00:09,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1484/1563 [02:21<00:09,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1485/1563 [02:21<00:09,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1486/1563 [02:21<00:09,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1487/1563 [02:21<00:09,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1488/1563 [02:21<00:09,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1489/1563 [02:21<00:08,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1491/1563 [02:22<00:07,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1493/1563 [02:22<00:07,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1495/1563 [02:22<00:06, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1497/1563 [02:22<00:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1499/1563 [02:22<00:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1501/1563 [02:22<00:05, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1503/1563 [02:23<00:05, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1505/1563 [02:23<00:05, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1507/1563 [02:23<00:05, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1509/1563 [02:23<00:04, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1511/1563 [02:23<00:04, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1513/1563 [02:24<00:04, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1515/1563 [02:24<00:04, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1517/1563 [02:24<00:04, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1519/1563 [02:24<00:04, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1521/1563 [02:24<00:03, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1523/1563 [02:24<00:03, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1525/1563 [02:25<00:03, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1527/1563 [02:25<00:03, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1529/1563 [02:25<00:03, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1531/1563 [02:25<00:02, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1533/1563 [02:25<00:02, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1535/1563 [02:26<00:02, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1537/1563 [02:26<00:02, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1539/1563 [02:26<00:02, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1541/1563 [02:26<00:02, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1543/1563 [02:26<00:01, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1545/1563 [02:27<00:01, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1547/1563 [02:27<00:01, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1549/1563 [02:27<00:01, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1551/1563 [02:27<00:01, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1553/1563 [02:27<00:00, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1555/1563 [02:27<00:00, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1557/1563 [02:28<00:00, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1559/1563 [02:28<00:00, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1561/1563 [02:28<00:00, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|██████████| 1563/1563 [02:28<00:00, 11.54batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 1/1 [02:28<00:00, 148.68s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/1 tamamlandı. Kayıp: 1.6889, Doğruluk: 38.04%\n",
-            "DyT Eğitim Süresi: 148.68 saniye, Son Doğruluk: 38.04%\n",
-            "\n",
-            "Karşılaştırma:\n",
-            "RMSNorm - Süre: 154.52s, Doğruluk: 33.27%\n",
-            "DyT - Süre: 148.68s, Doğruluk: 38.04%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.optim as optim\n",
-        "import time\n",
-        "from torchvision import datasets, transforms\n",
-        "from torch.utils.data import DataLoader\n",
-        "from tqdm import tqdm  # İlerleme çubuğu için tqdm ekleniyor\n",
-        "\n",
-        "# 1. RMSNorm Sınıfı\n",
-        "class RMSNorm(nn.Module):\n",
-        "    def __init__(self, dim, eps=1e-6):\n",
-        "        super(RMSNorm, self).__init__()\n",
-        "        self.dim = dim\n",
-        "        self.eps = eps\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
-        "        x_norm = x / rms\n",
-        "        return self.gamma * x_norm + self.beta\n",
-        "\n",
-        "# 2. DyT Sınıfı\n",
-        "class DyT(nn.Module):\n",
-        "    def __init__(self, dim, init_alpha=0.5):\n",
-        "        super(DyT, self).__init__()\n",
-        "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        x = torch.tanh(self.alpha * x)\n",
-        "        return self.gamma * x + self.beta\n",
-        "\n",
-        "# 3. TransformerBlock Sınıfı\n",
-        "class TransformerBlock(nn.Module):\n",
-        "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
-        "        super(TransformerBlock, self).__init__()\n",
-        "        if norm_layer == 'RMSNorm':\n",
-        "            self.norm1 = RMSNorm(dim)\n",
-        "            self.norm2 = RMSNorm(dim)\n",
-        "        elif norm_layer == 'DyT':\n",
-        "            self.norm1 = DyT(dim, init_alpha)\n",
-        "            self.norm2 = DyT(dim, init_alpha)\n",
-        "        else:\n",
-        "            raise ValueError(\"Geçersiz norm_layer. 'RMSNorm' veya 'DyT' seçin.\")\n",
-        "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
-        "        self.ffn = nn.Sequential(\n",
-        "            nn.Linear(dim, dim * 4),\n",
-        "            nn.GELU(),\n",
-        "            nn.Linear(dim * 4, dim)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
-        "        x = x + attn_output\n",
-        "        ffn_output = self.ffn(self.norm2(x))\n",
-        "        x = x + ffn_output\n",
-        "        return x\n",
-        "\n",
-        "# 4. SimpleViT Sınıfı\n",
-        "class SimpleViT(nn.Module):\n",
-        "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
-        "        super(SimpleViT, self).__init__()\n",
-        "        assert img_size % patch_size == 0, \"Görüntü boyutu yama boyutuna bölünebilir olmalı\"\n",
-        "        num_patches = (img_size // patch_size) ** 2\n",
-        "\n",
-        "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
-        "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
-        "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
-        "\n",
-        "        self.blocks = nn.ModuleList([\n",
-        "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
-        "        ])\n",
-        "\n",
-        "        self.head = nn.Linear(dim, num_classes)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        B = x.shape[0]\n",
-        "        x = self.patch_embed(x)\n",
-        "        x = x.flatten(2).transpose(1, 2)\n",
-        "\n",
-        "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
-        "        x = torch.cat((cls_tokens, x), dim=1)\n",
-        "        x = x + self.pos_embed\n",
-        "\n",
-        "        x = x.transpose(0, 1)\n",
-        "        for block in self.blocks:\n",
-        "            x = block(x)\n",
-        "        x = x.transpose(0, 1)\n",
-        "\n",
-        "        x = x[:, 0]\n",
-        "        x = self.head(x)\n",
-        "        return x\n",
-        "\n",
-        "# 5. Eğitim ve Değerlendirme Fonksiyonu\n",
-        "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
-        "    model.to(device)\n",
-        "    start_time = time.time()\n",
-        "\n",
-        "    # Epoch'lar için tqdm\n",
-        "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
-        "        model.train()\n",
-        "        running_loss = 0.0\n",
-        "        correct = 0\n",
-        "        total = 0\n",
-        "\n",
-        "        # Batch'ler için tqdm\n",
-        "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
-        "            inputs, labels = inputs.to(device), labels.to(device)\n",
-        "            optimizer.zero_grad()\n",
-        "            outputs = model(inputs)\n",
-        "            loss = criterion(outputs, labels)\n",
-        "            loss.backward()\n",
-        "            optimizer.step()\n",
-        "\n",
-        "            running_loss += loss.item()\n",
-        "            _, predicted = torch.max(outputs, 1)\n",
-        "            total += labels.size(0)\n",
-        "            correct += (predicted == labels).sum().item()\n",
-        "\n",
-        "        accuracy = 100 * correct / total\n",
-        "        avg_loss = running_loss / len(dataloader)\n",
-        "        print(f\"Epoch {epoch+1}/{num_epochs} tamamlandı. Kayıp: {avg_loss:.4f}, Doğruluk: {accuracy:.2f}%\")\n",
-        "\n",
-        "    end_time = time.time()\n",
-        "    training_time = end_time - start_time\n",
-        "    return training_time, accuracy\n",
-        "\n",
-        "# Veri Seti ve DataLoader (CIFAR-10)\n",
-        "transform = transforms.Compose([\n",
-        "    transforms.Resize((224, 224)),\n",
-        "    transforms.ToTensor(),\n",
-        "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-        "])\n",
-        "\n",
-        "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
-        "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
-        "\n",
-        "# Cihaz ve Eğitim Parametreleri\n",
-        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-        "num_epochs = 1\n",
-        "\n",
-        "# RMSNorm Modeli\n",
-        "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
-        "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
-        "criterion = nn.CrossEntropyLoss()\n",
-        "\n",
-        "# DyT Modeli\n",
-        "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
-        "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
-        "\n",
-        "# Eğitim ve Karşılaştırma\n",
-        "print(\"RMSNorm Modeli Eğitiliyor...\")\n",
-        "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
-        "print(f\"RMSNorm Eğitim Süresi: {time_rms:.2f} saniye, Son Doğruluk: {acc_rms:.2f}%\")\n",
-        "\n",
-        "print(\"\\nDyT Modeli Eğitiliyor...\")\n",
-        "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
-        "print(f\"DyT Eğitim Süresi: {time_dyt:.2f} saniye, Son Doğruluk: {acc_dyt:.2f}%\")\n",
-        "\n",
-        "# Karşılaştırma Sonuçları\n",
-        "print(\"\\nKarşılaştırma:\")\n",
-        "print(f\"RMSNorm - Süre: {time_rms:.2f}s, Doğruluk: {acc_rms:.2f}%\")\n",
-        "print(f\"DyT - Süre: {time_dyt:.2f}s, Doğruluk: {acc_dyt:.2f}%\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip install tqdm tabulate reportlab"
-      ],
-      "metadata": {
-        "id": "57qK6QKCr3_8"
-      },
-      "execution_count": null,
-      "outputs": []
+  {
+   "cell_type": "code",
+   "source": [
+    "!pip install tqdm tabulate reportlab"
+   ],
+   "metadata": {
+    "id": "57qK6QKCr3_8"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import time\n",
+    "from torchvision import datasets, transforms\n",
+    "from torch.utils.data import DataLoader\n",
+    "from tqdm import tqdm  # Adding tqdm for the progress bar\n",
+    "\n",
+    "# 1. RMSNorm Class\n",
+    "class RMSNorm(nn.Module):\n",
+    "    def __init__(self, dim, eps=1e-6):\n",
+    "        super(RMSNorm, self).__init__()\n",
+    "        self.dim = dim\n",
+    "        self.eps = eps\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
+    "        x_norm = x / rms\n",
+    "        return self.gamma * x_norm + self.beta\n",
+    "\n",
+    "# 2. DyT Class\n",
+    "class DyT(nn.Module):\n",
+    "    def __init__(self, dim, init_alpha=0.5):\n",
+    "        super(DyT, self).__init__()\n",
+    "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = torch.tanh(self.alpha * x)\n",
+    "        return self.gamma * x + self.beta\n",
+    "\n",
+    "# 3. TransformerBlock Class\n",
+    "class TransformerBlock(nn.Module):\n",
+    "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
+    "        super(TransformerBlock, self).__init__()\n",
+    "        if norm_layer == 'RMSNorm':\n",
+    "            self.norm1 = RMSNorm(dim)\n",
+    "            self.norm2 = RMSNorm(dim)\n",
+    "        elif norm_layer == 'DyT':\n",
+    "            self.norm1 = DyT(dim, init_alpha)\n",
+    "            self.norm2 = DyT(dim, init_alpha)\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid norm_layer. Choose 'RMSNorm' or 'DyT'.\")\n",
+    "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(dim, dim * 4),\n",
+    "            nn.GELU(),\n",
+    "            nn.Linear(dim * 4, dim)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
+    "        x = x + attn_output\n",
+    "        ffn_output = self.ffn(self.norm2(x))\n",
+    "        x = x + ffn_output\n",
+    "        return x\n",
+    "\n",
+    "# 4. SimpleViT Class\n",
+    "class SimpleViT(nn.Module):\n",
+    "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
+    "        super(SimpleViT, self).__init__()\n",
+    "        assert img_size % patch_size == 0, \"Image size must be divisible by the patch size\"\n",
+    "        num_patches = (img_size // patch_size) ** 2\n",
+    "\n",
+    "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
+    "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
+    "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
+    "\n",
+    "        self.blocks = nn.ModuleList([\n",
+    "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
+    "        ])\n",
+    "\n",
+    "        self.head = nn.Linear(dim, num_classes)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        B = x.shape[0]\n",
+    "        x = self.patch_embed(x)\n",
+    "        x = x.flatten(2).transpose(1, 2)\n",
+    "\n",
+    "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
+    "        x = torch.cat((cls_tokens, x), dim=1)\n",
+    "        x = x + self.pos_embed\n",
+    "\n",
+    "        x = x.transpose(0, 1)\n",
+    "        for block in self.blocks:\n",
+    "            x = block(x)\n",
+    "        x = x.transpose(0, 1)\n",
+    "\n",
+    "        x = x[:, 0]\n",
+    "        x = self.head(x)\n",
+    "        return x\n",
+    "\n",
+    "# 5. Training and evaluation function\n",
+    "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
+    "    model.to(device)\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # tqdm for epochs\n",
+    "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
+    "        model.train()\n",
+    "        running_loss = 0.0\n",
+    "        correct = 0\n",
+    "        total = 0\n",
+    "\n",
+    "        # tqdm for batches\n",
+    "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
+    "            inputs, labels = inputs.to(device), labels.to(device)\n",
+    "            optimizer.zero_grad()\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "            running_loss += loss.item()\n",
+    "            _, predicted = torch.max(outputs, 1)\n",
+    "            total += labels.size(0)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "\n",
+    "        accuracy = 100 * correct / total\n",
+    "        avg_loss = running_loss / len(dataloader)\n",
+    "        print(f\"Epoch {epoch+1}/{num_epochs} completed. Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%\")\n",
+    "\n",
+    "    end_time = time.time()\n",
+    "    training_time = end_time - start_time\n",
+    "    return training_time, accuracy\n",
+    "\n",
+    "# Veri Seti ve DataLoader (CIFAR-10)\n",
+    "transform = transforms.Compose([\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
+    "])\n",
+    "\n",
+    "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
+    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
+    "\n",
+    "# Device and training parameters\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "num_epochs = 2\n",
+    "\n",
+    "# RMSNorm Modeli\n",
+    "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
+    "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
+    "# DyT Modeli\n",
+    "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
+    "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
+    "\n",
+    "# Training and comparison\n",
+    "print(\"RMSNorm Model training...\")\n",
+    "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
+    "print(f\"RMSNorm Training Time: {time_rms:.2f} seconds, Final Accuracy: {acc_rms:.2f}%\")\n",
+    "\n",
+    "print(\"\\nDyT Model training...\")\n",
+    "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
+    "print(f\"DyT Training Time: {time_dyt:.2f} seconds, Final Accuracy: {acc_dyt:.2f}%\")\n",
+    "\n",
+    "# Comparison results\n",
+    "print(\"\\nComparison:\")\n",
+    "print(f\"RMSNorm - Time: {time_rms:.2f}s, Accuracy: {acc_rms:.2f}%\")\n",
+    "print(f\"DyT - Time: {time_dyt:.2f}s, Accuracy: {acc_dyt:.2f}%\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.optim as optim\n",
-        "import time\n",
-        "from torchvision import datasets, transforms\n",
-        "from torch.utils.data import DataLoader\n",
-        "from tqdm import tqdm  # İlerleme çubuğu için tqdm ekleniyor\n",
-        "\n",
-        "# 1. RMSNorm Sınıfı\n",
-        "class RMSNorm(nn.Module):\n",
-        "    def __init__(self, dim, eps=1e-6):\n",
-        "        super(RMSNorm, self).__init__()\n",
-        "        self.dim = dim\n",
-        "        self.eps = eps\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
-        "        x_norm = x / rms\n",
-        "        return self.gamma * x_norm + self.beta\n",
-        "\n",
-        "# 2. DyT Sınıfı\n",
-        "class DyT(nn.Module):\n",
-        "    def __init__(self, dim, init_alpha=0.5):\n",
-        "        super(DyT, self).__init__()\n",
-        "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        x = torch.tanh(self.alpha * x)\n",
-        "        return self.gamma * x + self.beta\n",
-        "\n",
-        "# 3. TransformerBlock Sınıfı\n",
-        "class TransformerBlock(nn.Module):\n",
-        "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
-        "        super(TransformerBlock, self).__init__()\n",
-        "        if norm_layer == 'RMSNorm':\n",
-        "            self.norm1 = RMSNorm(dim)\n",
-        "            self.norm2 = RMSNorm(dim)\n",
-        "        elif norm_layer == 'DyT':\n",
-        "            self.norm1 = DyT(dim, init_alpha)\n",
-        "            self.norm2 = DyT(dim, init_alpha)\n",
-        "        else:\n",
-        "            raise ValueError(\"Geçersiz norm_layer. 'RMSNorm' veya 'DyT' seçin.\")\n",
-        "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
-        "        self.ffn = nn.Sequential(\n",
-        "            nn.Linear(dim, dim * 4),\n",
-        "            nn.GELU(),\n",
-        "            nn.Linear(dim * 4, dim)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
-        "        x = x + attn_output\n",
-        "        ffn_output = self.ffn(self.norm2(x))\n",
-        "        x = x + ffn_output\n",
-        "        return x\n",
-        "\n",
-        "# 4. SimpleViT Sınıfı\n",
-        "class SimpleViT(nn.Module):\n",
-        "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
-        "        super(SimpleViT, self).__init__()\n",
-        "        assert img_size % patch_size == 0, \"Görüntü boyutu yama boyutuna bölünebilir olmalı\"\n",
-        "        num_patches = (img_size // patch_size) ** 2\n",
-        "\n",
-        "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
-        "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
-        "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
-        "\n",
-        "        self.blocks = nn.ModuleList([\n",
-        "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
-        "        ])\n",
-        "\n",
-        "        self.head = nn.Linear(dim, num_classes)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        B = x.shape[0]\n",
-        "        x = self.patch_embed(x)\n",
-        "        x = x.flatten(2).transpose(1, 2)\n",
-        "\n",
-        "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
-        "        x = torch.cat((cls_tokens, x), dim=1)\n",
-        "        x = x + self.pos_embed\n",
-        "\n",
-        "        x = x.transpose(0, 1)\n",
-        "        for block in self.blocks:\n",
-        "            x = block(x)\n",
-        "        x = x.transpose(0, 1)\n",
-        "\n",
-        "        x = x[:, 0]\n",
-        "        x = self.head(x)\n",
-        "        return x\n",
-        "\n",
-        "# 5. Eğitim ve Değerlendirme Fonksiyonu\n",
-        "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
-        "    model.to(device)\n",
-        "    start_time = time.time()\n",
-        "\n",
-        "    # Epoch'lar için tqdm\n",
-        "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
-        "        model.train()\n",
-        "        running_loss = 0.0\n",
-        "        correct = 0\n",
-        "        total = 0\n",
-        "\n",
-        "        # Batch'ler için tqdm\n",
-        "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
-        "            inputs, labels = inputs.to(device), labels.to(device)\n",
-        "            optimizer.zero_grad()\n",
-        "            outputs = model(inputs)\n",
-        "            loss = criterion(outputs, labels)\n",
-        "            loss.backward()\n",
-        "            optimizer.step()\n",
-        "\n",
-        "            running_loss += loss.item()\n",
-        "            _, predicted = torch.max(outputs, 1)\n",
-        "            total += labels.size(0)\n",
-        "            correct += (predicted == labels).sum().item()\n",
-        "\n",
-        "        accuracy = 100 * correct / total\n",
-        "        avg_loss = running_loss / len(dataloader)\n",
-        "        print(f\"Epoch {epoch+1}/{num_epochs} tamamlandı. Kayıp: {avg_loss:.4f}, Doğruluk: {accuracy:.2f}%\")\n",
-        "\n",
-        "    end_time = time.time()\n",
-        "    training_time = end_time - start_time\n",
-        "    return training_time, accuracy\n",
-        "\n",
-        "# Veri Seti ve DataLoader (CIFAR-10)\n",
-        "transform = transforms.Compose([\n",
-        "    transforms.Resize((224, 224)),\n",
-        "    transforms.ToTensor(),\n",
-        "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-        "])\n",
-        "\n",
-        "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
-        "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
-        "\n",
-        "# Cihaz ve Eğitim Parametreleri\n",
-        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-        "num_epochs = 2\n",
-        "\n",
-        "# RMSNorm Modeli\n",
-        "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
-        "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
-        "criterion = nn.CrossEntropyLoss()\n",
-        "\n",
-        "# DyT Modeli\n",
-        "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
-        "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
-        "\n",
-        "# Eğitim ve Karşılaştırma\n",
-        "print(\"RMSNorm Modeli Eğitiliyor...\")\n",
-        "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
-        "print(f\"RMSNorm Eğitim Süresi: {time_rms:.2f} saniye, Son Doğruluk: {acc_rms:.2f}%\")\n",
-        "\n",
-        "print(\"\\nDyT Modeli Eğitiliyor...\")\n",
-        "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
-        "print(f\"DyT Eğitim Süresi: {time_dyt:.2f} saniye, Son Doğruluk: {acc_dyt:.2f}%\")\n",
-        "\n",
-        "# Karşılaştırma Sonuçları\n",
-        "print(\"\\nKarşılaştırma:\")\n",
-        "print(f\"RMSNorm - Süre: {time_rms:.2f}s, Doğruluk: {acc_rms:.2f}%\")\n",
-        "print(f\"DyT - Süre: {time_dyt:.2f}s, Doğruluk: {acc_dyt:.2f}%\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Ligm3e2erYq6",
-        "outputId": "4e4a2bee-0bd8-40ae-ee7d-c28bed94edec"
-      },
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "RMSNorm Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/2 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 1/1563 [00:00<03:24,  7.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 2/1563 [00:00<03:22,  7.72batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 3/1563 [00:00<03:08,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 4/1563 [00:00<03:00,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 5/1563 [00:00<02:59,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 7/1563 [00:00<02:42,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 9/1563 [00:00<02:33, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 11/1563 [00:01<02:29, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 13/1563 [00:01<02:28, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 15/1563 [00:01<02:28, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 17/1563 [00:01<02:26, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 19/1563 [00:01<02:24, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 21/1563 [00:02<02:23, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 23/1563 [00:02<02:22, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 25/1563 [00:02<02:23, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 27/1563 [00:02<02:22, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 29/1563 [00:02<02:22, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 31/1563 [00:03<02:22, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 33/1563 [00:03<02:21, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 35/1563 [00:03<02:21, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 37/1563 [00:03<02:21, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 39/1563 [00:03<02:20, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 41/1563 [00:03<02:20, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 43/1563 [00:04<02:20, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 45/1563 [00:04<02:20, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 47/1563 [00:04<02:19, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 49/1563 [00:04<02:19, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 51/1563 [00:04<02:19, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 53/1563 [00:05<02:19, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 55/1563 [00:05<02:19, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 57/1563 [00:05<02:21, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 59/1563 [00:05<02:20, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 61/1563 [00:05<02:20, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 63/1563 [00:05<02:20, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 65/1563 [00:06<02:19, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 67/1563 [00:06<02:19, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 69/1563 [00:06<02:20, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 71/1563 [00:06<02:18, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 73/1563 [00:06<02:18, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 75/1563 [00:07<02:17, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 77/1563 [00:07<02:16, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 79/1563 [00:07<02:20, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 81/1563 [00:07<02:27, 10.04batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 83/1563 [00:07<02:32,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 84/1563 [00:08<02:36,  9.48batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 85/1563 [00:08<02:38,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 86/1563 [00:08<02:40,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 87/1563 [00:08<02:42,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 88/1563 [00:08<02:42,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 89/1563 [00:08<02:43,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 90/1563 [00:08<02:42,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 91/1563 [00:08<02:45,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 92/1563 [00:08<02:51,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 93/1563 [00:09<02:50,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 94/1563 [00:09<02:50,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 95/1563 [00:09<02:48,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 96/1563 [00:09<02:48,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 97/1563 [00:09<02:48,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 99/1563 [00:09<02:34,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 101/1563 [00:09<02:28,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 103/1563 [00:10<02:24, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 105/1563 [00:10<02:20, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 107/1563 [00:10<02:19, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 109/1563 [00:10<02:17, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 111/1563 [00:10<02:18, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 113/1563 [00:11<02:18, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 115/1563 [00:11<02:16, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 117/1563 [00:11<02:16, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 119/1563 [00:11<02:15, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 121/1563 [00:11<02:16, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 123/1563 [00:11<02:15, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 125/1563 [00:12<02:14, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 127/1563 [00:12<02:13, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 129/1563 [00:12<02:15, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 131/1563 [00:12<02:14, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 133/1563 [00:12<02:13, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 135/1563 [00:13<02:12, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 137/1563 [00:13<02:12, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 139/1563 [00:13<02:12, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 141/1563 [00:13<02:11, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 143/1563 [00:13<02:11, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 145/1563 [00:14<02:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 147/1563 [00:14<02:10, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 149/1563 [00:14<02:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 151/1563 [00:14<02:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 153/1563 [00:14<02:09, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 155/1563 [00:14<02:10, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 157/1563 [00:15<02:10, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 159/1563 [00:15<02:10, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 161/1563 [00:15<02:10, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 163/1563 [00:15<02:10, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 165/1563 [00:15<02:10, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 167/1563 [00:16<02:10, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 169/1563 [00:16<02:09, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 171/1563 [00:16<02:10, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 173/1563 [00:16<02:09, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 175/1563 [00:16<02:08, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 177/1563 [00:16<02:08, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 179/1563 [00:17<02:08, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 181/1563 [00:17<02:08, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 183/1563 [00:17<02:08, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 185/1563 [00:17<02:07, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 187/1563 [00:17<02:07, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 189/1563 [00:18<02:07, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 191/1563 [00:18<02:08, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 193/1563 [00:18<02:08, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 195/1563 [00:18<02:08, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 197/1563 [00:18<02:07, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 199/1563 [00:19<02:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 201/1563 [00:19<02:06, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 203/1563 [00:19<02:07, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 205/1563 [00:19<02:10, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 207/1563 [00:19<02:15,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 209/1563 [00:20<02:19,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 210/1563 [00:20<02:22,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 211/1563 [00:20<02:24,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 212/1563 [00:20<02:27,  9.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 213/1563 [00:20<02:27,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 214/1563 [00:20<02:28,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 215/1563 [00:20<02:27,  9.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 216/1563 [00:20<02:27,  9.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 217/1563 [00:20<02:30,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 218/1563 [00:21<02:34,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 219/1563 [00:21<02:34,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 220/1563 [00:21<02:34,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 221/1563 [00:21<02:42,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 222/1563 [00:21<02:39,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 223/1563 [00:21<02:38,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 225/1563 [00:21<02:22,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 227/1563 [00:22<02:15,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 229/1563 [00:22<02:13, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 231/1563 [00:22<02:11, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 233/1563 [00:22<02:09, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 235/1563 [00:22<02:07, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 237/1563 [00:22<02:05, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 239/1563 [00:23<02:04, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 241/1563 [00:23<02:05, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 243/1563 [00:23<02:04, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 245/1563 [00:23<02:02, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 247/1563 [00:23<02:02, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 249/1563 [00:24<02:01, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 251/1563 [00:24<02:04, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 253/1563 [00:24<02:04, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 255/1563 [00:24<02:02, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 257/1563 [00:24<02:01, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 259/1563 [00:25<02:01, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 261/1563 [00:25<02:00, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 263/1563 [00:25<02:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 265/1563 [00:25<02:02, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 267/1563 [00:25<02:01, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 269/1563 [00:25<02:00, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 271/1563 [00:26<01:59, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 273/1563 [00:26<02:01, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 275/1563 [00:26<02:01, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 277/1563 [00:26<02:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 279/1563 [00:26<02:00, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 281/1563 [00:27<01:59, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 283/1563 [00:27<01:59, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 285/1563 [00:27<02:00, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 287/1563 [00:27<01:59, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 289/1563 [00:27<01:58, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 291/1563 [00:28<01:57, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 293/1563 [00:28<01:57, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 295/1563 [00:28<01:59, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 297/1563 [00:28<01:58, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 299/1563 [00:28<01:58, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 301/1563 [00:28<01:57, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 303/1563 [00:29<01:56, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 305/1563 [00:29<01:56, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 307/1563 [00:29<01:59, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 309/1563 [00:29<01:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 311/1563 [00:29<01:57, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 313/1563 [00:30<01:56, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 315/1563 [00:30<01:56, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 317/1563 [00:30<01:56, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 319/1563 [00:30<01:55, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 321/1563 [00:30<01:54, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 323/1563 [00:31<01:54, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 325/1563 [00:31<01:54, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 327/1563 [00:31<01:55, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 329/1563 [00:31<01:56, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 331/1563 [00:31<01:59, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 333/1563 [00:32<02:04,  9.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 334/1563 [00:32<02:06,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 335/1563 [00:32<02:08,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 336/1563 [00:32<02:10,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 337/1563 [00:32<02:15,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 338/1563 [00:32<02:14,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 339/1563 [00:32<02:16,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 340/1563 [00:32<02:17,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 341/1563 [00:32<02:19,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 342/1563 [00:33<02:19,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 343/1563 [00:33<02:22,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 344/1563 [00:33<02:21,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 345/1563 [00:33<02:21,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 346/1563 [00:33<02:21,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 347/1563 [00:33<02:19,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 348/1563 [00:33<02:19,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 349/1563 [00:33<02:17,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 351/1563 [00:34<02:06,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 353/1563 [00:34<02:00, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 355/1563 [00:34<01:58, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 357/1563 [00:34<01:58, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 359/1563 [00:34<01:56, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 361/1563 [00:34<01:54, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 363/1563 [00:35<01:53, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 365/1563 [00:35<01:53, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 367/1563 [00:35<01:52, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 369/1563 [00:35<01:52, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 371/1563 [00:35<01:52, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 373/1563 [00:36<01:51, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 375/1563 [00:36<01:50, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 377/1563 [00:36<01:50, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 379/1563 [00:36<01:50, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 381/1563 [00:36<01:51, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 383/1563 [00:37<01:50, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 385/1563 [00:37<01:49, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 387/1563 [00:37<01:50, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 389/1563 [00:37<01:50, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 391/1563 [00:37<01:50, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 393/1563 [00:37<01:50, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 395/1563 [00:38<01:50, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 397/1563 [00:38<01:49, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 399/1563 [00:38<01:49, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 401/1563 [00:38<01:49, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 403/1563 [00:38<01:48, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 405/1563 [00:39<01:48, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 407/1563 [00:39<01:48, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 409/1563 [00:39<01:47, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 411/1563 [00:39<01:48, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 413/1563 [00:39<01:49, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 415/1563 [00:40<01:48, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 417/1563 [00:40<01:49, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 419/1563 [00:40<01:50, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 421/1563 [00:40<01:48, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 423/1563 [00:40<01:49, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 425/1563 [00:41<01:48, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 427/1563 [00:41<01:48, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 429/1563 [00:41<01:47, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 431/1563 [00:41<01:46, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 433/1563 [00:41<01:47, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 435/1563 [00:41<01:46, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 437/1563 [00:42<01:45, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 439/1563 [00:42<01:44, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 441/1563 [00:42<01:45, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 443/1563 [00:42<01:44, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 445/1563 [00:42<01:45, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 447/1563 [00:43<01:44, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 449/1563 [00:43<01:44, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 451/1563 [00:43<01:43, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 453/1563 [00:43<01:43, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 455/1563 [00:43<01:44, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 457/1563 [00:44<01:49, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 459/1563 [00:44<01:54,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 460/1563 [00:44<01:56,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 461/1563 [00:44<01:57,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 462/1563 [00:44<01:59,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 463/1563 [00:44<02:01,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 464/1563 [00:44<02:04,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 465/1563 [00:44<02:02,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 466/1563 [00:45<02:03,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 467/1563 [00:45<02:05,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 468/1563 [00:45<02:05,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 469/1563 [00:45<02:05,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 470/1563 [00:45<02:06,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 471/1563 [00:45<02:06,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 472/1563 [00:45<02:06,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 473/1563 [00:45<02:08,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 474/1563 [00:46<02:03,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 476/1563 [00:46<01:52,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 478/1563 [00:46<01:48, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 480/1563 [00:46<01:45, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 482/1563 [00:46<01:44, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 484/1563 [00:46<01:43, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 486/1563 [00:47<01:48,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 487/1563 [00:47<01:48,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 489/1563 [00:47<01:45, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 491/1563 [00:47<01:44, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 493/1563 [00:47<01:46, 10.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 495/1563 [00:48<01:45, 10.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 497/1563 [00:48<01:43, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 499/1563 [00:48<01:42, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 501/1563 [00:48<01:41, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 503/1563 [00:48<01:41, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 505/1563 [00:49<01:41, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 507/1563 [00:49<01:40, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 509/1563 [00:49<01:39, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 511/1563 [00:49<01:38, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 513/1563 [00:49<01:38, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 515/1563 [00:49<01:38, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 517/1563 [00:50<01:38, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 519/1563 [00:50<01:38, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 521/1563 [00:50<01:38, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 523/1563 [00:50<01:38, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 525/1563 [00:50<01:37, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 527/1563 [00:51<01:37, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 529/1563 [00:51<01:37, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 531/1563 [00:51<01:37, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 533/1563 [00:51<01:39, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 535/1563 [00:51<01:38, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 537/1563 [00:52<01:39, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 539/1563 [00:52<01:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 541/1563 [00:52<01:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 543/1563 [00:52<01:37, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 545/1563 [00:52<01:36, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 547/1563 [00:52<01:36, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 549/1563 [00:53<01:37, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 551/1563 [00:53<01:35, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 553/1563 [00:53<01:35, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 555/1563 [00:53<01:34, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 557/1563 [00:53<01:33, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 559/1563 [00:54<01:34, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 561/1563 [00:54<01:34, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 563/1563 [00:54<01:33, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 565/1563 [00:54<01:33, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 567/1563 [00:54<01:32, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 569/1563 [00:55<01:32, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 571/1563 [00:55<01:34, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 573/1563 [00:55<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 575/1563 [00:55<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 577/1563 [00:55<01:33, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 579/1563 [00:56<01:35, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 581/1563 [00:56<01:40,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 582/1563 [00:56<01:42,  9.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 583/1563 [00:56<01:45,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 584/1563 [00:56<01:46,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 585/1563 [00:56<01:46,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 586/1563 [00:56<01:49,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 587/1563 [00:56<01:50,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 588/1563 [00:57<01:51,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 589/1563 [00:57<01:52,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 590/1563 [00:57<01:52,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 591/1563 [00:57<01:53,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 592/1563 [00:57<01:53,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 593/1563 [00:57<01:53,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 594/1563 [00:57<01:52,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 595/1563 [00:57<01:52,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 596/1563 [00:57<01:52,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 597/1563 [00:58<01:50,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 599/1563 [00:58<01:41,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 601/1563 [00:58<01:36,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 603/1563 [00:58<01:34, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 605/1563 [00:58<01:32, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 607/1563 [00:59<01:31, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 609/1563 [00:59<01:31, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 611/1563 [00:59<01:31, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 613/1563 [00:59<01:30, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 615/1563 [00:59<01:30, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 617/1563 [00:59<01:29, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 619/1563 [01:00<01:29, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 621/1563 [01:00<01:30, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 623/1563 [01:00<01:29, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 625/1563 [01:00<01:29, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 627/1563 [01:00<01:29, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 629/1563 [01:01<01:28, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 631/1563 [01:01<01:29, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 633/1563 [01:01<01:29, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 635/1563 [01:01<01:29, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 637/1563 [01:01<01:28, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 639/1563 [01:02<01:28, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 641/1563 [01:02<01:28, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 643/1563 [01:02<01:27, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 645/1563 [01:02<01:26, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 647/1563 [01:02<01:26, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 649/1563 [01:03<01:26, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 651/1563 [01:03<01:25, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 653/1563 [01:03<01:27, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 655/1563 [01:03<01:26, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 657/1563 [01:03<01:26, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 659/1563 [01:04<01:25, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 661/1563 [01:04<01:25, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 663/1563 [01:04<01:26, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 665/1563 [01:04<01:25, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 667/1563 [01:04<01:25, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 669/1563 [01:04<01:25, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 671/1563 [01:05<01:24, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 673/1563 [01:05<01:24, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 675/1563 [01:05<01:26, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 677/1563 [01:05<01:25, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 679/1563 [01:05<01:24, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 681/1563 [01:06<01:24, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 683/1563 [01:06<01:24, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 685/1563 [01:06<01:24, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 687/1563 [01:06<01:24, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 689/1563 [01:06<01:23, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 691/1563 [01:07<01:23, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 693/1563 [01:07<01:23, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 695/1563 [01:07<01:23, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 697/1563 [01:07<01:23, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 699/1563 [01:07<01:22, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 701/1563 [01:08<01:22, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 703/1563 [01:08<01:24, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 705/1563 [01:08<01:28,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 706/1563 [01:08<01:30,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 707/1563 [01:08<01:31,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 708/1563 [01:08<01:32,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 709/1563 [01:08<01:34,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 710/1563 [01:09<01:34,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 711/1563 [01:09<01:34,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 712/1563 [01:09<01:34,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 713/1563 [01:09<01:34,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 714/1563 [01:09<01:35,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 715/1563 [01:09<01:39,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 716/1563 [01:09<01:40,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 717/1563 [01:09<01:41,  8.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 718/1563 [01:09<01:41,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 719/1563 [01:10<01:41,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 720/1563 [01:10<01:41,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 721/1563 [01:10<01:41,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 722/1563 [01:10<01:36,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 724/1563 [01:10<01:28,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 726/1563 [01:10<01:24,  9.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 728/1563 [01:11<01:22, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 730/1563 [01:11<01:21, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 732/1563 [01:11<01:20, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 734/1563 [01:11<01:22, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 736/1563 [01:11<01:34,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 738/1563 [01:12<01:29,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 740/1563 [01:12<01:26,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 741/1563 [01:12<01:26,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 742/1563 [01:12<01:45,  7.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 743/1563 [01:12<01:41,  8.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 745/1563 [01:12<01:32,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 747/1563 [01:13<01:26,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 749/1563 [01:13<01:33,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 750/1563 [01:13<01:37,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 752/1563 [01:13<01:31,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 754/1563 [01:13<01:26,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 756/1563 [01:14<01:22,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 758/1563 [01:14<01:20, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▊     | 760/1563 [01:14<01:19, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 762/1563 [01:14<01:17, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 764/1563 [01:14<01:17, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 766/1563 [01:15<01:16, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 768/1563 [01:15<01:15, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 770/1563 [01:15<01:16, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 772/1563 [01:15<01:15, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 774/1563 [01:15<01:15, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 776/1563 [01:15<01:15, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 778/1563 [01:16<01:14, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 780/1563 [01:16<01:14, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 782/1563 [01:16<01:13, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 784/1563 [01:16<01:14, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 786/1563 [01:16<01:14, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 788/1563 [01:17<01:13, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 790/1563 [01:17<01:13, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 792/1563 [01:17<01:14, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 794/1563 [01:17<01:13, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 796/1563 [01:17<01:14, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 798/1563 [01:18<01:13, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 800/1563 [01:18<01:13, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████▏    | 802/1563 [01:18<01:13, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████▏    | 804/1563 [01:18<01:12, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 806/1563 [01:18<01:12, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 808/1563 [01:19<01:12, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 810/1563 [01:19<01:11, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 812/1563 [01:19<01:11, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 814/1563 [01:19<01:11, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 816/1563 [01:19<01:11, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 818/1563 [01:20<01:11, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 820/1563 [01:20<01:10, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 822/1563 [01:20<01:10, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 824/1563 [01:20<01:14,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 825/1563 [01:20<01:16,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 826/1563 [01:20<01:18,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 827/1563 [01:20<01:19,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 828/1563 [01:21<01:20,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 829/1563 [01:21<01:20,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 830/1563 [01:21<01:20,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 831/1563 [01:21<01:21,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 832/1563 [01:21<01:21,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 833/1563 [01:21<01:23,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 834/1563 [01:21<01:22,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 835/1563 [01:21<01:22,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 836/1563 [01:21<01:22,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 837/1563 [01:22<01:24,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 838/1563 [01:22<01:23,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 839/1563 [01:22<01:25,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 840/1563 [01:22<01:25,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 841/1563 [01:22<01:25,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 842/1563 [01:22<01:25,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 844/1563 [01:22<01:18,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 845/1563 [01:23<01:16,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 847/1563 [01:23<01:13,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 849/1563 [01:23<01:10, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 851/1563 [01:23<01:09, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 853/1563 [01:23<01:07, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 855/1563 [01:23<01:07, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 857/1563 [01:24<01:07, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 859/1563 [01:24<01:07, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 861/1563 [01:24<01:08, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 863/1563 [01:24<01:07, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 865/1563 [01:24<01:07, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 867/1563 [01:25<01:07, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 869/1563 [01:25<01:06, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 871/1563 [01:25<01:06, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 873/1563 [01:25<01:06, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 875/1563 [01:25<01:05, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 877/1563 [01:26<01:06, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 879/1563 [01:26<01:05, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 881/1563 [01:26<01:05, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 883/1563 [01:26<01:04, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 885/1563 [01:26<01:04, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 887/1563 [01:27<01:04, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 889/1563 [01:27<01:04, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 891/1563 [01:27<01:04, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 893/1563 [01:27<01:03, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 895/1563 [01:27<01:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 897/1563 [01:27<01:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 899/1563 [01:28<01:03, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 901/1563 [01:28<01:02, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 903/1563 [01:28<01:03, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 905/1563 [01:28<01:02, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 907/1563 [01:28<01:02, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 909/1563 [01:29<01:02, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 911/1563 [01:29<01:02, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 913/1563 [01:29<01:02, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 915/1563 [01:29<01:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 917/1563 [01:29<01:01, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 919/1563 [01:30<01:01, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 921/1563 [01:30<01:01, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 923/1563 [01:30<01:01, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 925/1563 [01:30<01:00, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 927/1563 [01:30<01:00, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 929/1563 [01:31<01:00, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 931/1563 [01:31<01:01, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 933/1563 [01:31<01:01, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 935/1563 [01:31<01:00, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 937/1563 [01:31<01:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 939/1563 [01:31<00:59, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 941/1563 [01:32<00:59, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 943/1563 [01:32<00:59, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 945/1563 [01:32<00:58, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 947/1563 [01:32<01:00, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 949/1563 [01:32<01:03,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 950/1563 [01:33<01:04,  9.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 951/1563 [01:33<01:05,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 952/1563 [01:33<01:06,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 953/1563 [01:33<01:07,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 954/1563 [01:33<01:07,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 955/1563 [01:33<01:08,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 956/1563 [01:33<01:09,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 957/1563 [01:33<01:11,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 958/1563 [01:34<01:10,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 959/1563 [01:34<01:10,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 960/1563 [01:34<01:12,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 961/1563 [01:34<01:13,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 962/1563 [01:34<01:12,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 963/1563 [01:34<01:12,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 964/1563 [01:34<01:12,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 965/1563 [01:34<01:12,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 967/1563 [01:35<01:04,  9.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 969/1563 [01:35<01:01,  9.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 970/1563 [01:35<01:01,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 971/1563 [01:35<01:00,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 972/1563 [01:35<01:00,  9.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 974/1563 [01:35<00:58, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 975/1563 [01:35<00:58, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 977/1563 [01:36<00:57, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 979/1563 [01:36<00:56, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 981/1563 [01:36<00:57, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 983/1563 [01:36<00:56, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 985/1563 [01:36<00:55, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 987/1563 [01:37<00:55, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 989/1563 [01:37<00:54, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 991/1563 [01:37<00:55, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 993/1563 [01:37<00:54, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 995/1563 [01:37<00:54, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 997/1563 [01:37<00:53, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 999/1563 [01:38<00:53, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1001/1563 [01:38<00:53, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1003/1563 [01:38<00:53, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1005/1563 [01:38<00:53, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1007/1563 [01:38<00:53, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1009/1563 [01:39<00:53, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1011/1563 [01:39<00:52, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1013/1563 [01:39<00:52, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1015/1563 [01:39<00:52, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1017/1563 [01:39<00:52, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1019/1563 [01:40<00:51, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1021/1563 [01:40<00:51, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1023/1563 [01:40<00:52, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1025/1563 [01:40<00:51, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1027/1563 [01:40<00:51, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1029/1563 [01:41<00:51, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1031/1563 [01:41<00:50, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1033/1563 [01:41<00:51, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1035/1563 [01:41<00:51, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1037/1563 [01:41<00:50, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1039/1563 [01:42<00:50, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1041/1563 [01:42<00:50, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1043/1563 [01:42<00:50, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1045/1563 [01:42<00:49, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1047/1563 [01:42<00:49, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1049/1563 [01:42<00:49, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1051/1563 [01:43<00:48, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1053/1563 [01:43<00:48, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1055/1563 [01:43<00:48, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1057/1563 [01:43<00:48, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1059/1563 [01:43<00:47, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1061/1563 [01:44<00:47, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1063/1563 [01:44<00:47, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1065/1563 [01:44<00:47, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1067/1563 [01:44<00:47, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1069/1563 [01:44<00:47, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1071/1563 [01:45<00:49,  9.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1072/1563 [01:45<00:51,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1073/1563 [01:45<00:52,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1074/1563 [01:45<00:54,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1075/1563 [01:45<00:54,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1076/1563 [01:45<00:54,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1077/1563 [01:45<00:54,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1078/1563 [01:45<00:54,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1079/1563 [01:46<00:55,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1080/1563 [01:46<00:55,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1081/1563 [01:46<00:54,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1082/1563 [01:46<00:55,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1083/1563 [01:46<00:56,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1084/1563 [01:46<00:56,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1085/1563 [01:46<00:56,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1086/1563 [01:46<00:56,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1087/1563 [01:46<00:56,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1088/1563 [01:47<00:57,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1089/1563 [01:47<00:56,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1090/1563 [01:47<00:53,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1091/1563 [01:47<00:53,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1093/1563 [01:47<00:49,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1094/1563 [01:47<00:49,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1096/1563 [01:47<00:47,  9.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1098/1563 [01:48<00:45, 10.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1099/1563 [01:48<00:46, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1101/1563 [01:48<00:45, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1103/1563 [01:48<00:44, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1105/1563 [01:48<00:44, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1107/1563 [01:48<00:43, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1109/1563 [01:49<00:43, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1111/1563 [01:49<00:43, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1113/1563 [01:49<00:43, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1115/1563 [01:49<00:42, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1117/1563 [01:49<00:42, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1119/1563 [01:50<00:42, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1121/1563 [01:50<00:42, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1123/1563 [01:50<00:42, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1125/1563 [01:50<00:41, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1127/1563 [01:50<00:42, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1129/1563 [01:51<00:42, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1131/1563 [01:51<00:41, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1133/1563 [01:51<00:41, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1135/1563 [01:51<00:41, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1137/1563 [01:51<00:42, 10.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1139/1563 [01:52<00:41, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1141/1563 [01:52<00:41, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1143/1563 [01:52<00:40, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1145/1563 [01:52<00:40, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1147/1563 [01:52<00:39, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1149/1563 [01:53<00:39, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1151/1563 [01:53<00:39, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1153/1563 [01:53<00:39, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1155/1563 [01:53<00:39, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1157/1563 [01:53<00:38, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1159/1563 [01:54<00:39, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1161/1563 [01:54<00:38, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1163/1563 [01:54<00:38, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1165/1563 [01:54<00:38, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1167/1563 [01:54<00:37, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1169/1563 [01:54<00:37, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1171/1563 [01:55<00:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1173/1563 [01:55<00:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1175/1563 [01:55<00:37, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1177/1563 [01:55<00:36, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1179/1563 [01:55<00:36, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1181/1563 [01:56<00:36, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1183/1563 [01:56<00:36, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1185/1563 [01:56<00:36, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1187/1563 [01:56<00:36, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1189/1563 [01:56<00:35, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1191/1563 [01:57<00:36, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1193/1563 [01:57<00:36, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1195/1563 [01:57<00:38,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1196/1563 [01:57<00:38,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1197/1563 [01:57<00:39,  9.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1198/1563 [01:57<00:39,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1199/1563 [01:57<00:39,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1200/1563 [01:58<00:39,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1201/1563 [01:58<00:40,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1202/1563 [01:58<00:41,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1203/1563 [01:58<00:41,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1204/1563 [01:58<00:42,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1205/1563 [01:58<00:41,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1206/1563 [01:58<00:41,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1207/1563 [01:58<00:41,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1208/1563 [01:59<00:41,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1209/1563 [01:59<00:42,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1210/1563 [01:59<00:42,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1211/1563 [01:59<00:45,  7.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1212/1563 [01:59<00:42,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1214/1563 [01:59<00:38,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1215/1563 [01:59<00:37,  9.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1217/1563 [02:00<00:35,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1219/1563 [02:00<00:34, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1221/1563 [02:00<00:33, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1223/1563 [02:00<00:32, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1225/1563 [02:00<00:32, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1227/1563 [02:00<00:31, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1229/1563 [02:01<00:31, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1231/1563 [02:01<00:31, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1233/1563 [02:01<00:31, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1235/1563 [02:01<00:31, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1237/1563 [02:01<00:30, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1239/1563 [02:02<00:30, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1241/1563 [02:02<00:31, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1243/1563 [02:02<00:30, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1245/1563 [02:02<00:30, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1247/1563 [02:02<00:30, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1249/1563 [02:03<00:29, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1251/1563 [02:03<00:29, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1253/1563 [02:03<00:29, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1255/1563 [02:03<00:29, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1257/1563 [02:03<00:29, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1259/1563 [02:04<00:29, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1261/1563 [02:04<00:28, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1263/1563 [02:04<00:28, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1265/1563 [02:04<00:28, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1267/1563 [02:04<00:28, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1269/1563 [02:04<00:27, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1271/1563 [02:05<00:27, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1273/1563 [02:05<00:27, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1275/1563 [02:05<00:27, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1277/1563 [02:05<00:27, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1279/1563 [02:05<00:26, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1281/1563 [02:06<00:26, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1283/1563 [02:06<00:27, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1285/1563 [02:06<00:26, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1287/1563 [02:06<00:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1289/1563 [02:06<00:26, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1291/1563 [02:07<00:25, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1293/1563 [02:07<00:25, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1295/1563 [02:07<00:25, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1297/1563 [02:07<00:25, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1299/1563 [02:07<00:25, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1301/1563 [02:08<00:24, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1303/1563 [02:08<00:24, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1305/1563 [02:08<00:24, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1307/1563 [02:08<00:24, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1309/1563 [02:08<00:24, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1311/1563 [02:08<00:24, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1313/1563 [02:09<00:24, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1315/1563 [02:09<00:24, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1317/1563 [02:09<00:24, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1319/1563 [02:09<00:25,  9.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1320/1563 [02:09<00:25,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1321/1563 [02:10<00:25,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1322/1563 [02:10<00:26,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1323/1563 [02:10<00:27,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1324/1563 [02:10<00:28,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1325/1563 [02:10<00:29,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1326/1563 [02:10<00:28,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1327/1563 [02:10<00:28,  8.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1328/1563 [02:10<00:28,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1329/1563 [02:11<00:28,  8.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1330/1563 [02:11<00:27,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1331/1563 [02:11<00:27,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1332/1563 [02:11<00:28,  8.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1333/1563 [02:11<00:28,  8.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1334/1563 [02:11<00:28,  8.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1335/1563 [02:11<00:27,  8.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1336/1563 [02:11<00:25,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1337/1563 [02:11<00:25,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1339/1563 [02:12<00:23,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1341/1563 [02:12<00:22,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1342/1563 [02:12<00:22,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1343/1563 [02:12<00:22,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1345/1563 [02:12<00:21,  9.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1346/1563 [02:12<00:21,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1347/1563 [02:12<00:21,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1348/1563 [02:13<00:21,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▋ | 1350/1563 [02:13<00:20, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1352/1563 [02:13<00:20, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1354/1563 [02:13<00:20, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1356/1563 [02:13<00:20, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1358/1563 [02:14<00:19, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1360/1563 [02:14<00:19, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1362/1563 [02:14<00:19, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1364/1563 [02:14<00:19, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1366/1563 [02:14<00:19, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1368/1563 [02:14<00:18, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1370/1563 [02:15<00:18, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1372/1563 [02:15<00:18, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1374/1563 [02:15<00:18, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1376/1563 [02:15<00:18, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1378/1563 [02:15<00:17, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1380/1563 [02:16<00:17, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1382/1563 [02:16<00:17, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1384/1563 [02:16<00:17, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1386/1563 [02:16<00:17, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1388/1563 [02:16<00:17, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1390/1563 [02:17<00:16, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1392/1563 [02:17<00:16, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1394/1563 [02:17<00:16, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1396/1563 [02:17<00:16, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1398/1563 [02:17<00:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1400/1563 [02:18<00:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1402/1563 [02:18<00:15, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1404/1563 [02:18<00:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1406/1563 [02:18<00:15, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1408/1563 [02:18<00:15, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1410/1563 [02:19<00:14, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1412/1563 [02:19<00:14, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1414/1563 [02:19<00:14, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1416/1563 [02:19<00:14, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1418/1563 [02:19<00:14, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1420/1563 [02:20<00:13, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1422/1563 [02:20<00:13, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1424/1563 [02:20<00:13, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1426/1563 [02:20<00:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1428/1563 [02:20<00:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1430/1563 [02:20<00:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1432/1563 [02:21<00:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1434/1563 [02:21<00:12, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1436/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1438/1563 [02:21<00:12, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1440/1563 [02:21<00:12,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1441/1563 [02:22<00:13,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1442/1563 [02:22<00:13,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1443/1563 [02:22<00:13,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1444/1563 [02:22<00:13,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1445/1563 [02:22<00:13,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1446/1563 [02:22<00:13,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1447/1563 [02:22<00:14,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1448/1563 [02:22<00:13,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1449/1563 [02:23<00:13,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1450/1563 [02:23<00:13,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1451/1563 [02:23<00:13,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1452/1563 [02:23<00:12,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1453/1563 [02:23<00:12,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1454/1563 [02:23<00:13,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1455/1563 [02:23<00:13,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1456/1563 [02:23<00:13,  8.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1457/1563 [02:24<00:13,  8.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1459/1563 [02:24<00:11,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1460/1563 [02:24<00:11,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1461/1563 [02:24<00:10,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1462/1563 [02:24<00:10,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1464/1563 [02:24<00:10,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1466/1563 [02:24<00:09,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1468/1563 [02:25<00:09, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1470/1563 [02:25<00:09, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1472/1563 [02:25<00:08, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1474/1563 [02:25<00:08, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1476/1563 [02:25<00:08, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1478/1563 [02:26<00:08, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1480/1563 [02:26<00:07, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1482/1563 [02:26<00:07, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1484/1563 [02:26<00:07, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1486/1563 [02:26<00:07, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1488/1563 [02:27<00:07, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1490/1563 [02:27<00:06, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1492/1563 [02:27<00:06, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1494/1563 [02:27<00:06, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1496/1563 [02:27<00:06, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1498/1563 [02:27<00:06, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1500/1563 [02:28<00:06, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1502/1563 [02:28<00:05, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1504/1563 [02:28<00:05, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1506/1563 [02:28<00:05, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1508/1563 [02:28<00:05, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1510/1563 [02:29<00:05, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1512/1563 [02:29<00:04, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1514/1563 [02:29<00:04, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1516/1563 [02:29<00:04, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1518/1563 [02:29<00:04, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1520/1563 [02:30<00:04, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1522/1563 [02:30<00:03, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1524/1563 [02:30<00:03, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1526/1563 [02:30<00:03, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1528/1563 [02:30<00:03, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1530/1563 [02:31<00:03, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1532/1563 [02:31<00:02, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1534/1563 [02:31<00:02, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1536/1563 [02:31<00:02, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1538/1563 [02:31<00:02, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1540/1563 [02:32<00:02, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1542/1563 [02:32<00:02, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1544/1563 [02:32<00:01, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1546/1563 [02:32<00:01, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1548/1563 [02:32<00:01, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1550/1563 [02:32<00:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1552/1563 [02:33<00:01, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1554/1563 [02:33<00:00, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1556/1563 [02:33<00:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1558/1563 [02:33<00:00, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1560/1563 [02:33<00:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1562/1563 [02:34<00:00, 10.06batch/s]\u001b[A\n",
-            "Epochs:  50%|█████     | 1/2 [02:34<02:34, 154.24s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/2 tamamlandı. Kayıp: 1.7931, Doğruluk: 33.94%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Epoch 2/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 1/1563 [00:00<03:19,  7.83batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 2/1563 [00:00<03:19,  7.83batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 3/1563 [00:00<03:06,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 4/1563 [00:00<03:00,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 5/1563 [00:00<03:02,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 6/1563 [00:00<03:04,  8.45batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 7/1563 [00:00<03:04,  8.41batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 8/1563 [00:00<03:05,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 9/1563 [00:01<03:06,  8.35batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 10/1563 [00:01<03:12,  8.06batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 11/1563 [00:01<03:09,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 12/1563 [00:01<03:05,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 13/1563 [00:01<03:02,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 14/1563 [00:01<03:01,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 15/1563 [00:01<03:00,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 16/1563 [00:01<02:59,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 17/1563 [00:02<03:06,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 19/1563 [00:02<02:48,  9.17batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 21/1563 [00:02<02:38,  9.73batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 23/1563 [00:02<02:35,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 25/1563 [00:02<02:31, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 27/1563 [00:02<02:29, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 29/1563 [00:03<02:31, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 31/1563 [00:03<02:28, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 33/1563 [00:03<02:27, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 35/1563 [00:03<02:26, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 37/1563 [00:03<02:26, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 39/1563 [00:04<02:25, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 41/1563 [00:04<02:24, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 43/1563 [00:04<02:23, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 45/1563 [00:04<02:23, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 47/1563 [00:04<02:23, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 49/1563 [00:05<02:23, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 51/1563 [00:05<02:23, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 53/1563 [00:05<02:22, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 55/1563 [00:05<02:22, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 57/1563 [00:05<02:21, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 59/1563 [00:06<02:21, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 61/1563 [00:06<02:22, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 63/1563 [00:06<02:21, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 65/1563 [00:06<02:20, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 67/1563 [00:06<02:20, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 69/1563 [00:06<02:20, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 71/1563 [00:07<02:22, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 73/1563 [00:07<02:21, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 75/1563 [00:07<02:20, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 77/1563 [00:07<02:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 79/1563 [00:07<02:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 81/1563 [00:08<02:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 83/1563 [00:08<02:21, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 85/1563 [00:08<02:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 87/1563 [00:08<02:19, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 89/1563 [00:08<02:18, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 91/1563 [00:09<02:17, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 93/1563 [00:09<02:19, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 95/1563 [00:09<02:18, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 97/1563 [00:09<02:17, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 99/1563 [00:09<02:17, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 101/1563 [00:09<02:18, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 103/1563 [00:10<02:19, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 105/1563 [00:10<02:19, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 107/1563 [00:10<02:17, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 109/1563 [00:10<02:17, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 111/1563 [00:10<02:16, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 113/1563 [00:11<02:16, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 115/1563 [00:11<02:17, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 117/1563 [00:11<02:16, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 119/1563 [00:11<02:16, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 121/1563 [00:11<02:17, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 123/1563 [00:12<02:23, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 125/1563 [00:12<02:34,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 126/1563 [00:12<02:36,  9.20batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 127/1563 [00:12<02:40,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 128/1563 [00:12<02:39,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 129/1563 [00:12<02:40,  8.94batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 130/1563 [00:12<02:41,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 131/1563 [00:13<02:44,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 132/1563 [00:13<02:46,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 133/1563 [00:13<02:45,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 134/1563 [00:13<02:47,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 135/1563 [00:13<02:45,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 136/1563 [00:13<02:46,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 137/1563 [00:13<02:47,  8.51batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 138/1563 [00:13<02:47,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 139/1563 [00:14<02:48,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 140/1563 [00:14<02:50,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 141/1563 [00:14<02:49,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 143/1563 [00:14<02:36,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 145/1563 [00:14<02:27,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 147/1563 [00:14<02:21, 10.01batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 148/1563 [00:14<02:21,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 150/1563 [00:15<02:19, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 151/1563 [00:15<02:20, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 153/1563 [00:15<02:17, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 155/1563 [00:15<02:15, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 157/1563 [00:15<02:14, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 159/1563 [00:15<02:13, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 161/1563 [00:16<02:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 163/1563 [00:16<02:12, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 165/1563 [00:16<02:12, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 167/1563 [00:16<02:11, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 169/1563 [00:16<02:12, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 171/1563 [00:17<02:12, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 173/1563 [00:17<02:13, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 175/1563 [00:17<02:12, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 177/1563 [00:17<02:11, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 179/1563 [00:17<02:12, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 181/1563 [00:18<02:12, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 183/1563 [00:18<02:11, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 185/1563 [00:18<02:11, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 187/1563 [00:18<02:11, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 189/1563 [00:18<02:10, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 191/1563 [00:19<02:10, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 193/1563 [00:19<02:10, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 195/1563 [00:19<02:09, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 197/1563 [00:19<02:09, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 199/1563 [00:19<02:08, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 201/1563 [00:19<02:07, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 203/1563 [00:20<02:07, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 205/1563 [00:20<02:06, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 207/1563 [00:20<02:05, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 209/1563 [00:20<02:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 211/1563 [00:20<02:06, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▎        | 213/1563 [00:21<02:06, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 215/1563 [00:21<02:07, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 217/1563 [00:21<02:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 219/1563 [00:21<02:07, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 221/1563 [00:21<02:06, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 223/1563 [00:22<02:05, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 225/1563 [00:22<02:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 227/1563 [00:22<02:05, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 229/1563 [00:22<02:05, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 231/1563 [00:22<02:05, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 233/1563 [00:22<02:05, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 235/1563 [00:23<02:05, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 237/1563 [00:23<02:04, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 239/1563 [00:23<02:04, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 241/1563 [00:23<02:03, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 243/1563 [00:23<02:03, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 245/1563 [00:24<02:02, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 247/1563 [00:24<02:07, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 249/1563 [00:24<02:13,  9.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 250/1563 [00:24<02:16,  9.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 251/1563 [00:24<02:19,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 252/1563 [00:24<02:20,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 253/1563 [00:24<02:21,  9.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 254/1563 [00:25<02:22,  9.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 255/1563 [00:25<02:26,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 256/1563 [00:25<02:27,  8.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 257/1563 [00:25<02:28,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 258/1563 [00:25<02:30,  8.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 259/1563 [00:25<02:32,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 260/1563 [00:25<02:33,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 261/1563 [00:25<02:36,  8.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 262/1563 [00:26<02:37,  8.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 263/1563 [00:26<02:35,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 264/1563 [00:26<02:34,  8.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 265/1563 [00:26<02:33,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 267/1563 [00:26<02:19,  9.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 269/1563 [00:26<02:13,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 271/1563 [00:26<02:08, 10.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 273/1563 [00:27<02:06, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 275/1563 [00:27<02:04, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 277/1563 [00:27<02:03, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 279/1563 [00:27<02:02, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 281/1563 [00:27<02:03, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 283/1563 [00:28<02:02, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 285/1563 [00:28<02:03, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 287/1563 [00:28<02:02, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 289/1563 [00:28<02:01, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 291/1563 [00:28<02:01, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 293/1563 [00:29<02:01, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 295/1563 [00:29<02:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 297/1563 [00:29<02:00, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 299/1563 [00:29<02:00, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 301/1563 [00:29<01:59, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 303/1563 [00:30<01:58, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 305/1563 [00:30<01:59, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 307/1563 [00:30<01:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 309/1563 [00:30<01:57, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 311/1563 [00:30<01:57, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 313/1563 [00:30<01:57, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 315/1563 [00:31<01:56, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 317/1563 [00:31<01:57, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 319/1563 [00:31<01:56, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 321/1563 [00:31<01:56, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 323/1563 [00:31<01:56, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 325/1563 [00:32<01:56, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 327/1563 [00:32<01:55, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 329/1563 [00:32<01:55, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 331/1563 [00:32<01:54, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 333/1563 [00:32<01:54, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 335/1563 [00:32<01:54, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 337/1563 [00:33<01:57, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 339/1563 [00:33<01:56, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 341/1563 [00:33<01:55, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 343/1563 [00:33<01:54, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 345/1563 [00:33<01:55, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 347/1563 [00:34<01:55, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 349/1563 [00:34<01:54, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 351/1563 [00:34<01:53, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 353/1563 [00:34<01:53, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 355/1563 [00:34<01:53, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 357/1563 [00:35<01:55, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 359/1563 [00:35<01:55, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 361/1563 [00:35<01:54, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 363/1563 [00:35<01:53, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 365/1563 [00:35<01:53, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 367/1563 [00:36<01:53, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 369/1563 [00:36<01:53, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 371/1563 [00:36<01:54, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 373/1563 [00:36<02:00,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 374/1563 [00:36<02:03,  9.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 375/1563 [00:36<02:05,  9.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 376/1563 [00:36<02:07,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 377/1563 [00:37<02:09,  9.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 378/1563 [00:37<02:15,  8.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 379/1563 [00:37<02:14,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 380/1563 [00:37<02:13,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 381/1563 [00:37<02:13,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 382/1563 [00:37<02:12,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 383/1563 [00:37<02:12,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 384/1563 [00:37<02:13,  8.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 385/1563 [00:38<02:14,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 386/1563 [00:38<02:14,  8.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 387/1563 [00:38<02:17,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 388/1563 [00:38<02:16,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 389/1563 [00:38<02:15,  8.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 390/1563 [00:38<02:16,  8.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 392/1563 [00:38<02:04,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 394/1563 [00:38<01:58,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 396/1563 [00:39<01:56, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 398/1563 [00:39<01:54, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 400/1563 [00:39<01:52, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 402/1563 [00:39<01:51, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 404/1563 [00:39<01:50, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 406/1563 [00:40<01:51, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 408/1563 [00:40<01:50, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 410/1563 [00:40<01:49, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 412/1563 [00:40<01:49, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 414/1563 [00:40<01:48, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 416/1563 [00:41<01:48, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 418/1563 [00:41<01:51, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 420/1563 [00:41<01:50, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 422/1563 [00:41<01:49, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 424/1563 [00:41<01:48, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 426/1563 [00:42<01:47, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 428/1563 [00:42<01:47, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 430/1563 [00:42<01:46, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 432/1563 [00:42<01:46, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 434/1563 [00:42<01:46, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 436/1563 [00:42<01:45, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 438/1563 [00:43<01:46, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 440/1563 [00:43<01:45, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 442/1563 [00:43<01:44, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 444/1563 [00:43<01:44, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 446/1563 [00:43<01:44, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 448/1563 [00:44<01:44, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 450/1563 [00:44<01:46, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 452/1563 [00:44<01:45, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 454/1563 [00:44<01:44, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 456/1563 [00:44<01:44, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 458/1563 [00:45<01:44, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 460/1563 [00:45<01:43, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 462/1563 [00:45<01:43, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 464/1563 [00:45<01:42, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 466/1563 [00:45<01:42, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 468/1563 [00:45<01:42, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 470/1563 [00:46<01:43, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 472/1563 [00:46<01:42, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 474/1563 [00:46<01:42, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 476/1563 [00:46<01:42, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 478/1563 [00:46<01:42, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 480/1563 [00:47<01:42, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 482/1563 [00:47<01:43, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 484/1563 [00:47<01:44, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 486/1563 [00:47<01:45, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 488/1563 [00:47<01:49,  9.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 489/1563 [00:48<01:49,  9.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 490/1563 [00:48<01:49,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 492/1563 [00:48<01:46, 10.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 494/1563 [00:48<01:44, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 496/1563 [00:48<01:46,  9.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 497/1563 [00:48<01:49,  9.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 498/1563 [00:48<01:52,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 499/1563 [00:49<01:56,  9.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 500/1563 [00:49<01:59,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 501/1563 [00:49<01:59,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 502/1563 [00:49<02:02,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 503/1563 [00:49<02:03,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 504/1563 [00:49<02:01,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 505/1563 [00:49<01:59,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 506/1563 [00:49<01:59,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 507/1563 [00:50<02:04,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 508/1563 [00:50<02:05,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 509/1563 [00:50<02:06,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 510/1563 [00:50<02:05,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 511/1563 [00:50<02:09,  8.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 512/1563 [00:50<02:08,  8.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 513/1563 [00:50<02:07,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 514/1563 [00:50<02:06,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 516/1563 [00:51<01:54,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 518/1563 [00:51<01:48,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 519/1563 [00:51<01:47,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 521/1563 [00:51<01:44,  9.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 523/1563 [00:51<01:42, 10.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 525/1563 [00:51<01:40, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 527/1563 [00:52<01:38, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 529/1563 [00:52<01:39, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 531/1563 [00:52<01:38, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 533/1563 [00:52<01:37, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 535/1563 [00:52<01:36, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 537/1563 [00:53<01:36, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 539/1563 [00:53<01:36, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 541/1563 [00:53<01:36, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 543/1563 [00:53<01:35, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 545/1563 [00:53<01:35, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 547/1563 [00:53<01:35, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 549/1563 [00:54<01:36, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 551/1563 [00:54<01:36, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 553/1563 [00:54<01:35, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 555/1563 [00:54<01:35, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 557/1563 [00:54<01:35, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 559/1563 [00:55<01:35, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 561/1563 [00:55<01:34, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 563/1563 [00:55<01:34, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 565/1563 [00:55<01:35, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 567/1563 [00:55<01:34, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 569/1563 [00:56<01:34, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 571/1563 [00:56<01:34, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 573/1563 [00:56<01:33, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 575/1563 [00:56<01:34, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 577/1563 [00:56<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 579/1563 [00:57<01:33, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 581/1563 [00:57<01:33, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 583/1563 [00:57<01:32, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 585/1563 [00:57<01:31, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 587/1563 [00:57<01:32, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 589/1563 [00:57<01:32, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 591/1563 [00:58<01:31, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 593/1563 [00:58<01:31, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 595/1563 [00:58<01:30, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 597/1563 [00:58<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 599/1563 [00:58<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 601/1563 [00:59<01:29, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 603/1563 [00:59<01:31, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 605/1563 [00:59<01:30, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 607/1563 [00:59<01:30, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 609/1563 [00:59<01:29, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 611/1563 [01:00<01:29, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 613/1563 [01:00<01:29, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 615/1563 [01:00<01:29, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 617/1563 [01:00<01:29, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 619/1563 [01:00<01:29, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 621/1563 [01:01<01:34, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 623/1563 [01:01<01:38,  9.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 624/1563 [01:01<01:42,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 625/1563 [01:01<01:44,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 626/1563 [01:01<01:46,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 627/1563 [01:01<01:50,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 628/1563 [01:01<01:51,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 629/1563 [01:02<01:51,  8.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 630/1563 [01:02<01:54,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 631/1563 [01:02<01:54,  8.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 632/1563 [01:02<01:55,  8.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 633/1563 [01:02<01:53,  8.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 634/1563 [01:02<01:51,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 635/1563 [01:02<01:51,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 636/1563 [01:02<01:52,  8.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 637/1563 [01:02<01:51,  8.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 639/1563 [01:03<01:41,  9.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 641/1563 [01:03<01:35,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 643/1563 [01:03<01:31, 10.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 645/1563 [01:03<01:29, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 647/1563 [01:03<01:27, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 649/1563 [01:04<01:27, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 651/1563 [01:04<01:27, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 653/1563 [01:04<01:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 655/1563 [01:04<01:26, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 657/1563 [01:04<01:26, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 659/1563 [01:05<01:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 661/1563 [01:05<01:25, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 663/1563 [01:05<01:25, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 665/1563 [01:05<01:24, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 667/1563 [01:05<01:24, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 669/1563 [01:06<01:25, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 671/1563 [01:06<01:25, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 673/1563 [01:06<01:24, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 675/1563 [01:06<01:24, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 677/1563 [01:06<01:24, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 679/1563 [01:06<01:24, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 681/1563 [01:07<01:25, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 683/1563 [01:07<01:23, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 685/1563 [01:07<01:23, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 687/1563 [01:07<01:23, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 689/1563 [01:07<01:23, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 691/1563 [01:08<01:22, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 693/1563 [01:08<01:22, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 695/1563 [01:08<01:23, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 697/1563 [01:08<01:22, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 699/1563 [01:08<01:22, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 701/1563 [01:09<01:23, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 703/1563 [01:09<01:22, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 705/1563 [01:09<01:22, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 707/1563 [01:09<01:21, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 709/1563 [01:09<01:20, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 711/1563 [01:10<01:21, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 713/1563 [01:10<01:21, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 715/1563 [01:10<01:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 717/1563 [01:10<01:19, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 719/1563 [01:10<01:19, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 721/1563 [01:10<01:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 723/1563 [01:11<01:21, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 725/1563 [01:11<01:20, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 727/1563 [01:11<01:19, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 729/1563 [01:11<01:19, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 731/1563 [01:11<01:18, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 733/1563 [01:12<01:19, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 735/1563 [01:12<01:19, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 737/1563 [01:12<01:18, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 739/1563 [01:12<01:18, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 741/1563 [01:12<01:18, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 743/1563 [01:13<01:20, 10.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 745/1563 [01:13<01:24,  9.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 746/1563 [01:13<01:26,  9.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 747/1563 [01:13<01:29,  9.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 748/1563 [01:13<01:29,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 749/1563 [01:13<01:30,  9.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 750/1563 [01:13<01:30,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 751/1563 [01:14<01:30,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 752/1563 [01:14<01:32,  8.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 753/1563 [01:14<01:37,  8.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 754/1563 [01:14<01:34,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 755/1563 [01:14<01:33,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 756/1563 [01:14<01:33,  8.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 757/1563 [01:14<01:33,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 758/1563 [01:14<01:36,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 759/1563 [01:14<01:35,  8.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 760/1563 [01:15<01:36,  8.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 761/1563 [01:15<01:39,  8.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 763/1563 [01:15<01:27,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 765/1563 [01:15<01:22,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 767/1563 [01:15<01:19,  9.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 769/1563 [01:15<01:17, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 771/1563 [01:16<01:16, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 773/1563 [01:16<01:16, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 775/1563 [01:16<01:15, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 777/1563 [01:16<01:15, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 779/1563 [01:16<01:14, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 781/1563 [01:17<01:14, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 783/1563 [01:17<01:15, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 785/1563 [01:17<01:14, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 787/1563 [01:17<01:13, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 789/1563 [01:17<01:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 791/1563 [01:18<01:14, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 793/1563 [01:18<01:14, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 795/1563 [01:18<01:13, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 797/1563 [01:18<01:13, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 799/1563 [01:18<01:12, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 801/1563 [01:19<01:11, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 803/1563 [01:19<01:12, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 805/1563 [01:19<01:12, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 807/1563 [01:19<01:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 809/1563 [01:19<01:12, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 811/1563 [01:19<01:11, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 813/1563 [01:20<01:11, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 815/1563 [01:20<01:11, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 817/1563 [01:20<01:11, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 819/1563 [01:20<01:10, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 821/1563 [01:20<01:11, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 823/1563 [01:21<01:10, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 825/1563 [01:21<01:10, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 827/1563 [01:21<01:10, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 829/1563 [01:21<01:09, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 831/1563 [01:21<01:09, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 833/1563 [01:22<01:08, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 835/1563 [01:22<01:09, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 837/1563 [01:22<01:09, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 839/1563 [01:22<01:08, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 841/1563 [01:22<01:08, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 843/1563 [01:23<01:08, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 845/1563 [01:23<01:08, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 847/1563 [01:23<01:08, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 849/1563 [01:23<01:07, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 851/1563 [01:23<01:07, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 853/1563 [01:23<01:07, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 855/1563 [01:24<01:07, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 857/1563 [01:24<01:06, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 859/1563 [01:24<01:07, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 861/1563 [01:24<01:07, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 863/1563 [01:24<01:07, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 865/1563 [01:25<01:07, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 867/1563 [01:25<01:09, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 869/1563 [01:25<01:12,  9.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 870/1563 [01:25<01:14,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 871/1563 [01:25<01:14,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 872/1563 [01:25<01:14,  9.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 873/1563 [01:26<01:14,  9.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 874/1563 [01:26<01:16,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 875/1563 [01:26<01:17,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 876/1563 [01:26<01:16,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 877/1563 [01:26<01:17,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 878/1563 [01:26<01:17,  8.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 879/1563 [01:26<01:17,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 880/1563 [01:26<01:17,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 881/1563 [01:26<01:17,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 882/1563 [01:27<01:19,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 883/1563 [01:27<01:20,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 884/1563 [01:27<01:19,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 885/1563 [01:27<01:19,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 886/1563 [01:27<01:18,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 888/1563 [01:27<01:11,  9.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 890/1563 [01:27<01:08,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 892/1563 [01:28<01:06, 10.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 893/1563 [01:28<01:07,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 895/1563 [01:28<01:05, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 897/1563 [01:28<01:05, 10.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 899/1563 [01:28<01:04, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 901/1563 [01:28<01:03, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 903/1563 [01:29<01:03, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 905/1563 [01:29<01:02, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 907/1563 [01:29<01:02, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 909/1563 [01:29<01:02, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 911/1563 [01:29<01:02, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 913/1563 [01:30<01:01, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 915/1563 [01:30<01:01, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 917/1563 [01:30<01:01, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 919/1563 [01:30<01:01, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 921/1563 [01:30<01:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 923/1563 [01:31<01:00, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 925/1563 [01:31<01:00, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 927/1563 [01:31<01:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 929/1563 [01:31<00:59, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 931/1563 [01:31<00:59, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 933/1563 [01:31<00:59, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 935/1563 [01:32<00:59, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 937/1563 [01:32<00:59, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 939/1563 [01:32<00:59, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 941/1563 [01:32<00:59, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 943/1563 [01:32<00:59, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 945/1563 [01:33<00:59, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 947/1563 [01:33<00:58, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 949/1563 [01:33<00:58, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 951/1563 [01:33<00:58, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 953/1563 [01:33<00:58, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 955/1563 [01:34<00:58, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 957/1563 [01:34<00:58, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 959/1563 [01:34<00:58, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 961/1563 [01:34<00:58, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 963/1563 [01:34<00:58, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 965/1563 [01:35<00:57, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 967/1563 [01:35<00:57, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 969/1563 [01:35<00:57, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 971/1563 [01:35<00:56, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 973/1563 [01:35<00:56, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 975/1563 [01:36<00:56, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 977/1563 [01:36<00:56, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 979/1563 [01:36<00:56, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 981/1563 [01:36<00:56, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 983/1563 [01:36<00:57, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 985/1563 [01:37<00:56, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 987/1563 [01:37<00:56, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 989/1563 [01:37<00:55, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 991/1563 [01:37<00:56, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 993/1563 [01:37<01:00,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 994/1563 [01:37<01:01,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 995/1563 [01:38<01:01,  9.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 996/1563 [01:38<01:03,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 997/1563 [01:38<01:03,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 998/1563 [01:38<01:05,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 999/1563 [01:38<01:05,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1000/1563 [01:38<01:06,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1001/1563 [01:38<01:06,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1002/1563 [01:38<01:08,  8.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1003/1563 [01:39<01:07,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1004/1563 [01:39<01:07,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1005/1563 [01:39<01:07,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1006/1563 [01:39<01:06,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1007/1563 [01:39<01:05,  8.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1008/1563 [01:39<01:05,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1009/1563 [01:39<01:06,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1011/1563 [01:39<01:00,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1012/1563 [01:40<00:59,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1013/1563 [01:40<00:58,  9.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1015/1563 [01:40<00:55,  9.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1016/1563 [01:40<00:55,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1017/1563 [01:40<00:55,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1018/1563 [01:40<00:55,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1019/1563 [01:40<00:54,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1021/1563 [01:40<00:53, 10.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1023/1563 [01:41<00:52, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1025/1563 [01:41<00:52, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1027/1563 [01:41<00:51, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1029/1563 [01:41<00:51, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1031/1563 [01:41<00:51, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1033/1563 [01:42<00:51, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1035/1563 [01:42<00:51, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1037/1563 [01:42<00:50, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1039/1563 [01:42<00:50, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1041/1563 [01:42<00:50, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1043/1563 [01:43<00:50, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1045/1563 [01:43<00:49, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1047/1563 [01:43<00:49, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1049/1563 [01:43<00:48, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1051/1563 [01:43<00:49, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1053/1563 [01:44<00:48, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1055/1563 [01:44<00:49, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1057/1563 [01:44<00:48, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1059/1563 [01:44<00:48, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1061/1563 [01:44<00:47, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1063/1563 [01:44<00:47, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1065/1563 [01:45<00:48, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1067/1563 [01:45<00:47, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1069/1563 [01:45<00:47, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1071/1563 [01:45<00:47, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1073/1563 [01:45<00:46, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1075/1563 [01:46<00:46, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1077/1563 [01:46<00:46, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1079/1563 [01:46<00:46, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1081/1563 [01:46<00:46, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1083/1563 [01:46<00:46, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1085/1563 [01:47<00:46, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1087/1563 [01:47<00:46, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1089/1563 [01:47<00:46, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1091/1563 [01:47<00:45, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1093/1563 [01:47<00:45, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1095/1563 [01:48<00:45, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1097/1563 [01:48<00:46,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1099/1563 [01:48<00:45, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1101/1563 [01:48<00:45, 10.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1103/1563 [01:48<00:45, 10.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1105/1563 [01:49<00:44, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1107/1563 [01:49<00:44, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1109/1563 [01:49<00:44, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1111/1563 [01:49<00:43, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1113/1563 [01:49<00:45, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1115/1563 [01:50<00:46,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1116/1563 [01:50<00:47,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1117/1563 [01:50<00:48,  9.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1118/1563 [01:50<00:48,  9.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1119/1563 [01:50<00:49,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1120/1563 [01:50<00:49,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1121/1563 [01:50<00:49,  8.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1122/1563 [01:50<00:49,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1123/1563 [01:50<00:49,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1124/1563 [01:51<00:51,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1125/1563 [01:51<00:52,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1126/1563 [01:51<00:53,  8.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1127/1563 [01:51<00:53,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1128/1563 [01:51<00:53,  8.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1129/1563 [01:51<00:53,  8.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1130/1563 [01:51<00:53,  8.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1131/1563 [01:51<00:52,  8.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1132/1563 [01:52<00:52,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1133/1563 [01:52<00:50,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1134/1563 [01:52<00:48,  8.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1135/1563 [01:52<00:46,  9.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1136/1563 [01:52<00:45,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1138/1563 [01:52<00:43,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1140/1563 [01:52<00:41, 10.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1141/1563 [01:52<00:41, 10.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1142/1563 [01:53<00:42, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1143/1563 [01:53<00:42,  9.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1144/1563 [01:53<00:42,  9.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1145/1563 [01:53<00:42,  9.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1147/1563 [01:53<00:41, 10.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1148/1563 [01:53<00:41, 10.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1149/1563 [01:53<00:41, 10.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1150/1563 [01:53<00:41,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1152/1563 [01:54<00:40, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1154/1563 [01:54<00:40, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1156/1563 [01:54<00:40,  9.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1158/1563 [01:54<00:39, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1160/1563 [01:54<00:39, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1162/1563 [01:55<00:38, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1164/1563 [01:55<00:38, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1166/1563 [01:55<00:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1168/1563 [01:55<00:37, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1170/1563 [01:55<00:37, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1172/1563 [01:56<00:37, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1174/1563 [01:56<00:37, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1176/1563 [01:56<00:36, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1178/1563 [01:56<00:36, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1180/1563 [01:56<00:36, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1182/1563 [01:56<00:36, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1184/1563 [01:57<00:36, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1186/1563 [01:57<00:35, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1188/1563 [01:57<00:36, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1190/1563 [01:57<00:36, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1192/1563 [01:57<00:35, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1194/1563 [01:58<00:35, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1196/1563 [01:58<00:35, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1198/1563 [01:58<00:35, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1200/1563 [01:58<00:35, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1202/1563 [01:58<00:34, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1204/1563 [01:59<00:34, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1206/1563 [01:59<00:34, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1208/1563 [01:59<00:34, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1210/1563 [01:59<00:33, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1212/1563 [01:59<00:33, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1214/1563 [02:00<00:33, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1216/1563 [02:00<00:33, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1218/1563 [02:00<00:33, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1220/1563 [02:00<00:33, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1222/1563 [02:00<00:33, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1224/1563 [02:01<00:32, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1226/1563 [02:01<00:32, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1228/1563 [02:01<00:32, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1230/1563 [02:01<00:32, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1232/1563 [02:01<00:31, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1234/1563 [02:01<00:31, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1236/1563 [02:02<00:32, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1238/1563 [02:02<00:33,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1239/1563 [02:02<00:34,  9.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1240/1563 [02:02<00:34,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1241/1563 [02:02<00:35,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1242/1563 [02:02<00:35,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1243/1563 [02:03<00:35,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1244/1563 [02:03<00:35,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1245/1563 [02:03<00:36,  8.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1246/1563 [02:03<00:36,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1247/1563 [02:03<00:37,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1248/1563 [02:03<00:36,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1249/1563 [02:03<00:37,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1250/1563 [02:03<00:37,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1251/1563 [02:03<00:36,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1252/1563 [02:04<00:37,  8.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1253/1563 [02:04<00:37,  8.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1254/1563 [02:04<00:37,  8.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1255/1563 [02:04<00:36,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1256/1563 [02:04<00:34,  8.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1257/1563 [02:04<00:33,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1258/1563 [02:04<00:33,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1259/1563 [02:04<00:32,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1261/1563 [02:05<00:30,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1262/1563 [02:05<00:30,  9.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1263/1563 [02:05<00:30,  9.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1265/1563 [02:05<00:29, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1266/1563 [02:05<00:29,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1267/1563 [02:05<00:29,  9.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1268/1563 [02:05<00:30,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1269/1563 [02:05<00:30,  9.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1271/1563 [02:06<00:28, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1272/1563 [02:06<00:29, 10.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1273/1563 [02:06<00:29,  9.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1274/1563 [02:06<00:29,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1275/1563 [02:06<00:29,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1276/1563 [02:06<00:28,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1278/1563 [02:06<00:27, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1280/1563 [02:06<00:27, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1282/1563 [02:07<00:27, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1284/1563 [02:07<00:27, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1286/1563 [02:07<00:26, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1288/1563 [02:07<00:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1290/1563 [02:07<00:26, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1292/1563 [02:08<00:26, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1294/1563 [02:08<00:26, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1296/1563 [02:08<00:26, 10.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1298/1563 [02:08<00:25, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1300/1563 [02:08<00:25, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1302/1563 [02:09<00:25, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1304/1563 [02:09<00:25, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1306/1563 [02:09<00:25, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1308/1563 [02:09<00:24, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1310/1563 [02:09<00:25, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1312/1563 [02:10<00:24, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1314/1563 [02:10<00:24, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1316/1563 [02:10<00:23, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1318/1563 [02:10<00:23, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1320/1563 [02:10<00:23, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1322/1563 [02:11<00:23, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1324/1563 [02:11<00:23, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1326/1563 [02:11<00:22, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1328/1563 [02:11<00:22, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1330/1563 [02:11<00:22, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1332/1563 [02:11<00:22, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1334/1563 [02:12<00:22, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1336/1563 [02:12<00:21, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1338/1563 [02:12<00:21, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1340/1563 [02:12<00:21, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1342/1563 [02:12<00:21, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1344/1563 [02:13<00:21, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1346/1563 [02:13<00:21, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1348/1563 [02:13<00:20, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▋ | 1350/1563 [02:13<00:20, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1352/1563 [02:13<00:20, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1354/1563 [02:14<00:20, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1356/1563 [02:14<00:20, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1358/1563 [02:14<00:20, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1360/1563 [02:14<00:21,  9.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1361/1563 [02:14<00:21,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1362/1563 [02:15<00:22,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1363/1563 [02:15<00:23,  8.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1364/1563 [02:15<00:23,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1365/1563 [02:15<00:23,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1366/1563 [02:15<00:22,  8.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1367/1563 [02:15<00:22,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1368/1563 [02:15<00:22,  8.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1369/1563 [02:15<00:22,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1370/1563 [02:15<00:22,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1371/1563 [02:16<00:23,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1372/1563 [02:16<00:23,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1373/1563 [02:16<00:22,  8.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1374/1563 [02:16<00:23,  8.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1375/1563 [02:16<00:23,  8.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1376/1563 [02:16<00:23,  8.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1377/1563 [02:16<00:21,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1379/1563 [02:16<00:19,  9.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1380/1563 [02:17<00:19,  9.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1381/1563 [02:17<00:19,  9.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1382/1563 [02:17<00:19,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1383/1563 [02:17<00:19,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1384/1563 [02:17<00:18,  9.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1385/1563 [02:17<00:18,  9.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1386/1563 [02:17<00:18,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1387/1563 [02:17<00:18,  9.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1388/1563 [02:17<00:18,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1389/1563 [02:18<00:18,  9.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1390/1563 [02:18<00:18,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1391/1563 [02:18<00:19,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1392/1563 [02:18<00:18,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1393/1563 [02:18<00:18,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1394/1563 [02:18<00:17,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1395/1563 [02:18<00:17,  9.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1396/1563 [02:18<00:17,  9.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1397/1563 [02:18<00:17,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1398/1563 [02:18<00:17,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1399/1563 [02:19<00:16,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1400/1563 [02:19<00:17,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1401/1563 [02:19<00:17,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1402/1563 [02:19<00:16,  9.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1404/1563 [02:19<00:15,  9.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1405/1563 [02:19<00:15,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1406/1563 [02:19<00:15,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1407/1563 [02:19<00:15,  9.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1408/1563 [02:20<00:15,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1409/1563 [02:20<00:15,  9.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1410/1563 [02:20<00:16,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1411/1563 [02:20<00:15,  9.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1412/1563 [02:20<00:15,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1413/1563 [02:20<00:15,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1414/1563 [02:20<00:15,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1415/1563 [02:20<00:15,  9.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1416/1563 [02:20<00:15,  9.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1417/1563 [02:20<00:14,  9.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1418/1563 [02:21<00:14,  9.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1420/1563 [02:21<00:14,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1421/1563 [02:21<00:14,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1423/1563 [02:21<00:13, 10.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1425/1563 [02:21<00:13, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1427/1563 [02:21<00:13, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1429/1563 [02:22<00:12, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1431/1563 [02:22<00:12, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1433/1563 [02:22<00:12, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1435/1563 [02:22<00:12, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1437/1563 [02:22<00:12, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1439/1563 [02:23<00:11, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1441/1563 [02:23<00:12, 10.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1443/1563 [02:23<00:11, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1445/1563 [02:23<00:11, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1447/1563 [02:23<00:11, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1449/1563 [02:24<00:11, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1451/1563 [02:24<00:11, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1453/1563 [02:24<00:10, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1455/1563 [02:24<00:10, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1457/1563 [02:24<00:10, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1459/1563 [02:25<00:10, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1461/1563 [02:25<00:09, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1463/1563 [02:25<00:09, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1465/1563 [02:25<00:09, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1467/1563 [02:25<00:09, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1469/1563 [02:26<00:09, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1471/1563 [02:26<00:08, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1473/1563 [02:26<00:08, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1475/1563 [02:26<00:08, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1477/1563 [02:26<00:08, 10.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1479/1563 [02:27<00:08,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1480/1563 [02:27<00:08,  9.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1481/1563 [02:27<00:09,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1482/1563 [02:27<00:09,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1483/1563 [02:27<00:09,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1484/1563 [02:27<00:09,  8.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1485/1563 [02:27<00:09,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1486/1563 [02:27<00:08,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1487/1563 [02:27<00:08,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1488/1563 [02:28<00:08,  8.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1489/1563 [02:28<00:08,  8.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1490/1563 [02:28<00:08,  8.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1491/1563 [02:28<00:08,  8.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1492/1563 [02:28<00:08,  8.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1493/1563 [02:28<00:08,  8.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1494/1563 [02:28<00:08,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1495/1563 [02:28<00:08,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1496/1563 [02:29<00:08,  8.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1497/1563 [02:29<00:07,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1498/1563 [02:29<00:07,  8.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1499/1563 [02:29<00:07,  8.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1500/1563 [02:29<00:07,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1501/1563 [02:29<00:06,  9.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1502/1563 [02:29<00:06,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1503/1563 [02:29<00:06,  9.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1504/1563 [02:29<00:06,  9.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1505/1563 [02:30<00:06,  9.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1506/1563 [02:30<00:05,  9.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1507/1563 [02:30<00:05,  9.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1508/1563 [02:30<00:05,  9.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1509/1563 [02:30<00:05,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1510/1563 [02:30<00:05,  9.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1511/1563 [02:30<00:05,  9.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1512/1563 [02:30<00:05,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1513/1563 [02:30<00:05,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1514/1563 [02:30<00:05,  9.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1515/1563 [02:31<00:04,  9.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1516/1563 [02:31<00:04,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1517/1563 [02:31<00:04,  9.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1518/1563 [02:31<00:04,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1519/1563 [02:31<00:04,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1520/1563 [02:31<00:04,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1521/1563 [02:31<00:04,  9.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1522/1563 [02:31<00:04,  9.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1523/1563 [02:31<00:04,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1525/1563 [02:32<00:03, 10.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1526/1563 [02:32<00:03,  9.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1527/1563 [02:32<00:03,  9.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1529/1563 [02:32<00:03, 10.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1531/1563 [02:32<00:03, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1533/1563 [02:32<00:02, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1535/1563 [02:33<00:02, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1537/1563 [02:33<00:02, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1539/1563 [02:33<00:02, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1541/1563 [02:33<00:02, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1543/1563 [02:33<00:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1545/1563 [02:34<00:01, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1547/1563 [02:34<00:01, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1549/1563 [02:34<00:01, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1551/1563 [02:34<00:01, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1553/1563 [02:34<00:00, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1555/1563 [02:34<00:00, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1557/1563 [02:35<00:00, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1559/1563 [02:35<00:00, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1561/1563 [02:35<00:00, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|██████████| 1563/1563 [02:35<00:00, 10.95batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 2/2 [05:09<00:00, 155.00s/epoch]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 2/2 tamamlandı. Kayıp: 1.5690, Doğruluk: 42.15%\n",
-            "RMSNorm Eğitim Süresi: 310.00 saniye, Son Doğruluk: 42.15%\n",
-            "\n",
-            "DyT Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/2 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 1/1563 [00:00<02:59,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 3/1563 [00:00<02:39,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 4/1563 [00:00<02:39,  9.78batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 6/1563 [00:00<02:27, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 8/1563 [00:00<02:23, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 10/1563 [00:00<02:22, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 12/1563 [00:01<02:21, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 14/1563 [00:01<02:21, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 16/1563 [00:01<02:20, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 18/1563 [00:01<02:20, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 20/1563 [00:01<02:18, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 22/1563 [00:02<02:18, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 24/1563 [00:02<02:17, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 26/1563 [00:02<02:18, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 28/1563 [00:02<02:17, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 30/1563 [00:02<02:16, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 32/1563 [00:02<02:15, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 34/1563 [00:03<02:16, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 36/1563 [00:03<02:16, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 38/1563 [00:03<02:25, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 40/1563 [00:03<02:32, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 42/1563 [00:03<02:35,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 43/1563 [00:04<02:42,  9.37batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 44/1563 [00:04<02:45,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 45/1563 [00:04<02:49,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 46/1563 [00:04<02:55,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 47/1563 [00:04<02:52,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 48/1563 [00:04<02:52,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 49/1563 [00:04<02:49,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 50/1563 [00:04<02:49,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 51/1563 [00:04<02:52,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 52/1563 [00:05<02:56,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 53/1563 [00:05<03:00,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 54/1563 [00:05<03:01,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 55/1563 [00:05<02:58,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 57/1563 [00:05<02:39,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 59/1563 [00:05<02:28, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 61/1563 [00:06<02:22, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 63/1563 [00:06<02:19, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 65/1563 [00:06<02:18, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 67/1563 [00:06<02:16, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 69/1563 [00:06<02:14, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 71/1563 [00:06<02:13, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 73/1563 [00:07<02:13, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 75/1563 [00:07<02:15, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 77/1563 [00:07<02:14, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 79/1563 [00:07<02:13, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 81/1563 [00:07<02:12, 11.20batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 83/1563 [00:07<02:11, 11.22batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 85/1563 [00:08<02:11, 11.24batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 87/1563 [00:08<02:12, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 89/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 91/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 93/1563 [00:08<02:12, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 95/1563 [00:09<02:11, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 97/1563 [00:09<02:12, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 99/1563 [00:09<02:13, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 101/1563 [00:09<02:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 103/1563 [00:09<02:12, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 105/1563 [00:09<02:10, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 107/1563 [00:10<02:10, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 109/1563 [00:10<02:11, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 111/1563 [00:10<02:12, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 113/1563 [00:10<02:10, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 115/1563 [00:10<02:09, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 117/1563 [00:11<02:09, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 119/1563 [00:11<02:10, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 121/1563 [00:11<02:10, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 123/1563 [00:11<02:09, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 125/1563 [00:11<02:09, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 127/1563 [00:11<02:09, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 129/1563 [00:12<02:08, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 131/1563 [00:12<02:10, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 133/1563 [00:12<02:09, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 135/1563 [00:12<02:12, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 137/1563 [00:12<02:10, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 139/1563 [00:13<02:10, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 141/1563 [00:13<02:08, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 143/1563 [00:13<02:10, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 145/1563 [00:13<02:09, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 147/1563 [00:13<02:08, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 149/1563 [00:13<02:07, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 151/1563 [00:14<02:06, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 153/1563 [00:14<02:06, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 155/1563 [00:14<02:07, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 157/1563 [00:14<02:06, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 159/1563 [00:14<02:08, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 161/1563 [00:15<02:06, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 163/1563 [00:15<02:05, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 165/1563 [00:15<02:09, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 167/1563 [00:15<02:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 169/1563 [00:15<02:18, 10.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 171/1563 [00:16<02:20,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 172/1563 [00:16<02:21,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 173/1563 [00:16<02:22,  9.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 174/1563 [00:16<02:24,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 175/1563 [00:16<02:29,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 176/1563 [00:16<02:31,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 177/1563 [00:16<02:33,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 178/1563 [00:16<02:35,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 179/1563 [00:16<02:37,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 180/1563 [00:17<02:37,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 181/1563 [00:17<02:38,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 182/1563 [00:17<02:39,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 183/1563 [00:17<02:44,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 184/1563 [00:17<02:41,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 185/1563 [00:17<02:42,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 187/1563 [00:17<02:24,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 189/1563 [00:18<02:16, 10.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 191/1563 [00:18<02:12, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 193/1563 [00:18<02:09, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 195/1563 [00:18<02:07, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 197/1563 [00:18<02:05, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 199/1563 [00:18<02:03, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 201/1563 [00:19<02:02, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 203/1563 [00:19<02:01, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 205/1563 [00:19<02:01, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 207/1563 [00:19<02:03, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 209/1563 [00:19<02:02, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 211/1563 [00:19<02:01, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 213/1563 [00:20<02:01, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 215/1563 [00:20<02:00, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 217/1563 [00:20<02:02, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 219/1563 [00:20<02:01, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 221/1563 [00:20<01:59, 11.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 223/1563 [00:21<01:58, 11.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 225/1563 [00:21<02:00, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 227/1563 [00:21<02:01, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 229/1563 [00:21<02:00, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 231/1563 [00:21<01:59, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 233/1563 [00:21<01:58, 11.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 235/1563 [00:22<01:58, 11.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 237/1563 [00:22<01:57, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 239/1563 [00:22<01:58, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 241/1563 [00:22<01:59, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 243/1563 [00:22<01:58, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 245/1563 [00:23<01:58, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 247/1563 [00:23<01:57, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 249/1563 [00:23<01:58, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 251/1563 [00:23<01:57, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 253/1563 [00:23<01:57, 11.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 255/1563 [00:23<01:56, 11.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 257/1563 [00:24<01:55, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 259/1563 [00:24<01:56, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 261/1563 [00:24<01:58, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 263/1563 [00:24<01:58, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 265/1563 [00:24<01:57, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 267/1563 [00:25<01:57, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 269/1563 [00:25<01:56, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 271/1563 [00:25<01:57, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 273/1563 [00:25<01:58, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 275/1563 [00:25<01:58, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 277/1563 [00:25<01:57, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 279/1563 [00:26<01:55, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 281/1563 [00:26<01:55, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 283/1563 [00:26<01:55, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 285/1563 [00:26<01:54, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 287/1563 [00:26<01:54, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 289/1563 [00:27<01:54, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 291/1563 [00:27<01:54, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 293/1563 [00:27<01:53, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 295/1563 [00:27<01:53, 11.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 297/1563 [00:27<01:57, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 299/1563 [00:27<02:03, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 301/1563 [00:28<02:08,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 302/1563 [00:28<02:10,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 303/1563 [00:28<02:17,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 304/1563 [00:28<02:17,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 305/1563 [00:28<02:18,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 306/1563 [00:28<02:21,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 307/1563 [00:28<02:20,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 308/1563 [00:28<02:19,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 309/1563 [00:29<02:18,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 310/1563 [00:29<02:21,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 311/1563 [00:29<02:23,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 312/1563 [00:29<02:23,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 313/1563 [00:29<02:23,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 314/1563 [00:29<02:25,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 315/1563 [00:29<02:27,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 316/1563 [00:29<02:25,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 318/1563 [00:30<02:11,  9.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 320/1563 [00:30<02:03, 10.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 322/1563 [00:30<01:59, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 324/1563 [00:30<01:56, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 326/1563 [00:30<01:55, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 328/1563 [00:31<01:54, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 330/1563 [00:31<01:52, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 332/1563 [00:31<01:53, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 334/1563 [00:31<01:52, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 336/1563 [00:31<01:51, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 338/1563 [00:31<01:50, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 340/1563 [00:32<01:49, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 342/1563 [00:32<01:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 344/1563 [00:32<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 346/1563 [00:32<01:50, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 348/1563 [00:32<01:49, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 350/1563 [00:33<01:50, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 352/1563 [00:33<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 354/1563 [00:33<01:49, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 356/1563 [00:33<01:49, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 358/1563 [00:33<01:47, 11.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 360/1563 [00:33<01:48, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 362/1563 [00:34<01:47, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 364/1563 [00:34<01:47, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 366/1563 [00:34<01:47, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 368/1563 [00:34<01:47, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 370/1563 [00:34<01:47, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 372/1563 [00:34<01:47, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 374/1563 [00:35<01:46, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 376/1563 [00:35<01:46, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 378/1563 [00:35<01:46, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 380/1563 [00:35<01:46, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 382/1563 [00:35<01:45, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 384/1563 [00:36<01:46, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 386/1563 [00:36<01:46, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 388/1563 [00:36<01:47, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 390/1563 [00:36<01:47, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 392/1563 [00:36<01:46, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 394/1563 [00:36<01:44, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 396/1563 [00:37<01:44, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 398/1563 [00:37<01:44, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 400/1563 [00:37<01:45, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 402/1563 [00:37<01:44, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 404/1563 [00:37<01:43, 11.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 406/1563 [00:38<01:44, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 408/1563 [00:38<01:44, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 410/1563 [00:38<01:44, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 412/1563 [00:38<01:44, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 414/1563 [00:38<01:43, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 416/1563 [00:38<01:42, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 418/1563 [00:39<01:43, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 420/1563 [00:39<01:42, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 422/1563 [00:39<01:44, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 424/1563 [00:39<01:43, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 426/1563 [00:39<01:43, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 428/1563 [00:40<01:50, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 430/1563 [00:40<01:54,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 431/1563 [00:40<02:01,  9.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 432/1563 [00:40<02:04,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 433/1563 [00:40<02:07,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 434/1563 [00:40<02:07,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 435/1563 [00:40<02:06,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 436/1563 [00:41<02:04,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 437/1563 [00:41<02:05,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 438/1563 [00:41<02:04,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 439/1563 [00:41<02:03,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 440/1563 [00:41<02:07,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 441/1563 [00:41<02:06,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 442/1563 [00:41<02:06,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 443/1563 [00:41<02:08,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 444/1563 [00:41<02:09,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 445/1563 [00:42<02:08,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 446/1563 [00:42<02:12,  8.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 447/1563 [00:42<02:07,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 449/1563 [00:42<01:55,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 450/1563 [00:42<01:54,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 452/1563 [00:42<01:52,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 454/1563 [00:42<01:47, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 456/1563 [00:43<01:44, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 458/1563 [00:43<01:44, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 460/1563 [00:43<01:45, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 462/1563 [00:43<01:43, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 464/1563 [00:43<01:41, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 466/1563 [00:44<01:40, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 468/1563 [00:44<01:40, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 470/1563 [00:44<01:40, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 472/1563 [00:44<01:40, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 474/1563 [00:44<01:39, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 476/1563 [00:44<01:38, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 478/1563 [00:45<01:38, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 480/1563 [00:45<01:39, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 482/1563 [00:45<01:40, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 484/1563 [00:45<01:39, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 486/1563 [00:45<01:38, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 488/1563 [00:46<01:37, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 490/1563 [00:46<01:37, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 492/1563 [00:46<01:39, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 494/1563 [00:46<01:38, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 496/1563 [00:46<01:37, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 498/1563 [00:46<01:36, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 500/1563 [00:47<01:36, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 502/1563 [00:47<01:37, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 504/1563 [00:47<01:37, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 506/1563 [00:47<01:37, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 508/1563 [00:47<01:36, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 510/1563 [00:48<01:35, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 512/1563 [00:48<01:36, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 514/1563 [00:48<01:38, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 516/1563 [00:48<01:37, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 518/1563 [00:48<01:36, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 520/1563 [00:49<01:35, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 522/1563 [00:49<01:35, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 524/1563 [00:49<01:37, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 526/1563 [00:49<01:36, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 528/1563 [00:49<01:35, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 530/1563 [00:49<01:34, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 532/1563 [00:50<01:33, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 534/1563 [00:50<01:34, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 536/1563 [00:50<01:34, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 538/1563 [00:50<01:33, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 540/1563 [00:50<01:32, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 542/1563 [00:51<01:32, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 544/1563 [00:51<01:32, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 546/1563 [00:51<01:32, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 548/1563 [00:51<01:32, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 550/1563 [00:51<01:32, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 552/1563 [00:51<01:32, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 554/1563 [00:52<01:32, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 556/1563 [00:52<01:35, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 558/1563 [00:52<01:41,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 559/1563 [00:52<01:43,  9.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 560/1563 [00:52<01:44,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 561/1563 [00:52<01:45,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 562/1563 [00:52<01:45,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 563/1563 [00:53<01:45,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 564/1563 [00:53<01:46,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 565/1563 [00:53<01:50,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 566/1563 [00:53<01:51,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 567/1563 [00:53<01:53,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 568/1563 [00:53<01:52,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 569/1563 [00:53<01:53,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 570/1563 [00:53<01:52,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 571/1563 [00:54<01:52,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 572/1563 [00:54<01:52,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 573/1563 [00:54<01:55,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 574/1563 [00:54<01:55,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 575/1563 [00:54<01:58,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 576/1563 [00:54<01:59,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 578/1563 [00:54<01:45,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 580/1563 [00:54<01:39,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 582/1563 [00:55<01:34, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 584/1563 [00:55<01:33, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 586/1563 [00:55<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 588/1563 [00:55<01:31, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 590/1563 [00:55<01:30, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 592/1563 [00:56<01:29, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 594/1563 [00:56<01:28, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 596/1563 [00:56<01:29, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 598/1563 [00:56<01:28, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 600/1563 [00:56<01:27, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 602/1563 [00:56<01:27, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 604/1563 [00:57<01:27, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 606/1563 [00:57<01:28, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 608/1563 [00:57<01:28, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 610/1563 [00:57<01:27, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 612/1563 [00:57<01:26, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 614/1563 [00:58<01:25, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 616/1563 [00:58<01:26, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 618/1563 [00:58<01:26, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 620/1563 [00:58<01:26, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 622/1563 [00:58<01:26, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 624/1563 [00:59<01:26, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 626/1563 [00:59<01:25, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 628/1563 [00:59<01:25, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 630/1563 [00:59<01:25, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 632/1563 [00:59<01:26, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 634/1563 [00:59<01:25, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 636/1563 [01:00<01:25, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 638/1563 [01:00<01:25, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 640/1563 [01:00<01:24, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 642/1563 [01:00<01:24, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 644/1563 [01:00<01:24, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 646/1563 [01:01<01:24, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 648/1563 [01:01<01:23, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 650/1563 [01:01<01:24, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 652/1563 [01:01<01:23, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 654/1563 [01:01<01:22, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 656/1563 [01:01<01:22, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 658/1563 [01:02<01:22, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 660/1563 [01:02<01:22, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 662/1563 [01:02<01:22, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 664/1563 [01:02<01:22, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 666/1563 [01:02<01:22, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 668/1563 [01:03<01:21, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 670/1563 [01:03<01:22, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 672/1563 [01:03<01:22, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 674/1563 [01:03<01:21, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 676/1563 [01:03<01:20, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 678/1563 [01:03<01:21, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 680/1563 [01:04<01:20, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 682/1563 [01:04<01:21, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 684/1563 [01:04<01:21, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 686/1563 [01:04<01:23, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 688/1563 [01:04<01:27, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 690/1563 [01:05<01:28,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 691/1563 [01:05<01:29,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 692/1563 [01:05<01:30,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 693/1563 [01:05<01:30,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 694/1563 [01:05<01:31,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 695/1563 [01:05<01:32,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 696/1563 [01:05<01:31,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 697/1563 [01:05<01:31,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 698/1563 [01:06<01:34,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 699/1563 [01:06<01:35,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 700/1563 [01:06<01:35,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 701/1563 [01:06<01:37,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 702/1563 [01:06<01:37,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 703/1563 [01:06<01:38,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 704/1563 [01:06<01:40,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 705/1563 [01:06<01:38,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 706/1563 [01:06<01:38,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 707/1563 [01:07<01:35,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 709/1563 [01:07<01:26,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 711/1563 [01:07<01:23, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 713/1563 [01:07<01:21, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 715/1563 [01:07<01:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 717/1563 [01:07<01:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 719/1563 [01:08<01:19, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 721/1563 [01:08<01:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 723/1563 [01:08<01:20, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 725/1563 [01:08<01:18, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 727/1563 [01:08<01:17, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 729/1563 [01:09<01:18, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 731/1563 [01:09<01:17, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 733/1563 [01:09<01:16, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 735/1563 [01:09<01:15, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 737/1563 [01:09<01:15, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 739/1563 [01:10<01:15, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 741/1563 [01:10<01:16, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 743/1563 [01:10<01:16, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 745/1563 [01:10<01:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 747/1563 [01:10<01:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 749/1563 [01:10<01:14, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 751/1563 [01:11<01:16, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 753/1563 [01:11<01:15, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 755/1563 [01:11<01:15, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 757/1563 [01:11<01:14, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▊     | 759/1563 [01:11<01:13, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▊     | 761/1563 [01:12<01:13, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 763/1563 [01:12<01:14, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 765/1563 [01:12<01:14, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 767/1563 [01:12<01:14, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 769/1563 [01:12<01:13, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 771/1563 [01:12<01:15, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 773/1563 [01:13<01:15, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 775/1563 [01:13<01:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 777/1563 [01:13<01:12, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 779/1563 [01:13<01:12, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 781/1563 [01:13<01:11, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 783/1563 [01:14<01:11, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 785/1563 [01:14<01:11, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 787/1563 [01:14<01:11, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 789/1563 [01:14<01:11, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 791/1563 [01:14<01:10, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 793/1563 [01:15<01:10, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 795/1563 [01:15<01:10, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 797/1563 [01:15<01:10, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 799/1563 [01:15<01:09, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 801/1563 [01:15<01:09, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████▏    | 803/1563 [01:15<01:08, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 805/1563 [01:16<01:08, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 807/1563 [01:16<01:09, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 809/1563 [01:16<01:09, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 811/1563 [01:16<01:09, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 813/1563 [01:16<01:08, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 815/1563 [01:17<01:10, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 817/1563 [01:17<01:13, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 819/1563 [01:17<01:15,  9.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 820/1563 [01:17<01:17,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 821/1563 [01:17<01:18,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 822/1563 [01:17<01:19,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 823/1563 [01:17<01:21,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 824/1563 [01:18<01:23,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 825/1563 [01:18<01:25,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 826/1563 [01:18<01:27,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 827/1563 [01:18<01:26,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 828/1563 [01:18<01:26,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 829/1563 [01:18<01:24,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 830/1563 [01:18<01:24,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 831/1563 [01:18<01:23,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 832/1563 [01:18<01:24,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 833/1563 [01:19<01:24,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 834/1563 [01:19<01:26,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 836/1563 [01:19<01:19,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 838/1563 [01:19<01:13,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 840/1563 [01:19<01:10, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 842/1563 [01:19<01:08, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 844/1563 [01:20<01:06, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 846/1563 [01:20<01:05, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 848/1563 [01:20<01:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 850/1563 [01:20<01:06, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 852/1563 [01:20<01:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 854/1563 [01:21<01:05, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 856/1563 [01:21<01:04, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 858/1563 [01:21<01:04, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 860/1563 [01:21<01:04, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 862/1563 [01:21<01:03, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 864/1563 [01:21<01:03, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 866/1563 [01:22<01:02, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 868/1563 [01:22<01:03, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 870/1563 [01:22<01:03, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 872/1563 [01:22<01:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 874/1563 [01:22<01:02, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 876/1563 [01:23<01:02, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 878/1563 [01:23<01:02, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 880/1563 [01:23<01:02, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 882/1563 [01:23<01:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 884/1563 [01:23<01:03, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 886/1563 [01:24<01:02, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 888/1563 [01:24<01:02, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 890/1563 [01:24<01:02, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 892/1563 [01:24<01:02, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 894/1563 [01:24<01:02, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 896/1563 [01:24<01:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 898/1563 [01:25<01:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 900/1563 [01:25<01:02, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 902/1563 [01:25<01:02, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 904/1563 [01:25<01:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 906/1563 [01:25<01:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 908/1563 [01:26<01:01, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 910/1563 [01:26<01:01, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 912/1563 [01:26<01:01, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 914/1563 [01:26<01:02, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 916/1563 [01:26<01:01, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 918/1563 [01:27<01:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 920/1563 [01:27<01:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 922/1563 [01:27<01:00, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 924/1563 [01:27<00:59, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 926/1563 [01:27<01:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 928/1563 [01:27<00:59, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 930/1563 [01:28<00:59, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 932/1563 [01:28<00:59, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 934/1563 [01:28<00:59, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 936/1563 [01:28<00:59, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 938/1563 [01:28<00:59, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 940/1563 [01:29<00:58, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 942/1563 [01:29<00:59, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 944/1563 [01:29<01:03,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 945/1563 [01:29<01:06,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 946/1563 [01:29<01:07,  9.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 947/1563 [01:29<01:09,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 948/1563 [01:30<01:10,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 949/1563 [01:30<01:10,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 950/1563 [01:30<01:10,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 951/1563 [01:30<01:11,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 952/1563 [01:30<01:10,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 953/1563 [01:30<01:10,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 954/1563 [01:30<01:10,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 955/1563 [01:30<01:11,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 956/1563 [01:30<01:11,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 957/1563 [01:31<01:11,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 958/1563 [01:31<01:11,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 959/1563 [01:31<01:13,  8.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 960/1563 [01:31<01:13,  8.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 961/1563 [01:31<01:12,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 963/1563 [01:31<01:04,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 964/1563 [01:31<01:04,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 966/1563 [01:32<01:00,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 968/1563 [01:32<00:58, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 970/1563 [01:32<00:57, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 972/1563 [01:32<00:56, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 974/1563 [01:32<00:55, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 976/1563 [01:32<00:55, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 978/1563 [01:33<00:55, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 980/1563 [01:33<00:54, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 982/1563 [01:33<00:54, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 984/1563 [01:33<00:53, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 986/1563 [01:33<00:53, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 988/1563 [01:34<00:53, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 990/1563 [01:34<00:52, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 992/1563 [01:34<00:52, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 994/1563 [01:34<00:52, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 996/1563 [01:34<00:51, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 998/1563 [01:34<00:51, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1000/1563 [01:35<00:51, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1002/1563 [01:35<00:51, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1004/1563 [01:35<00:51, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1006/1563 [01:35<00:50, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1008/1563 [01:35<00:50, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1010/1563 [01:36<00:50, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1012/1563 [01:36<00:50, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1014/1563 [01:36<00:50, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1016/1563 [01:36<00:50, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1018/1563 [01:36<00:49, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1020/1563 [01:37<00:50, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1022/1563 [01:37<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1024/1563 [01:37<00:50, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1026/1563 [01:37<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1028/1563 [01:37<00:49, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1030/1563 [01:37<00:49, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1032/1563 [01:38<00:49, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1034/1563 [01:38<00:49, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1036/1563 [01:38<00:49, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1038/1563 [01:38<00:49, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1040/1563 [01:38<00:49, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1042/1563 [01:39<00:49, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1044/1563 [01:39<00:48, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1046/1563 [01:39<00:48, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1048/1563 [01:39<00:47, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1050/1563 [01:39<00:47, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1052/1563 [01:40<00:47, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1054/1563 [01:40<00:47, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1056/1563 [01:40<00:46, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1058/1563 [01:40<00:46, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1060/1563 [01:40<00:46, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1062/1563 [01:40<00:46, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1064/1563 [01:41<00:46, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1066/1563 [01:41<00:46, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1068/1563 [01:41<00:46, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1070/1563 [01:41<00:47, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1072/1563 [01:41<00:49,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1074/1563 [01:42<00:51,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1075/1563 [01:42<00:52,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1076/1563 [01:42<00:54,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1077/1563 [01:42<00:55,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1078/1563 [01:42<00:54,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1079/1563 [01:42<00:55,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1080/1563 [01:42<00:55,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1081/1563 [01:43<00:57,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1082/1563 [01:43<00:56,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1083/1563 [01:43<00:57,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1084/1563 [01:43<00:56,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1085/1563 [01:43<00:58,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1086/1563 [01:43<00:57,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1087/1563 [01:43<00:56,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1088/1563 [01:43<00:56,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1089/1563 [01:43<00:56,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1091/1563 [01:44<00:50,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1093/1563 [01:44<00:47, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1094/1563 [01:44<00:47,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1096/1563 [01:44<00:44, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1098/1563 [01:44<00:43, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1100/1563 [01:44<00:43, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1102/1563 [01:45<00:42, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1104/1563 [01:45<00:41, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1106/1563 [01:45<00:42, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1108/1563 [01:45<00:41, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1110/1563 [01:45<00:41, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1112/1563 [01:46<00:41, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1114/1563 [01:46<00:40, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1116/1563 [01:46<00:41, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1118/1563 [01:46<00:41, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1120/1563 [01:46<00:40, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1122/1563 [01:46<00:40, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1124/1563 [01:47<00:40, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1126/1563 [01:47<00:40, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1128/1563 [01:47<00:40, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1130/1563 [01:47<00:40, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1132/1563 [01:47<00:39, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1134/1563 [01:48<00:39, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1136/1563 [01:48<00:39, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1138/1563 [01:48<00:39, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1140/1563 [01:48<00:39, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1142/1563 [01:48<00:39, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1144/1563 [01:49<00:39, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1146/1563 [01:49<00:38, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1148/1563 [01:49<00:39, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1150/1563 [01:49<00:38, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1152/1563 [01:49<00:37, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1154/1563 [01:49<00:37, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1156/1563 [01:50<00:37, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1158/1563 [01:50<00:37, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1160/1563 [01:50<00:37, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1162/1563 [01:50<00:37, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1164/1563 [01:50<00:37, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1166/1563 [01:51<00:36, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1168/1563 [01:51<00:36, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1170/1563 [01:51<00:36, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1172/1563 [01:51<00:36, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1174/1563 [01:51<00:35, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1176/1563 [01:52<00:35, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1178/1563 [01:52<00:35, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1180/1563 [01:52<00:35, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1182/1563 [01:52<00:35, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1184/1563 [01:52<00:34, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1186/1563 [01:52<00:34, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1188/1563 [01:53<00:34, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1190/1563 [01:53<00:34, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1192/1563 [01:53<00:34, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1194/1563 [01:53<00:34, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1196/1563 [01:53<00:33, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1198/1563 [01:54<00:33, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1200/1563 [01:54<00:35, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1202/1563 [01:54<00:36,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1204/1563 [01:54<00:37,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1205/1563 [01:54<00:37,  9.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1206/1563 [01:54<00:38,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1207/1563 [01:55<00:38,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1208/1563 [01:55<00:38,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1209/1563 [01:55<00:37,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1210/1563 [01:55<00:37,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1211/1563 [01:55<00:38,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1212/1563 [01:55<00:38,  9.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1213/1563 [01:55<00:38,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1214/1563 [01:55<00:38,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1215/1563 [01:55<00:38,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1216/1563 [01:56<00:39,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1217/1563 [01:56<00:40,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1218/1563 [01:56<00:41,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1219/1563 [01:56<00:41,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1221/1563 [01:56<00:36,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1223/1563 [01:56<00:34,  9.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1225/1563 [01:56<00:33, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1227/1563 [01:57<00:32, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1229/1563 [01:57<00:31, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1231/1563 [01:57<00:31, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1233/1563 [01:57<00:30, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1235/1563 [01:57<00:30, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1237/1563 [01:58<00:29, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1239/1563 [01:58<00:29, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1241/1563 [01:58<00:29, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1243/1563 [01:58<00:29, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1245/1563 [01:58<00:29, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1247/1563 [01:58<00:29, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1249/1563 [01:59<00:29, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1251/1563 [01:59<00:28, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1253/1563 [01:59<00:28, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1255/1563 [01:59<00:28, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1257/1563 [01:59<00:28, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1259/1563 [02:00<00:28, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1261/1563 [02:00<00:27, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1263/1563 [02:00<00:27, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1265/1563 [02:00<00:27, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1267/1563 [02:00<00:27, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1269/1563 [02:01<00:27, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1271/1563 [02:01<00:27, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1273/1563 [02:01<00:26, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1275/1563 [02:01<00:26, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1277/1563 [02:01<00:26, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1279/1563 [02:01<00:26, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1281/1563 [02:02<00:26, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1283/1563 [02:02<00:25, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1285/1563 [02:02<00:25, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1287/1563 [02:02<00:25, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1289/1563 [02:02<00:25, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1291/1563 [02:03<00:25, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1293/1563 [02:03<00:24, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1295/1563 [02:03<00:24, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1297/1563 [02:03<00:24, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1299/1563 [02:03<00:25, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1301/1563 [02:03<00:24, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1303/1563 [02:04<00:24, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1305/1563 [02:04<00:24, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1307/1563 [02:04<00:23, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1309/1563 [02:04<00:23, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1311/1563 [02:04<00:23, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1313/1563 [02:05<00:23, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1315/1563 [02:05<00:23, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1317/1563 [02:05<00:23, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1319/1563 [02:05<00:22, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1321/1563 [02:05<00:22, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1323/1563 [02:06<00:22, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1325/1563 [02:06<00:22, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1327/1563 [02:06<00:22, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1329/1563 [02:06<00:23, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1331/1563 [02:06<00:23,  9.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1332/1563 [02:06<00:24,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1333/1563 [02:07<00:24,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1334/1563 [02:07<00:24,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1335/1563 [02:07<00:24,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1336/1563 [02:07<00:25,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1337/1563 [02:07<00:26,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1338/1563 [02:07<00:25,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1339/1563 [02:07<00:26,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1340/1563 [02:07<00:25,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1341/1563 [02:08<00:25,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1342/1563 [02:08<00:25,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1343/1563 [02:08<00:26,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1344/1563 [02:08<00:26,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1345/1563 [02:08<00:26,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1346/1563 [02:08<00:25,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1347/1563 [02:08<00:25,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1348/1563 [02:08<00:24,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▋ | 1350/1563 [02:09<00:22,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1352/1563 [02:09<00:20, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1354/1563 [02:09<00:20, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1356/1563 [02:09<00:19, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1358/1563 [02:09<00:19, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1360/1563 [02:09<00:18, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1362/1563 [02:10<00:18, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1364/1563 [02:10<00:18, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1366/1563 [02:10<00:18, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1368/1563 [02:10<00:18, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1370/1563 [02:10<00:17, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1372/1563 [02:11<00:17, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1374/1563 [02:11<00:17, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1376/1563 [02:11<00:17, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1378/1563 [02:11<00:17, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1380/1563 [02:11<00:16, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1382/1563 [02:11<00:16, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1384/1563 [02:12<00:16, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1386/1563 [02:12<00:16, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1388/1563 [02:12<00:16, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1390/1563 [02:12<00:15, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1392/1563 [02:12<00:15, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1394/1563 [02:13<00:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1396/1563 [02:13<00:15, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1398/1563 [02:13<00:15, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1400/1563 [02:13<00:15, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1402/1563 [02:13<00:15, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1404/1563 [02:14<00:14, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1406/1563 [02:14<00:14, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1408/1563 [02:14<00:14, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1410/1563 [02:14<00:14, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1412/1563 [02:14<00:13, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1414/1563 [02:14<00:13, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1416/1563 [02:15<00:13, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1418/1563 [02:15<00:13, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1420/1563 [02:15<00:13, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1422/1563 [02:15<00:13, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1424/1563 [02:15<00:12, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1426/1563 [02:16<00:12, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1428/1563 [02:16<00:12, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1430/1563 [02:16<00:12, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1432/1563 [02:16<00:12, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1434/1563 [02:16<00:12, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1436/1563 [02:17<00:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1438/1563 [02:17<00:11, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1440/1563 [02:17<00:11, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1442/1563 [02:17<00:11, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1444/1563 [02:17<00:10, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1446/1563 [02:17<00:10, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1448/1563 [02:18<00:10, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1450/1563 [02:18<00:10, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1452/1563 [02:18<00:10, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1454/1563 [02:18<00:10, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1456/1563 [02:18<00:10, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1458/1563 [02:19<00:10, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1460/1563 [02:19<00:10,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1461/1563 [02:19<00:10,  9.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1462/1563 [02:19<00:11,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1463/1563 [02:19<00:10,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1464/1563 [02:19<00:10,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1465/1563 [02:19<00:10,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1466/1563 [02:20<00:10,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1467/1563 [02:20<00:10,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1468/1563 [02:20<00:10,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1469/1563 [02:20<00:10,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1470/1563 [02:20<00:10,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1471/1563 [02:20<00:10,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1472/1563 [02:20<00:10,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1473/1563 [02:20<00:10,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1474/1563 [02:20<00:10,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1475/1563 [02:21<00:10,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1476/1563 [02:21<00:10,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1477/1563 [02:21<00:09,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1478/1563 [02:21<00:09,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1480/1563 [02:21<00:08,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1482/1563 [02:21<00:08, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1484/1563 [02:21<00:07, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1486/1563 [02:22<00:07, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1488/1563 [02:22<00:07, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1490/1563 [02:22<00:06, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1492/1563 [02:22<00:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1494/1563 [02:22<00:06, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1496/1563 [02:23<00:06, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1498/1563 [02:23<00:05, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1500/1563 [02:23<00:05, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1502/1563 [02:23<00:05, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1504/1563 [02:23<00:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1506/1563 [02:23<00:05, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1508/1563 [02:24<00:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1510/1563 [02:24<00:05, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1512/1563 [02:24<00:04, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1514/1563 [02:24<00:04, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1516/1563 [02:24<00:04, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1518/1563 [02:25<00:04, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1520/1563 [02:25<00:04, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1522/1563 [02:25<00:03, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1524/1563 [02:25<00:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1526/1563 [02:25<00:03, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1528/1563 [02:26<00:03, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1530/1563 [02:26<00:03, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1532/1563 [02:26<00:02, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1534/1563 [02:26<00:02, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1536/1563 [02:26<00:02, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1538/1563 [02:27<00:02, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1540/1563 [02:27<00:02, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1542/1563 [02:27<00:01, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1544/1563 [02:27<00:01, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1546/1563 [02:27<00:01, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1548/1563 [02:27<00:01, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1550/1563 [02:28<00:01, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1552/1563 [02:28<00:01, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1554/1563 [02:28<00:00, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1556/1563 [02:28<00:00, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1558/1563 [02:28<00:00, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1560/1563 [02:29<00:00, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1562/1563 [02:29<00:00, 10.79batch/s]\u001b[A\n",
-            "Epochs:  50%|█████     | 1/2 [02:29<02:29, 149.31s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/2 tamamlandı. Kayıp: 1.7113, Doğruluk: 36.97%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Epoch 2/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 1/1563 [00:00<03:08,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 2/1563 [00:00<02:51,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 3/1563 [00:00<02:49,  9.19batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 5/1563 [00:00<02:32, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 7/1563 [00:00<02:30, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 9/1563 [00:00<02:26, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 11/1563 [00:01<02:25, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 13/1563 [00:01<02:22, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 15/1563 [00:01<02:24, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 17/1563 [00:01<02:22, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 19/1563 [00:01<02:22, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 21/1563 [00:02<02:27, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 23/1563 [00:02<02:34,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 25/1563 [00:02<02:40,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 26/1563 [00:02<02:44,  9.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 27/1563 [00:02<02:47,  9.19batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 28/1563 [00:02<02:48,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 29/1563 [00:02<02:47,  9.14batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 30/1563 [00:03<02:47,  9.13batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 31/1563 [00:03<02:51,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 32/1563 [00:03<02:49,  9.04batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 33/1563 [00:03<02:48,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 34/1563 [00:03<02:51,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 35/1563 [00:03<02:50,  8.94batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 36/1563 [00:03<02:51,  8.89batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 37/1563 [00:03<02:49,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 38/1563 [00:03<02:50,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 39/1563 [00:04<02:53,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 40/1563 [00:04<02:54,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 41/1563 [00:04<02:53,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 43/1563 [00:04<02:39,  9.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 45/1563 [00:04<02:30, 10.08batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 47/1563 [00:04<02:25, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 49/1563 [00:04<02:22, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 51/1563 [00:05<02:20, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 53/1563 [00:05<02:19, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 55/1563 [00:05<02:18, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 57/1563 [00:05<02:17, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 59/1563 [00:05<02:16, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 61/1563 [00:06<02:18, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 63/1563 [00:06<02:17, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 65/1563 [00:06<02:15, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 67/1563 [00:06<02:17, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 69/1563 [00:06<02:16, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 71/1563 [00:07<02:16, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 73/1563 [00:07<02:17, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 75/1563 [00:07<02:15, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 77/1563 [00:07<02:15, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 79/1563 [00:07<02:14, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 81/1563 [00:07<02:14, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 83/1563 [00:08<02:14, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 85/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 87/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 89/1563 [00:08<02:13, 11.07batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 91/1563 [00:08<02:11, 11.16batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 93/1563 [00:08<02:12, 11.12batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 95/1563 [00:09<02:12, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 97/1563 [00:09<02:12, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 99/1563 [00:09<02:12, 11.08batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 101/1563 [00:09<02:13, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 103/1563 [00:09<02:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 105/1563 [00:10<02:15, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 107/1563 [00:10<02:16, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 109/1563 [00:10<02:17, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 111/1563 [00:10<02:20, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 113/1563 [00:10<02:19, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 115/1563 [00:11<02:19, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 117/1563 [00:11<02:18, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 119/1563 [00:11<02:17, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 121/1563 [00:11<02:15, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 123/1563 [00:11<02:15, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 125/1563 [00:11<02:13, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 127/1563 [00:12<02:16, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 129/1563 [00:12<02:15, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 131/1563 [00:12<02:16, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 133/1563 [00:12<02:16, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 135/1563 [00:12<02:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 137/1563 [00:13<02:12, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 139/1563 [00:13<02:10, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 141/1563 [00:13<02:09, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 143/1563 [00:13<02:09, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 145/1563 [00:13<02:10, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 147/1563 [00:14<02:13, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 149/1563 [00:14<02:10, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 151/1563 [00:14<02:18, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 153/1563 [00:14<02:23,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 154/1563 [00:14<02:27,  9.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 155/1563 [00:14<02:27,  9.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 156/1563 [00:15<02:28,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 157/1563 [00:15<02:33,  9.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 158/1563 [00:15<02:34,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 159/1563 [00:15<02:39,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 160/1563 [00:15<02:41,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 161/1563 [00:15<02:40,  8.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 162/1563 [00:15<02:43,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 163/1563 [00:15<02:42,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 164/1563 [00:15<02:42,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 165/1563 [00:16<02:41,  8.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 166/1563 [00:16<02:39,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 167/1563 [00:16<02:39,  8.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 168/1563 [00:16<02:38,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 169/1563 [00:16<02:40,  8.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 171/1563 [00:16<02:24,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 173/1563 [00:16<02:19,  9.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 175/1563 [00:17<02:15, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 177/1563 [00:17<02:11, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 179/1563 [00:17<02:09, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 181/1563 [00:17<02:07, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 183/1563 [00:17<02:07, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 185/1563 [00:17<02:09, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 187/1563 [00:18<02:10, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 189/1563 [00:18<02:09, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 191/1563 [00:18<02:08, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 193/1563 [00:18<02:08, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 195/1563 [00:18<02:09, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 197/1563 [00:19<02:08, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 199/1563 [00:19<02:07, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 201/1563 [00:19<02:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 203/1563 [00:19<02:05, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 205/1563 [00:19<02:04, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 207/1563 [00:20<02:05, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 209/1563 [00:20<02:04, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 211/1563 [00:20<02:03, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▎        | 213/1563 [00:20<02:03, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 215/1563 [00:20<02:02, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 217/1563 [00:20<02:02, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 219/1563 [00:21<02:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 221/1563 [00:21<02:02, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 223/1563 [00:21<02:01, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 225/1563 [00:21<02:02, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 227/1563 [00:21<02:02, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 229/1563 [00:22<02:01, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 231/1563 [00:22<02:01, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 233/1563 [00:22<02:00, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 235/1563 [00:22<02:01, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 237/1563 [00:22<02:01, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 239/1563 [00:22<02:02, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 241/1563 [00:23<02:06, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 243/1563 [00:23<02:05, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 245/1563 [00:23<02:03, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 247/1563 [00:23<02:05, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 249/1563 [00:23<02:04, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 251/1563 [00:24<02:02, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 253/1563 [00:24<02:01, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 255/1563 [00:24<02:02, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 257/1563 [00:24<02:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 259/1563 [00:24<02:12,  9.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 261/1563 [00:25<02:13,  9.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 263/1563 [00:25<02:08, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 265/1563 [00:25<02:05, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 267/1563 [00:25<02:04, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 269/1563 [00:25<02:02, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 271/1563 [00:26<02:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 273/1563 [00:26<02:03, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 275/1563 [00:26<02:01, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 277/1563 [00:26<02:03, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 279/1563 [00:26<02:10,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 280/1563 [00:26<02:14,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 281/1563 [00:27<02:19,  9.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 282/1563 [00:27<02:21,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 283/1563 [00:27<02:22,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 284/1563 [00:27<02:23,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 285/1563 [00:27<02:21,  9.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 286/1563 [00:27<02:20,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 287/1563 [00:27<02:22,  8.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 288/1563 [00:27<02:19,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 289/1563 [00:27<02:23,  8.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 290/1563 [00:28<02:25,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 291/1563 [00:28<02:24,  8.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 292/1563 [00:28<02:26,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 293/1563 [00:28<02:25,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 294/1563 [00:28<02:28,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 295/1563 [00:28<02:33,  8.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 296/1563 [00:28<02:31,  8.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 297/1563 [00:28<02:32,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 299/1563 [00:29<02:16,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 300/1563 [00:29<02:13,  9.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 302/1563 [00:29<02:05, 10.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 303/1563 [00:29<02:06,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 305/1563 [00:29<02:01, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 307/1563 [00:29<01:59, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 309/1563 [00:30<01:57, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 311/1563 [00:30<01:55, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 313/1563 [00:30<01:54, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 315/1563 [00:30<01:54, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 317/1563 [00:30<01:53, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 319/1563 [00:30<01:53, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 321/1563 [00:31<01:55, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 323/1563 [00:31<01:55, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 325/1563 [00:31<01:54, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 327/1563 [00:31<01:54, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 329/1563 [00:31<01:52, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 331/1563 [00:32<01:53, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 333/1563 [00:32<01:52, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 335/1563 [00:32<01:53, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 337/1563 [00:32<01:52, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 339/1563 [00:32<01:52, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 341/1563 [00:32<01:50, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 343/1563 [00:33<01:51, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 345/1563 [00:33<01:51, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 347/1563 [00:33<01:51, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 349/1563 [00:33<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 351/1563 [00:33<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 353/1563 [00:34<01:50, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 355/1563 [00:34<01:50, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 357/1563 [00:34<01:50, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 359/1563 [00:34<01:49, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 361/1563 [00:34<01:48, 11.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 363/1563 [00:35<01:49, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 365/1563 [00:35<01:49, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 367/1563 [00:35<01:49, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 369/1563 [00:35<01:50, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 371/1563 [00:35<01:48, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 373/1563 [00:35<01:47, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 375/1563 [00:36<01:48, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 377/1563 [00:36<01:48, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 379/1563 [00:36<01:49, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 381/1563 [00:36<01:48, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 383/1563 [00:36<01:48, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 385/1563 [00:37<01:47, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 387/1563 [00:37<01:49, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 389/1563 [00:37<01:49, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 391/1563 [00:37<01:48, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 393/1563 [00:37<01:48, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 395/1563 [00:37<01:46, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 397/1563 [00:38<01:46, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 399/1563 [00:38<01:46, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 401/1563 [00:38<01:46, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 403/1563 [00:38<01:45, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 405/1563 [00:38<01:44, 11.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 407/1563 [00:39<01:47, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 409/1563 [00:39<01:54, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 411/1563 [00:39<01:58,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 412/1563 [00:39<02:01,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 413/1563 [00:39<02:02,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 414/1563 [00:39<02:03,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 415/1563 [00:39<02:06,  9.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 416/1563 [00:40<02:07,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 417/1563 [00:40<02:08,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 418/1563 [00:40<02:07,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 419/1563 [00:40<02:06,  9.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 420/1563 [00:40<02:05,  9.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 421/1563 [00:40<02:13,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 422/1563 [00:40<02:13,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 423/1563 [00:40<02:14,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 424/1563 [00:41<02:19,  8.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 425/1563 [00:41<02:20,  8.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 426/1563 [00:41<02:15,  8.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 428/1563 [00:41<02:00,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 430/1563 [00:41<01:53,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 431/1563 [00:41<01:54,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 433/1563 [00:41<01:48, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 435/1563 [00:42<01:46, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 437/1563 [00:42<01:44, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 439/1563 [00:42<01:43, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 441/1563 [00:42<01:42, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 443/1563 [00:42<01:43, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 445/1563 [00:42<01:43, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 447/1563 [00:43<01:42, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 449/1563 [00:43<01:42, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 451/1563 [00:43<01:41, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 453/1563 [00:43<01:40, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 455/1563 [00:43<01:41, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 457/1563 [00:44<01:43, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 459/1563 [00:44<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 461/1563 [00:44<01:42, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 463/1563 [00:44<01:42, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 465/1563 [00:44<01:43, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 467/1563 [00:45<01:41, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 469/1563 [00:45<01:41, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 471/1563 [00:45<01:39, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 473/1563 [00:45<01:39, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 475/1563 [00:45<01:39, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 477/1563 [00:45<01:39, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 479/1563 [00:46<01:39, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 481/1563 [00:46<01:38, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 483/1563 [00:46<01:38, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 485/1563 [00:46<01:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 487/1563 [00:46<01:38, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 489/1563 [00:47<01:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 491/1563 [00:47<01:39, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 493/1563 [00:47<01:38, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 495/1563 [00:47<01:38, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 497/1563 [00:47<01:38, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 499/1563 [00:47<01:38, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 501/1563 [00:48<01:37, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 503/1563 [00:48<01:36, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 505/1563 [00:48<01:36, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 507/1563 [00:48<01:36, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 509/1563 [00:48<01:36, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 511/1563 [00:49<01:36, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 513/1563 [00:49<01:34, 11.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 515/1563 [00:49<01:35, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 517/1563 [00:49<01:34, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 519/1563 [00:49<01:34, 11.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 521/1563 [00:49<01:33, 11.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 523/1563 [00:50<01:33, 11.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 525/1563 [00:50<01:34, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 527/1563 [00:50<01:33, 11.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 529/1563 [00:50<01:33, 11.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 531/1563 [00:50<01:32, 11.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 533/1563 [00:51<01:34, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 535/1563 [00:51<01:33, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 537/1563 [00:51<01:38, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 539/1563 [00:51<01:43,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 540/1563 [00:51<01:45,  9.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 541/1563 [00:51<01:46,  9.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 542/1563 [00:52<01:50,  9.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 543/1563 [00:52<01:53,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 544/1563 [00:52<01:52,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 545/1563 [00:52<01:53,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 546/1563 [00:52<01:54,  8.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 547/1563 [00:52<01:52,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 548/1563 [00:52<01:52,  9.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 549/1563 [00:52<01:50,  9.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 550/1563 [00:52<01:51,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 551/1563 [00:53<01:56,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 552/1563 [00:53<01:59,  8.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 553/1563 [00:53<01:56,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 554/1563 [00:53<01:58,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 555/1563 [00:53<02:00,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 556/1563 [00:53<01:59,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 558/1563 [00:53<01:46,  9.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 560/1563 [00:54<01:41,  9.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 561/1563 [00:54<01:44,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 563/1563 [00:54<01:39, 10.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 565/1563 [00:54<01:37, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 567/1563 [00:54<01:36, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 569/1563 [00:54<01:36, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 571/1563 [00:55<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 573/1563 [00:55<01:33, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 575/1563 [00:55<01:31, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 577/1563 [00:55<01:30, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 579/1563 [00:55<01:30, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 581/1563 [00:55<01:30, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 583/1563 [00:56<01:31, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 585/1563 [00:56<01:29, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 587/1563 [00:56<01:29, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 589/1563 [00:56<01:29, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 591/1563 [00:56<01:29, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 593/1563 [00:57<01:30, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 595/1563 [00:57<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 597/1563 [00:57<01:29, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 599/1563 [00:57<01:28, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 601/1563 [00:57<01:27, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 603/1563 [00:57<01:27, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 605/1563 [00:58<01:29, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 607/1563 [00:58<01:28, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 609/1563 [00:58<01:27, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 611/1563 [00:58<01:26, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 613/1563 [00:58<01:27, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 615/1563 [00:59<01:27, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 617/1563 [00:59<01:27, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 619/1563 [00:59<01:26, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 621/1563 [00:59<01:26, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 623/1563 [00:59<01:26, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 625/1563 [01:00<01:25, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 627/1563 [01:00<01:26, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 629/1563 [01:00<01:25, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 631/1563 [01:00<01:24, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 633/1563 [01:00<01:24, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 635/1563 [01:00<01:24, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 637/1563 [01:01<01:24, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 639/1563 [01:01<01:24, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 641/1563 [01:01<01:23, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 643/1563 [01:01<01:23, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 645/1563 [01:01<01:24, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 647/1563 [01:02<01:24, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 649/1563 [01:02<01:24, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 651/1563 [01:02<01:24, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 653/1563 [01:02<01:24, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 655/1563 [01:02<01:23, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 657/1563 [01:02<01:24, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 659/1563 [01:03<01:23, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 661/1563 [01:03<01:24, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 663/1563 [01:03<01:23, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 665/1563 [01:03<01:25, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 667/1563 [01:03<01:29,  9.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 669/1563 [01:04<01:32,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 670/1563 [01:04<01:34,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 671/1563 [01:04<01:37,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 672/1563 [01:04<01:39,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 673/1563 [01:04<01:40,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 674/1563 [01:04<01:40,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 675/1563 [01:04<01:42,  8.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 676/1563 [01:05<01:43,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 677/1563 [01:05<01:45,  8.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 678/1563 [01:05<01:44,  8.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 679/1563 [01:05<01:43,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 680/1563 [01:05<01:44,  8.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 681/1563 [01:05<01:44,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 682/1563 [01:05<01:43,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 683/1563 [01:05<01:45,  8.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 684/1563 [01:05<01:42,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 686/1563 [01:06<01:32,  9.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 688/1563 [01:06<01:27, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 690/1563 [01:06<01:25, 10.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 692/1563 [01:06<01:23, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 694/1563 [01:06<01:21, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 696/1563 [01:07<01:20, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 698/1563 [01:07<01:19, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 700/1563 [01:07<01:19, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 702/1563 [01:07<01:20, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 704/1563 [01:07<01:19, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 706/1563 [01:07<01:18, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 708/1563 [01:08<01:18, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 710/1563 [01:08<01:17, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 712/1563 [01:08<01:18, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 714/1563 [01:08<01:17, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 716/1563 [01:08<01:17, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 718/1563 [01:09<01:16, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 720/1563 [01:09<01:16, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 722/1563 [01:09<01:16, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 724/1563 [01:09<01:17, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 726/1563 [01:09<01:16, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 728/1563 [01:09<01:16, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 730/1563 [01:10<01:16, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 732/1563 [01:10<01:16, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 734/1563 [01:10<01:16, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 736/1563 [01:10<01:17, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 738/1563 [01:10<01:16, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 740/1563 [01:11<01:15, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 742/1563 [01:11<01:15, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 744/1563 [01:11<01:15, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 746/1563 [01:11<01:16, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 748/1563 [01:11<01:15, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 750/1563 [01:12<01:15, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 752/1563 [01:12<01:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 754/1563 [01:12<01:14, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 756/1563 [01:12<01:14, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 758/1563 [01:12<01:15, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 760/1563 [01:12<01:15, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 762/1563 [01:13<01:14, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 764/1563 [01:13<01:13, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 766/1563 [01:13<01:12, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 768/1563 [01:13<01:13, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 770/1563 [01:13<01:13, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 772/1563 [01:14<01:12, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 774/1563 [01:14<01:14, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 776/1563 [01:14<01:13, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 778/1563 [01:14<01:12, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 780/1563 [01:14<01:13, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 782/1563 [01:14<01:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 784/1563 [01:15<01:13, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 786/1563 [01:15<01:12, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 788/1563 [01:15<01:12, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 790/1563 [01:15<01:12, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 792/1563 [01:15<01:11, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 794/1563 [01:16<01:15, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 796/1563 [01:16<01:17,  9.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 797/1563 [01:16<01:19,  9.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 798/1563 [01:16<01:21,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 799/1563 [01:16<01:22,  9.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 800/1563 [01:16<01:24,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 801/1563 [01:16<01:26,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 802/1563 [01:17<01:25,  8.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 803/1563 [01:17<01:26,  8.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 804/1563 [01:17<01:25,  8.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 805/1563 [01:17<01:26,  8.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 806/1563 [01:17<01:26,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 807/1563 [01:17<01:26,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 808/1563 [01:17<01:25,  8.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 809/1563 [01:17<01:26,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 810/1563 [01:17<01:27,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 811/1563 [01:18<01:27,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 812/1563 [01:18<01:26,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 813/1563 [01:18<01:28,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 815/1563 [01:18<01:18,  9.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 817/1563 [01:18<01:14, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 819/1563 [01:18<01:11, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 821/1563 [01:19<01:10, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 823/1563 [01:19<01:09, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 825/1563 [01:19<01:08, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 827/1563 [01:19<01:08, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 829/1563 [01:19<01:07, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 831/1563 [01:19<01:06, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 833/1563 [01:20<01:06, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 835/1563 [01:20<01:06, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 837/1563 [01:20<01:06, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 839/1563 [01:20<01:06, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 841/1563 [01:20<01:05, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 843/1563 [01:21<01:07, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 845/1563 [01:21<01:06, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 847/1563 [01:21<01:05, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 849/1563 [01:21<01:05, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 851/1563 [01:21<01:04, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 853/1563 [01:22<01:04, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 855/1563 [01:22<01:05, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 857/1563 [01:22<01:05, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 859/1563 [01:22<01:05, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 861/1563 [01:22<01:04, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 863/1563 [01:22<01:04, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 865/1563 [01:23<01:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 867/1563 [01:23<01:04, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 869/1563 [01:23<01:04, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 871/1563 [01:23<01:04, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 873/1563 [01:23<01:03, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 875/1563 [01:24<01:03, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 877/1563 [01:24<01:03, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 879/1563 [01:24<01:02, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 881/1563 [01:24<01:02, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 883/1563 [01:24<01:02, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 885/1563 [01:24<01:01, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 887/1563 [01:25<01:01, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 889/1563 [01:25<01:01, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 891/1563 [01:25<01:01, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 893/1563 [01:25<01:01, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 895/1563 [01:25<01:01, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 897/1563 [01:26<01:01, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 899/1563 [01:26<01:01, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 901/1563 [01:26<01:01, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 903/1563 [01:26<01:01, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 905/1563 [01:26<01:00, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 907/1563 [01:26<01:00, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 909/1563 [01:27<01:01, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 911/1563 [01:27<01:01, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 913/1563 [01:27<01:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 915/1563 [01:27<01:00, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 917/1563 [01:27<01:00, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 919/1563 [01:28<01:00, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 921/1563 [01:28<01:00, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 923/1563 [01:28<01:02, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 925/1563 [01:28<01:03,  9.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 927/1563 [01:28<01:07,  9.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 928/1563 [01:29<01:08,  9.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 929/1563 [01:29<01:08,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 930/1563 [01:29<01:09,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 931/1563 [01:29<01:10,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 932/1563 [01:29<01:11,  8.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 933/1563 [01:29<01:10,  8.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 934/1563 [01:29<01:09,  9.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 935/1563 [01:29<01:09,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 936/1563 [01:30<01:11,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 937/1563 [01:30<01:13,  8.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 938/1563 [01:30<01:12,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 939/1563 [01:30<01:14,  8.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 940/1563 [01:30<01:13,  8.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 941/1563 [01:30<01:12,  8.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 942/1563 [01:30<01:14,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 944/1563 [01:30<01:06,  9.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 945/1563 [01:31<01:05,  9.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 946/1563 [01:31<01:05,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 947/1563 [01:31<01:04,  9.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 949/1563 [01:31<01:01,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 951/1563 [01:31<00:59, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 953/1563 [01:31<00:57, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 955/1563 [01:31<00:57, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 957/1563 [01:32<00:56, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 959/1563 [01:32<00:55, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 961/1563 [01:32<00:56, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 963/1563 [01:32<00:56, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 965/1563 [01:32<00:55, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 967/1563 [01:33<00:55, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 969/1563 [01:33<00:54, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 971/1563 [01:33<00:55, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 973/1563 [01:33<00:54, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 975/1563 [01:33<00:54, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 977/1563 [01:33<00:54, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 979/1563 [01:34<00:54, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 981/1563 [01:34<00:53, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 983/1563 [01:34<00:54, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 985/1563 [01:34<00:54, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 987/1563 [01:34<00:53, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 989/1563 [01:35<00:53, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 991/1563 [01:35<00:53, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 993/1563 [01:35<00:53, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 995/1563 [01:35<00:52, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 997/1563 [01:35<00:52, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 999/1563 [01:36<00:52, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1001/1563 [01:36<00:52, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1003/1563 [01:36<00:52, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1005/1563 [01:36<00:52, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1007/1563 [01:36<00:52, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1009/1563 [01:36<00:51, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1011/1563 [01:37<00:51, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1013/1563 [01:37<00:51, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1015/1563 [01:37<00:52, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1017/1563 [01:37<00:51, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1019/1563 [01:37<00:51, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1021/1563 [01:38<00:50, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1023/1563 [01:38<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1025/1563 [01:38<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1027/1563 [01:38<00:50, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1029/1563 [01:38<00:49, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1031/1563 [01:39<00:49, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1033/1563 [01:39<00:48, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1035/1563 [01:39<00:48, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1037/1563 [01:39<00:48, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1039/1563 [01:39<00:48, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1041/1563 [01:39<00:47, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1043/1563 [01:40<00:47, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1045/1563 [01:40<00:47, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1047/1563 [01:40<00:46, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1049/1563 [01:40<00:46, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1051/1563 [01:40<00:49, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1053/1563 [01:41<00:51,  9.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1054/1563 [01:41<00:52,  9.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1055/1563 [01:41<00:53,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1056/1563 [01:41<00:55,  9.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1057/1563 [01:41<00:56,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1058/1563 [01:41<00:57,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1059/1563 [01:41<00:56,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1060/1563 [01:41<00:55,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1061/1563 [01:42<00:54,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1062/1563 [01:42<00:55,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1063/1563 [01:42<00:54,  9.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1064/1563 [01:42<00:55,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1065/1563 [01:42<00:55,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1066/1563 [01:42<00:54,  9.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1067/1563 [01:42<00:53,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1068/1563 [01:42<00:55,  8.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1069/1563 [01:42<00:56,  8.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1070/1563 [01:43<00:57,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1071/1563 [01:43<00:58,  8.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1073/1563 [01:43<00:52,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1075/1563 [01:43<00:48,  9.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1077/1563 [01:43<00:47, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1079/1563 [01:43<00:46, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1081/1563 [01:44<00:45, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1083/1563 [01:44<00:45, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1085/1563 [01:44<00:44, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1087/1563 [01:44<00:44, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1089/1563 [01:44<00:44, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1091/1563 [01:45<00:44, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1093/1563 [01:45<00:44, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1095/1563 [01:45<00:43, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1097/1563 [01:45<00:43, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1099/1563 [01:45<00:43, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1101/1563 [01:45<00:42, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1103/1563 [01:46<00:43, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1105/1563 [01:46<00:42, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1107/1563 [01:46<00:42, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1109/1563 [01:46<00:42, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1111/1563 [01:46<00:41, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1113/1563 [01:47<00:42, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1115/1563 [01:47<00:41, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1117/1563 [01:47<00:41, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1119/1563 [01:47<00:40, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1121/1563 [01:47<00:40, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1123/1563 [01:47<00:40, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1125/1563 [01:48<00:40, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1127/1563 [01:48<00:40, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1129/1563 [01:48<00:40, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1131/1563 [01:48<00:39, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1133/1563 [01:48<00:39, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1135/1563 [01:49<00:39, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1137/1563 [01:49<00:39, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1139/1563 [01:49<00:38, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1141/1563 [01:49<00:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1143/1563 [01:49<00:38, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1145/1563 [01:50<00:38, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1147/1563 [01:50<00:38, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1149/1563 [01:50<00:38, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1151/1563 [01:50<00:38, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1153/1563 [01:50<00:37, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1155/1563 [01:50<00:37, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1157/1563 [01:51<00:37, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1159/1563 [01:51<00:37, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1161/1563 [01:51<00:36, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1163/1563 [01:51<00:36, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1165/1563 [01:51<00:36, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1167/1563 [01:52<00:36, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1169/1563 [01:52<00:36, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1171/1563 [01:52<00:36, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1173/1563 [01:52<00:36, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1175/1563 [01:52<00:35, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1177/1563 [01:52<00:35, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1179/1563 [01:53<00:35, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1181/1563 [01:53<00:37, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1183/1563 [01:53<00:38,  9.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1184/1563 [01:53<00:40,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1185/1563 [01:53<00:42,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1186/1563 [01:53<00:42,  8.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1187/1563 [01:54<00:43,  8.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1188/1563 [01:54<00:42,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1189/1563 [01:54<00:41,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1190/1563 [01:54<00:41,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1191/1563 [01:54<00:40,  9.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1192/1563 [01:54<00:40,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1193/1563 [01:54<00:41,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1194/1563 [01:54<00:41,  8.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1195/1563 [01:55<00:42,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1196/1563 [01:55<00:43,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1197/1563 [01:55<00:44,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1198/1563 [01:55<00:42,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1199/1563 [01:55<00:44,  8.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1200/1563 [01:55<00:42,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1202/1563 [01:55<00:37,  9.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1203/1563 [01:55<00:37,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1205/1563 [01:56<00:35,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1206/1563 [01:56<00:36,  9.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1208/1563 [01:56<00:34, 10.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1210/1563 [01:56<00:33, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1212/1563 [01:56<00:33, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1214/1563 [01:56<00:32, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1216/1563 [01:57<00:32, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1218/1563 [01:57<00:32, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1220/1563 [01:57<00:32, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1222/1563 [01:57<00:32, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1224/1563 [01:57<00:31, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1226/1563 [01:58<00:31, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1228/1563 [01:58<00:31, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1230/1563 [01:58<00:31, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1232/1563 [01:58<00:30, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1234/1563 [01:58<00:30, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1236/1563 [01:58<00:29, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1238/1563 [01:59<00:30, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1240/1563 [01:59<00:30, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1242/1563 [01:59<00:29, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1244/1563 [01:59<00:29, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1246/1563 [01:59<00:29, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1248/1563 [02:00<00:29, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1250/1563 [02:00<00:30, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1252/1563 [02:00<00:29, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1254/1563 [02:00<00:29, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1256/1563 [02:00<00:29, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1258/1563 [02:01<00:28, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1260/1563 [02:01<00:28, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1262/1563 [02:01<00:28, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1264/1563 [02:01<00:28, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1266/1563 [02:01<00:27, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1268/1563 [02:01<00:27, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1270/1563 [02:02<00:27, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1272/1563 [02:02<00:27, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1274/1563 [02:02<00:26, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1276/1563 [02:02<00:26, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1278/1563 [02:02<00:26, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1280/1563 [02:03<00:26, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1282/1563 [02:03<00:26, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1284/1563 [02:03<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1286/1563 [02:03<00:25, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1288/1563 [02:03<00:25, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1290/1563 [02:04<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1292/1563 [02:04<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1294/1563 [02:04<00:25, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1296/1563 [02:04<00:25, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1298/1563 [02:04<00:24, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1300/1563 [02:04<00:24, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1302/1563 [02:05<00:24, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1304/1563 [02:05<00:23, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1306/1563 [02:05<00:24, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1308/1563 [02:05<00:25, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1310/1563 [02:05<00:26,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1311/1563 [02:06<00:26,  9.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1312/1563 [02:06<00:26,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1313/1563 [02:06<00:27,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1314/1563 [02:06<00:26,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1315/1563 [02:06<00:26,  9.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1316/1563 [02:06<00:26,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1317/1563 [02:06<00:26,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1318/1563 [02:06<00:26,  9.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1319/1563 [02:06<00:26,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1320/1563 [02:07<00:26,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1321/1563 [02:07<00:26,  9.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1322/1563 [02:07<00:26,  9.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1323/1563 [02:07<00:26,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1324/1563 [02:07<00:26,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1325/1563 [02:07<00:27,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1326/1563 [02:07<00:27,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1327/1563 [02:07<00:28,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1328/1563 [02:08<00:28,  8.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1329/1563 [02:08<00:27,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1331/1563 [02:08<00:24,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1333/1563 [02:08<00:23,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1334/1563 [02:08<00:23,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1336/1563 [02:08<00:22, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1338/1563 [02:08<00:21, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1340/1563 [02:09<00:21, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1342/1563 [02:09<00:20, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1344/1563 [02:09<00:20, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1346/1563 [02:09<00:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1348/1563 [02:09<00:20, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▋ | 1350/1563 [02:10<00:20, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1352/1563 [02:10<00:19, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1354/1563 [02:10<00:19, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1356/1563 [02:10<00:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1358/1563 [02:10<00:19, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1360/1563 [02:11<00:18, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1362/1563 [02:11<00:19, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1364/1563 [02:11<00:18, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1366/1563 [02:11<00:18, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1368/1563 [02:11<00:18, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1370/1563 [02:11<00:18, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1372/1563 [02:12<00:18, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1374/1563 [02:12<00:17, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1376/1563 [02:12<00:17, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1378/1563 [02:12<00:17, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1380/1563 [02:12<00:17, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1382/1563 [02:13<00:16, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1384/1563 [02:13<00:16, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1386/1563 [02:13<00:16, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1388/1563 [02:13<00:16, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1390/1563 [02:13<00:16, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1392/1563 [02:14<00:15, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1394/1563 [02:14<00:15, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1396/1563 [02:14<00:16, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1398/1563 [02:14<00:15, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1400/1563 [02:14<00:15, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1402/1563 [02:14<00:15, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1404/1563 [02:15<00:14, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1406/1563 [02:15<00:14, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1408/1563 [02:15<00:14, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1410/1563 [02:15<00:14, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1412/1563 [02:15<00:14, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1414/1563 [02:16<00:13, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1416/1563 [02:16<00:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1418/1563 [02:16<00:13, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1420/1563 [02:16<00:13, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1422/1563 [02:16<00:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1424/1563 [02:17<00:12, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1426/1563 [02:17<00:12, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1428/1563 [02:17<00:12, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1430/1563 [02:17<00:12, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1432/1563 [02:17<00:12, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1434/1563 [02:17<00:11, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1436/1563 [02:18<00:12, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1438/1563 [02:18<00:12, 10.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1440/1563 [02:18<00:12,  9.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1441/1563 [02:18<00:12,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1442/1563 [02:18<00:12,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1443/1563 [02:18<00:12,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1444/1563 [02:19<00:12,  9.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1445/1563 [02:19<00:13,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1446/1563 [02:19<00:13,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1447/1563 [02:19<00:13,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1448/1563 [02:19<00:13,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1449/1563 [02:19<00:13,  8.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1450/1563 [02:19<00:12,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1451/1563 [02:19<00:12,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1452/1563 [02:19<00:13,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1453/1563 [02:20<00:13,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1454/1563 [02:20<00:13,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1455/1563 [02:20<00:12,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1456/1563 [02:20<00:12,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1457/1563 [02:20<00:11,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1459/1563 [02:20<00:10,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1461/1563 [02:20<00:10, 10.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1462/1563 [02:21<00:10,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1463/1563 [02:21<00:10,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1465/1563 [02:21<00:09, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1467/1563 [02:21<00:09, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1469/1563 [02:21<00:08, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1471/1563 [02:21<00:08, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1473/1563 [02:22<00:08, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1475/1563 [02:22<00:08, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1477/1563 [02:22<00:08, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1479/1563 [02:22<00:07, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1481/1563 [02:22<00:07, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1483/1563 [02:23<00:07, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1485/1563 [02:23<00:07, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1487/1563 [02:23<00:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1489/1563 [02:23<00:06, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1491/1563 [02:23<00:06, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1493/1563 [02:23<00:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1495/1563 [02:24<00:06, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1497/1563 [02:24<00:06, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1499/1563 [02:24<00:05, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1501/1563 [02:24<00:05, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1503/1563 [02:24<00:05, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1505/1563 [02:25<00:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1507/1563 [02:25<00:05, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1509/1563 [02:25<00:05, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1511/1563 [02:25<00:04, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1513/1563 [02:25<00:04, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1515/1563 [02:26<00:04, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1517/1563 [02:26<00:04, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1519/1563 [02:26<00:04, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1521/1563 [02:26<00:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1523/1563 [02:26<00:03, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1525/1563 [02:26<00:03, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1527/1563 [02:27<00:03, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1529/1563 [02:27<00:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1531/1563 [02:27<00:02, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1533/1563 [02:27<00:02, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1535/1563 [02:27<00:02, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1537/1563 [02:28<00:02, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1539/1563 [02:28<00:02, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1541/1563 [02:28<00:02, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1543/1563 [02:28<00:01, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1545/1563 [02:28<00:01, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1547/1563 [02:28<00:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1549/1563 [02:29<00:01, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1551/1563 [02:29<00:01, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1553/1563 [02:29<00:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1555/1563 [02:29<00:00, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1557/1563 [02:29<00:00, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1559/1563 [02:30<00:00, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1561/1563 [02:30<00:00, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|██████████| 1563/1563 [02:30<00:00, 11.31batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 2/2 [04:59<00:00, 149.91s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 2/2 tamamlandı. Kayıp: 1.3678, Doğruluk: 50.29%\n",
-            "DyT Eğitim Süresi: 299.82 saniye, Son Doğruluk: 50.29%\n",
-            "\n",
-            "Karşılaştırma:\n",
-            "RMSNorm - Süre: 310.00s, Doğruluk: 42.15%\n",
-            "DyT - Süre: 299.82s, Doğruluk: 50.29%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
+    "id": "Ligm3e2erYq6",
+    "outputId": "4e4a2bee-0bd8-40ae-ee7d-c28bed94edec"
+   },
+   "execution_count": null,
+   "outputs": []
+  }
+ ]
+}
diff --git a/Genel-4/Mixture_of_Experts.ipynb b/Genel-4/Mixture_of_Experts.ipynb
index 992545e..98c4afe 100644
--- a/Genel-4/Mixture_of_Experts.ipynb
+++ b/Genel-4/Mixture_of_Experts.ipynb
@@ -1,147 +1,139 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "authorship_tag": "ABX9TyNHvixUmBk+UdujKeVm4FmB",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": [],
+   "authorship_tag": "ABX9TyNHvixUmBk+UdujKeVm4FmB",
+   "include_colab_link": true
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/Mixture_of_Experts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "view-in-github",
+    "colab_type": "text"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/Mixture_of_Experts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Y4Cmr0MnCzgt",
-        "outputId": "004036a3-bbff-439b-8dd7-71aa9b1258e3"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Çıkış boyutu: torch.Size([5, 10, 128])\n"
-          ]
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.nn.functional as F\n",
-        "\n",
-        "class Expert(nn.Module):\n",
-        "    \"\"\"Tek bir uzmanın basit ileri beslemeli ağı\"\"\"\n",
-        "    def __init__(self, input_dim, hidden_dim):\n",
-        "        super(Expert, self).__init__()\n",
-        "        self.ffn = nn.Sequential(\n",
-        "            nn.Linear(input_dim, hidden_dim),\n",
-        "            nn.ReLU(),\n",
-        "            nn.Linear(hidden_dim, input_dim)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        return self.ffn(x)\n",
-        "\n",
-        "class Router(nn.Module):\n",
-        "    \"\"\"Yönlendirici: Hangi uzmanın etkinleştirileceğine karar verir.\"\"\"\n",
-        "    def __init__(self, input_dim, num_experts):\n",
-        "        super(Router, self).__init__()\n",
-        "        self.gate = nn.Linear(input_dim, num_experts)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        # Uzmanlar için olasılık hesaplama\n",
-        "        return F.softmax(self.gate(x), dim=-1)\n",
-        "\n",
-        "class MoELayer(nn.Module):\n",
-        "    \"\"\"Mixture of Experts katmanı\"\"\"\n",
-        "    def __init__(self, input_dim, hidden_dim, num_experts, top_k=2):\n",
-        "        super(MoELayer, self).__init__()\n",
-        "        self.num_experts = num_experts\n",
-        "        self.top_k = top_k\n",
-        "        self.experts = nn.ModuleList([Expert(input_dim, hidden_dim) for _ in range(num_experts)])\n",
-        "        self.router = Router(input_dim, num_experts)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        batch_size, seq_len, _ = x.size()  # Boyutları ayıkla\n",
-        "        x_flat = x.view(-1, x.size(-1))  # Batch ve seq_len birleştir\n",
-        "\n",
-        "        # Yönlendirici tarafından uzman seçimi\n",
-        "        route_weights = self.router(x_flat)\n",
-        "        topk_weights, topk_indices = torch.topk(route_weights, self.top_k, dim=-1)\n",
-        "\n",
-        "        # Uzmanların çıktılarının birleştirilmesi\n",
-        "        outputs = torch.zeros_like(x_flat)\n",
-        "        for i in range(self.top_k):\n",
-        "            weight = topk_weights[:, i].unsqueeze(-1)\n",
-        "            expert_idx = topk_indices[:, i]\n",
-        "            outputs += weight * torch.cat(\n",
-        "                [self.experts[expert](x_flat[j].unsqueeze(0)) for j, expert in enumerate(expert_idx)], dim=0\n",
-        "            )\n",
-        "\n",
-        "        # Orijinal boyuta geri dön\n",
-        "        outputs = outputs.view(batch_size, seq_len, -1)\n",
-        "        return outputs\n",
-        "\n",
-        "class MoETransformer(nn.Module):\n",
-        "    \"\"\"MoE içeren basit bir Transformer\"\"\"\n",
-        "    def __init__(self, input_dim, hidden_dim, num_heads, num_experts, top_k):\n",
-        "        super(MoETransformer, self).__init__()\n",
-        "        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=num_heads, batch_first=True)\n",
-        "        self.moe_layer = MoELayer(input_dim, hidden_dim, num_experts, top_k)\n",
-        "        self.norm1 = nn.LayerNorm(input_dim)\n",
-        "        self.norm2 = nn.LayerNorm(input_dim)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        # Multi-head attention\n",
-        "        attn_output, _ = self.attention(x, x, x)\n",
-        "        x = self.norm1(x + attn_output)\n",
-        "\n",
-        "        # Mixture of Experts katmanı\n",
-        "        moe_output = self.moe_layer(x)\n",
-        "        x = self.norm2(x + moe_output)\n",
-        "\n",
-        "        return x\n",
-        "\n",
-        "# Örnek kullanım\n",
-        "input_dim = 128\n",
-        "hidden_dim = 256\n",
-        "num_heads = 4\n",
-        "num_experts = 3\n",
-        "top_k = 2\n",
-        "seq_len = 10\n",
-        "batch_size = 5\n",
-        "\n",
-        "# Model oluşturma\n",
-        "model = MoETransformer(input_dim, hidden_dim, num_heads, num_experts, top_k)\n",
-        "\n",
-        "# Rastgele giriş verisi\n",
-        "x = torch.rand(batch_size, seq_len, input_dim)\n",
-        "\n",
-        "# Çıktı\n",
-        "output = model(x)\n",
-        "print(\"Çıkış boyutu:\", output.shape)"
-      ]
-    }
-  ]
-}
\ No newline at end of file
+    "id": "Y4Cmr0MnCzgt",
+    "outputId": "004036a3-bbff-439b-8dd7-71aa9b1258e3"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "\n",
+    "class Expert(nn.Module):\n",
+    "    \"\"\"Simple feed-forward network for a single expert\"\"\"\n",
+    "    def __init__(self, input_dim, hidden_dim):\n",
+    "        super(Expert, self).__init__()\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(input_dim, hidden_dim),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Linear(hidden_dim, input_dim)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.ffn(x)\n",
+    "\n",
+    "class Router(nn.Module):\n",
+    "    \"\"\"Router: decides which expert to activate.\"\"\"\n",
+    "    def __init__(self, input_dim, num_experts):\n",
+    "        super(Router, self).__init__()\n",
+    "        self.gate = nn.Linear(input_dim, num_experts)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # Compute probabilities for each expert\n",
+    "        return F.softmax(self.gate(x), dim=-1)\n",
+    "\n",
+    "class MoELayer(nn.Module):\n",
+    "    \"\"\"Mixture of Experts layer\"\"\"\n",
+    "    def __init__(self, input_dim, hidden_dim, num_experts, top_k=2):\n",
+    "        super(MoELayer, self).__init__()\n",
+    "        self.num_experts = num_experts\n",
+    "        self.top_k = top_k\n",
+    "        self.experts = nn.ModuleList([Expert(input_dim, hidden_dim) for _ in range(num_experts)])\n",
+    "        self.router = Router(input_dim, num_experts)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        batch_size, seq_len, _ = x.size()  # Extract dimensions\n",
+    "        x_flat = x.view(-1, x.size(-1))  # Merge batch and sequence dimensions\n",
+    "\n",
+    "        # Select experts via the router\n",
+    "        route_weights = self.router(x_flat)\n",
+    "        topk_weights, topk_indices = torch.topk(route_weights, self.top_k, dim=-1)\n",
+    "\n",
+    "        # Combine the outputs of the selected experts\n",
+    "        outputs = torch.zeros_like(x_flat)\n",
+    "        for i in range(self.top_k):\n",
+    "            weight = topk_weights[:, i].unsqueeze(-1)\n",
+    "            expert_idx = topk_indices[:, i]\n",
+    "            outputs += weight * torch.cat(\n",
+    "                [self.experts[expert](x_flat[j].unsqueeze(0)) for j, expert in enumerate(expert_idx)], dim=0\n",
+    "            )\n",
+    "\n",
+    "        # Restore the original shape\n",
+    "        outputs = outputs.view(batch_size, seq_len, -1)\n",
+    "        return outputs\n",
+    "\n",
+    "class MoETransformer(nn.Module):\n",
+    "    \"\"\"Simple Transformer with MoE\"\"\"\n",
+    "    def __init__(self, input_dim, hidden_dim, num_heads, num_experts, top_k):\n",
+    "        super(MoETransformer, self).__init__()\n",
+    "        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=num_heads, batch_first=True)\n",
+    "        self.moe_layer = MoELayer(input_dim, hidden_dim, num_experts, top_k)\n",
+    "        self.norm1 = nn.LayerNorm(input_dim)\n",
+    "        self.norm2 = nn.LayerNorm(input_dim)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # Multi-head attention\n",
+    "        attn_output, _ = self.attention(x, x, x)\n",
+    "        x = self.norm1(x + attn_output)\n",
+    "\n",
+    "        # Mixture of Experts layer\n",
+    "        moe_output = self.moe_layer(x)\n",
+    "        x = self.norm2(x + moe_output)\n",
+    "\n",
+    "        return x\n",
+    "\n",
+    "# Example usage\n",
+    "input_dim = 128\n",
+    "hidden_dim = 256\n",
+    "num_heads = 4\n",
+    "num_experts = 3\n",
+    "top_k = 2\n",
+    "seq_len = 10\n",
+    "batch_size = 5\n",
+    "\n",
+    "# Build the model\n",
+    "model = MoETransformer(input_dim, hidden_dim, num_heads, num_experts, top_k)\n",
+    "\n",
+    "# Random input data\n",
+    "x = torch.rand(batch_size, seq_len, input_dim)\n",
+    "\n",
+    "# Output\n",
+    "output = model(x)\n",
+    "print(\"Output shape:\", output.shape)"
+   ]
+  }
+ ]
+}
diff --git a/Genel-4/data.jsonl b/Genel-4/data.jsonl
index 52b8747..3d2dede 100644
--- a/Genel-4/data.jsonl
+++ b/Genel-4/data.jsonl
@@ -1,5 +1,5 @@
-{"title": "2025’e Girerken Yapay Zekâya Dair Bilinmesi Gerekenler: Son Gelişmeler", "url": "https://medium.com/@ozancanozdemir/2025e-girerken-yapay-zek%C3%A2ya-dair-bilinmesi-gerekenler-son-geli%C5%9Fmeler-2486d4e8955c", "year": 2025, "summary": "2025 yılına girerken yapay zekâ alanındaki en güncel gelişmeler, iş dünyasındaki etkiler ve yeni trendler ele alınıyor.", "content": "Yapay zekâ iş ilanları azalırken, yeni şirket sayısı %40,6 artış gösterdi. İşletmeler yapay zekâyı maliyetleri düşürmek ve gelirleri artırmak için kullanıyor. 2025’e girerken, yapay zekâ alanında öne çıkan gelişmeler ve trendler detaylıca inceleniyor.", "tags": ["yapay zeka", "2025", "trend", "iş dünyası", "teknoloji"]}
-{"title": "Teknoloji ve Yapay Zeka Bülteni | Nisan 2025", "url": "https://medium.com/kariyertech/teknoloji-ve-yapay-zeka-b%C3%BClteni-nisan-2025-d59294bbf6b1", "year": 2025, "summary": "OpenAI’ın yeni modelleri, Anthropic’in eğitimdeki AI ürünü, Google’ın yeni çipi ve yapay zekâ destekli iş dünyası trendleri öne çıkıyor.", "content": "OpenAI’ın görsellerle akıl yürütebilen ve araçları bağımsız olarak kullanabilen yeni modelleri, Anthropic’in eğitimde devrim yaratan Claude for Education ürünü, Google’ın yeni Ironwood çipi ve yapay zekâ destekli iş dünyası trendleri bu bültende öne çıkıyor. Ayrıca, yapay zekâ ile ilgili etik ve iş dünyası tartışmaları da yer alıyor.", "tags": ["yapay zeka", "OpenAI", "Anthropic", "Google", "2025", "teknoloji", "eğitim"]}
-{"title": "Yapay Zekâ Öğrenmek Neden 2025'in En Akıllı Kararı?", "url": "https://medium.com/@iammutluyigit/yapay-zek%C3%A2-%C3%B6%C4%9Frenmek-neden-2025in-en-ak%C4%B1ll%C4%B1-karar%C4%B1-2b9b6e2d4165", "year": 2025, "summary": "2025 yılında yapay zekâ öğrenmenin ve bu alanda uzmanlaşmanın neden geleceğin en akıllı yatırımı olduğu anlatılıyor.", "content": "2025 yılı artık “takip etme” değil, şekillendirme yılı olarak görülüyor. Yapay zekâ öğrenmenin ve bu alanda uzmanlaşmanın neden geleceğin en akıllı yatırımı olduğu, kişisel ve sektörel örneklerle anlatılıyor.", "tags": ["yapay zeka", "eğitim", "gelecek", "2025", "kariyer"]}
-{"title": "Yazılım Dünyasında 2025 Trendleri", "url": "https://mhkoca.medium.com/yaz%C4%B1l%C4%B1m-d%C3%BCnyas%C4%B1nda-2025-trendleri-fd901299e6aa", "year": 2025, "summary": "2025’te yapay zekâ artık sadece bir özellik değil, yazılım dünyasının temel bileşeni olacak. Uzman görüşleriyle trendler inceleniyor.", "content": "2025’te yapay zekâ artık sadece bir özellik değil, yazılım dünyasının temel bileşeni olacak. Makalede, yapay zekânın yazılım geliştirme süreçlerine, iş dünyasına ve günlük yaşama etkileri, uzman görüşleriyle birlikte detaylıca inceleniyor.", "tags": ["yapay zeka", "yazılım", "trend", "2025", "teknoloji"]}
-{"title": "Yapay Zeka Çağında İnsan Kalmak Üzerine", "url": "https://medium.com/@keskinserdar/yapay-zeka-%C3%A7a%C4%9F%C4%B1nda-i%CC%87nsan-kalmak-%C3%BCzerine-90301d0a2766", "year": 2025, "summary": "Yapay zekâ çağında insan olmanın anlamı, etik tartışmalar ve teknolojinin toplumsal etkileri ele alınıyor.", "content": "OpenAI ve Google DeepMind gibi devlerin liderliğinde, yapay zekâ çağında insan olmanın anlamı, etik tartışmalar ve teknolojinin toplumsal etkileri ele alınıyor. 2025 ve sonrası için insan-makine ilişkisine dair önemli perspektifler sunuluyor.", "tags": ["yapay zeka", "etik", "insan", "toplum", "2025"]}
\ No newline at end of file
+{"title": "Key AI Developments to Know Going into 2025", "url": "https://medium.com/@ozancanozdemir/2025e-girerken-yapay-zek%C3%A2ya-dair-bilinmesi-gerekenler-son-geli%C5%9Fmeler-2486d4e8955c", "year": 2025, "summary": "A survey of the latest advances in artificial intelligence, their business impact, and emerging trends at the start of 2025.", "content": "While AI job postings are slowing, the number of new companies grew by 40.6%. Organisations are leaning on AI to reduce costs and boost revenue. The article reviews the stand-out innovations and market trends that will shape 2025.", "tags": ["artificial intelligence", "2025", "trend", "business", "technology"]}
+{"title": "Technology and AI Bulletin | April 2025", "url": "https://medium.com/kariyertech/teknoloji-ve-yapay-zeka-b%C3%BClteni-nisan-2025-d59294bbf6b1", "year": 2025, "summary": "Highlights include OpenAI's new multi-modal models, Anthropic's education product, Google's Ironwood chip, and business trends powered by AI.", "content": "The bulletin covers OpenAI's models that reason over visuals and operate tools autonomously, Anthropic's Claude for Education platform, Google's Ironwood accelerator, and AI-driven shifts in the workplace. Ethical and organisational discussions around AI are also featured.", "tags": ["artificial intelligence", "OpenAI", "Anthropic", "Google", "2025", "technology", "education"]}
+{"title": "Why Learning AI Is the Smartest Decision for 2025", "url": "https://medium.com/@iammutluyigit/yapay-zek%C3%A2-%C3%B6%C4%9Frenmek-neden-2025in-en-ak%C4%B1ll%C4%B1-karar%C4%B1-2b9b6e2d4165", "year": 2025, "summary": "Explains why mastering AI skills in 2025 is one of the most valuable investments for the future.", "content": "2025 is framed as the year to lead rather than follow. The article discusses why learning AI and specialising in the field offers the best return, supported by personal anecdotes and industry examples.", "tags": ["artificial intelligence", "education", "future", "2025", "career"]}
+{"title": "Software Industry Trends for 2025", "url": "https://mhkoca.medium.com/yaz%C4%B1l%C4%B1m-d%C3%BCnyas%C4%B1nda-2025-trendleri-fd901299e6aa", "year": 2025, "summary": "Experts argue that AI will become a foundational component of software by 2025.", "content": "The piece analyses how AI will underpin software development, business operations, and everyday life. Expert commentary explores the major trends expected to reshape the software landscape.", "tags": ["artificial intelligence", "software", "trend", "2025", "technology"]}
+{"title": "Staying Human in the Age of AI", "url": "https://medium.com/@keskinserdar/yapay-zeka-%C3%A7a%C4%9F%C4%B1nda-i%CC%87nsan-kalmak-%C3%BCzerine-90301d0a2766", "year": 2025, "summary": "A reflection on what it means to remain human amid rapid AI progress, covering ethics and social impact.", "content": "With OpenAI and Google DeepMind setting the pace, the article considers the meaning of humanity in the AI era, the ethical debates, and the societal consequences. It offers key perspectives on human-machine relationships for 2025 and beyond.", "tags": ["artificial intelligence", "ethics", "humanity", "society", "2025"]}
diff --git a/Genel-4/finetune_llm.py b/Genel-4/finetune_llm.py
index 2f0e687..688103d 100644
--- a/Genel-4/finetune_llm.py
+++ b/Genel-4/finetune_llm.py
@@ -3,39 +3,39 @@
 from datasets import Dataset
 from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, pipeline
 
-# Ayarlar
+# Settings
 MODEL_NAME = "HuggingFaceTB/SmolLM-135M"
-DATA_PATH = "data.jsonl"  # JSONL dosya yolu
+DATA_PATH = "data.jsonl"  # Path to the JSONL file
 OUTPUT_DIR = "finetuned-llm"
 
-# 1. JSONL verisini yükle
+# 1. Load the JSONL data
 with open(DATA_PATH, "r", encoding="utf-8") as f:
     lines = [json.loads(line) for line in f]
 
-# 2. Dataset'e çevir
-# Prompt formatını daha belirgin ve modelin öğrenebileceği şekilde ayarlıyoruz.
+# 2. Convert to a Hugging Face Dataset
+# Format each prompt clearly so the model can learn the structure.
 def to_prompt(example):
     prompt = (
-        f"[BAŞLIK] {example['title']}\n"
-        f"[ÖZET] {example['summary']}\n"
-        f"[İÇERİK] {example['content']}\n"
-        f"[ETİKETLER] {', '.join(example['tags'])}"
+        f"[TITLE] {example['title']}\n"
+        f"[SUMMARY] {example['summary']}\n"
+        f"[CONTENT] {example['content']}\n"
+        f"[TAGS] {', '.join(example['tags'])}"
     )
     return {"text": prompt}
 
 dataset = Dataset.from_list([to_prompt(e) for e in lines])
 
-# 3. Tokenizer ve model yükle
-# pad_token ayarını koru
+# 3. Load the tokenizer and model
+# Preserve the pad_token configuration
 
-# Tokenizer yükle
+# Load the tokenizer
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 if tokenizer.pad_token is None:
     tokenizer.pad_token = tokenizer.eos_token
-# Model yükle
+# Load the model
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME)
 
-# 4. Tokenize fonksiyonu
+# 4. Tokenization function
 def tokenize_function(examples):
     result = tokenizer(
         examples["text"],
@@ -48,10 +48,10 @@ def tokenize_function(examples):
 
 tokenized_dataset = dataset.map(tokenize_function, batched=True)
 
-# 5. Eğitim argümanları
+# 5. Training arguments
 training_args = TrainingArguments(
     output_dir=OUTPUT_DIR,
-    num_train_epochs=5,  # Daha fazla epoch ile küçük veri için daha iyi öğrenme
+    num_train_epochs=5,  # Additional epochs help smaller datasets learn better
     per_device_train_batch_size=2,
     save_steps=10,
     save_total_limit=2,
@@ -68,33 +68,33 @@ def tokenize_function(examples):
     train_dataset=tokenized_dataset,
 )
 
-# 7. Eğitimi başlat
+# 7. Start training
 trainer.train()
 
-# 8. Modeli kaydet
+# 8. Save the model
 trainer.save_model(OUTPUT_DIR)
 tokenizer.save_pretrained(OUTPUT_DIR)
-print(f"Model {OUTPUT_DIR} klasörüne kaydedildi.")
+print(f"Model saved to {OUTPUT_DIR}.")
 
-# === TEST KODU ===
+# === TEST CODE ===
 def test_model():
-    print("Test başlatılıyor...")
-    # Eğitilmiş modeli ve tokenizer'ı yükle
+    print("Starting evaluation test...")
+    # Load the fine-tuned model and tokenizer
     model = AutoModelForCausalLM.from_pretrained(OUTPUT_DIR)
     tokenizer = AutoTokenizer.from_pretrained(OUTPUT_DIR)
     generator = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)
-    # Test promptunu yeni formatla oluştur
+    # Build the prompt using the new format
     prompt = (
-        "[BAŞLIK] Yapay Zeka ve 2025\n"
-        "[ÖZET] 2025 yılında yapay zeka alanında beklenen gelişmeler\n"
-        "[İÇERİK]"
+        "[TITLE] Artificial Intelligence and 2025\n"
+        "[SUMMARY] Expected developments in artificial intelligence in 2025\n"
+        "[CONTENT]"
     )
     output = generator(prompt, max_length=100, num_return_sequences=1, truncation=True)
-    print("\n--- Model Çıktısı ---")
-    # Sadece [İÇERİK] kısmından sonrasını al
+    print("\n--- Model Output ---")
+    # Extract only the portion after [CONTENT]
     generated = output[0]['generated_text']
-    if "[İÇERİK]" in generated:
-        generated = generated.split("[İÇERİK]")[1]
+    if "[CONTENT]" in generated:
+        generated = generated.split("[CONTENT]")[1]
     print(generated.strip())
 
 if __name__ == "__main__":
diff --git a/Genel-4/llada.py b/Genel-4/llada.py
index 5162c4f..d20141e 100644
--- a/Genel-4/llada.py
+++ b/Genel-4/llada.py
@@ -8,20 +8,20 @@
 from tqdm import tqdm
 from collections import Counter
 
-# HuggingFace veri setini yükle
+# Load the Hugging Face dataset
 dataset = load_dataset('salihturkoglu/se_data_set', split='train')
 instructions = [ex['instruction'] for ex in dataset]
 responses = [ex['response'] for ex in dataset]
 
-# Gelişmiş Türkçe tokenizer
+# Enhanced Turkish tokenizer
 def turkish_tokenize(text):
-    # Noktalama, sayılar, Türkçe karakterler ve kelime kökleri için daha iyi ayrıştırma
+    # Improved splitting for punctuation, numbers, Turkish characters, and word roots
     text = re.sub(r"([.,!?;:()\"'])", r" \1 ", text)
     text = re.sub(r"([0-9]+)", r" \1 ", text)
     text = re.sub(r"\s+", " ", text)
     return text.lower().strip().split()
 
-# Vocab oluştur (daha büyük ve çeşitli)
+# Build a broader, more diverse vocabulary
 PAD_TOKEN = "<PAD>"
 UNK_TOKEN = "<UNK>"
 all_texts = instructions + responses
@@ -36,7 +36,7 @@ def encode(text):
     return [vocab.get(tok, vocab[UNK_TOKEN]) for tok in turkish_tokenize(text)]
 
 def decode(token_ids):
-    # <UNK> oranını azaltmak için tekrarları ve padleri temizle
+    # Remove repeats and PAD tokens to reduce <UNK> usage
     words = []
     for idx in token_ids:
         if idx == vocab[PAD_TOKEN]:
@@ -47,7 +47,7 @@ def decode(token_ids):
     return " ".join(words)
 
 def build_prompt(instruction, response=None):
-    # Prompt formatı
+    # Prompt format helper
     if response is not None:
         return f"Instruction: {instruction} Response: {response}"
     else:
@@ -67,13 +67,13 @@ def __init__(self, instructions, responses, vocab, max_len=128, prompt_len=64):
             resp_ids = encode(resp)[:(max_len - prompt_len)]
             resp_ids += [vocab[PAD_TOKEN]] * ((max_len - prompt_len) - len(resp_ids))
             self.inputs.append(torch.tensor(prompt_ids, dtype=torch.long))
-            self.targets.append(torch.tensor(resp_ids, dtype=torch.long))  # Sadece response target!
+            self.targets.append(torch.tensor(resp_ids, dtype=torch.long))  # Only the response is the target
 
     def __len__(self):
         return len(self.inputs)
 
     def __getitem__(self, idx):
-        # input: prompt, target: response
+        # Returns input prompt tensor and response target tensor
         inp = self.inputs[idx]
         tgt = self.targets[idx]
         return inp, tgt
@@ -91,7 +91,7 @@ def add_noise(batch, noise_level=0.5):
     noisy[mask] = random_tokens[mask]
     return noisy
 
-# Cosine noise schedule (daha iyi diffusion için)
+# Cosine noise schedule (improves diffusion)
 def cosine_noise_schedule(step, total_steps):
     import math
     return math.cos((step / total_steps) * math.pi / 2)
@@ -115,10 +115,14 @@ def forward(self, prompt, x, timestep, prompt_emb, src_key_padding_mask=None):
         t_emb = self.timestep_embed(timestep).unsqueeze(1)
         prompt_cond = self.prompt_proj(prompt_emb).unsqueeze(1)
         emb = torch.cat([prompt_embs, x_embs], dim=1) + t_emb + prompt_cond
-        # src_key_padding_mask shape düzeltme
+        # Adjust src_key_padding_mask shape
         if src_key_padding_mask is not None:
             # src_key_padding_mask: (batch, response_len) -> (batch, prompt_len + response_len)
-            pad = torch.zeros((src_key_padding_mask.shape[0], prompt_embs.shape[1]), dtype=torch.bool, device=src_key_padding_mask.device)
+            pad = torch.zeros(
+                (src_key_padding_mask.shape[0], prompt_embs.shape[1]),
+                dtype=torch.bool,
+                device=src_key_padding_mask.device,
+            )
             src_key_padding_mask = torch.cat([pad, src_key_padding_mask], dim=1)
         out = self.transformer(emb, src_key_padding_mask=src_key_padding_mask)
         out = self.fc(out)
@@ -157,7 +161,7 @@ def train_diffusion_model(model, dataloader, epochs=10, steps=16):
             mask = (batch_targets == vocab[PAD_TOKEN])
             optimizer.zero_grad()
             outputs = model(batch_prompts, noisy_targets, timestep, prompt_emb, src_key_padding_mask=mask)
-            # .view yerine .reshape kullan
+            # Prefer .reshape to avoid issues with non-contiguous tensors
             loss = criterion(outputs.reshape(-1, outputs.size(-1)), batch_targets.reshape(-1))
             loss.backward()
             optimizer.step()
@@ -174,7 +178,7 @@ def generate_response(model, instruction, steps=16, max_len=256, prompt_len=64):
     prompt_ids += [vocab[PAD_TOKEN]] * (prompt_len - len(prompt_ids))
     prompt_tensor = torch.tensor([prompt_ids], dtype=torch.long, device=device)
     prompt_emb = get_prompt_embedding([prompt], vocab, model, prompt_len=prompt_len)
-    # Response kısmı random başlatılır
+    # Initialise the response portion randomly
     response_len = max_len - prompt_len
     response_part = torch.randint(2, len(vocab), (1, response_len), device=device)
     generated = response_part.clone()
@@ -193,12 +197,12 @@ def generate_response(model, instruction, steps=16, max_len=256, prompt_len=64):
 
 test_instruction = instructions[0]
 print('Instruction:', test_instruction)
-print('Gerçek Response:', responses[0])
+print('Ground Truth Response:', responses[0])
 print('Model Response:', generate_response(model, test_instruction, steps=16, max_len=max_len, prompt_len=prompt_len))
 
-test_instruction = "Çift anadal veya yandal yapmak istiyorum. Hangi bölümlerle yapabilirim?"
+test_instruction = "I want to pursue a double major or a minor. Which departments are available?"
 print('Instruction:', test_instruction)
-print('Gerçek Response:', responses[instructions.index(test_instruction)] if test_instruction in instructions else "Yok")
+print('Ground Truth Response:', responses[instructions.index(test_instruction)] if test_instruction in instructions else "None")
 print('Model Response:', generate_response(model, test_instruction, steps=16, max_len=max_len, prompt_len=prompt_len))
 
 def evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=256, prompt_len=64):
@@ -228,6 +232,6 @@ def evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=25
         correct += ((generated == tgt) & mask).sum().item()
         loop.set_postfix(acc=(correct/total if total > 0 else 0.0))
     accuracy = correct / total if total > 0 else 0.0
-    print(f"Test doğruluğu: {accuracy:.2%} ({correct}/{total})")
+    print(f"Test accuracy: {accuracy:.2%} ({correct}/{total})")
 
 evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=max_len, prompt_len=prompt_len)
\ No newline at end of file
diff --git a/Genel-4/mmlu-llm-stratch.py b/Genel-4/mmlu-llm-stratch.py
index efb194f..9969739 100644
--- a/Genel-4/mmlu-llm-stratch.py
+++ b/Genel-4/mmlu-llm-stratch.py
@@ -12,11 +12,11 @@
 from typing import List, Dict, Tuple, Optional, Union
 
 # =============================================================================
-# 1. TOKENIZER - Metni sayısal verilere dönüştürür
+# 1. TOKENIZER - Convert text into numerical features
 # =============================================================================
 
 class SimpleTokenizer:
-    """Geliştirilmiş tokenizer"""
+    """Improved tokenizer implementation"""
     
     def __init__(self):
         self.char_to_id = {}
@@ -26,88 +26,88 @@ def __init__(self):
         self.unk_token = '<UNK>'
         self.bos_token = '<BOS>'
         self.eos_token = '<EOS>'
-        self.pad_token_id = 0  # PAD token ID'sini 0 olarak ayarla
-        self.unk_token_id = 1  # UNK token ID'si
-        self.bos_token_id = 2  # BOS token ID'si
-        self.eos_token_id = 3  # EOS token ID'si
-        
+        self.pad_token_id = 0  # Assign 0 to the PAD token ID
+        self.unk_token_id = 1  # ID value reserved for UNK
+        self.bos_token_id = 2  # ID for the BOS token
+        self.eos_token_id = 3  # ID for the EOS token
+
     def fit(self, texts: List[str]):
-        """Metinlerden vocab oluştur"""
-        # Tüm karakterleri topla ve frekanslarını hesapla
+        """Build the vocabulary from raw text"""
+        # Collect every character and compute frequencies
         char_freq = {}
         for text in texts:
             for char in text:
-                if char not in ['\n', ' ']:  # Boşluk ve yeni satırı özel karakterlerden ayır
+                if char not in ['\n', ' ']:  # Separate spaces and newlines from the other tokens
                     char_freq[char] = char_freq.get(char, 0) + 1
-        
-        # Özel tokenlar ve sık kullanılan karakterler
+
+        # Special tokens and the most common characters
         special_tokens = [self.pad_token, self.unk_token, self.bos_token, self.eos_token, '\n', ' ']
-        
-        # En sık kullanılan 200 karakteri al (özel tokenlar hariç)
+
+        # Select the 200 most frequent characters (excluding the special tokens)
         common_chars = [char for char, _ in sorted(char_freq.items(), key=lambda x: -x[1])[:200]]
-        
-        # Özel token'ları ve yaygın karakterleri birleştir
+
+        # Combine the special tokens and the frequent characters
         all_chars = special_tokens + common_chars
-        
-        # Benzersiz karakterlerin listesini oluştur
+
+        # Build a list of unique characters
         unique_chars = []
         for char in all_chars:
             if char not in unique_chars:
                 unique_chars.append(char)
-        
-        # Sözlükleri oluştur
+
+        # Create lookup tables
         self.char_to_id = {char: i for i, char in enumerate(unique_chars)}
         self.id_to_char = {i: char for i, char in enumerate(unique_chars)}
         self.vocab_size = len(unique_chars)
-        
-        # Özel token ID'lerini güncelle
+
+        # Update the special token IDs
         self.pad_token_id = self.char_to_id[self.pad_token]
         self.unk_token_id = self.char_to_id[self.unk_token]
         self.bos_token_id = self.char_to_id[self.bos_token]
         self.eos_token_id = self.char_to_id[self.eos_token]
-        
-        # Vocab'ı oluştur (özel tokenlar + en sık kullanılan karakterler)
+
+        # Build the final vocabulary list (special tokens + frequent characters)
         vocab = special_tokens + common_chars
-        
-        # ID mapping oluştur
+
+        # Re-create the mappings so indices align with the final list
         self.char_to_id = {char: i for i, char in enumerate(vocab)}
         self.id_to_char = {i: char for i, char in enumerate(vocab)}
         self.vocab_size = len(vocab)
-        
-        # Özel token ID'lerini sakla
+
+        # Persist the special token IDs
         self.pad_token_id = self.char_to_id.get('<PAD>', 0)
         self.unk_token_id = self.char_to_id.get('<UNK>', 1)
         self.bos_token_id = self.char_to_id.get('<BOS>', 2)
         self.eos_token_id = self.char_to_id.get('<EOS>', 3)
-        
-        print(f"Vocab boyutu: {self.vocab_size}")
-        print(f"İlk 20 token: {vocab[:20]}")
-        
+
+        print(f"Vocabulary size: {self.vocab_size}")
+        print(f"First 20 tokens: {vocab[:20]}")
+
     def encode(self, text: str, max_length: int = 512, add_bos: bool = True, add_eos: bool = True) -> List[int]:
-        """Metni token ID'lerine çevir"""
-        # Özel tokenları ekle
+        """Convert raw text into token IDs"""
+        # Add the optional special tokens
         tokens = []
         if add_bos:
             tokens.append(self.bos_token_id)
-            
-        # Metni tokenlara çevir
+
+        # Map characters to token IDs
         for char in text:
             tokens.append(self.char_to_id.get(char, self.unk_token_id))
-            
+
         if add_eos:
             tokens.append(self.eos_token_id)
-            
-        # Uzunluğu max_length'e göre ayarla
+
+        # Constrain the sequence length
         if len(tokens) > max_length:
-            tokens = tokens[:max_length-1] + [tokens[-1]]  # Son token'ı koru
+            tokens = tokens[:max_length-1] + [tokens[-1]]  # Preserve the last token
         elif len(tokens) < max_length:
-            # Padding ekle
+            # Apply padding
             tokens = tokens + [self.pad_token_id] * (max_length - len(tokens))
-            
+
         return tokens
-    
+
     def decode(self, token_ids: List[int]) -> str:
-        """Token ID'leri metne dönüştür"""
+        """Convert token IDs back into readable text"""
         chars = []
         for token_id in token_ids:
             if token_id == self.char_to_id['<EOS>']:
@@ -117,11 +117,11 @@ def decode(self, token_ids: List[int]) -> str:
         return ''.join(chars)
 
 # =============================================================================
-# 2. TRANSFORMER COMPONENTS - Attention ve FFN katmanları
+# 2. TRANSFORMER COMPONENTS - Attention and FFN layers
 # =============================================================================
 
 class MultiHeadAttention(nn.Module):
-    """Multi-Head Self-Attention katmanı"""
+    """Multi-head self-attention layer"""
     
     def __init__(self, d_model: int, n_heads: int, dropout: float = 0.1):
         super().__init__()
@@ -142,25 +142,25 @@ def __init__(self, d_model: int, n_heads: int, dropout: float = 0.1):
     def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
         batch_size, seq_len, d_model = x.shape
         
-        # Q, K, V hesapla
+        # Compute Q, K, V projections
         Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         K = self.w_k(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         V = self.w_v(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         
-        # Attention hesapla
+        # Calculate attention weights
         scores = torch.matmul(Q, K.transpose(-2, -1)) / self.scale
         
-        # Causal mask uygula (gelecekteki tokenlara bakmasın)
+        # Apply a causal mask so the model cannot peek ahead
         if mask is not None:
             scores = scores.masked_fill(mask == 0, -1e9)
         
         attention_weights = F.softmax(scores, dim=-1)
         attention_weights = self.dropout(attention_weights)
         
-        # Attention uygula
+        # Apply the attention weights
         context = torch.matmul(attention_weights, V)
         
-        # Reshape ve output projection
+        # Reshape back and project to the model dimension
         context = context.transpose(1, 2).contiguous().view(batch_size, seq_len, d_model)
         output = self.w_o(context)
         
@@ -179,7 +179,7 @@ def forward(self, x):
         return self.linear2(self.dropout(F.relu(self.linear1(x))))
 
 class TransformerBlock(nn.Module):
-    """Transformer decoder bloğu"""
+    """Transformer decoder block"""
     
     def __init__(self, d_model: int, n_heads: int, d_ff: int, dropout: float = 0.1):
         super().__init__()
@@ -201,11 +201,11 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch
         return x
 
 # =============================================================================
-# 3. LLM MODEL - Ana transformer modeli
+# 3. LLM MODEL - Core transformer architecture
 # =============================================================================
 
 class SimpleLLM(nn.Module):
-    """Basit Large Language Model"""
+    """Lightweight large language model"""
     
     def __init__(self, vocab_size: int, d_model: int = 512, n_heads: int = 8, 
                  n_layers: int = 6, d_ff: int = 2048, max_seq_len: int = 512, 
@@ -214,23 +214,23 @@ def __init__(self, vocab_size: int, d_model: int = 512, n_heads: int = 8,
         self.d_model = d_model
         self.max_seq_len = max_seq_len
         
-        # Embedding katmanları
+        # Embedding layers
         self.token_embedding = nn.Embedding(vocab_size, d_model)
         self.position_embedding = nn.Embedding(max_seq_len, d_model)
         
-        # Transformer katmanları
+        # Transformer layers
         self.transformer_blocks = nn.ModuleList([
             TransformerBlock(d_model, n_heads, d_ff, dropout) 
             for _ in range(n_layers)
         ])
         
-        # Output katmanı
+        # Output layer
         self.ln_final = nn.LayerNorm(d_model)
         self.lm_head = nn.Linear(d_model, vocab_size)
         
         self.dropout = nn.Dropout(dropout)
         
-        # Parametreleri initialize et
+        # Initialise parameters
         self.apply(self._init_weights)
         
     def _init_weights(self, module):
@@ -242,28 +242,28 @@ def _init_weights(self, module):
             torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
             
     def create_causal_mask(self, seq_len: int) -> torch.Tensor:
-        """Causal mask oluştur (gelecekteki tokenlara bakmasın)"""
+        """Create a causal mask so the model cannot look ahead"""
         mask = torch.tril(torch.ones(seq_len, seq_len))
         return mask.unsqueeze(0).unsqueeze(0)  # [1, 1, seq_len, seq_len]
     
     def forward(self, input_ids: torch.Tensor) -> torch.Tensor:
         batch_size, seq_len = input_ids.shape
         
-        # Position IDs oluştur
+        # Build position IDs
         position_ids = torch.arange(0, seq_len, device=input_ids.device).unsqueeze(0).expand(batch_size, -1)
         
         # Embeddings
         
-        # Attention mask'i oluştur
-        # Shape: (batch_size, 1, 1, seq_len) olmalı
+        # Build the attention mask
+        # Shape should be (batch_size, 1, 1, seq_len)
         if mask.dim() == 2:
             mask = mask.unsqueeze(1).unsqueeze(2)  # (batch_size, 1, 1, seq_len)
         
-        # Mask değerlerini float'a çevir ve çok küçük bir sayı yap
-        # 1 olan yerler gözükür, 0 olanlar maskelenir
+        # Convert mask values to float and suppress masked positions
+        # Ones stay visible while zeros are masked out
         mask = (1.0 - mask.float()) * -1e9
         
-        # Transformer katmanları
+        # Transformer layers
         for transformer_block in self.transformer_blocks:
             x = transformer_block(x, mask)
             
@@ -274,115 +274,115 @@ def forward(self, input_ids: torch.Tensor) -> torch.Tensor:
     
     def generate(self, input_ids, max_length=100, temperature=1.0, top_k=50, top_p=0.9, pad_token_id=None):
         """
-        Metin üretme metodu
-        
+        Text generation helper.
+
         Args:
-            input_ids: Giriş token ID'leri (batch_size, seq_len)
-            max_length: Maksimum üretilecek token sayısı
-            temperature: Düşük değerler daha tahmin edilebilir çıktılar üretir
-            top_k: Top-k sampling için k değeri
-            top_p: Nucleus sampling için p değeri
-            pad_token_id: Padding token ID'si
-            
+            input_ids: Input token IDs (batch_size, seq_len)
+            max_length: Maximum number of tokens to extend the sequence
+            temperature: Lower values produce more predictable outputs
+            top_k: Top-k sampling threshold
+            top_p: Nucleus sampling probability threshold
+            pad_token_id: Padding token ID
+
         Returns:
-            Üretilen token ID'leri (batch_size, seq_len + max_length)
+            Generated token IDs with the continuation appended (batch_size, seq_len + max_length)
         """
         device = next(self.parameters()).device
         batch_size = input_ids.size(0)
         
-        # Girişi cihaza taşı
+        # Move the inputs to the target device
         input_ids = input_ids.to(device)
         
-        # Çıktıyı girişle başlat
+        # Start the output with the provided prompt
         generated = input_ids
         
-        # Eğitim modunu kapat
+        # Disable training mode
         self.eval()
         
         with torch.no_grad():
             for _ in range(max_length):
-                # Mevcut çıktı için maske oluştur
+                # Build a mask for the current sequence
                 seq_len = generated.size(1)
                 attn_mask = (generated != pad_token_id).unsqueeze(1).unsqueeze(2)  # (batch_size, 1, 1, seq_len)
                 
-                # Modelden çıktı al
+                # Run the model
                 outputs = self(generated, mask=attn_mask)
                 
-                # Sadece son token için logitleri al
+                # Focus on the logits for the last token
                 next_token_logits = outputs[:, -1, :] / temperature
                 
-                # Top-k sampling uygula
+                # Apply top-k sampling
                 if top_k > 0:
-                    # En yüksek olasılıklı k token dışındakileri -inf yap
+                    # Remove tokens outside the top-k candidates
                     indices_to_remove = next_token_logits < torch.topk(next_token_logits, top_k)[0][..., -1, None]
                     next_token_logits[indices_to_remove] = -float('Inf')
                 
-                # Nucleus (top-p) sampling
+                # Apply nucleus (top-p) sampling
                 if top_p < 1.0:
                     sorted_logits, sorted_indices = torch.sort(next_token_logits, descending=True)
                     cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
                     
-                    # Kümülatif olasılığı p'den büyük olan en küçük indeksleri bul
+                    # Identify the smallest indices whose cumulative probability exceeds p
                     sorted_indices_to_remove = cumulative_probs > top_p
-                    # İlk indeksi koru (en yüksek olasılıklı token)
+                    # Keep the first index (highest probability token)
                     sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
                     sorted_indices_to_remove[..., 0] = 0
                     
-                    # Sıralanmış logitlerden kaldırılacak olanları -inf yap
+                    # Suppress logits that fall outside the nucleus
                     sorted_logits[sorted_indices_to_remove] = -float('Inf')
                     
-                    # Orijinal sıraya geri dön
+                    # Restore the original order
                     next_token_logits = torch.zeros_like(next_token_logits).scatter_(
                         dim=1, index=sorted_indices, src=sorted_logits
                     )
                 
-                # Sonraki tokenı seç
+                # Sample the next token
                 probs = F.softmax(next_token_logits, dim=-1)
                 next_tokens = torch.multinomial(probs, num_samples=1)
                 
-                # Eğer pad_token_id verildiyse ve tüm olasılıklar -inf ise pad_token_id kullan
+                # Fall back to the pad_token_id if every probability is -inf
                 if pad_token_id is not None and torch.all(torch.isinf(probs)):
                     next_tokens = torch.full_like(next_tokens, pad_token_id)
                 
-                # Üretilen token'ı çıktıya ekle
+                # Append the generated token to the output
                 generated = torch.cat((generated, next_tokens), dim=1)
                 
-                # Eğer tüm örnekler sonlandırıldıysa döngüden çık
+                # Exit early if every sequence has finished
                 if pad_token_id is not None and torch.all(next_tokens == pad_token_id):
                     break
         
         return generated
 
 # =============================================================================
-# 4. DATASET - Eğitim verisi hazırlama
+# 4. DATASET - Prepare training data
 # =============================================================================
 
 def build_vocab_from_csv(csv_path: str) -> List[str]:
-    """CSV dosyasından karakter bazında vocabulary oluştur"""
+    """Create a character-level vocabulary from the CSV file"""
     df = pd.read_csv(csv_path)
     all_text = ""
     
-    # Tüm metinleri birleştir
+    # Concatenate all text
     for _, row in df.iterrows():
         all_text += row['Question'] + " "
         all_text += row['A'] + " " + row['B'] + " " + row['C'] + " " + row['D'] + " "
     
-    # Benzersiz karakterleri al ve sırala
+    # Collect and sort unique characters
     unique_chars = sorted(list(set(all_text)))
     return unique_chars
 
 class MMLUDataset(Dataset):
-    """MMLU dataset sınıfı"""
+    """MMLU dataset wrapper"""
     
     def __init__(self, csv_path: str, tokenizer: SimpleTokenizer, max_length: int = 512):
         self.tokenizer = tokenizer
         self.max_length = max_length
         self.data = []
         
-        # CSV'yi yükle
+        # Load the CSV file
         df = pd.read_csv(csv_path)
         
-        # Tüm metinleri topla
+        # Gather every prompt text
         all_texts = []
         for _, row in df.iterrows():
             question = row['Question']
@@ -390,27 +390,27 @@ def __init__(self, csv_path: str, tokenizer: SimpleTokenizer, max_length: int =
             prompt = f"Question: {question}\nA) {options[0]}\nB) {options[1]}\nC) {options[2]}\nD) {options[3]}\nAnswer:"
             all_texts.append(prompt)
         
-        # Tokenizer'ı eğit
+        # Train the tokenizer
         self.tokenizer.fit(all_texts)
         
-        # Veri setini oluştur
+        # Build the dataset entries
         for i, row in df.iterrows():
             question = row['Question']
             options = [str(row['A']), str(row['B']), str(row['C']), str(row['D'])]
             answer = row['Answer']
             
-            # Prompt formatı: "Question: [soru]\nA) [A]\nB) [B]\nC) [C]\nD) [D]\nAnswer:"
+            # Prompt format: "Question: [question]\nA) [A]\nB) [B]\nC) [C]\nD) [D]\nAnswer:"
             prompt = f"Question: {question}\n"
             for i, opt in enumerate(['A', 'B', 'C', 'D']):
                 prompt += f"{opt}) {options[i]}\n"
             prompt += "Answer:"
             
-            # Cevabı token olarak kodla (A->0, B->1, C->2, D->3)
+            # Encode the answer choice as a token (A->0, B->1, C->2, D->3)
             target = ord(str(answer).strip().upper()[0]) - ord('A')
             
-            # Tokenize et ve kaydet
+            # Tokenize and store
             tokens = self.tokenizer.encode(prompt, max_length)
-            if len(tokens) > 0 and 0 <= target <= 3:  # Sadece geçerli hedefleri kabul et
+            if len(tokens) > 0 and 0 <= target <= 3:  # Only accept valid targets
                 self.data.append((tokens, target))
     
     def __len__(self):
@@ -423,11 +423,11 @@ def __getitem__(self, idx):
         return x, y
 
 # =============================================================================
-# 5. TRAINING - Model eğitimi
+# 5. TRAINING - Model training
 # =============================================================================
 
 class LLMTrainer:
-    """LLM eğitim sınıfı"""
+    """LLM training helper"""
     
     def __init__(self, model: SimpleLLM, tokenizer: SimpleTokenizer, device: str = 'cpu',
                  learning_rate: float = 3e-4, weight_decay: float = 0.01,
@@ -452,11 +452,11 @@ def __init__(self, model: SimpleLLM, tokenizer: SimpleTokenizer, device: str = '
             eta_min=learning_rate * 0.1  # Minimum learning rate
         )
         
-        # Warmup için
+        # Warmup configuration
         self.warmup_steps = warmup_steps
         self.current_step = 0
         
-        # Loss fonksiyonu
+        # Loss functions
         self.criterion = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
         self.kl_loss = nn.KLDivLoss(reduction='batchmean')
         
@@ -464,7 +464,7 @@ def __init__(self, model: SimpleLLM, tokenizer: SimpleTokenizer, device: str = '
         self.max_grad_norm = 1.0
         
     def train_epoch(self, dataloader: DataLoader) -> float:
-        """Bir epoch eğitim"""
+        """Train for a single epoch"""
         self.model.train()
         total_loss = 0
         correct = 0
@@ -476,28 +476,28 @@ def train_epoch(self, dataloader: DataLoader) -> float:
             # Forward pass
             outputs = self.model(inputs)
             
-            # Sadece son token'ın çıktısını al (cevap pozisyonu)
+            # Consider only the final token (answer position)
             last_token_logits = outputs[:, -1, :]  # [batch_size, vocab_size]
             
-            # Loss hesapla
+            # Compute the loss
             loss = self.criterion(last_token_logits, targets)
             
-            # Doğru tahminleri say
+            # Count correct predictions
             _, predicted = torch.max(last_token_logits, 1)
             correct += (predicted == targets).sum().item()
             total += targets.size(0)
             
-            # Backward pass ve optimize
+            # Backpropagation and optimisation
             self.optimizer.zero_grad()
             loss.backward()
             
             # Gradient clipping
             torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.max_grad_norm)
             
-            # Optimizer adımı
+            # Optimiser step
             self.optimizer.step()
             
-            # Learning rate warmup ve schedule
+            # Learning-rate warmup and scheduling
             self.current_step += 1
             if self.current_step < self.warmup_steps:
                 # Linear warmup
@@ -509,7 +509,7 @@ def train_epoch(self, dataloader: DataLoader) -> float:
             
             total_loss += loss.item()
             
-            # Her 10 batch'te bir log göster
+            # Log progress every 10 batches
             if (batch_idx + 1) % 10 == 0:
                 batch_acc = (predicted == targets).float().mean().item() * 100
                 print(f"  Batch {batch_idx+1}/{len(dataloader)} - Loss: {loss.item():.4f}, Acc: {batch_acc:.2f}%")
@@ -518,10 +518,10 @@ def train_epoch(self, dataloader: DataLoader) -> float:
         return total_loss / len(dataloader), accuracy
     
     def generate(self, prompt: str, max_length: int = 100, temperature: float = 0.8) -> str:
-        """Metin üretimi"""
+        """Generate text"""
         self.model.eval()
         
-        # Prompt'u tokenize et
+        # Tokenise the prompt
         tokens = self.tokenizer.encode(prompt)
         input_ids = torch.tensor([tokens], dtype=torch.long).to(self.device)
         
@@ -537,75 +537,75 @@ def generate(self, prompt: str, max_length: int = 100, temperature: float = 0.8)
                 probabilities = F.softmax(next_token_logits, dim=-1)
                 next_token = torch.multinomial(probabilities, 1).item()
                 
-                # EOS token kontrolü
+                # Stop if the EOS token is produced
                 if next_token == self.tokenizer.char_to_id['<EOS>']:
                     break
                 
-                # Yeni token ekle
+                # Append the new token
                 generated_tokens.append(next_token)
                 next_token_tensor = torch.tensor([[next_token]], dtype=torch.long).to(self.device)
                 input_ids = torch.cat([input_ids, next_token_tensor], dim=1)
                 
-                # Maksimum sequence length kontrolü
+                # Enforce the maximum sequence length
                 if input_ids.size(1) >= self.model.max_seq_len:
-                    input_ids = input_ids[:, 1:]  # İlk tokenı çıkar
+                    input_ids = input_ids[:, 1:]  # Drop the first token to stay within limits
         
         return self.tokenizer.decode(generated_tokens)
 
 # =============================================================================
-# 6. MAIN - Ana çalıştırma kodu
+# 6. MAIN - Entry point
 # =============================================================================
 
 def main():
-    # Parametreler
+    # Hyper-parameters
     batch_size = 16
-    max_length = 512  # Daha uzun sequence'ler için
-    d_model = 512  # Daha büyük model boyutu
-    n_heads = 8  # Daha fazla head
-    n_layers = 6  # Daha fazla layer
-    d_ff = 2048  # Daha büyük feed forward
+    max_length = 512  # Allow longer sequences
+    d_model = 512  # Larger model width
+    n_heads = 8  # Increased number of heads
+    n_layers = 6  # Deeper stack
+    d_ff = 2048  # Wider feed-forward network
     dropout = 0.1  
-    num_epochs = 1 # 5 epoch için ayarlandı  # Daha fazla epoch
+    num_epochs = 1  # Set to 1 for quick experimentation
     max_examples = 1000
     learning_rate = 3e-4
     weight_decay = 0.01
-    warmup_steps = 1000  # Learning rate warmup için adım sayısı
+    warmup_steps = 1000  # Warmup steps for the learning rate
     
-    # Cihaz
+    # Device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     print(f"Using device: {device}")
     
-    # CSV dosya yolu
+    # CSV file path
     csv_path = r"c:\Users\emreq\Downloads\archive\mmlu.csv"
     
-    # Tokenizer'ı oluştur
+    # Build the tokenizer
     tokenizer = SimpleTokenizer()
     
-    # MMLU veri setini yükle
-    print("Veri seti yükleniyor...")
+    # Load the MMLU dataset
+    print("Loading dataset...")
     dataset = MMLUDataset(csv_path, tokenizer, max_length)
     
-    # Eğer veri kümesi boşsa hata ver
+    # Guard against an empty dataset
     if len(dataset) == 0:
-        raise ValueError("Veri kümesi boş. CSV dosyasını ve veri yapısını kontrol edin.")
+        raise ValueError("Dataset is empty. Please check the CSV file and structure.")
     
-    # Veri setini 1000 örnekle sınırla
+    # Limit the dataset to 1000 examples
     if len(dataset) > max_examples:
-        print(f"Veri seti {len(dataset)} örnekten {max_examples} örneğe indiriliyor...")
+        print(f"Truncating dataset from {len(dataset)} to {max_examples} examples...")
         indices = torch.randperm(len(dataset))[:max_examples]
         dataset = torch.utils.data.Subset(dataset, indices)
     
-    # Eğitim ve test setlerine ayır (%80 eğitim, %20 test)
+    # Split into train and test sets (80/20)
     train_size = int(0.8 * len(dataset))
     test_size = len(dataset) - train_size
     train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
     
-    # DataLoader'ları oluştur
+    # Build the data loaders
     train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
     test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
     
-    # Modeli oluştur
-    print("Model oluşturuluyor...")
+    # Instantiate the model
+    print("Building model...")
     model = SimpleLLM(
         vocab_size=tokenizer.vocab_size,
         d_model=d_model,
@@ -616,10 +616,10 @@ def main():
         dropout=dropout
     ).to(device)
     
-    # Learning rate scheduler için toplam adım sayısı
+    # Total number of steps for the learning-rate scheduler
     total_steps = len(train_dataloader) * num_epochs
     
-    # Trainer oluştur
+    # Instantiate the trainer
     trainer = LLMTrainer(
         model=model,
         tokenizer=tokenizer,
@@ -630,15 +630,15 @@ def main():
         total_steps=total_steps
     )
     
-    # Eğitim döngüsü
-    print(f"Eğitim başlıyor... Toplam {len(train_dataset)} eğitim, {len(test_dataset)} test örneği")
+    # Training loop
+    print(f"Training starting... {len(train_dataset)} train samples, {len(test_dataset)} test samples")
     
     best_test_acc = 0
     for epoch in range(num_epochs):
-        # Eğitim
+        # Training
         train_loss, train_acc = trainer.train_epoch(train_dataloader)
         
-        # Test
+        # Evaluation
         model.eval()
         test_loss = 0
         correct = 0
@@ -650,11 +650,11 @@ def main():
                 outputs = model(inputs)
                 last_token_logits = outputs[:, -1, :]
                 
-                # Loss hesapla
+                # Compute the loss
                 loss = F.cross_entropy(last_token_logits, targets)
                 test_loss += loss.item()
                 
-                # Doğruluk hesapla
+                # Compute accuracy
                 _, predicted = torch.max(last_token_logits, 1)
                 correct += (predicted == targets).sum().item()
                 total += targets.size(0)
@@ -662,50 +662,50 @@ def main():
         test_loss = test_loss / len(test_dataloader)
         test_acc = 100 * correct / total if total > 0 else 0
         
-        # En iyi modeli kaydet
+        # Save the best model
         if test_acc > best_test_acc:
             best_test_acc = test_acc
-            # Sadece gerekli bilgileri kaydet
+            # Persist only the required information
             torch.save({
                 'epoch': epoch,
                 'model_state_dict': model.state_dict(),
                 'optimizer_state_dict': trainer.optimizer.state_dict(),
                 'loss': train_loss,
                 'accuracy': test_acc,
-                'tokenizer_chars': tokenizer.char_to_id,  # Sadece karakter-ID eşlemesini kaydet
+                'tokenizer_chars': tokenizer.char_to_id,  # Persist only the character-to-id mapping
                 'vocab_size': tokenizer.vocab_size
             }, 'best_model.pt')
-            print(f"Yeni en iyi model kaydedildi! Test Doğruluğu: {test_acc:.2f}%")
+            print(f"New best model saved! Test accuracy: {test_acc:.2f}%")
         
         print(f"Epoch {epoch+1}/{num_epochs}:")
     
-    # Test etme
+    # Evaluation etme
     print("\n=== Final Test ===\n")
     
-    # En iyi modeli yükle
+    # Load the best checkpoint if it exists
     if os.path.exists('best_model.pt'):
         checkpoint = torch.load('best_model.pt')
         model.load_state_dict(checkpoint['model_state_dict'])
-        print(f"\nEn iyi model yüklendi (Doğruluk: {checkpoint['accuracy']:.2f}%)\n")
+        print(f"\nLoaded best model (Accuracy: {checkpoint['accuracy']:.2f}%)\n")
     
     model.eval()
     
-    # Örnek test soruları ve doğru cevapları
+    # Example test questions with expected answers
     test_questions = [
         {
-            "question": "Soru: İstanbul'un fethi hangi yılda olmuştur?\nA) 1451\nB) 1453\nC) 1455\nD) 1457\nCevap:",
+            "question": "Question: In which year was the conquest of Istanbul?\nA) 1451\nB) 1453\nC) 1455\nD) 1457\nAnswer:",
             "correct": "B"
         },
         {
-            "question": "Soru: Python programlama dili kim tarafından geliştirilmiştir?\nA) Guido van Rossum\nB) James Gosling\nC) Bjarne Stroustrup\nD) Dennis Ritchie\nCevap:",
+            "question": "Question: Who created the Python programming language?\nA) Guido van Rossum\nB) James Gosling\nC) Bjarne Stroustrup\nD) Dennis Ritchie\nAnswer:",
             "correct": "A"
         },
         {
-            "question": "Soru: Dünya'nın en büyük okyanusu hangisidir?\nA) Atlas Okyanusu\nB) Hint Okyanusu\nC) Arktik Okyanusu\nD) Büyük Okyanus\nCevap:",
+            "question": "Question: Which is the largest ocean in the world?\nA) Atlantic Ocean\nB) Indian Ocean\nC) Arctic Ocean\nD) Pacific Ocean\nAnswer:",
             "correct": "D"
         },
         {
-            "question": "Soru: Aşağıdakilerden hangisi bir yapay zeka kütüphanesidir?\nA) React\nB) TensorFlow\nC) Django\nD) Flask\nCevap:",
+            "question": "Question: Which of the following is an artificial intelligence library?\nA) React\nB) TensorFlow\nC) Django\nD) Flask\nAnswer:",
             "correct": "B"
         }
     ]
@@ -717,33 +717,33 @@ def main():
         correct = item["correct"]
         
         print(f"\n--- Test {i} ---")
-        print("Soru:")
+        print("Question:")
         print(question)
         
         # Modelden cevap al
         with torch.no_grad():
-            # Sadece soru kısmını tokenize et
+            # Tokenise only the question text
             question_tokens = tokenizer.encode(question, max_length=512, add_bos=True, add_eos=False)
             input_tensor = torch.tensor([question_tokens], device=device)
             
-            # Cevap oluştur
+            # Generate an answer
             output = model.generate(
                 input_tensor,
-                max_length=len(question_tokens) + 5,  # Cevap için 5 token yeterli
+                max_length=len(question_tokens) + 5,  # Five extra tokens are usually enough for the answer
                 temperature=0.7,
                 top_k=50,
                 top_p=0.9,
                 pad_token_id=tokenizer.pad_token_id
             )
             
-            # Tüm çıktıyı al
+            # Decode the full output
             full_output = tokenizer.decode(output[0].tolist())
             
-            # Sadece son 5 tokeni al (cevap genellikle sonlarda olur)
+            # Inspect the final 5 tokens (the answer is usually near the end)
             last_tokens = output[0][-5:].tolist()
             last_chars = tokenizer.decode(last_tokens)
             
-            # Cevap olarak A, B, C veya D harfini ara
+            # Search for an answer choice (A, B, C, or D)
             answer = None
             for c in last_chars.upper():
                 if c in ['A', 'B', 'C', 'D']:
@@ -751,26 +751,26 @@ def main():
                     break
             
             if answer is None:
-                answer = "(Cevap bulunamadı)"
+                answer = "(Answer not found)"
             
-            # Doğru cevabı kontrol et
+            # Compare against the expected answer
             is_correct = (answer == correct)
             if is_correct:
                 correct_answers += 1
             
-            print("\nModelin Cevabı:", answer)
-            print("Doğru Cevap:", correct)
-            print("Sonuç:", "✅ Doğru" if is_correct else "❌ Yanlış")
-            print("\nTam Çıktı:", full_output)
+            print("\nModel Answer:", answer)
+            print("Correct Answer:", correct)
+            print("Result:", "✅ Correct" if is_correct else "❌ Incorrect")
+            print("\nFull Output:", full_output)
         
         print("\n" + "="*80)
     
-    # Genel başarı oranını göster
+    # Report the overall success rate
     accuracy = (correct_answers / len(test_questions)) * 100
-    print(f"\n=== Test Sonuçları ===")
-    print(f"Doğru Cevaplar: {correct_answers}/{len(test_questions)}")
-    print(f"Başarı Oranı: {accuracy:.1f}%")
-    print("\n=== Eğitim ve Test Tamamlandı! ===")
+    print(f"\n=== Test Results ===")
+    print(f"Correct Answers: {correct_answers}/{len(test_questions)}")
+    print(f"Accuracy: {accuracy:.1f}%")
+    print("\n=== Training and evaluation complete! ===")
     
     return model, tokenizer, trainer
 
diff --git a/Genel-5/DyT_vs_RMSNorm.ipynb b/Genel-5/DyT_vs_RMSNorm.ipynb
index 70dacf8..3b1432d 100644
--- a/Genel-5/DyT_vs_RMSNorm.ipynb
+++ b/Genel-5/DyT_vs_RMSNorm.ipynb
@@ -1,5936 +1,401 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4",
+   "authorship_tag": "ABX9TyMvKDhNQ1pgBRmULhDR4kMt",
+   "include_colab_link": true
+  },
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  },
+  "accelerator": "GPU"
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "view-in-github",
+    "colab_type": "text"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/DyT_vs_RMSNorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
     "colab": {
-      "provenance": [],
-      "gpuType": "T4",
-      "authorship_tag": "ABX9TyMvKDhNQ1pgBRmULhDR4kMt",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
+     "base_uri": "https://localhost:8080/"
     },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
+    "id": "ZZHpfH5HoXIb",
+    "outputId": "b77f76ef-941e-46aa-909e-093fd4eafa72"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import time\n",
+    "from torchvision import datasets, transforms\n",
+    "from torch.utils.data import DataLoader\n",
+    "from tqdm import tqdm  # Adding tqdm for the progress bar\n",
+    "\n",
+    "# 1. RMSNorm Class\n",
+    "class RMSNorm(nn.Module):\n",
+    "    def __init__(self, dim, eps=1e-6):\n",
+    "        super(RMSNorm, self).__init__()\n",
+    "        self.dim = dim\n",
+    "        self.eps = eps\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
+    "        x_norm = x / rms\n",
+    "        return self.gamma * x_norm + self.beta\n",
+    "\n",
+    "# 2. DyT Class\n",
+    "class DyT(nn.Module):\n",
+    "    def __init__(self, dim, init_alpha=0.5):\n",
+    "        super(DyT, self).__init__()\n",
+    "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = torch.tanh(self.alpha * x)\n",
+    "        return self.gamma * x + self.beta\n",
+    "\n",
+    "# 3. TransformerBlock Class\n",
+    "class TransformerBlock(nn.Module):\n",
+    "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
+    "        super(TransformerBlock, self).__init__()\n",
+    "        if norm_layer == 'RMSNorm':\n",
+    "            self.norm1 = RMSNorm(dim)\n",
+    "            self.norm2 = RMSNorm(dim)\n",
+    "        elif norm_layer == 'DyT':\n",
+    "            self.norm1 = DyT(dim, init_alpha)\n",
+    "            self.norm2 = DyT(dim, init_alpha)\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid norm_layer. Choose 'RMSNorm' or 'DyT'.\")\n",
+    "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(dim, dim * 4),\n",
+    "            nn.GELU(),\n",
+    "            nn.Linear(dim * 4, dim)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
+    "        x = x + attn_output\n",
+    "        ffn_output = self.ffn(self.norm2(x))\n",
+    "        x = x + ffn_output\n",
+    "        return x\n",
+    "\n",
+    "# 4. SimpleViT Class\n",
+    "class SimpleViT(nn.Module):\n",
+    "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
+    "        super(SimpleViT, self).__init__()\n",
+    "        assert img_size % patch_size == 0, \"Image size must be divisible by the patch size\"\n",
+    "        num_patches = (img_size // patch_size) ** 2\n",
+    "\n",
+    "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
+    "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
+    "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
+    "\n",
+    "        self.blocks = nn.ModuleList([\n",
+    "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
+    "        ])\n",
+    "\n",
+    "        self.head = nn.Linear(dim, num_classes)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        B = x.shape[0]\n",
+    "        x = self.patch_embed(x)\n",
+    "        x = x.flatten(2).transpose(1, 2)\n",
+    "\n",
+    "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
+    "        x = torch.cat((cls_tokens, x), dim=1)\n",
+    "        x = x + self.pos_embed\n",
+    "\n",
+    "        x = x.transpose(0, 1)\n",
+    "        for block in self.blocks:\n",
+    "            x = block(x)\n",
+    "        x = x.transpose(0, 1)\n",
+    "\n",
+    "        x = x[:, 0]\n",
+    "        x = self.head(x)\n",
+    "        return x\n",
+    "\n",
+    "# 5. Training and evaluation function\n",
+    "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
+    "    model.to(device)\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # tqdm for epochs\n",
+    "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
+    "        model.train()\n",
+    "        running_loss = 0.0\n",
+    "        correct = 0\n",
+    "        total = 0\n",
+    "\n",
+    "        # tqdm for batches\n",
+    "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
+    "            inputs, labels = inputs.to(device), labels.to(device)\n",
+    "            optimizer.zero_grad()\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "            running_loss += loss.item()\n",
+    "            _, predicted = torch.max(outputs, 1)\n",
+    "            total += labels.size(0)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "\n",
+    "        accuracy = 100 * correct / total\n",
+    "        avg_loss = running_loss / len(dataloader)\n",
+    "        print(f\"Epoch {epoch+1}/{num_epochs} completed. Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%\")\n",
+    "\n",
+    "    end_time = time.time()\n",
+    "    training_time = end_time - start_time\n",
+    "    return training_time, accuracy\n",
+    "\n",
+    "# Veri Seti ve DataLoader (CIFAR-10)\n",
+    "transform = transforms.Compose([\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
+    "])\n",
+    "\n",
+    "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
+    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
+    "\n",
+    "# Device and training parameters\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "num_epochs = 1\n",
+    "\n",
+    "# RMSNorm Modeli\n",
+    "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
+    "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
+    "# DyT Modeli\n",
+    "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
+    "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
+    "\n",
+    "# Training and comparison\n",
+    "print(\"RMSNorm Model training...\")\n",
+    "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
+    "print(f\"RMSNorm Training Time: {time_rms:.2f} seconds, Final Accuracy: {acc_rms:.2f}%\")\n",
+    "\n",
+    "print(\"\\nDyT Model training...\")\n",
+    "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
+    "print(f\"DyT Training Time: {time_dyt:.2f} seconds, Final Accuracy: {acc_dyt:.2f}%\")\n",
+    "\n",
+    "# Comparison results\n",
+    "print(\"\\nComparison:\")\n",
+    "print(f\"RMSNorm - Time: {time_rms:.2f}s, Accuracy: {acc_rms:.2f}%\")\n",
+    "print(f\"DyT - Time: {time_dyt:.2f}s, Accuracy: {acc_dyt:.2f}%\")"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/DyT_vs_RMSNorm.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 4,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "ZZHpfH5HoXIb",
-        "outputId": "b77f76ef-941e-46aa-909e-093fd4eafa72"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "RMSNorm Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/1 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/1:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 1/1563 [00:00<03:23,  7.69batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 2/1563 [00:00<03:22,  7.71batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 3/1563 [00:00<03:11,  8.13batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 4/1563 [00:00<03:02,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 5/1563 [00:00<02:53,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 7/1563 [00:00<02:46,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 8/1563 [00:00<02:51,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 9/1563 [00:01<02:53,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 10/1563 [00:01<02:54,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 11/1563 [00:01<02:56,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 12/1563 [00:01<02:58,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 13/1563 [00:01<02:57,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 14/1563 [00:01<03:03,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 15/1563 [00:01<03:00,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 16/1563 [00:01<02:58,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 17/1563 [00:01<02:57,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 18/1563 [00:02<02:57,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 19/1563 [00:02<02:59,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 20/1563 [00:02<03:01,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 21/1563 [00:02<03:01,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 22/1563 [00:02<03:01,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 23/1563 [00:02<03:01,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 24/1563 [00:02<02:59,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 26/1563 [00:02<02:42,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 28/1563 [00:03<02:34,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 30/1563 [00:03<02:30, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 32/1563 [00:03<02:28, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 34/1563 [00:03<02:25, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 36/1563 [00:03<02:23, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 38/1563 [00:04<02:23, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 40/1563 [00:04<02:23, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 42/1563 [00:04<02:22, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 44/1563 [00:04<02:23, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 46/1563 [00:04<02:22, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 48/1563 [00:05<02:24, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 50/1563 [00:05<02:23, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 52/1563 [00:05<02:22, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 54/1563 [00:05<02:22, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 56/1563 [00:05<02:21, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 58/1563 [00:05<02:20, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 60/1563 [00:06<02:20, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 62/1563 [00:06<02:20, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 64/1563 [00:06<02:20, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 66/1563 [00:06<02:18, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 68/1563 [00:06<02:18, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 70/1563 [00:07<02:18, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 72/1563 [00:07<02:18, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 74/1563 [00:07<02:18, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 76/1563 [00:07<02:19, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 78/1563 [00:07<02:18, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 80/1563 [00:08<02:18, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 82/1563 [00:08<02:17, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 84/1563 [00:08<02:17, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 86/1563 [00:08<02:18, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 88/1563 [00:08<02:17, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 90/1563 [00:08<02:16, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 92/1563 [00:09<02:16, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 94/1563 [00:09<02:17, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 96/1563 [00:09<02:17, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 98/1563 [00:09<02:16, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 100/1563 [00:09<02:15, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 102/1563 [00:10<02:15, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 104/1563 [00:10<02:15, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 106/1563 [00:10<02:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 108/1563 [00:10<02:15, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 110/1563 [00:10<02:14, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 112/1563 [00:10<02:14, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 114/1563 [00:11<02:14, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 116/1563 [00:11<02:14, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 118/1563 [00:11<02:15, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 120/1563 [00:11<02:14, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 122/1563 [00:11<02:13, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 124/1563 [00:12<02:13, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 126/1563 [00:12<02:13, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 128/1563 [00:12<02:12, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 130/1563 [00:12<02:14, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 132/1563 [00:12<02:15, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 134/1563 [00:13<02:23,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 136/1563 [00:13<02:26,  9.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 137/1563 [00:13<02:28,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 138/1563 [00:13<02:33,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 139/1563 [00:13<02:34,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 140/1563 [00:13<02:35,  9.17batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 141/1563 [00:13<02:36,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 142/1563 [00:13<02:38,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 143/1563 [00:14<02:40,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 144/1563 [00:14<02:42,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 145/1563 [00:14<02:43,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 146/1563 [00:14<02:44,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 147/1563 [00:14<02:48,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 148/1563 [00:14<02:47,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 149/1563 [00:14<02:47,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 150/1563 [00:14<02:46,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 152/1563 [00:15<02:30,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 154/1563 [00:15<02:23,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 156/1563 [00:15<02:19, 10.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 157/1563 [00:15<02:19, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 159/1563 [00:15<02:16, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 161/1563 [00:15<02:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 163/1563 [00:16<02:12, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 165/1563 [00:16<02:11, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 167/1563 [00:16<02:11, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 169/1563 [00:16<02:10, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 171/1563 [00:16<02:10, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 173/1563 [00:17<02:10, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 175/1563 [00:17<02:10, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 177/1563 [00:17<02:09, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 179/1563 [00:17<02:11, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 181/1563 [00:17<02:10, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 183/1563 [00:18<02:09, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 185/1563 [00:18<02:09, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 187/1563 [00:18<02:09, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 189/1563 [00:18<02:08, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 191/1563 [00:18<02:08, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 193/1563 [00:18<02:08, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 195/1563 [00:19<02:08, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 197/1563 [00:19<02:07, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 199/1563 [00:19<02:07, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 201/1563 [00:19<02:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 203/1563 [00:19<02:06, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 205/1563 [00:20<02:06, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 207/1563 [00:20<02:06, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 209/1563 [00:20<02:05, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 211/1563 [00:20<02:06, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▎        | 213/1563 [00:20<02:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 215/1563 [00:21<02:05, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 217/1563 [00:21<02:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 219/1563 [00:21<02:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 221/1563 [00:21<02:05, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 223/1563 [00:21<02:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 225/1563 [00:21<02:04, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 227/1563 [00:22<02:03, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 229/1563 [00:22<02:04, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 231/1563 [00:22<02:04, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 233/1563 [00:22<02:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 235/1563 [00:22<02:03, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 237/1563 [00:23<02:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 239/1563 [00:23<02:03, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 241/1563 [00:23<02:02, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 243/1563 [00:23<02:02, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 245/1563 [00:23<02:01, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 247/1563 [00:23<02:02, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 249/1563 [00:24<02:02, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 251/1563 [00:24<02:03, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 253/1563 [00:24<02:04, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 255/1563 [00:24<02:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 257/1563 [00:24<02:03, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 259/1563 [00:25<02:11,  9.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 260/1563 [00:25<02:14,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 261/1563 [00:25<02:18,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 262/1563 [00:25<02:21,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 263/1563 [00:25<02:23,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 264/1563 [00:25<02:25,  8.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 265/1563 [00:25<02:26,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 266/1563 [00:25<02:27,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 267/1563 [00:26<02:28,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 268/1563 [00:26<02:29,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 269/1563 [00:26<02:30,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 270/1563 [00:26<02:29,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 271/1563 [00:26<02:32,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 272/1563 [00:26<02:32,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 273/1563 [00:26<02:32,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 274/1563 [00:26<02:31,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 275/1563 [00:27<02:33,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 277/1563 [00:27<02:18,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 279/1563 [00:27<02:10,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 280/1563 [00:27<02:12,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 282/1563 [00:27<02:06, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 284/1563 [00:27<02:02, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 286/1563 [00:28<02:00, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 288/1563 [00:28<02:01, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 290/1563 [00:28<02:00, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 292/1563 [00:28<02:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 294/1563 [00:28<01:59, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 296/1563 [00:29<01:59, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 298/1563 [00:29<01:58, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 300/1563 [00:29<01:58, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 302/1563 [00:29<01:58, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 304/1563 [00:29<01:59, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 306/1563 [00:29<01:58, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 308/1563 [00:30<01:59, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 310/1563 [00:30<01:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 312/1563 [00:30<01:58, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 314/1563 [00:30<01:57, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 316/1563 [00:30<01:56, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 318/1563 [00:31<01:57, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 320/1563 [00:31<01:57, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 322/1563 [00:31<01:56, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 324/1563 [00:31<01:56, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 326/1563 [00:31<01:55, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 328/1563 [00:32<01:55, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 330/1563 [00:32<01:57, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 332/1563 [00:32<01:56, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 334/1563 [00:32<01:55, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 336/1563 [00:32<01:55, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 338/1563 [00:32<01:54, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 340/1563 [00:33<01:55, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 342/1563 [00:33<01:55, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 344/1563 [00:33<01:55, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 346/1563 [00:33<01:54, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 348/1563 [00:33<01:53, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 350/1563 [00:34<01:53, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 352/1563 [00:34<01:55, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 354/1563 [00:34<01:54, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 356/1563 [00:34<01:54, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 358/1563 [00:34<01:53, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 360/1563 [00:35<01:54, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 362/1563 [00:35<01:54, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 364/1563 [00:35<01:53, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 366/1563 [00:35<01:53, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 368/1563 [00:35<01:52, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 370/1563 [00:36<01:53, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 372/1563 [00:36<01:52, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 374/1563 [00:36<01:51, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 376/1563 [00:36<01:51, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 378/1563 [00:36<01:51, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 380/1563 [00:36<01:50, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 382/1563 [00:37<01:54, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 384/1563 [00:37<02:04,  9.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 385/1563 [00:37<02:06,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 386/1563 [00:37<02:08,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 387/1563 [00:37<02:09,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 388/1563 [00:37<02:10,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 389/1563 [00:37<02:11,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 390/1563 [00:38<02:12,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 391/1563 [00:38<02:14,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 392/1563 [00:38<02:16,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 393/1563 [00:38<02:16,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 394/1563 [00:38<02:23,  8.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 395/1563 [00:38<02:22,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 396/1563 [00:38<02:22,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 397/1563 [00:38<02:22,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 398/1563 [00:39<02:22,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 399/1563 [00:39<02:20,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 401/1563 [00:39<02:08,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 403/1563 [00:39<02:01,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 405/1563 [00:39<01:56,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 407/1563 [00:39<01:53, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 409/1563 [00:40<01:51, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 411/1563 [00:40<01:49, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 413/1563 [00:40<01:50, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 415/1563 [00:40<01:49, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 417/1563 [00:40<01:48, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 419/1563 [00:41<01:48, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 421/1563 [00:41<01:47, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 423/1563 [00:41<01:48, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 425/1563 [00:41<01:48, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 427/1563 [00:41<01:47, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 429/1563 [00:42<01:46, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 431/1563 [00:42<01:46, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 433/1563 [00:42<01:46, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 435/1563 [00:42<01:45, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 437/1563 [00:42<01:44, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 439/1563 [00:42<01:43, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 441/1563 [00:43<01:43, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 443/1563 [00:43<01:43, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 445/1563 [00:43<01:43, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 447/1563 [00:43<01:43, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 449/1563 [00:43<01:43, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 451/1563 [00:44<01:42, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 453/1563 [00:44<01:42, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 455/1563 [00:44<01:43, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 457/1563 [00:44<01:43, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 459/1563 [00:44<01:43, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 461/1563 [00:45<01:42, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 463/1563 [00:45<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 465/1563 [00:45<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 467/1563 [00:45<01:42, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 469/1563 [00:45<01:42, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 471/1563 [00:45<01:41, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 473/1563 [00:46<01:41, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 475/1563 [00:46<01:42, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 477/1563 [00:46<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 479/1563 [00:46<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 481/1563 [00:46<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 483/1563 [00:47<01:41, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 485/1563 [00:47<01:41, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 487/1563 [00:47<01:41, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 489/1563 [00:47<01:41, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 491/1563 [00:47<01:41, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 493/1563 [00:48<01:40, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 495/1563 [00:48<01:39, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 497/1563 [00:48<01:39, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 499/1563 [00:48<01:40, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 501/1563 [00:48<01:39, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 503/1563 [00:48<01:38, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 505/1563 [00:49<01:38, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 507/1563 [00:49<01:42, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 509/1563 [00:49<01:47,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 510/1563 [00:49<01:50,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 511/1563 [00:49<01:52,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 512/1563 [00:49<01:54,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 513/1563 [00:50<01:58,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 514/1563 [00:50<02:00,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 515/1563 [00:50<01:59,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 516/1563 [00:50<01:59,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 517/1563 [00:50<01:59,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 518/1563 [00:50<02:01,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 519/1563 [00:50<02:00,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 520/1563 [00:50<01:59,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 521/1563 [00:50<01:58,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 522/1563 [00:51<01:58,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 523/1563 [00:51<01:59,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 524/1563 [00:51<01:59,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 525/1563 [00:51<01:58,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 527/1563 [00:51<01:48,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 528/1563 [00:51<01:49,  9.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 530/1563 [00:51<01:44,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 532/1563 [00:52<01:41, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 534/1563 [00:52<01:39, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 536/1563 [00:52<01:38, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 538/1563 [00:52<01:39, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 540/1563 [00:52<01:38, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 542/1563 [00:53<01:37, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 544/1563 [00:53<01:37, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 546/1563 [00:53<01:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 548/1563 [00:53<01:36, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 550/1563 [00:53<01:36, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 552/1563 [00:53<01:35, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 554/1563 [00:54<01:35, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 556/1563 [00:54<01:35, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 558/1563 [00:54<01:35, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 560/1563 [00:54<01:34, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 562/1563 [00:54<01:34, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 564/1563 [00:55<01:34, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 566/1563 [00:55<01:33, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 568/1563 [00:55<01:33, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 570/1563 [00:55<01:33, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 572/1563 [00:55<01:33, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 574/1563 [00:56<01:33, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 576/1563 [00:56<01:33, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 578/1563 [00:56<01:33, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 580/1563 [00:56<01:33, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 582/1563 [00:56<01:33, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 584/1563 [00:57<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 586/1563 [00:57<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 588/1563 [00:57<01:31, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 590/1563 [00:57<01:31, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 592/1563 [00:57<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 594/1563 [00:57<01:33, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 596/1563 [00:58<01:31, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 598/1563 [00:58<01:31, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 600/1563 [00:58<01:31, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 602/1563 [00:58<01:30, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 604/1563 [00:58<01:31, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 606/1563 [00:59<01:31, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 608/1563 [00:59<01:30, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 610/1563 [00:59<01:30, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 612/1563 [00:59<01:30, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 614/1563 [00:59<01:29, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 616/1563 [01:00<01:29, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 618/1563 [01:00<01:29, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 620/1563 [01:00<01:28, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 622/1563 [01:00<01:28, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 624/1563 [01:00<01:28, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 626/1563 [01:00<01:28, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 628/1563 [01:01<01:28, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 630/1563 [01:01<01:28, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 632/1563 [01:01<01:33,  9.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 634/1563 [01:01<01:37,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 635/1563 [01:01<01:39,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 636/1563 [01:02<01:41,  9.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 637/1563 [01:02<01:42,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 638/1563 [01:02<01:44,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 639/1563 [01:02<01:45,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 640/1563 [01:02<01:46,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 641/1563 [01:02<01:47,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 642/1563 [01:02<01:47,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 643/1563 [01:02<01:48,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 644/1563 [01:03<01:50,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 645/1563 [01:03<01:50,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 646/1563 [01:03<01:49,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 647/1563 [01:03<01:48,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 648/1563 [01:03<01:48,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 649/1563 [01:03<01:52,  8.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 651/1563 [01:03<01:39,  9.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 653/1563 [01:04<01:34,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 654/1563 [01:04<01:34,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 656/1563 [01:04<01:30,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 657/1563 [01:04<01:30,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 659/1563 [01:04<01:28, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 661/1563 [01:04<01:27, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 663/1563 [01:04<01:25, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 665/1563 [01:05<01:27, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 667/1563 [01:05<01:26, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 669/1563 [01:05<01:25, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 671/1563 [01:05<01:25, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 673/1563 [01:05<01:24, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 675/1563 [01:06<01:23, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 677/1563 [01:06<01:24, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 679/1563 [01:06<01:23, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 681/1563 [01:06<01:23, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 683/1563 [01:06<01:23, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 685/1563 [01:07<01:22, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 687/1563 [01:07<01:22, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 689/1563 [01:07<01:22, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 691/1563 [01:07<01:22, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 693/1563 [01:07<01:21, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 695/1563 [01:07<01:21, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 697/1563 [01:08<01:20, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 699/1563 [01:08<01:20, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 701/1563 [01:08<01:21, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 703/1563 [01:08<01:20, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 705/1563 [01:08<01:20, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 707/1563 [01:09<01:20, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 709/1563 [01:09<01:19, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 711/1563 [01:09<01:19, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 713/1563 [01:09<01:19, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 715/1563 [01:09<01:19, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 717/1563 [01:10<01:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 719/1563 [01:10<01:19, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 721/1563 [01:10<01:19, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 723/1563 [01:10<01:19, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 725/1563 [01:10<01:19, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 727/1563 [01:11<01:19, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 729/1563 [01:11<01:19, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 731/1563 [01:11<01:19, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 733/1563 [01:11<01:19, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 735/1563 [01:11<01:18, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 737/1563 [01:11<01:17, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 739/1563 [01:12<01:17, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 741/1563 [01:12<01:18, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 743/1563 [01:12<01:17, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 745/1563 [01:12<01:16, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 747/1563 [01:12<01:16, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 749/1563 [01:13<01:16, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 751/1563 [01:13<01:16, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 753/1563 [01:13<01:17, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 755/1563 [01:13<01:20, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 757/1563 [01:13<01:23,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 758/1563 [01:14<01:24,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 759/1563 [01:14<01:25,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 760/1563 [01:14<01:28,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 761/1563 [01:14<01:31,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 762/1563 [01:14<01:32,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 763/1563 [01:14<01:32,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 764/1563 [01:14<01:34,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 765/1563 [01:14<01:33,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 766/1563 [01:14<01:34,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 767/1563 [01:15<01:37,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 768/1563 [01:15<01:36,  8.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 769/1563 [01:15<01:37,  8.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 770/1563 [01:15<01:39,  8.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 771/1563 [01:15<01:37,  8.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 772/1563 [01:15<01:35,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 774/1563 [01:15<01:25,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 775/1563 [01:16<01:24,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 777/1563 [01:16<01:19,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 779/1563 [01:16<01:17, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 780/1563 [01:16<01:18,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 782/1563 [01:16<01:16, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 784/1563 [01:16<01:15, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 786/1563 [01:17<01:14, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 788/1563 [01:17<01:13, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 790/1563 [01:17<01:13, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 792/1563 [01:17<01:14, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 794/1563 [01:17<01:13, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 796/1563 [01:18<01:12, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 798/1563 [01:18<01:12, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 800/1563 [01:18<01:12, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 802/1563 [01:18<01:13, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 804/1563 [01:18<01:13, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 806/1563 [01:18<01:12, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 808/1563 [01:19<01:12, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 810/1563 [01:19<01:12, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 812/1563 [01:19<01:11, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 814/1563 [01:19<01:11, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 816/1563 [01:19<01:11, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 818/1563 [01:20<01:10, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 820/1563 [01:20<01:10, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 822/1563 [01:20<01:10, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 824/1563 [01:20<01:10, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 826/1563 [01:20<01:10, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 828/1563 [01:21<01:10, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 830/1563 [01:21<01:09, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 832/1563 [01:21<01:09, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 834/1563 [01:21<01:10, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 836/1563 [01:21<01:09, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 838/1563 [01:22<01:08, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 840/1563 [01:22<01:08, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 842/1563 [01:22<01:07, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 844/1563 [01:22<01:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 846/1563 [01:22<01:08, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 848/1563 [01:22<01:07, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 850/1563 [01:23<01:06, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 852/1563 [01:23<01:06, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 854/1563 [01:23<01:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 856/1563 [01:23<01:07, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 858/1563 [01:23<01:06, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 860/1563 [01:24<01:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 862/1563 [01:24<01:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 864/1563 [01:24<01:05, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 866/1563 [01:24<01:05, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 868/1563 [01:24<01:05, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 870/1563 [01:25<01:05, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 872/1563 [01:25<01:05, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 874/1563 [01:25<01:04, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 876/1563 [01:25<01:04, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 878/1563 [01:25<01:05, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 880/1563 [01:26<01:09,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 881/1563 [01:26<01:11,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 882/1563 [01:26<01:12,  9.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 883/1563 [01:26<01:13,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 884/1563 [01:26<01:15,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 885/1563 [01:26<01:16,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 886/1563 [01:26<01:16,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 887/1563 [01:26<01:16,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 888/1563 [01:26<01:17,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 889/1563 [01:27<01:17,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 890/1563 [01:27<01:17,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 891/1563 [01:27<01:16,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 892/1563 [01:27<01:17,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 893/1563 [01:27<01:16,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 894/1563 [01:27<01:17,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 895/1563 [01:27<01:18,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 896/1563 [01:27<01:20,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 898/1563 [01:28<01:12,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 900/1563 [01:28<01:08,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 902/1563 [01:28<01:05, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 903/1563 [01:28<01:06,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 905/1563 [01:28<01:04, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 907/1563 [01:28<01:03, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 909/1563 [01:29<01:02, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 911/1563 [01:29<01:02, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 913/1563 [01:29<01:01, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 915/1563 [01:29<01:01, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 917/1563 [01:29<01:00, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 919/1563 [01:30<01:00, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 921/1563 [01:30<00:59, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 923/1563 [01:30<00:59, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 925/1563 [01:30<01:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 927/1563 [01:30<00:59, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 929/1563 [01:31<00:59, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 931/1563 [01:31<00:59, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 933/1563 [01:31<00:58, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 935/1563 [01:31<00:58, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 937/1563 [01:31<00:58, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 939/1563 [01:31<00:59, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 941/1563 [01:32<00:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 943/1563 [01:32<00:58, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 945/1563 [01:32<00:58, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 947/1563 [01:32<00:58, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 949/1563 [01:32<00:57, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 951/1563 [01:33<00:57, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 953/1563 [01:33<00:57, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 955/1563 [01:33<00:57, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 957/1563 [01:33<00:57, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 959/1563 [01:33<00:56, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 961/1563 [01:34<00:57, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 963/1563 [01:34<00:57, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 965/1563 [01:34<00:56, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 967/1563 [01:34<00:56, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 969/1563 [01:34<00:55, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 971/1563 [01:34<00:56, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 973/1563 [01:35<00:56, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 975/1563 [01:35<00:55, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 977/1563 [01:35<00:55, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 979/1563 [01:35<00:55, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 981/1563 [01:35<00:54, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 983/1563 [01:36<00:55, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 985/1563 [01:36<00:54, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 987/1563 [01:36<00:54, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 989/1563 [01:36<00:54, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 991/1563 [01:36<00:53, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 993/1563 [01:37<00:53, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 995/1563 [01:37<00:53, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 997/1563 [01:37<00:53, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 999/1563 [01:37<00:53, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1001/1563 [01:37<00:53, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1003/1563 [01:38<00:55, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1005/1563 [01:38<00:57,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1006/1563 [01:38<00:58,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1007/1563 [01:38<00:59,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1008/1563 [01:38<00:59,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1009/1563 [01:38<01:00,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1010/1563 [01:38<01:01,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1011/1563 [01:38<01:01,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1012/1563 [01:39<01:01,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1013/1563 [01:39<01:03,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1014/1563 [01:39<01:02,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1015/1563 [01:39<01:02,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1016/1563 [01:39<01:04,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1017/1563 [01:39<01:04,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1018/1563 [01:39<01:04,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1019/1563 [01:39<01:05,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1020/1563 [01:40<01:04,  8.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1021/1563 [01:40<01:04,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1022/1563 [01:40<01:02,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1024/1563 [01:40<00:56,  9.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1025/1563 [01:40<00:56,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1027/1563 [01:40<00:53,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1029/1563 [01:40<00:52, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1031/1563 [01:41<00:51, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1033/1563 [01:41<00:50, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1035/1563 [01:41<00:50, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1037/1563 [01:41<00:49, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1039/1563 [01:41<00:49, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1041/1563 [01:42<00:49, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1043/1563 [01:42<00:48, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1045/1563 [01:42<00:48, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1047/1563 [01:42<00:48, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1049/1563 [01:42<00:48, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1051/1563 [01:42<00:48, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1053/1563 [01:43<00:48, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1055/1563 [01:43<00:48, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1057/1563 [01:43<00:48, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1059/1563 [01:43<00:47, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1061/1563 [01:43<00:47, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1063/1563 [01:44<00:47, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1065/1563 [01:44<00:47, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1067/1563 [01:44<00:47, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1069/1563 [01:44<00:46, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1071/1563 [01:44<00:46, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1073/1563 [01:45<00:46, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1075/1563 [01:45<00:46, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1077/1563 [01:45<00:46, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1079/1563 [01:45<00:46, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1081/1563 [01:45<00:46, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1083/1563 [01:46<00:45, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1085/1563 [01:46<00:45, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1087/1563 [01:46<00:45, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1089/1563 [01:46<00:45, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1091/1563 [01:46<00:44, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1093/1563 [01:46<00:44, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1095/1563 [01:47<00:44, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1097/1563 [01:47<00:44, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1099/1563 [01:47<00:44, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1101/1563 [01:47<00:43, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1103/1563 [01:47<00:43, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1105/1563 [01:48<00:43, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1107/1563 [01:48<00:43, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1109/1563 [01:48<00:44, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1111/1563 [01:48<00:43, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1113/1563 [01:48<00:43, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1115/1563 [01:49<00:42, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1117/1563 [01:49<00:42, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1119/1563 [01:49<00:42, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1121/1563 [01:49<00:42, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1123/1563 [01:49<00:42, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1125/1563 [01:50<00:41, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1127/1563 [01:50<00:42, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1129/1563 [01:50<00:44,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1130/1563 [01:50<00:45,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1131/1563 [01:50<00:47,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1132/1563 [01:50<00:48,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1133/1563 [01:50<00:49,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1134/1563 [01:51<00:50,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1135/1563 [01:51<00:50,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1136/1563 [01:51<00:50,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1137/1563 [01:51<00:49,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1138/1563 [01:51<00:51,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1139/1563 [01:51<00:50,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1140/1563 [01:51<00:51,  8.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1141/1563 [01:51<00:50,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1142/1563 [01:52<00:51,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1143/1563 [01:52<00:51,  8.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1144/1563 [01:52<00:51,  8.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1145/1563 [01:52<00:50,  8.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1146/1563 [01:52<00:49,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1148/1563 [01:52<00:44,  9.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1150/1563 [01:52<00:42,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1151/1563 [01:53<00:42,  9.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1152/1563 [01:53<00:42,  9.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1154/1563 [01:53<00:40, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1155/1563 [01:53<00:40, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1156/1563 [01:53<00:40,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1157/1563 [01:53<00:41,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1159/1563 [01:53<00:40, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1161/1563 [01:54<00:39, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1163/1563 [01:54<00:38, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1165/1563 [01:54<00:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1167/1563 [01:54<00:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1169/1563 [01:54<00:37, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1171/1563 [01:54<00:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1173/1563 [01:55<00:37, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1175/1563 [01:55<00:37, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1177/1563 [01:55<00:37, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1179/1563 [01:55<00:37, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1181/1563 [01:55<00:37, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1183/1563 [01:56<00:36, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1185/1563 [01:56<00:36, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1187/1563 [01:56<00:36, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1189/1563 [01:56<00:36, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1191/1563 [01:56<00:35, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1193/1563 [01:57<00:35, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1195/1563 [01:57<00:35, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1197/1563 [01:57<00:35, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1199/1563 [01:57<00:35, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1201/1563 [01:57<00:34, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1203/1563 [01:58<00:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1205/1563 [01:58<00:34, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1207/1563 [01:58<00:33, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1209/1563 [01:58<00:34, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1211/1563 [01:58<00:33, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1213/1563 [01:59<00:33, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1215/1563 [01:59<00:33, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1217/1563 [01:59<00:33, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1219/1563 [01:59<00:32, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1221/1563 [01:59<00:33, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1223/1563 [01:59<00:32, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1225/1563 [02:00<00:32, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1227/1563 [02:00<00:32, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1229/1563 [02:00<00:32, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1231/1563 [02:00<00:31, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1233/1563 [02:00<00:31, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1235/1563 [02:01<00:31, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1237/1563 [02:01<00:31, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1239/1563 [02:01<00:31, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1241/1563 [02:01<00:31, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1243/1563 [02:01<00:31, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1245/1563 [02:02<00:30, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1247/1563 [02:02<00:30, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1249/1563 [02:02<00:30, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1251/1563 [02:02<00:31,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1252/1563 [02:02<00:33,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1253/1563 [02:02<00:33,  9.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1254/1563 [02:03<00:34,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1255/1563 [02:03<00:35,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1256/1563 [02:03<00:35,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1257/1563 [02:03<00:36,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1258/1563 [02:03<00:36,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1259/1563 [02:03<00:35,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1260/1563 [02:03<00:35,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1261/1563 [02:03<00:35,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1262/1563 [02:04<00:35,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1263/1563 [02:04<00:35,  8.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1264/1563 [02:04<00:35,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1265/1563 [02:04<00:37,  8.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1266/1563 [02:04<00:36,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1267/1563 [02:04<00:35,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1268/1563 [02:04<00:35,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1270/1563 [02:04<00:32,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1272/1563 [02:05<00:30,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1273/1563 [02:05<00:30,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1274/1563 [02:05<00:30,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1276/1563 [02:05<00:29,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1278/1563 [02:05<00:28,  9.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1279/1563 [02:05<00:28,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1280/1563 [02:05<00:28,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1281/1563 [02:06<00:28,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1282/1563 [02:06<00:28,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1283/1563 [02:06<00:28,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1284/1563 [02:06<00:28,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1285/1563 [02:06<00:28,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1286/1563 [02:06<00:28,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1288/1563 [02:06<00:27,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1289/1563 [02:06<00:27,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1291/1563 [02:07<00:26, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1293/1563 [02:07<00:26, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1295/1563 [02:07<00:26, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1297/1563 [02:07<00:26, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1299/1563 [02:07<00:25, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1301/1563 [02:08<00:25, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1303/1563 [02:08<00:25, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1305/1563 [02:08<00:25, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1307/1563 [02:08<00:25, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1309/1563 [02:08<00:24, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1311/1563 [02:09<00:24, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1313/1563 [02:09<00:24, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1315/1563 [02:09<00:24, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1317/1563 [02:09<00:23, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1319/1563 [02:09<00:23, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1321/1563 [02:09<00:23, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1323/1563 [02:10<00:23, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1325/1563 [02:10<00:23, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1327/1563 [02:10<00:22, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1329/1563 [02:10<00:22, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1331/1563 [02:10<00:22, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1333/1563 [02:11<00:22, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1335/1563 [02:11<00:22, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1337/1563 [02:11<00:22, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1339/1563 [02:11<00:21, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1341/1563 [02:11<00:21, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1343/1563 [02:12<00:21, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1345/1563 [02:12<00:21, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1347/1563 [02:12<00:20, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1349/1563 [02:12<00:20, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1351/1563 [02:12<00:20, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1353/1563 [02:13<00:20, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1355/1563 [02:13<00:20, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1357/1563 [02:13<00:19, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1359/1563 [02:13<00:19, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1361/1563 [02:13<00:19, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1363/1563 [02:14<00:19, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1365/1563 [02:14<00:19, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1367/1563 [02:14<00:19, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1369/1563 [02:14<00:19, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1371/1563 [02:14<00:19,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1372/1563 [02:14<00:19,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1373/1563 [02:15<00:20,  9.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1374/1563 [02:15<00:20,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1375/1563 [02:15<00:20,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1376/1563 [02:15<00:20,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1377/1563 [02:15<00:21,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1378/1563 [02:15<00:21,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1379/1563 [02:15<00:21,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1380/1563 [02:15<00:21,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1381/1563 [02:16<00:21,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1382/1563 [02:16<00:21,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1383/1563 [02:16<00:21,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1384/1563 [02:16<00:21,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1385/1563 [02:16<00:21,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1386/1563 [02:16<00:21,  8.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1387/1563 [02:16<00:21,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1388/1563 [02:16<00:21,  8.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1389/1563 [02:17<00:21,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1390/1563 [02:17<00:20,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1391/1563 [02:17<00:19,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1392/1563 [02:17<00:18,  9.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1393/1563 [02:17<00:18,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1394/1563 [02:17<00:17,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1396/1563 [02:17<00:16,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1398/1563 [02:17<00:16, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1400/1563 [02:18<00:15, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1402/1563 [02:18<00:15, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1404/1563 [02:18<00:15, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1406/1563 [02:18<00:15, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1408/1563 [02:18<00:15, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1410/1563 [02:19<00:14, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1412/1563 [02:19<00:14, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1414/1563 [02:19<00:14, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1416/1563 [02:19<00:14, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1418/1563 [02:19<00:14, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1420/1563 [02:20<00:13, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1422/1563 [02:20<00:13, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1424/1563 [02:20<00:13, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1426/1563 [02:20<00:13, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1428/1563 [02:20<00:13, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1430/1563 [02:21<00:12, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1432/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1434/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1436/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1438/1563 [02:21<00:12, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1440/1563 [02:21<00:11, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1442/1563 [02:22<00:11, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1444/1563 [02:22<00:11, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1446/1563 [02:22<00:11, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1448/1563 [02:22<00:11, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1450/1563 [02:22<00:10, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1452/1563 [02:23<00:10, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1454/1563 [02:23<00:10, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1456/1563 [02:23<00:10, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1458/1563 [02:23<00:10, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1460/1563 [02:23<00:10, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1462/1563 [02:24<00:09, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1464/1563 [02:24<00:09, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1466/1563 [02:24<00:09, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1468/1563 [02:24<00:09, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1470/1563 [02:24<00:09, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1472/1563 [02:25<00:08, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1474/1563 [02:25<00:08, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1476/1563 [02:25<00:08, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1478/1563 [02:25<00:08, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1480/1563 [02:25<00:08, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1482/1563 [02:26<00:07, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1484/1563 [02:26<00:07, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1486/1563 [02:26<00:07, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1488/1563 [02:26<00:07, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1490/1563 [02:26<00:07, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1492/1563 [02:27<00:06, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1494/1563 [02:27<00:06,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1495/1563 [02:27<00:07,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1496/1563 [02:27<00:07,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1497/1563 [02:27<00:07,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1498/1563 [02:27<00:07,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1499/1563 [02:27<00:07,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1500/1563 [02:28<00:07,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1501/1563 [02:28<00:07,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1502/1563 [02:28<00:07,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1503/1563 [02:28<00:07,  8.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1504/1563 [02:28<00:07,  8.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1505/1563 [02:28<00:07,  8.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1506/1563 [02:28<00:07,  7.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1507/1563 [02:28<00:06,  8.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1508/1563 [02:28<00:06,  8.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1509/1563 [02:29<00:06,  8.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1510/1563 [02:29<00:06,  8.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1511/1563 [02:29<00:06,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1512/1563 [02:29<00:05,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1513/1563 [02:29<00:05,  8.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1514/1563 [02:29<00:05,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1515/1563 [02:29<00:05,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1516/1563 [02:29<00:05,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1517/1563 [02:29<00:04,  9.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1519/1563 [02:30<00:04,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1521/1563 [02:30<00:04, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1522/1563 [02:30<00:04, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1523/1563 [02:30<00:04,  9.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1524/1563 [02:30<00:03,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1525/1563 [02:30<00:03,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1526/1563 [02:30<00:03,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1527/1563 [02:30<00:03,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1528/1563 [02:31<00:03,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1529/1563 [02:31<00:03,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1530/1563 [02:31<00:03,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1531/1563 [02:31<00:03,  9.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1533/1563 [02:31<00:03,  9.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1535/1563 [02:31<00:02, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1537/1563 [02:31<00:02, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1539/1563 [02:32<00:02, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1541/1563 [02:32<00:02, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1543/1563 [02:32<00:01, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1545/1563 [02:32<00:01, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1547/1563 [02:32<00:01, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1549/1563 [02:33<00:01, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1551/1563 [02:33<00:01, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1553/1563 [02:33<00:00, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1555/1563 [02:33<00:00, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1557/1563 [02:33<00:00, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1559/1563 [02:34<00:00, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1561/1563 [02:34<00:00, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|██████████| 1563/1563 [02:34<00:00, 10.58batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 1/1 [02:34<00:00, 154.51s/epoch]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/1 tamamlandı. Kayıp: 1.8101, Doğruluk: 33.27%\n",
-            "RMSNorm Eğitim Süresi: 154.52 saniye, Son Doğruluk: 33.27%\n",
-            "\n",
-            "DyT Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/1 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/1:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 1/1563 [00:00<03:43,  6.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 2/1563 [00:00<03:03,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 3/1563 [00:00<02:53,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 5/1563 [00:00<02:35, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:   0%|          | 7/1563 [00:00<02:29, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 9/1563 [00:00<02:32, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 11/1563 [00:01<02:29, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 13/1563 [00:01<02:25, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 15/1563 [00:01<02:24, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 17/1563 [00:01<02:22, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|          | 19/1563 [00:01<02:21, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 21/1563 [00:02<02:23, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:   1%|▏         | 23/1563 [00:02<02:21, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 25/1563 [00:02<02:19, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 27/1563 [00:02<02:18, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 29/1563 [00:02<02:18, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 31/1563 [00:02<02:18, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 33/1563 [00:03<02:17, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 35/1563 [00:03<02:16, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 37/1563 [00:03<02:20, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:   2%|▏         | 39/1563 [00:03<02:19, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 41/1563 [00:03<02:18, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 43/1563 [00:04<02:19, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 45/1563 [00:04<02:17, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 47/1563 [00:04<02:17, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 49/1563 [00:04<02:19, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 51/1563 [00:04<02:19, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:   3%|▎         | 53/1563 [00:04<02:28, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 55/1563 [00:05<02:35,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 56/1563 [00:05<02:37,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 57/1563 [00:05<02:40,  9.39batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▎         | 58/1563 [00:05<02:47,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 59/1563 [00:05<02:49,  8.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 60/1563 [00:05<02:48,  8.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 61/1563 [00:05<02:51,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 62/1563 [00:06<02:55,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 63/1563 [00:06<02:52,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 64/1563 [00:06<02:54,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 65/1563 [00:06<02:52,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 66/1563 [00:06<02:53,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 67/1563 [00:06<02:59,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 68/1563 [00:06<03:02,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 69/1563 [00:06<02:59,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/1:   4%|▍         | 70/1563 [00:06<02:55,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 71/1563 [00:07<02:49,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 73/1563 [00:07<02:32,  9.75batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 75/1563 [00:07<02:25, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▍         | 77/1563 [00:07<02:24, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 79/1563 [00:07<02:20, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 81/1563 [00:07<02:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 83/1563 [00:08<02:17, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   5%|▌         | 85/1563 [00:08<02:15, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 87/1563 [00:08<02:14, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 89/1563 [00:08<02:16, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 91/1563 [00:08<02:16, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 93/1563 [00:09<02:16, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 95/1563 [00:09<02:14, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▌         | 97/1563 [00:09<02:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 99/1563 [00:09<02:13, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:   6%|▋         | 101/1563 [00:09<02:12, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 103/1563 [00:09<02:13, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 105/1563 [00:10<02:12, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 107/1563 [00:10<02:11, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 109/1563 [00:10<02:11, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 111/1563 [00:10<02:12, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 113/1563 [00:10<02:11, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 115/1563 [00:11<02:11, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   7%|▋         | 117/1563 [00:11<02:11, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 119/1563 [00:11<02:10, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 121/1563 [00:11<02:10, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 123/1563 [00:11<02:12, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 125/1563 [00:11<02:12, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 127/1563 [00:12<02:10, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 129/1563 [00:12<02:10, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   8%|▊         | 131/1563 [00:12<02:10, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 133/1563 [00:12<02:10, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▊         | 135/1563 [00:12<02:10, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 137/1563 [00:13<02:10, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 139/1563 [00:13<02:09, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 141/1563 [00:13<02:09, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 143/1563 [00:13<02:09, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 145/1563 [00:13<02:08, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:   9%|▉         | 147/1563 [00:13<02:08, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 149/1563 [00:14<02:08, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 151/1563 [00:14<02:07, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 153/1563 [00:14<02:07, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|▉         | 155/1563 [00:14<02:07, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 157/1563 [00:14<02:09, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 159/1563 [00:15<02:09, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 161/1563 [00:15<02:08, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  10%|█         | 163/1563 [00:15<02:07, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 165/1563 [00:15<02:07, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 167/1563 [00:15<02:08, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 169/1563 [00:16<02:09, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 171/1563 [00:16<02:08, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 173/1563 [00:16<02:08, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█         | 175/1563 [00:16<02:07, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 177/1563 [00:16<02:07, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  11%|█▏        | 179/1563 [00:16<02:08, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 181/1563 [00:17<02:16, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 183/1563 [00:17<02:19,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 184/1563 [00:17<02:23,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 185/1563 [00:17<02:23,  9.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 186/1563 [00:17<02:24,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 187/1563 [00:17<02:24,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 188/1563 [00:17<02:27,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 189/1563 [00:18<02:35,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 190/1563 [00:18<02:34,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 191/1563 [00:18<02:33,  8.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 192/1563 [00:18<02:33,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 193/1563 [00:18<02:35,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 194/1563 [00:18<02:37,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  12%|█▏        | 195/1563 [00:18<02:37,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 196/1563 [00:18<02:38,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 197/1563 [00:18<02:42,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 198/1563 [00:19<02:40,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 200/1563 [00:19<02:24,  9.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 202/1563 [00:19<02:15, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 204/1563 [00:19<02:10, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 206/1563 [00:19<02:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 208/1563 [00:20<02:07, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  13%|█▎        | 210/1563 [00:20<02:05, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▎        | 212/1563 [00:20<02:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▎        | 214/1563 [00:20<02:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 216/1563 [00:20<02:04, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 218/1563 [00:20<02:04, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 220/1563 [00:21<02:06, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 222/1563 [00:21<02:04, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 224/1563 [00:21<02:03, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  14%|█▍        | 226/1563 [00:21<02:02, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 228/1563 [00:21<02:02, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 230/1563 [00:22<02:05, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 232/1563 [00:22<02:03, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▍        | 234/1563 [00:22<02:02, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 236/1563 [00:22<02:01, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 238/1563 [00:22<02:00, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 240/1563 [00:22<02:01, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  15%|█▌        | 242/1563 [00:23<02:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 244/1563 [00:23<02:01, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 246/1563 [00:23<02:00, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 248/1563 [00:23<02:00, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 250/1563 [00:23<02:00, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▌        | 252/1563 [00:24<02:00, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 254/1563 [00:24<01:59, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  16%|█▋        | 256/1563 [00:24<01:58, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 258/1563 [00:24<01:58, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 260/1563 [00:24<01:58, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 262/1563 [00:24<01:58, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 264/1563 [00:25<01:58, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 266/1563 [00:25<01:57, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 268/1563 [00:25<01:57, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 270/1563 [00:25<01:58, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  17%|█▋        | 272/1563 [00:25<01:58, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 274/1563 [00:26<01:57, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 276/1563 [00:26<01:58, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 278/1563 [00:26<01:57, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 280/1563 [00:26<01:57, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 282/1563 [00:26<01:57, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 284/1563 [00:26<01:56, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 286/1563 [00:27<01:56, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  18%|█▊        | 288/1563 [00:27<01:55, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 290/1563 [00:27<01:56, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▊        | 292/1563 [00:27<01:58, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 294/1563 [00:27<01:57, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 296/1563 [00:28<01:56, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 298/1563 [00:28<01:55, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 300/1563 [00:28<01:55, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 302/1563 [00:28<01:56, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  19%|█▉        | 304/1563 [00:28<01:55, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 306/1563 [00:28<01:55, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 308/1563 [00:29<01:55, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 310/1563 [00:29<02:01, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|█▉        | 312/1563 [00:29<02:06,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 313/1563 [00:29<02:08,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 314/1563 [00:29<02:09,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 315/1563 [00:29<02:10,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 316/1563 [00:30<02:14,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 317/1563 [00:30<02:14,  9.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 318/1563 [00:30<02:18,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 319/1563 [00:30<02:18,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  20%|██        | 320/1563 [00:30<02:17,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 321/1563 [00:30<02:18,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 322/1563 [00:30<02:20,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 323/1563 [00:30<02:19,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 324/1563 [00:30<02:24,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 325/1563 [00:31<02:23,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 326/1563 [00:31<02:24,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 327/1563 [00:31<02:28,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 328/1563 [00:31<02:30,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 330/1563 [00:31<02:12,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██        | 332/1563 [00:31<02:03,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 334/1563 [00:32<01:59, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  21%|██▏       | 336/1563 [00:32<01:56, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 338/1563 [00:32<01:56, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 340/1563 [00:32<01:54, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 342/1563 [00:32<01:52, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 344/1563 [00:32<01:51, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 346/1563 [00:33<01:51, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 348/1563 [00:33<01:50, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  22%|██▏       | 350/1563 [00:33<01:52, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 352/1563 [00:33<01:51, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 354/1563 [00:33<01:49, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 356/1563 [00:34<01:50, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 358/1563 [00:34<01:50, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 360/1563 [00:34<01:49, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 362/1563 [00:34<01:49, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 364/1563 [00:34<01:48, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  23%|██▎       | 366/1563 [00:34<01:47, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 368/1563 [00:35<01:48, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▎       | 370/1563 [00:35<01:47, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 372/1563 [00:35<01:47, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 374/1563 [00:35<01:47, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 376/1563 [00:35<01:46, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 378/1563 [00:36<01:49, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 380/1563 [00:36<01:49, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  24%|██▍       | 382/1563 [00:36<01:48, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 384/1563 [00:36<01:49, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 386/1563 [00:36<01:48, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 388/1563 [00:36<01:47, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▍       | 390/1563 [00:37<01:48, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 392/1563 [00:37<01:48, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 394/1563 [00:37<01:48, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 396/1563 [00:37<01:47, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  25%|██▌       | 398/1563 [00:37<01:45, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 400/1563 [00:38<01:46, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 402/1563 [00:38<01:46, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 404/1563 [00:38<01:45, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 406/1563 [00:38<01:44, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 408/1563 [00:38<01:44, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▌       | 410/1563 [00:38<01:44, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 412/1563 [00:39<01:45, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  26%|██▋       | 414/1563 [00:39<01:44, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 416/1563 [00:39<01:44, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 418/1563 [00:39<01:43, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 420/1563 [00:39<01:43, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 422/1563 [00:40<01:45, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 424/1563 [00:40<01:43, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 426/1563 [00:40<01:43, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  27%|██▋       | 428/1563 [00:40<01:44, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 430/1563 [00:40<01:44, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 432/1563 [00:40<01:44, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 434/1563 [00:41<01:44, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 436/1563 [00:41<01:43, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 438/1563 [00:41<01:45, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 440/1563 [00:41<01:50, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 442/1563 [00:41<01:52,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 443/1563 [00:42<01:54,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 444/1563 [00:42<01:57,  9.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  28%|██▊       | 445/1563 [00:42<01:58,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 446/1563 [00:42<01:59,  9.37batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 447/1563 [00:42<02:00,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 448/1563 [00:42<02:06,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▊       | 449/1563 [00:42<02:07,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 450/1563 [00:42<02:08,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 451/1563 [00:43<02:10,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 452/1563 [00:43<02:11,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 453/1563 [00:43<02:07,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 454/1563 [00:43<02:08,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 455/1563 [00:43<02:06,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 456/1563 [00:43<02:06,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 457/1563 [00:43<02:08,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 459/1563 [00:43<01:56,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  29%|██▉       | 461/1563 [00:44<01:50,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 463/1563 [00:44<01:45, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 465/1563 [00:44<01:43, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|██▉       | 467/1563 [00:44<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 469/1563 [00:44<01:40, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 471/1563 [00:44<01:39, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 473/1563 [00:45<01:39, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  30%|███       | 475/1563 [00:45<01:41, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 477/1563 [00:45<01:40, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 479/1563 [00:45<01:39, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 481/1563 [00:45<01:39, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 483/1563 [00:46<01:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 485/1563 [00:46<01:37, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███       | 487/1563 [00:46<01:36, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 489/1563 [00:46<01:37, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  31%|███▏      | 491/1563 [00:46<01:37, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 493/1563 [00:46<01:37, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 495/1563 [00:47<01:36, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 497/1563 [00:47<01:36, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 499/1563 [00:47<01:36, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 501/1563 [00:47<01:35, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 503/1563 [00:47<01:34, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 505/1563 [00:48<01:35, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  32%|███▏      | 507/1563 [00:48<01:35, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 509/1563 [00:48<01:35, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 511/1563 [00:48<01:34, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 513/1563 [00:48<01:34, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 515/1563 [00:48<01:36, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 517/1563 [00:49<01:36, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 519/1563 [00:49<01:34, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 521/1563 [00:49<01:34, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  33%|███▎      | 523/1563 [00:49<01:33, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 525/1563 [00:49<01:35, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▎      | 527/1563 [00:50<01:35, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 529/1563 [00:50<01:34, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 531/1563 [00:50<01:33, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 533/1563 [00:50<01:33, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 535/1563 [00:50<01:33, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 537/1563 [00:50<01:33, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  34%|███▍      | 539/1563 [00:51<01:33, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 541/1563 [00:51<01:32, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 543/1563 [00:51<01:31, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 545/1563 [00:51<01:31, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▍      | 547/1563 [00:51<01:31, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 549/1563 [00:52<01:32, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 551/1563 [00:52<01:31, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  35%|███▌      | 553/1563 [00:52<01:31, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 555/1563 [00:52<01:31, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 557/1563 [00:52<01:31, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 559/1563 [00:52<01:32, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 561/1563 [00:53<01:31, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 563/1563 [00:53<01:30, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▌      | 565/1563 [00:53<01:30, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 567/1563 [00:53<01:30, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  36%|███▋      | 569/1563 [00:53<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 571/1563 [00:54<01:39,  9.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 573/1563 [00:54<01:41,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 574/1563 [00:54<01:42,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 575/1563 [00:54<01:43,  9.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 576/1563 [00:54<01:43,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 577/1563 [00:54<01:44,  9.44batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 578/1563 [00:54<01:44,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 579/1563 [00:54<01:45,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 580/1563 [00:55<01:48,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 581/1563 [00:55<01:52,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 582/1563 [00:55<01:51,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 583/1563 [00:55<01:51,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 584/1563 [00:55<01:54,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 585/1563 [00:55<01:53,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  37%|███▋      | 586/1563 [00:55<01:55,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 588/1563 [00:55<01:42,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 589/1563 [00:56<01:42,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 591/1563 [00:56<01:35, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 593/1563 [00:56<01:31, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 595/1563 [00:56<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 597/1563 [00:56<01:30, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 599/1563 [00:57<01:29, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  38%|███▊      | 601/1563 [00:57<01:27, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 603/1563 [00:57<01:27, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▊      | 605/1563 [00:57<01:26, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 607/1563 [00:57<01:26, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 609/1563 [00:57<01:25, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 611/1563 [00:58<01:25, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 613/1563 [00:58<01:27, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 615/1563 [00:58<01:26, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  39%|███▉      | 617/1563 [00:58<01:26, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 619/1563 [00:58<01:25, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 621/1563 [00:58<01:25, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 623/1563 [00:59<01:25, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|███▉      | 625/1563 [00:59<01:25, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 627/1563 [00:59<01:24, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 629/1563 [00:59<01:23, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 631/1563 [00:59<01:23, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/1:  40%|████      | 633/1563 [01:00<01:23, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 635/1563 [01:00<01:23, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 637/1563 [01:00<01:23, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 639/1563 [01:00<01:23, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 641/1563 [01:00<01:22, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████      | 643/1563 [01:00<01:23, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 645/1563 [01:01<01:25, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  41%|████▏     | 647/1563 [01:01<01:25, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 649/1563 [01:01<01:23, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 651/1563 [01:01<01:23, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 653/1563 [01:01<01:22, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 655/1563 [01:02<01:22, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 657/1563 [01:02<01:22, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 659/1563 [01:02<01:21, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 661/1563 [01:02<01:21, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  42%|████▏     | 663/1563 [01:02<01:21, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 665/1563 [01:02<01:21, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 667/1563 [01:03<01:20, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 669/1563 [01:03<01:20, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 671/1563 [01:03<01:20, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 673/1563 [01:03<01:20, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 675/1563 [01:03<01:20, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 677/1563 [01:04<01:19, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  43%|████▎     | 679/1563 [01:04<01:19, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 681/1563 [01:04<01:20, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▎     | 683/1563 [01:04<01:20, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 685/1563 [01:04<01:19, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 687/1563 [01:04<01:19, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 689/1563 [01:05<01:20, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 691/1563 [01:05<01:20, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 693/1563 [01:05<01:20, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  44%|████▍     | 695/1563 [01:05<01:19, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 697/1563 [01:05<01:21, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 699/1563 [01:06<01:26,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 701/1563 [01:06<01:28,  9.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 702/1563 [01:06<01:30,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▍     | 703/1563 [01:06<01:32,  9.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 704/1563 [01:06<01:35,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 705/1563 [01:06<01:37,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 706/1563 [01:06<01:36,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 707/1563 [01:07<01:36,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 708/1563 [01:07<01:35,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 709/1563 [01:07<01:35,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 710/1563 [01:07<01:37,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  45%|████▌     | 711/1563 [01:07<01:37,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 712/1563 [01:07<01:36,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 713/1563 [01:07<01:38,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 714/1563 [01:07<01:38,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 715/1563 [01:07<01:37,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 717/1563 [01:08<01:28,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 719/1563 [01:08<01:23, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 720/1563 [01:08<01:23, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▌     | 722/1563 [01:08<01:20, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 724/1563 [01:08<01:18, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  46%|████▋     | 726/1563 [01:09<01:18, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 728/1563 [01:09<01:16, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 730/1563 [01:09<01:16, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 732/1563 [01:09<01:15, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 734/1563 [01:09<01:15, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 736/1563 [01:09<01:15, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 738/1563 [01:10<01:15, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 740/1563 [01:10<01:14, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  47%|████▋     | 742/1563 [01:10<01:13, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 744/1563 [01:10<01:15, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 746/1563 [01:10<01:14, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 748/1563 [01:10<01:14, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 750/1563 [01:11<01:13, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 752/1563 [01:11<01:13, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 754/1563 [01:11<01:13, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 756/1563 [01:11<01:13, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  48%|████▊     | 758/1563 [01:11<01:13, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▊     | 760/1563 [01:12<01:13, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 762/1563 [01:12<01:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 764/1563 [01:12<01:13, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 766/1563 [01:12<01:12, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 768/1563 [01:12<01:12, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 770/1563 [01:13<01:12, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  49%|████▉     | 772/1563 [01:13<01:11, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 774/1563 [01:13<01:11, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 776/1563 [01:13<01:11, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 778/1563 [01:13<01:12, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|████▉     | 780/1563 [01:13<01:12, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 782/1563 [01:14<01:11, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 784/1563 [01:14<01:11, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 786/1563 [01:14<01:11, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  50%|█████     | 788/1563 [01:14<01:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 790/1563 [01:14<01:11, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 792/1563 [01:15<01:11, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 794/1563 [01:15<01:10, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 796/1563 [01:15<01:10, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 798/1563 [01:15<01:10, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████     | 800/1563 [01:15<01:10, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 802/1563 [01:15<01:09, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  51%|█████▏    | 804/1563 [01:16<01:09, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 806/1563 [01:16<01:09, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 808/1563 [01:16<01:08, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 810/1563 [01:16<01:08, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 812/1563 [01:16<01:08, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 814/1563 [01:17<01:08, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 816/1563 [01:17<01:08, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 818/1563 [01:17<01:07, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  52%|█████▏    | 820/1563 [01:17<01:07, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 822/1563 [01:17<01:07, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 824/1563 [01:17<01:07, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 826/1563 [01:18<01:11, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 828/1563 [01:18<01:14,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 829/1563 [01:18<01:16,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 830/1563 [01:18<01:18,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 831/1563 [01:18<01:21,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 832/1563 [01:18<01:23,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 833/1563 [01:18<01:22,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 834/1563 [01:19<01:21,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 835/1563 [01:19<01:21,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  53%|█████▎    | 836/1563 [01:19<01:21,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 837/1563 [01:19<01:22,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 838/1563 [01:19<01:22,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 839/1563 [01:19<01:22,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▎    | 840/1563 [01:19<01:26,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 841/1563 [01:19<01:26,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 842/1563 [01:20<01:24,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 843/1563 [01:20<01:24,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 845/1563 [01:20<01:15,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 847/1563 [01:20<01:12,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 849/1563 [01:20<01:08, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  54%|█████▍    | 851/1563 [01:20<01:07, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 853/1563 [01:21<01:06, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 855/1563 [01:21<01:05, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 857/1563 [01:21<01:05, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▍    | 859/1563 [01:21<01:04, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 861/1563 [01:21<01:03, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 863/1563 [01:21<01:03, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 865/1563 [01:22<01:04, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  55%|█████▌    | 867/1563 [01:22<01:04, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 869/1563 [01:22<01:03, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 871/1563 [01:22<01:02, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 873/1563 [01:22<01:03, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 875/1563 [01:23<01:03, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 877/1563 [01:23<01:02, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▌    | 879/1563 [01:23<01:01, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 881/1563 [01:23<01:02, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  56%|█████▋    | 883/1563 [01:23<01:01, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 885/1563 [01:23<01:02, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 887/1563 [01:24<01:01, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 889/1563 [01:24<01:01, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 891/1563 [01:24<01:01, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 893/1563 [01:24<01:00, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 895/1563 [01:24<01:00, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  57%|█████▋    | 897/1563 [01:25<01:00, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 899/1563 [01:25<01:00, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 901/1563 [01:25<00:59, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 903/1563 [01:25<01:00, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 905/1563 [01:25<01:00, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 907/1563 [01:25<01:00, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 909/1563 [01:26<00:59, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 911/1563 [01:26<00:59, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  58%|█████▊    | 913/1563 [01:26<00:59, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 915/1563 [01:26<00:59, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▊    | 917/1563 [01:26<00:59, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 919/1563 [01:27<00:59, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 921/1563 [01:27<00:59, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 923/1563 [01:27<00:58, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 925/1563 [01:27<00:58, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 927/1563 [01:27<00:57, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  59%|█████▉    | 929/1563 [01:28<00:59, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 931/1563 [01:28<00:58, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 933/1563 [01:28<00:58, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 935/1563 [01:28<00:57, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|█████▉    | 937/1563 [01:28<00:57, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 939/1563 [01:28<00:57, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 941/1563 [01:29<00:58, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 943/1563 [01:29<00:57, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  60%|██████    | 945/1563 [01:29<00:57, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 947/1563 [01:29<00:56, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 949/1563 [01:29<00:56, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 951/1563 [01:30<00:56, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 953/1563 [01:30<00:58, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 955/1563 [01:30<01:01,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 956/1563 [01:30<01:01,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████    | 957/1563 [01:30<01:04,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 958/1563 [01:30<01:05,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 959/1563 [01:30<01:06,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 960/1563 [01:31<01:08,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  61%|██████▏   | 961/1563 [01:31<01:07,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 962/1563 [01:31<01:06,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 963/1563 [01:31<01:08,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 964/1563 [01:31<01:07,  8.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 965/1563 [01:31<01:07,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 966/1563 [01:31<01:08,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 967/1563 [01:31<01:07,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 968/1563 [01:31<01:06,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 969/1563 [01:32<01:08,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 970/1563 [01:32<01:08,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 971/1563 [01:32<01:08,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 972/1563 [01:32<01:09,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 974/1563 [01:32<01:02,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/1:  62%|██████▏   | 976/1563 [01:32<00:58, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 978/1563 [01:32<00:56, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 980/1563 [01:33<00:56, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 982/1563 [01:33<00:54, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 984/1563 [01:33<00:54, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 986/1563 [01:33<00:53, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 988/1563 [01:33<00:52, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 990/1563 [01:34<00:52, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  63%|██████▎   | 992/1563 [01:34<00:52, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 994/1563 [01:34<00:52, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▎   | 996/1563 [01:34<00:51, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 998/1563 [01:34<00:51, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1000/1563 [01:34<00:51, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1002/1563 [01:35<00:50, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1004/1563 [01:35<00:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1006/1563 [01:35<00:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  64%|██████▍   | 1008/1563 [01:35<00:50, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1010/1563 [01:35<00:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1012/1563 [01:36<00:50, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▍   | 1014/1563 [01:36<00:50, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1016/1563 [01:36<00:49, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1018/1563 [01:36<00:49, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1020/1563 [01:36<00:49, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  65%|██████▌   | 1022/1563 [01:37<00:49, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1024/1563 [01:37<00:49, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1026/1563 [01:37<00:50, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1028/1563 [01:37<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1030/1563 [01:37<00:48, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1032/1563 [01:37<00:48, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▌   | 1034/1563 [01:38<00:49, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1036/1563 [01:38<00:49, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/1:  66%|██████▋   | 1038/1563 [01:38<00:48, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1040/1563 [01:38<00:48, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1042/1563 [01:38<00:47, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1044/1563 [01:39<00:47, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1046/1563 [01:39<00:47, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1048/1563 [01:39<00:47, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1050/1563 [01:39<00:46, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1052/1563 [01:39<00:46, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/1:  67%|██████▋   | 1054/1563 [01:39<00:46, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1056/1563 [01:40<00:46, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1058/1563 [01:40<00:46, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1060/1563 [01:40<00:45, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1062/1563 [01:40<00:45, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1064/1563 [01:40<00:45, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1066/1563 [01:41<00:45, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1068/1563 [01:41<00:45, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  68%|██████▊   | 1070/1563 [01:41<00:45, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1072/1563 [01:41<00:44, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▊   | 1074/1563 [01:41<00:44, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1076/1563 [01:41<00:44, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1078/1563 [01:42<00:44, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1080/1563 [01:42<00:44, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1082/1563 [01:42<00:46, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1084/1563 [01:42<00:48,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  69%|██████▉   | 1086/1563 [01:42<00:48,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1087/1563 [01:43<00:48,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1088/1563 [01:43<00:49,  9.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1089/1563 [01:43<00:49,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1090/1563 [01:43<00:51,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1091/1563 [01:43<00:51,  9.24batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1092/1563 [01:43<00:51,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1093/1563 [01:43<00:51,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|██████▉   | 1094/1563 [01:43<00:51,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1095/1563 [01:43<00:52,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1096/1563 [01:44<00:53,  8.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1097/1563 [01:44<00:52,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1098/1563 [01:44<00:52,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1099/1563 [01:44<00:52,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1100/1563 [01:44<00:53,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  70%|███████   | 1101/1563 [01:44<00:53,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1102/1563 [01:44<00:52,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1104/1563 [01:44<00:47,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1106/1563 [01:45<00:45, 10.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1108/1563 [01:45<00:43, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1110/1563 [01:45<00:42, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████   | 1112/1563 [01:45<00:42, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1114/1563 [01:45<00:41, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  71%|███████▏  | 1116/1563 [01:46<00:41, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1118/1563 [01:46<00:40, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1120/1563 [01:46<00:40, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1122/1563 [01:46<00:40, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1124/1563 [01:46<00:39, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1126/1563 [01:46<00:39, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1128/1563 [01:47<00:40, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1130/1563 [01:47<00:39, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  72%|███████▏  | 1132/1563 [01:47<00:39, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1134/1563 [01:47<00:39, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1136/1563 [01:47<00:39, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1138/1563 [01:48<00:39, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1140/1563 [01:48<00:38, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1142/1563 [01:48<00:38, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1144/1563 [01:48<00:38, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1146/1563 [01:48<00:38, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  73%|███████▎  | 1148/1563 [01:48<00:38, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1150/1563 [01:49<00:37, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▎  | 1152/1563 [01:49<00:37, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1154/1563 [01:49<00:37, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1156/1563 [01:49<00:37, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1158/1563 [01:49<00:37, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1160/1563 [01:50<00:37, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1162/1563 [01:50<00:36, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  74%|███████▍  | 1164/1563 [01:50<00:36, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1166/1563 [01:50<00:36, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1168/1563 [01:50<00:36, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1170/1563 [01:51<00:36, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▍  | 1172/1563 [01:51<00:35, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1174/1563 [01:51<00:35, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1176/1563 [01:51<00:35, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1178/1563 [01:51<00:35, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  75%|███████▌  | 1180/1563 [01:51<00:34, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1182/1563 [01:52<00:34, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1184/1563 [01:52<00:34, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1186/1563 [01:52<00:34, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1188/1563 [01:52<00:34, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▌  | 1190/1563 [01:52<00:34, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1192/1563 [01:53<00:34, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  76%|███████▋  | 1194/1563 [01:53<00:33, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1196/1563 [01:53<00:33, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1198/1563 [01:53<00:33, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1200/1563 [01:53<00:33, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1202/1563 [01:53<00:33, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1204/1563 [01:54<00:32, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1206/1563 [01:54<00:32, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1208/1563 [01:54<00:32, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/1:  77%|███████▋  | 1210/1563 [01:54<00:32, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1212/1563 [01:54<00:34, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1214/1563 [01:55<00:35,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1215/1563 [01:55<00:36,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1216/1563 [01:55<00:37,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1217/1563 [01:55<00:37,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1218/1563 [01:55<00:37,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1219/1563 [01:55<00:37,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1220/1563 [01:55<00:38,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1221/1563 [01:55<00:37,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1222/1563 [01:56<00:37,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1223/1563 [01:56<00:37,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1224/1563 [01:56<00:36,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1225/1563 [01:56<00:36,  9.25batch/s]\u001b[A\n",
-            "Epoch 1/1:  78%|███████▊  | 1226/1563 [01:56<00:36,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1227/1563 [01:56<00:36,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1228/1563 [01:56<00:36,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1229/1563 [01:56<00:37,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▊  | 1230/1563 [01:56<00:38,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1231/1563 [01:57<00:39,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1232/1563 [01:57<00:37,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1234/1563 [01:57<00:34,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1235/1563 [01:57<00:33,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1236/1563 [01:57<00:33,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1238/1563 [01:57<00:31, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1240/1563 [01:57<00:31, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/1:  79%|███████▉  | 1242/1563 [01:58<00:30, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1244/1563 [01:58<00:29, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1246/1563 [01:58<00:29, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1248/1563 [01:58<00:29, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|███████▉  | 1250/1563 [01:58<00:28, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1252/1563 [01:59<00:28, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1254/1563 [01:59<00:28, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1256/1563 [01:59<00:28, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  80%|████████  | 1258/1563 [01:59<00:28, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1260/1563 [01:59<00:28, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1262/1563 [01:59<00:28, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1264/1563 [02:00<00:28, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1266/1563 [02:00<00:27, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████  | 1268/1563 [02:00<00:27, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1270/1563 [02:00<00:27, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  81%|████████▏ | 1272/1563 [02:00<00:26, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1274/1563 [02:01<00:26, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1276/1563 [02:01<00:26, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1278/1563 [02:01<00:25, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1280/1563 [02:01<00:26, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1282/1563 [02:01<00:25, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1284/1563 [02:01<00:25, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1286/1563 [02:02<00:25, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  82%|████████▏ | 1288/1563 [02:02<00:25, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1290/1563 [02:02<00:25, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1292/1563 [02:02<00:25, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1294/1563 [02:02<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1296/1563 [02:03<00:24, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1298/1563 [02:03<00:24, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1300/1563 [02:03<00:24, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1302/1563 [02:03<00:23, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  83%|████████▎ | 1304/1563 [02:03<00:23, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1306/1563 [02:04<00:23, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▎ | 1308/1563 [02:04<00:23, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1310/1563 [02:04<00:23, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1312/1563 [02:04<00:23, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1314/1563 [02:04<00:22, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1316/1563 [02:04<00:22, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1318/1563 [02:05<00:23, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  84%|████████▍ | 1320/1563 [02:05<00:22, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1322/1563 [02:05<00:22, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1324/1563 [02:05<00:22, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1326/1563 [02:05<00:21, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▍ | 1328/1563 [02:06<00:21, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1330/1563 [02:06<00:21, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1332/1563 [02:06<00:21, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1334/1563 [02:06<00:21, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  85%|████████▌ | 1336/1563 [02:06<00:20, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1338/1563 [02:06<00:20, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1340/1563 [02:07<00:21, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1342/1563 [02:07<00:21, 10.11batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1344/1563 [02:07<00:22,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1345/1563 [02:07<00:22,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1346/1563 [02:07<00:22,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1347/1563 [02:07<00:23,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▌ | 1348/1563 [02:08<00:24,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1349/1563 [02:08<00:23,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1350/1563 [02:08<00:23,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/1:  86%|████████▋ | 1351/1563 [02:08<00:23,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1352/1563 [02:08<00:23,  9.10batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1353/1563 [02:08<00:22,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1354/1563 [02:08<00:23,  9.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1355/1563 [02:08<00:23,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1356/1563 [02:08<00:23,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1357/1563 [02:09<00:23,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1358/1563 [02:09<00:23,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1359/1563 [02:09<00:23,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1360/1563 [02:09<00:23,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1362/1563 [02:09<00:21,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1364/1563 [02:09<00:19,  9.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  87%|████████▋ | 1366/1563 [02:10<00:19, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1368/1563 [02:10<00:18, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1370/1563 [02:10<00:18, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1372/1563 [02:10<00:18, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1374/1563 [02:10<00:17, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1376/1563 [02:10<00:17, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1378/1563 [02:11<00:17, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1380/1563 [02:11<00:16, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  88%|████████▊ | 1382/1563 [02:11<00:16, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1384/1563 [02:11<00:16, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▊ | 1386/1563 [02:11<00:16, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1388/1563 [02:12<00:16, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1390/1563 [02:12<00:16, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1392/1563 [02:12<00:15, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1394/1563 [02:12<00:15, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1396/1563 [02:12<00:15, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  89%|████████▉ | 1398/1563 [02:12<00:14, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1400/1563 [02:13<00:14, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1402/1563 [02:13<00:14, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1404/1563 [02:13<00:14, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|████████▉ | 1406/1563 [02:13<00:14, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1408/1563 [02:13<00:14, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1410/1563 [02:14<00:14, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1412/1563 [02:14<00:13, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  90%|█████████ | 1414/1563 [02:14<00:13, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1416/1563 [02:14<00:13, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1418/1563 [02:14<00:13, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1420/1563 [02:14<00:13, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1422/1563 [02:15<00:12, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1424/1563 [02:15<00:12, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████ | 1426/1563 [02:15<00:12, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1428/1563 [02:15<00:12, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  91%|█████████▏| 1430/1563 [02:15<00:12, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1432/1563 [02:16<00:12, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1434/1563 [02:16<00:11, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1436/1563 [02:16<00:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1438/1563 [02:16<00:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1440/1563 [02:16<00:11, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1442/1563 [02:17<00:11, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/1:  92%|█████████▏| 1444/1563 [02:17<00:10, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1446/1563 [02:17<00:10, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1448/1563 [02:17<00:10, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1450/1563 [02:17<00:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1452/1563 [02:17<00:10, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1454/1563 [02:18<00:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1456/1563 [02:18<00:09, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1458/1563 [02:18<00:09, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  93%|█████████▎| 1460/1563 [02:18<00:09, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1462/1563 [02:18<00:09, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▎| 1464/1563 [02:19<00:09, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1466/1563 [02:19<00:08, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1468/1563 [02:19<00:08, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1470/1563 [02:19<00:09, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1472/1563 [02:19<00:09,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1474/1563 [02:20<00:09,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1475/1563 [02:20<00:09,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1476/1563 [02:20<00:09,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/1:  94%|█████████▍| 1477/1563 [02:20<00:09,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1478/1563 [02:20<00:09,  9.15batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1479/1563 [02:20<00:09,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1480/1563 [02:20<00:09,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1481/1563 [02:20<00:09,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1482/1563 [02:20<00:09,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1483/1563 [02:21<00:09,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▍| 1484/1563 [02:21<00:09,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1485/1563 [02:21<00:09,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1486/1563 [02:21<00:09,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1487/1563 [02:21<00:09,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1488/1563 [02:21<00:09,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1489/1563 [02:21<00:08,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  95%|█████████▌| 1491/1563 [02:22<00:07,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1493/1563 [02:22<00:07,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1495/1563 [02:22<00:06, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1497/1563 [02:22<00:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1499/1563 [02:22<00:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1501/1563 [02:22<00:05, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▌| 1503/1563 [02:23<00:05, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1505/1563 [02:23<00:05, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/1:  96%|█████████▋| 1507/1563 [02:23<00:05, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1509/1563 [02:23<00:04, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1511/1563 [02:23<00:04, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1513/1563 [02:24<00:04, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1515/1563 [02:24<00:04, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1517/1563 [02:24<00:04, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1519/1563 [02:24<00:04, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1521/1563 [02:24<00:03, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  97%|█████████▋| 1523/1563 [02:24<00:03, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1525/1563 [02:25<00:03, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1527/1563 [02:25<00:03, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1529/1563 [02:25<00:03, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1531/1563 [02:25<00:02, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1533/1563 [02:25<00:02, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1535/1563 [02:26<00:02, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1537/1563 [02:26<00:02, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/1:  98%|█████████▊| 1539/1563 [02:26<00:02, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1541/1563 [02:26<00:02, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▊| 1543/1563 [02:26<00:01, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1545/1563 [02:27<00:01, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1547/1563 [02:27<00:01, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1549/1563 [02:27<00:01, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1551/1563 [02:27<00:01, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1553/1563 [02:27<00:00, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1:  99%|█████████▉| 1555/1563 [02:27<00:00, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1557/1563 [02:28<00:00, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1559/1563 [02:28<00:00, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|█████████▉| 1561/1563 [02:28<00:00, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/1: 100%|██████████| 1563/1563 [02:28<00:00, 11.54batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 1/1 [02:28<00:00, 148.68s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/1 tamamlandı. Kayıp: 1.6889, Doğruluk: 38.04%\n",
-            "DyT Eğitim Süresi: 148.68 saniye, Son Doğruluk: 38.04%\n",
-            "\n",
-            "Karşılaştırma:\n",
-            "RMSNorm - Süre: 154.52s, Doğruluk: 33.27%\n",
-            "DyT - Süre: 148.68s, Doğruluk: 38.04%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.optim as optim\n",
-        "import time\n",
-        "from torchvision import datasets, transforms\n",
-        "from torch.utils.data import DataLoader\n",
-        "from tqdm import tqdm  # İlerleme çubuğu için tqdm ekleniyor\n",
-        "\n",
-        "# 1. RMSNorm Sınıfı\n",
-        "class RMSNorm(nn.Module):\n",
-        "    def __init__(self, dim, eps=1e-6):\n",
-        "        super(RMSNorm, self).__init__()\n",
-        "        self.dim = dim\n",
-        "        self.eps = eps\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
-        "        x_norm = x / rms\n",
-        "        return self.gamma * x_norm + self.beta\n",
-        "\n",
-        "# 2. DyT Sınıfı\n",
-        "class DyT(nn.Module):\n",
-        "    def __init__(self, dim, init_alpha=0.5):\n",
-        "        super(DyT, self).__init__()\n",
-        "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        x = torch.tanh(self.alpha * x)\n",
-        "        return self.gamma * x + self.beta\n",
-        "\n",
-        "# 3. TransformerBlock Sınıfı\n",
-        "class TransformerBlock(nn.Module):\n",
-        "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
-        "        super(TransformerBlock, self).__init__()\n",
-        "        if norm_layer == 'RMSNorm':\n",
-        "            self.norm1 = RMSNorm(dim)\n",
-        "            self.norm2 = RMSNorm(dim)\n",
-        "        elif norm_layer == 'DyT':\n",
-        "            self.norm1 = DyT(dim, init_alpha)\n",
-        "            self.norm2 = DyT(dim, init_alpha)\n",
-        "        else:\n",
-        "            raise ValueError(\"Geçersiz norm_layer. 'RMSNorm' veya 'DyT' seçin.\")\n",
-        "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
-        "        self.ffn = nn.Sequential(\n",
-        "            nn.Linear(dim, dim * 4),\n",
-        "            nn.GELU(),\n",
-        "            nn.Linear(dim * 4, dim)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
-        "        x = x + attn_output\n",
-        "        ffn_output = self.ffn(self.norm2(x))\n",
-        "        x = x + ffn_output\n",
-        "        return x\n",
-        "\n",
-        "# 4. SimpleViT Sınıfı\n",
-        "class SimpleViT(nn.Module):\n",
-        "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
-        "        super(SimpleViT, self).__init__()\n",
-        "        assert img_size % patch_size == 0, \"Görüntü boyutu yama boyutuna bölünebilir olmalı\"\n",
-        "        num_patches = (img_size // patch_size) ** 2\n",
-        "\n",
-        "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
-        "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
-        "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
-        "\n",
-        "        self.blocks = nn.ModuleList([\n",
-        "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
-        "        ])\n",
-        "\n",
-        "        self.head = nn.Linear(dim, num_classes)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        B = x.shape[0]\n",
-        "        x = self.patch_embed(x)\n",
-        "        x = x.flatten(2).transpose(1, 2)\n",
-        "\n",
-        "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
-        "        x = torch.cat((cls_tokens, x), dim=1)\n",
-        "        x = x + self.pos_embed\n",
-        "\n",
-        "        x = x.transpose(0, 1)\n",
-        "        for block in self.blocks:\n",
-        "            x = block(x)\n",
-        "        x = x.transpose(0, 1)\n",
-        "\n",
-        "        x = x[:, 0]\n",
-        "        x = self.head(x)\n",
-        "        return x\n",
-        "\n",
-        "# 5. Eğitim ve Değerlendirme Fonksiyonu\n",
-        "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
-        "    model.to(device)\n",
-        "    start_time = time.time()\n",
-        "\n",
-        "    # Epoch'lar için tqdm\n",
-        "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
-        "        model.train()\n",
-        "        running_loss = 0.0\n",
-        "        correct = 0\n",
-        "        total = 0\n",
-        "\n",
-        "        # Batch'ler için tqdm\n",
-        "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
-        "            inputs, labels = inputs.to(device), labels.to(device)\n",
-        "            optimizer.zero_grad()\n",
-        "            outputs = model(inputs)\n",
-        "            loss = criterion(outputs, labels)\n",
-        "            loss.backward()\n",
-        "            optimizer.step()\n",
-        "\n",
-        "            running_loss += loss.item()\n",
-        "            _, predicted = torch.max(outputs, 1)\n",
-        "            total += labels.size(0)\n",
-        "            correct += (predicted == labels).sum().item()\n",
-        "\n",
-        "        accuracy = 100 * correct / total\n",
-        "        avg_loss = running_loss / len(dataloader)\n",
-        "        print(f\"Epoch {epoch+1}/{num_epochs} tamamlandı. Kayıp: {avg_loss:.4f}, Doğruluk: {accuracy:.2f}%\")\n",
-        "\n",
-        "    end_time = time.time()\n",
-        "    training_time = end_time - start_time\n",
-        "    return training_time, accuracy\n",
-        "\n",
-        "# Veri Seti ve DataLoader (CIFAR-10)\n",
-        "transform = transforms.Compose([\n",
-        "    transforms.Resize((224, 224)),\n",
-        "    transforms.ToTensor(),\n",
-        "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-        "])\n",
-        "\n",
-        "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
-        "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
-        "\n",
-        "# Cihaz ve Eğitim Parametreleri\n",
-        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-        "num_epochs = 1\n",
-        "\n",
-        "# RMSNorm Modeli\n",
-        "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
-        "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
-        "criterion = nn.CrossEntropyLoss()\n",
-        "\n",
-        "# DyT Modeli\n",
-        "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
-        "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
-        "\n",
-        "# Eğitim ve Karşılaştırma\n",
-        "print(\"RMSNorm Modeli Eğitiliyor...\")\n",
-        "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
-        "print(f\"RMSNorm Eğitim Süresi: {time_rms:.2f} saniye, Son Doğruluk: {acc_rms:.2f}%\")\n",
-        "\n",
-        "print(\"\\nDyT Modeli Eğitiliyor...\")\n",
-        "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
-        "print(f\"DyT Eğitim Süresi: {time_dyt:.2f} saniye, Son Doğruluk: {acc_dyt:.2f}%\")\n",
-        "\n",
-        "# Karşılaştırma Sonuçları\n",
-        "print(\"\\nKarşılaştırma:\")\n",
-        "print(f\"RMSNorm - Süre: {time_rms:.2f}s, Doğruluk: {acc_rms:.2f}%\")\n",
-        "print(f\"DyT - Süre: {time_dyt:.2f}s, Doğruluk: {acc_dyt:.2f}%\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "!pip install tqdm tabulate reportlab"
-      ],
-      "metadata": {
-        "id": "57qK6QKCr3_8"
-      },
-      "execution_count": null,
-      "outputs": []
+  {
+   "cell_type": "code",
+   "source": [
+    "!pip install tqdm tabulate reportlab"
+   ],
+   "metadata": {
+    "id": "57qK6QKCr3_8"
+   },
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.optim as optim\n",
+    "import time\n",
+    "from torchvision import datasets, transforms\n",
+    "from torch.utils.data import DataLoader\n",
+    "from tqdm import tqdm  # Adding tqdm for the progress bar\n",
+    "\n",
+    "# 1. RMSNorm Class\n",
+    "class RMSNorm(nn.Module):\n",
+    "    def __init__(self, dim, eps=1e-6):\n",
+    "        super(RMSNorm, self).__init__()\n",
+    "        self.dim = dim\n",
+    "        self.eps = eps\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
+    "        x_norm = x / rms\n",
+    "        return self.gamma * x_norm + self.beta\n",
+    "\n",
+    "# 2. DyT Class\n",
+    "class DyT(nn.Module):\n",
+    "    def __init__(self, dim, init_alpha=0.5):\n",
+    "        super(DyT, self).__init__()\n",
+    "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
+    "        self.gamma = nn.Parameter(torch.ones(dim))\n",
+    "        self.beta = nn.Parameter(torch.zeros(dim))\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        x = torch.tanh(self.alpha * x)\n",
+    "        return self.gamma * x + self.beta\n",
+    "\n",
+    "# 3. TransformerBlock Class\n",
+    "class TransformerBlock(nn.Module):\n",
+    "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
+    "        super(TransformerBlock, self).__init__()\n",
+    "        if norm_layer == 'RMSNorm':\n",
+    "            self.norm1 = RMSNorm(dim)\n",
+    "            self.norm2 = RMSNorm(dim)\n",
+    "        elif norm_layer == 'DyT':\n",
+    "            self.norm1 = DyT(dim, init_alpha)\n",
+    "            self.norm2 = DyT(dim, init_alpha)\n",
+    "        else:\n",
+    "            raise ValueError(\"Invalid norm_layer. Choose 'RMSNorm' or 'DyT'.\")\n",
+    "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(dim, dim * 4),\n",
+    "            nn.GELU(),\n",
+    "            nn.Linear(dim * 4, dim)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
+    "        x = x + attn_output\n",
+    "        ffn_output = self.ffn(self.norm2(x))\n",
+    "        x = x + ffn_output\n",
+    "        return x\n",
+    "\n",
+    "# 4. SimpleViT Class\n",
+    "class SimpleViT(nn.Module):\n",
+    "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
+    "        super(SimpleViT, self).__init__()\n",
+    "        assert img_size % patch_size == 0, \"Image size must be divisible by the patch size\"\n",
+    "        num_patches = (img_size // patch_size) ** 2\n",
+    "\n",
+    "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
+    "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
+    "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
+    "\n",
+    "        self.blocks = nn.ModuleList([\n",
+    "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
+    "        ])\n",
+    "\n",
+    "        self.head = nn.Linear(dim, num_classes)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        B = x.shape[0]\n",
+    "        x = self.patch_embed(x)\n",
+    "        x = x.flatten(2).transpose(1, 2)\n",
+    "\n",
+    "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
+    "        x = torch.cat((cls_tokens, x), dim=1)\n",
+    "        x = x + self.pos_embed\n",
+    "\n",
+    "        x = x.transpose(0, 1)\n",
+    "        for block in self.blocks:\n",
+    "            x = block(x)\n",
+    "        x = x.transpose(0, 1)\n",
+    "\n",
+    "        x = x[:, 0]\n",
+    "        x = self.head(x)\n",
+    "        return x\n",
+    "\n",
+    "# 5. Training and evaluation function\n",
+    "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
+    "    model.to(device)\n",
+    "    start_time = time.time()\n",
+    "\n",
+    "    # tqdm for epochs\n",
+    "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
+    "        model.train()\n",
+    "        running_loss = 0.0\n",
+    "        correct = 0\n",
+    "        total = 0\n",
+    "\n",
+    "        # tqdm for batches\n",
+    "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
+    "            inputs, labels = inputs.to(device), labels.to(device)\n",
+    "            optimizer.zero_grad()\n",
+    "            outputs = model(inputs)\n",
+    "            loss = criterion(outputs, labels)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "            running_loss += loss.item()\n",
+    "            _, predicted = torch.max(outputs, 1)\n",
+    "            total += labels.size(0)\n",
+    "            correct += (predicted == labels).sum().item()\n",
+    "\n",
+    "        accuracy = 100 * correct / total\n",
+    "        avg_loss = running_loss / len(dataloader)\n",
+    "        print(f\"Epoch {epoch+1}/{num_epochs} completed. Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%\")\n",
+    "\n",
+    "    end_time = time.time()\n",
+    "    training_time = end_time - start_time\n",
+    "    return training_time, accuracy\n",
+    "\n",
+    "# Veri Seti ve DataLoader (CIFAR-10)\n",
+    "transform = transforms.Compose([\n",
+    "    transforms.Resize((224, 224)),\n",
+    "    transforms.ToTensor(),\n",
+    "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
+    "])\n",
+    "\n",
+    "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
+    "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
+    "\n",
+    "# Device and training parameters\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "num_epochs = 2\n",
+    "\n",
+    "# RMSNorm Modeli\n",
+    "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
+    "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
+    "criterion = nn.CrossEntropyLoss()\n",
+    "\n",
+    "# DyT Modeli\n",
+    "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
+    "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
+    "\n",
+    "# Training and comparison\n",
+    "print(\"RMSNorm Model training...\")\n",
+    "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
+    "print(f\"RMSNorm Training Time: {time_rms:.2f} seconds, Final Accuracy: {acc_rms:.2f}%\")\n",
+    "\n",
+    "print(\"\\nDyT Model training...\")\n",
+    "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
+    "print(f\"DyT Training Time: {time_dyt:.2f} seconds, Final Accuracy: {acc_dyt:.2f}%\")\n",
+    "\n",
+    "# Comparison results\n",
+    "print(\"\\nComparison:\")\n",
+    "print(f\"RMSNorm - Time: {time_rms:.2f}s, Accuracy: {acc_rms:.2f}%\")\n",
+    "print(f\"DyT - Time: {time_dyt:.2f}s, Accuracy: {acc_dyt:.2f}%\")"
+   ],
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.optim as optim\n",
-        "import time\n",
-        "from torchvision import datasets, transforms\n",
-        "from torch.utils.data import DataLoader\n",
-        "from tqdm import tqdm  # İlerleme çubuğu için tqdm ekleniyor\n",
-        "\n",
-        "# 1. RMSNorm Sınıfı\n",
-        "class RMSNorm(nn.Module):\n",
-        "    def __init__(self, dim, eps=1e-6):\n",
-        "        super(RMSNorm, self).__init__()\n",
-        "        self.dim = dim\n",
-        "        self.eps = eps\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        rms = torch.sqrt(torch.mean(x**2, dim=-1, keepdim=True) + self.eps)\n",
-        "        x_norm = x / rms\n",
-        "        return self.gamma * x_norm + self.beta\n",
-        "\n",
-        "# 2. DyT Sınıfı\n",
-        "class DyT(nn.Module):\n",
-        "    def __init__(self, dim, init_alpha=0.5):\n",
-        "        super(DyT, self).__init__()\n",
-        "        self.alpha = nn.Parameter(torch.ones(1) * init_alpha)\n",
-        "        self.gamma = nn.Parameter(torch.ones(dim))\n",
-        "        self.beta = nn.Parameter(torch.zeros(dim))\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        x = torch.tanh(self.alpha * x)\n",
-        "        return self.gamma * x + self.beta\n",
-        "\n",
-        "# 3. TransformerBlock Sınıfı\n",
-        "class TransformerBlock(nn.Module):\n",
-        "    def __init__(self, dim, num_heads, norm_layer, init_alpha=0.5):\n",
-        "        super(TransformerBlock, self).__init__()\n",
-        "        if norm_layer == 'RMSNorm':\n",
-        "            self.norm1 = RMSNorm(dim)\n",
-        "            self.norm2 = RMSNorm(dim)\n",
-        "        elif norm_layer == 'DyT':\n",
-        "            self.norm1 = DyT(dim, init_alpha)\n",
-        "            self.norm2 = DyT(dim, init_alpha)\n",
-        "        else:\n",
-        "            raise ValueError(\"Geçersiz norm_layer. 'RMSNorm' veya 'DyT' seçin.\")\n",
-        "        self.attn = nn.MultiheadAttention(embed_dim=dim, num_heads=num_heads)\n",
-        "        self.ffn = nn.Sequential(\n",
-        "            nn.Linear(dim, dim * 4),\n",
-        "            nn.GELU(),\n",
-        "            nn.Linear(dim * 4, dim)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        attn_output, _ = self.attn(self.norm1(x), self.norm1(x), self.norm1(x))\n",
-        "        x = x + attn_output\n",
-        "        ffn_output = self.ffn(self.norm2(x))\n",
-        "        x = x + ffn_output\n",
-        "        return x\n",
-        "\n",
-        "# 4. SimpleViT Sınıfı\n",
-        "class SimpleViT(nn.Module):\n",
-        "    def __init__(self, img_size=224, patch_size=16, num_classes=10, dim=256, depth=3, heads=4, norm_layer='RMSNorm', init_alpha=0.5):\n",
-        "        super(SimpleViT, self).__init__()\n",
-        "        assert img_size % patch_size == 0, \"Görüntü boyutu yama boyutuna bölünebilir olmalı\"\n",
-        "        num_patches = (img_size // patch_size) ** 2\n",
-        "\n",
-        "        self.patch_embed = nn.Conv2d(3, dim, kernel_size=patch_size, stride=patch_size)\n",
-        "        self.pos_embed = nn.Parameter(torch.randn(1, num_patches + 1, dim))\n",
-        "        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))\n",
-        "\n",
-        "        self.blocks = nn.ModuleList([\n",
-        "            TransformerBlock(dim=dim, num_heads=heads, norm_layer=norm_layer, init_alpha=init_alpha) for _ in range(depth)\n",
-        "        ])\n",
-        "\n",
-        "        self.head = nn.Linear(dim, num_classes)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        B = x.shape[0]\n",
-        "        x = self.patch_embed(x)\n",
-        "        x = x.flatten(2).transpose(1, 2)\n",
-        "\n",
-        "        cls_tokens = self.cls_token.expand(B, -1, -1)\n",
-        "        x = torch.cat((cls_tokens, x), dim=1)\n",
-        "        x = x + self.pos_embed\n",
-        "\n",
-        "        x = x.transpose(0, 1)\n",
-        "        for block in self.blocks:\n",
-        "            x = block(x)\n",
-        "        x = x.transpose(0, 1)\n",
-        "\n",
-        "        x = x[:, 0]\n",
-        "        x = self.head(x)\n",
-        "        return x\n",
-        "\n",
-        "# 5. Eğitim ve Değerlendirme Fonksiyonu\n",
-        "def train_model(model, dataloader, criterion, optimizer, num_epochs, device):\n",
-        "    model.to(device)\n",
-        "    start_time = time.time()\n",
-        "\n",
-        "    # Epoch'lar için tqdm\n",
-        "    for epoch in tqdm(range(num_epochs), desc=\"Epochs\", unit=\"epoch\"):\n",
-        "        model.train()\n",
-        "        running_loss = 0.0\n",
-        "        correct = 0\n",
-        "        total = 0\n",
-        "\n",
-        "        # Batch'ler için tqdm\n",
-        "        for inputs, labels in tqdm(dataloader, desc=f\"Epoch {epoch+1}/{num_epochs}\", unit=\"batch\", leave=False):\n",
-        "            inputs, labels = inputs.to(device), labels.to(device)\n",
-        "            optimizer.zero_grad()\n",
-        "            outputs = model(inputs)\n",
-        "            loss = criterion(outputs, labels)\n",
-        "            loss.backward()\n",
-        "            optimizer.step()\n",
-        "\n",
-        "            running_loss += loss.item()\n",
-        "            _, predicted = torch.max(outputs, 1)\n",
-        "            total += labels.size(0)\n",
-        "            correct += (predicted == labels).sum().item()\n",
-        "\n",
-        "        accuracy = 100 * correct / total\n",
-        "        avg_loss = running_loss / len(dataloader)\n",
-        "        print(f\"Epoch {epoch+1}/{num_epochs} tamamlandı. Kayıp: {avg_loss:.4f}, Doğruluk: {accuracy:.2f}%\")\n",
-        "\n",
-        "    end_time = time.time()\n",
-        "    training_time = end_time - start_time\n",
-        "    return training_time, accuracy\n",
-        "\n",
-        "# Veri Seti ve DataLoader (CIFAR-10)\n",
-        "transform = transforms.Compose([\n",
-        "    transforms.Resize((224, 224)),\n",
-        "    transforms.ToTensor(),\n",
-        "    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))\n",
-        "])\n",
-        "\n",
-        "train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)\n",
-        "train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)\n",
-        "\n",
-        "# Cihaz ve Eğitim Parametreleri\n",
-        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-        "num_epochs = 2\n",
-        "\n",
-        "# RMSNorm Modeli\n",
-        "model_rms = SimpleViT(norm_layer='RMSNorm')\n",
-        "optimizer_rms = optim.Adam(model_rms.parameters(), lr=0.001)\n",
-        "criterion = nn.CrossEntropyLoss()\n",
-        "\n",
-        "# DyT Modeli\n",
-        "model_dyt = SimpleViT(norm_layer='DyT', init_alpha=0.5)\n",
-        "optimizer_dyt = optim.Adam(model_dyt.parameters(), lr=0.001)\n",
-        "\n",
-        "# Eğitim ve Karşılaştırma\n",
-        "print(\"RMSNorm Modeli Eğitiliyor...\")\n",
-        "time_rms, acc_rms = train_model(model_rms, train_loader, criterion, optimizer_rms, num_epochs, device)\n",
-        "print(f\"RMSNorm Eğitim Süresi: {time_rms:.2f} saniye, Son Doğruluk: {acc_rms:.2f}%\")\n",
-        "\n",
-        "print(\"\\nDyT Modeli Eğitiliyor...\")\n",
-        "time_dyt, acc_dyt = train_model(model_dyt, train_loader, criterion, optimizer_dyt, num_epochs, device)\n",
-        "print(f\"DyT Eğitim Süresi: {time_dyt:.2f} saniye, Son Doğruluk: {acc_dyt:.2f}%\")\n",
-        "\n",
-        "# Karşılaştırma Sonuçları\n",
-        "print(\"\\nKarşılaştırma:\")\n",
-        "print(f\"RMSNorm - Süre: {time_rms:.2f}s, Doğruluk: {acc_rms:.2f}%\")\n",
-        "print(f\"DyT - Süre: {time_dyt:.2f}s, Doğruluk: {acc_dyt:.2f}%\")"
-      ],
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Ligm3e2erYq6",
-        "outputId": "4e4a2bee-0bd8-40ae-ee7d-c28bed94edec"
-      },
-      "execution_count": 6,
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "RMSNorm Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/2 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 1/1563 [00:00<03:24,  7.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 2/1563 [00:00<03:22,  7.72batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 3/1563 [00:00<03:08,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 4/1563 [00:00<03:00,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 5/1563 [00:00<02:59,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 7/1563 [00:00<02:42,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 9/1563 [00:00<02:33, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 11/1563 [00:01<02:29, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 13/1563 [00:01<02:28, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 15/1563 [00:01<02:28, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 17/1563 [00:01<02:26, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 19/1563 [00:01<02:24, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 21/1563 [00:02<02:23, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 23/1563 [00:02<02:22, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 25/1563 [00:02<02:23, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 27/1563 [00:02<02:22, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 29/1563 [00:02<02:22, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 31/1563 [00:03<02:22, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 33/1563 [00:03<02:21, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 35/1563 [00:03<02:21, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 37/1563 [00:03<02:21, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 39/1563 [00:03<02:20, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 41/1563 [00:03<02:20, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 43/1563 [00:04<02:20, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 45/1563 [00:04<02:20, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 47/1563 [00:04<02:19, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 49/1563 [00:04<02:19, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 51/1563 [00:04<02:19, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 53/1563 [00:05<02:19, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 55/1563 [00:05<02:19, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 57/1563 [00:05<02:21, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 59/1563 [00:05<02:20, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 61/1563 [00:05<02:20, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 63/1563 [00:05<02:20, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 65/1563 [00:06<02:19, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 67/1563 [00:06<02:19, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 69/1563 [00:06<02:20, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 71/1563 [00:06<02:18, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 73/1563 [00:06<02:18, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 75/1563 [00:07<02:17, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 77/1563 [00:07<02:16, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 79/1563 [00:07<02:20, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 81/1563 [00:07<02:27, 10.04batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 83/1563 [00:07<02:32,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 84/1563 [00:08<02:36,  9.48batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 85/1563 [00:08<02:38,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 86/1563 [00:08<02:40,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 87/1563 [00:08<02:42,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 88/1563 [00:08<02:42,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 89/1563 [00:08<02:43,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 90/1563 [00:08<02:42,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 91/1563 [00:08<02:45,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 92/1563 [00:08<02:51,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 93/1563 [00:09<02:50,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 94/1563 [00:09<02:50,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 95/1563 [00:09<02:48,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 96/1563 [00:09<02:48,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 97/1563 [00:09<02:48,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 99/1563 [00:09<02:34,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 101/1563 [00:09<02:28,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 103/1563 [00:10<02:24, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 105/1563 [00:10<02:20, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 107/1563 [00:10<02:19, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 109/1563 [00:10<02:17, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 111/1563 [00:10<02:18, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 113/1563 [00:11<02:18, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 115/1563 [00:11<02:16, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 117/1563 [00:11<02:16, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 119/1563 [00:11<02:15, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 121/1563 [00:11<02:16, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 123/1563 [00:11<02:15, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 125/1563 [00:12<02:14, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 127/1563 [00:12<02:13, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 129/1563 [00:12<02:15, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 131/1563 [00:12<02:14, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 133/1563 [00:12<02:13, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 135/1563 [00:13<02:12, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 137/1563 [00:13<02:12, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 139/1563 [00:13<02:12, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 141/1563 [00:13<02:11, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 143/1563 [00:13<02:11, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 145/1563 [00:14<02:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 147/1563 [00:14<02:10, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 149/1563 [00:14<02:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 151/1563 [00:14<02:10, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 153/1563 [00:14<02:09, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 155/1563 [00:14<02:10, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 157/1563 [00:15<02:10, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 159/1563 [00:15<02:10, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 161/1563 [00:15<02:10, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 163/1563 [00:15<02:10, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 165/1563 [00:15<02:10, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 167/1563 [00:16<02:10, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 169/1563 [00:16<02:09, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 171/1563 [00:16<02:10, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 173/1563 [00:16<02:09, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 175/1563 [00:16<02:08, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 177/1563 [00:16<02:08, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 179/1563 [00:17<02:08, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 181/1563 [00:17<02:08, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 183/1563 [00:17<02:08, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 185/1563 [00:17<02:07, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 187/1563 [00:17<02:07, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 189/1563 [00:18<02:07, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 191/1563 [00:18<02:08, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 193/1563 [00:18<02:08, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 195/1563 [00:18<02:08, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 197/1563 [00:18<02:07, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 199/1563 [00:19<02:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 201/1563 [00:19<02:06, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 203/1563 [00:19<02:07, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 205/1563 [00:19<02:10, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 207/1563 [00:19<02:15,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 209/1563 [00:20<02:19,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 210/1563 [00:20<02:22,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 211/1563 [00:20<02:24,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 212/1563 [00:20<02:27,  9.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 213/1563 [00:20<02:27,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 214/1563 [00:20<02:28,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 215/1563 [00:20<02:27,  9.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 216/1563 [00:20<02:27,  9.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 217/1563 [00:20<02:30,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 218/1563 [00:21<02:34,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 219/1563 [00:21<02:34,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 220/1563 [00:21<02:34,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 221/1563 [00:21<02:42,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 222/1563 [00:21<02:39,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 223/1563 [00:21<02:38,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 225/1563 [00:21<02:22,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 227/1563 [00:22<02:15,  9.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 229/1563 [00:22<02:13, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 231/1563 [00:22<02:11, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 233/1563 [00:22<02:09, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 235/1563 [00:22<02:07, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 237/1563 [00:22<02:05, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 239/1563 [00:23<02:04, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 241/1563 [00:23<02:05, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 243/1563 [00:23<02:04, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 245/1563 [00:23<02:02, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 247/1563 [00:23<02:02, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 249/1563 [00:24<02:01, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 251/1563 [00:24<02:04, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 253/1563 [00:24<02:04, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 255/1563 [00:24<02:02, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 257/1563 [00:24<02:01, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 259/1563 [00:25<02:01, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 261/1563 [00:25<02:00, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 263/1563 [00:25<02:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 265/1563 [00:25<02:02, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 267/1563 [00:25<02:01, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 269/1563 [00:25<02:00, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 271/1563 [00:26<01:59, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 273/1563 [00:26<02:01, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 275/1563 [00:26<02:01, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 277/1563 [00:26<02:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 279/1563 [00:26<02:00, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 281/1563 [00:27<01:59, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 283/1563 [00:27<01:59, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 285/1563 [00:27<02:00, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 287/1563 [00:27<01:59, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 289/1563 [00:27<01:58, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 291/1563 [00:28<01:57, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 293/1563 [00:28<01:57, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 295/1563 [00:28<01:59, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 297/1563 [00:28<01:58, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 299/1563 [00:28<01:58, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 301/1563 [00:28<01:57, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 303/1563 [00:29<01:56, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 305/1563 [00:29<01:56, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 307/1563 [00:29<01:59, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 309/1563 [00:29<01:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 311/1563 [00:29<01:57, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 313/1563 [00:30<01:56, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 315/1563 [00:30<01:56, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 317/1563 [00:30<01:56, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 319/1563 [00:30<01:55, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 321/1563 [00:30<01:54, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 323/1563 [00:31<01:54, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 325/1563 [00:31<01:54, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 327/1563 [00:31<01:55, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 329/1563 [00:31<01:56, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 331/1563 [00:31<01:59, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 333/1563 [00:32<02:04,  9.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 334/1563 [00:32<02:06,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 335/1563 [00:32<02:08,  9.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 336/1563 [00:32<02:10,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 337/1563 [00:32<02:15,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 338/1563 [00:32<02:14,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 339/1563 [00:32<02:16,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 340/1563 [00:32<02:17,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 341/1563 [00:32<02:19,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 342/1563 [00:33<02:19,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 343/1563 [00:33<02:22,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 344/1563 [00:33<02:21,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 345/1563 [00:33<02:21,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 346/1563 [00:33<02:21,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 347/1563 [00:33<02:19,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 348/1563 [00:33<02:19,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 349/1563 [00:33<02:17,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 351/1563 [00:34<02:06,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 353/1563 [00:34<02:00, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 355/1563 [00:34<01:58, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 357/1563 [00:34<01:58, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 359/1563 [00:34<01:56, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 361/1563 [00:34<01:54, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 363/1563 [00:35<01:53, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 365/1563 [00:35<01:53, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 367/1563 [00:35<01:52, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 369/1563 [00:35<01:52, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 371/1563 [00:35<01:52, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 373/1563 [00:36<01:51, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 375/1563 [00:36<01:50, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 377/1563 [00:36<01:50, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 379/1563 [00:36<01:50, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 381/1563 [00:36<01:51, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 383/1563 [00:37<01:50, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 385/1563 [00:37<01:49, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 387/1563 [00:37<01:50, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 389/1563 [00:37<01:50, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 391/1563 [00:37<01:50, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 393/1563 [00:37<01:50, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 395/1563 [00:38<01:50, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 397/1563 [00:38<01:49, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 399/1563 [00:38<01:49, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 401/1563 [00:38<01:49, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 403/1563 [00:38<01:48, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 405/1563 [00:39<01:48, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 407/1563 [00:39<01:48, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 409/1563 [00:39<01:47, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 411/1563 [00:39<01:48, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 413/1563 [00:39<01:49, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 415/1563 [00:40<01:48, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 417/1563 [00:40<01:49, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 419/1563 [00:40<01:50, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 421/1563 [00:40<01:48, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 423/1563 [00:40<01:49, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 425/1563 [00:41<01:48, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 427/1563 [00:41<01:48, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 429/1563 [00:41<01:47, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 431/1563 [00:41<01:46, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 433/1563 [00:41<01:47, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 435/1563 [00:41<01:46, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 437/1563 [00:42<01:45, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 439/1563 [00:42<01:44, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 441/1563 [00:42<01:45, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 443/1563 [00:42<01:44, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 445/1563 [00:42<01:45, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 447/1563 [00:43<01:44, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 449/1563 [00:43<01:44, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 451/1563 [00:43<01:43, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 453/1563 [00:43<01:43, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 455/1563 [00:43<01:44, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 457/1563 [00:44<01:49, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 459/1563 [00:44<01:54,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 460/1563 [00:44<01:56,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 461/1563 [00:44<01:57,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 462/1563 [00:44<01:59,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 463/1563 [00:44<02:01,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 464/1563 [00:44<02:04,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 465/1563 [00:44<02:02,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 466/1563 [00:45<02:03,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 467/1563 [00:45<02:05,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 468/1563 [00:45<02:05,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 469/1563 [00:45<02:05,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 470/1563 [00:45<02:06,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 471/1563 [00:45<02:06,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 472/1563 [00:45<02:06,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 473/1563 [00:45<02:08,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 474/1563 [00:46<02:03,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 476/1563 [00:46<01:52,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 478/1563 [00:46<01:48, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 480/1563 [00:46<01:45, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 482/1563 [00:46<01:44, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 484/1563 [00:46<01:43, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 486/1563 [00:47<01:48,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 487/1563 [00:47<01:48,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 489/1563 [00:47<01:45, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 491/1563 [00:47<01:44, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 493/1563 [00:47<01:46, 10.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 495/1563 [00:48<01:45, 10.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 497/1563 [00:48<01:43, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 499/1563 [00:48<01:42, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 501/1563 [00:48<01:41, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 503/1563 [00:48<01:41, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 505/1563 [00:49<01:41, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 507/1563 [00:49<01:40, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 509/1563 [00:49<01:39, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 511/1563 [00:49<01:38, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 513/1563 [00:49<01:38, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 515/1563 [00:49<01:38, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 517/1563 [00:50<01:38, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 519/1563 [00:50<01:38, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 521/1563 [00:50<01:38, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 523/1563 [00:50<01:38, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 525/1563 [00:50<01:37, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 527/1563 [00:51<01:37, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 529/1563 [00:51<01:37, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 531/1563 [00:51<01:37, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 533/1563 [00:51<01:39, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 535/1563 [00:51<01:38, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 537/1563 [00:52<01:39, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 539/1563 [00:52<01:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 541/1563 [00:52<01:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 543/1563 [00:52<01:37, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 545/1563 [00:52<01:36, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 547/1563 [00:52<01:36, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 549/1563 [00:53<01:37, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 551/1563 [00:53<01:35, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 553/1563 [00:53<01:35, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 555/1563 [00:53<01:34, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 557/1563 [00:53<01:33, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 559/1563 [00:54<01:34, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 561/1563 [00:54<01:34, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 563/1563 [00:54<01:33, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 565/1563 [00:54<01:33, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 567/1563 [00:54<01:32, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 569/1563 [00:55<01:32, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 571/1563 [00:55<01:34, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 573/1563 [00:55<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 575/1563 [00:55<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 577/1563 [00:55<01:33, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 579/1563 [00:56<01:35, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 581/1563 [00:56<01:40,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 582/1563 [00:56<01:42,  9.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 583/1563 [00:56<01:45,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 584/1563 [00:56<01:46,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 585/1563 [00:56<01:46,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 586/1563 [00:56<01:49,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 587/1563 [00:56<01:50,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 588/1563 [00:57<01:51,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 589/1563 [00:57<01:52,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 590/1563 [00:57<01:52,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 591/1563 [00:57<01:53,  8.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 592/1563 [00:57<01:53,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 593/1563 [00:57<01:53,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 594/1563 [00:57<01:52,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 595/1563 [00:57<01:52,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 596/1563 [00:57<01:52,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 597/1563 [00:58<01:50,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 599/1563 [00:58<01:41,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 601/1563 [00:58<01:36,  9.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 603/1563 [00:58<01:34, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 605/1563 [00:58<01:32, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 607/1563 [00:59<01:31, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 609/1563 [00:59<01:31, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 611/1563 [00:59<01:31, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 613/1563 [00:59<01:30, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 615/1563 [00:59<01:30, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 617/1563 [00:59<01:29, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 619/1563 [01:00<01:29, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 621/1563 [01:00<01:30, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 623/1563 [01:00<01:29, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 625/1563 [01:00<01:29, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 627/1563 [01:00<01:29, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 629/1563 [01:01<01:28, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 631/1563 [01:01<01:29, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 633/1563 [01:01<01:29, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 635/1563 [01:01<01:29, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 637/1563 [01:01<01:28, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 639/1563 [01:02<01:28, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 641/1563 [01:02<01:28, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 643/1563 [01:02<01:27, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 645/1563 [01:02<01:26, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 647/1563 [01:02<01:26, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 649/1563 [01:03<01:26, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 651/1563 [01:03<01:25, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 653/1563 [01:03<01:27, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 655/1563 [01:03<01:26, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 657/1563 [01:03<01:26, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 659/1563 [01:04<01:25, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 661/1563 [01:04<01:25, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 663/1563 [01:04<01:26, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 665/1563 [01:04<01:25, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 667/1563 [01:04<01:25, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 669/1563 [01:04<01:25, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 671/1563 [01:05<01:24, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 673/1563 [01:05<01:24, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 675/1563 [01:05<01:26, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 677/1563 [01:05<01:25, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 679/1563 [01:05<01:24, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 681/1563 [01:06<01:24, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 683/1563 [01:06<01:24, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 685/1563 [01:06<01:24, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 687/1563 [01:06<01:24, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 689/1563 [01:06<01:23, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 691/1563 [01:07<01:23, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 693/1563 [01:07<01:23, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 695/1563 [01:07<01:23, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 697/1563 [01:07<01:23, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 699/1563 [01:07<01:22, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 701/1563 [01:08<01:22, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 703/1563 [01:08<01:24, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 705/1563 [01:08<01:28,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 706/1563 [01:08<01:30,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 707/1563 [01:08<01:31,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 708/1563 [01:08<01:32,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 709/1563 [01:08<01:34,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 710/1563 [01:09<01:34,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 711/1563 [01:09<01:34,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 712/1563 [01:09<01:34,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 713/1563 [01:09<01:34,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 714/1563 [01:09<01:35,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 715/1563 [01:09<01:39,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 716/1563 [01:09<01:40,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 717/1563 [01:09<01:41,  8.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 718/1563 [01:09<01:41,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 719/1563 [01:10<01:41,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 720/1563 [01:10<01:41,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 721/1563 [01:10<01:41,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 722/1563 [01:10<01:36,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 724/1563 [01:10<01:28,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 726/1563 [01:10<01:24,  9.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 728/1563 [01:11<01:22, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 730/1563 [01:11<01:21, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 732/1563 [01:11<01:20, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 734/1563 [01:11<01:22, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 736/1563 [01:11<01:34,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 738/1563 [01:12<01:29,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 740/1563 [01:12<01:26,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 741/1563 [01:12<01:26,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 742/1563 [01:12<01:45,  7.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 743/1563 [01:12<01:41,  8.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 745/1563 [01:12<01:32,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 747/1563 [01:13<01:26,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 749/1563 [01:13<01:33,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 750/1563 [01:13<01:37,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 752/1563 [01:13<01:31,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 754/1563 [01:13<01:26,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 756/1563 [01:14<01:22,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 758/1563 [01:14<01:20, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▊     | 760/1563 [01:14<01:19, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 762/1563 [01:14<01:17, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 764/1563 [01:14<01:17, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 766/1563 [01:15<01:16, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 768/1563 [01:15<01:15, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 770/1563 [01:15<01:16, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 772/1563 [01:15<01:15, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 774/1563 [01:15<01:15, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 776/1563 [01:15<01:15, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 778/1563 [01:16<01:14, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 780/1563 [01:16<01:14, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 782/1563 [01:16<01:13, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 784/1563 [01:16<01:14, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 786/1563 [01:16<01:14, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 788/1563 [01:17<01:13, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 790/1563 [01:17<01:13, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 792/1563 [01:17<01:14, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 794/1563 [01:17<01:13, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 796/1563 [01:17<01:14, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 798/1563 [01:18<01:13, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 800/1563 [01:18<01:13, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████▏    | 802/1563 [01:18<01:13, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████▏    | 804/1563 [01:18<01:12, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 806/1563 [01:18<01:12, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 808/1563 [01:19<01:12, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 810/1563 [01:19<01:11, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 812/1563 [01:19<01:11, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 814/1563 [01:19<01:11, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 816/1563 [01:19<01:11, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 818/1563 [01:20<01:11, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 820/1563 [01:20<01:10, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 822/1563 [01:20<01:10, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 824/1563 [01:20<01:14,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 825/1563 [01:20<01:16,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 826/1563 [01:20<01:18,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 827/1563 [01:20<01:19,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 828/1563 [01:21<01:20,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 829/1563 [01:21<01:20,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 830/1563 [01:21<01:20,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 831/1563 [01:21<01:21,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 832/1563 [01:21<01:21,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 833/1563 [01:21<01:23,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 834/1563 [01:21<01:22,  8.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 835/1563 [01:21<01:22,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 836/1563 [01:21<01:22,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 837/1563 [01:22<01:24,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 838/1563 [01:22<01:23,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 839/1563 [01:22<01:25,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 840/1563 [01:22<01:25,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 841/1563 [01:22<01:25,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 842/1563 [01:22<01:25,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 844/1563 [01:22<01:18,  9.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 845/1563 [01:23<01:16,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 847/1563 [01:23<01:13,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 849/1563 [01:23<01:10, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 851/1563 [01:23<01:09, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 853/1563 [01:23<01:07, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 855/1563 [01:23<01:07, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 857/1563 [01:24<01:07, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 859/1563 [01:24<01:07, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 861/1563 [01:24<01:08, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 863/1563 [01:24<01:07, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 865/1563 [01:24<01:07, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 867/1563 [01:25<01:07, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 869/1563 [01:25<01:06, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 871/1563 [01:25<01:06, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 873/1563 [01:25<01:06, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 875/1563 [01:25<01:05, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 877/1563 [01:26<01:06, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 879/1563 [01:26<01:05, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 881/1563 [01:26<01:05, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 883/1563 [01:26<01:04, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 885/1563 [01:26<01:04, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 887/1563 [01:27<01:04, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 889/1563 [01:27<01:04, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 891/1563 [01:27<01:04, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 893/1563 [01:27<01:03, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 895/1563 [01:27<01:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 897/1563 [01:27<01:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 899/1563 [01:28<01:03, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 901/1563 [01:28<01:02, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 903/1563 [01:28<01:03, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 905/1563 [01:28<01:02, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 907/1563 [01:28<01:02, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 909/1563 [01:29<01:02, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 911/1563 [01:29<01:02, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 913/1563 [01:29<01:02, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 915/1563 [01:29<01:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 917/1563 [01:29<01:01, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 919/1563 [01:30<01:01, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 921/1563 [01:30<01:01, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 923/1563 [01:30<01:01, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 925/1563 [01:30<01:00, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 927/1563 [01:30<01:00, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 929/1563 [01:31<01:00, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 931/1563 [01:31<01:01, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 933/1563 [01:31<01:01, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 935/1563 [01:31<01:00, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 937/1563 [01:31<01:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 939/1563 [01:31<00:59, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 941/1563 [01:32<00:59, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 943/1563 [01:32<00:59, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 945/1563 [01:32<00:58, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 947/1563 [01:32<01:00, 10.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 949/1563 [01:32<01:03,  9.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 950/1563 [01:33<01:04,  9.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 951/1563 [01:33<01:05,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 952/1563 [01:33<01:06,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 953/1563 [01:33<01:07,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 954/1563 [01:33<01:07,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 955/1563 [01:33<01:08,  8.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 956/1563 [01:33<01:09,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 957/1563 [01:33<01:11,  8.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 958/1563 [01:34<01:10,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 959/1563 [01:34<01:10,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 960/1563 [01:34<01:12,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 961/1563 [01:34<01:13,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 962/1563 [01:34<01:12,  8.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 963/1563 [01:34<01:12,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 964/1563 [01:34<01:12,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 965/1563 [01:34<01:12,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 967/1563 [01:35<01:04,  9.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 969/1563 [01:35<01:01,  9.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 970/1563 [01:35<01:01,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 971/1563 [01:35<01:00,  9.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 972/1563 [01:35<01:00,  9.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 974/1563 [01:35<00:58, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 975/1563 [01:35<00:58, 10.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 977/1563 [01:36<00:57, 10.25batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 979/1563 [01:36<00:56, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 981/1563 [01:36<00:57, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 983/1563 [01:36<00:56, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 985/1563 [01:36<00:55, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 987/1563 [01:37<00:55, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 989/1563 [01:37<00:54, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 991/1563 [01:37<00:55, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 993/1563 [01:37<00:54, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 995/1563 [01:37<00:54, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 997/1563 [01:37<00:53, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 999/1563 [01:38<00:53, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1001/1563 [01:38<00:53, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1003/1563 [01:38<00:53, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1005/1563 [01:38<00:53, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1007/1563 [01:38<00:53, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1009/1563 [01:39<00:53, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1011/1563 [01:39<00:52, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1013/1563 [01:39<00:52, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1015/1563 [01:39<00:52, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1017/1563 [01:39<00:52, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1019/1563 [01:40<00:51, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1021/1563 [01:40<00:51, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1023/1563 [01:40<00:52, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1025/1563 [01:40<00:51, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1027/1563 [01:40<00:51, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1029/1563 [01:41<00:51, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1031/1563 [01:41<00:50, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1033/1563 [01:41<00:51, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1035/1563 [01:41<00:51, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1037/1563 [01:41<00:50, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1039/1563 [01:42<00:50, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1041/1563 [01:42<00:50, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1043/1563 [01:42<00:50, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1045/1563 [01:42<00:49, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1047/1563 [01:42<00:49, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1049/1563 [01:42<00:49, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1051/1563 [01:43<00:48, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1053/1563 [01:43<00:48, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1055/1563 [01:43<00:48, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1057/1563 [01:43<00:48, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1059/1563 [01:43<00:47, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1061/1563 [01:44<00:47, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1063/1563 [01:44<00:47, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1065/1563 [01:44<00:47, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1067/1563 [01:44<00:47, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1069/1563 [01:44<00:47, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1071/1563 [01:45<00:49,  9.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1072/1563 [01:45<00:51,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1073/1563 [01:45<00:52,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1074/1563 [01:45<00:54,  9.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1075/1563 [01:45<00:54,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1076/1563 [01:45<00:54,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1077/1563 [01:45<00:54,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1078/1563 [01:45<00:54,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1079/1563 [01:46<00:55,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1080/1563 [01:46<00:55,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1081/1563 [01:46<00:54,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1082/1563 [01:46<00:55,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1083/1563 [01:46<00:56,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1084/1563 [01:46<00:56,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1085/1563 [01:46<00:56,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1086/1563 [01:46<00:56,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1087/1563 [01:46<00:56,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1088/1563 [01:47<00:57,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1089/1563 [01:47<00:56,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1090/1563 [01:47<00:53,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1091/1563 [01:47<00:53,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1093/1563 [01:47<00:49,  9.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1094/1563 [01:47<00:49,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1096/1563 [01:47<00:47,  9.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1098/1563 [01:48<00:45, 10.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1099/1563 [01:48<00:46, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1101/1563 [01:48<00:45, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1103/1563 [01:48<00:44, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1105/1563 [01:48<00:44, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1107/1563 [01:48<00:43, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1109/1563 [01:49<00:43, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1111/1563 [01:49<00:43, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1113/1563 [01:49<00:43, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1115/1563 [01:49<00:42, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1117/1563 [01:49<00:42, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1119/1563 [01:50<00:42, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1121/1563 [01:50<00:42, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1123/1563 [01:50<00:42, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1125/1563 [01:50<00:41, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1127/1563 [01:50<00:42, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1129/1563 [01:51<00:42, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1131/1563 [01:51<00:41, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1133/1563 [01:51<00:41, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1135/1563 [01:51<00:41, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1137/1563 [01:51<00:42, 10.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1139/1563 [01:52<00:41, 10.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1141/1563 [01:52<00:41, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1143/1563 [01:52<00:40, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1145/1563 [01:52<00:40, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1147/1563 [01:52<00:39, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1149/1563 [01:53<00:39, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1151/1563 [01:53<00:39, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1153/1563 [01:53<00:39, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1155/1563 [01:53<00:39, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1157/1563 [01:53<00:38, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1159/1563 [01:54<00:39, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1161/1563 [01:54<00:38, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1163/1563 [01:54<00:38, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1165/1563 [01:54<00:38, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1167/1563 [01:54<00:37, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1169/1563 [01:54<00:37, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1171/1563 [01:55<00:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1173/1563 [01:55<00:37, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1175/1563 [01:55<00:37, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1177/1563 [01:55<00:36, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1179/1563 [01:55<00:36, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1181/1563 [01:56<00:36, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1183/1563 [01:56<00:36, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1185/1563 [01:56<00:36, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1187/1563 [01:56<00:36, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1189/1563 [01:56<00:35, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1191/1563 [01:57<00:36, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1193/1563 [01:57<00:36, 10.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1195/1563 [01:57<00:38,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1196/1563 [01:57<00:38,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1197/1563 [01:57<00:39,  9.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1198/1563 [01:57<00:39,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1199/1563 [01:57<00:39,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1200/1563 [01:58<00:39,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1201/1563 [01:58<00:40,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1202/1563 [01:58<00:41,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1203/1563 [01:58<00:41,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1204/1563 [01:58<00:42,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1205/1563 [01:58<00:41,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1206/1563 [01:58<00:41,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1207/1563 [01:58<00:41,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1208/1563 [01:59<00:41,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1209/1563 [01:59<00:42,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1210/1563 [01:59<00:42,  8.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1211/1563 [01:59<00:45,  7.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1212/1563 [01:59<00:42,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1214/1563 [01:59<00:38,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1215/1563 [01:59<00:37,  9.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1217/1563 [02:00<00:35,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1219/1563 [02:00<00:34, 10.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1221/1563 [02:00<00:33, 10.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1223/1563 [02:00<00:32, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1225/1563 [02:00<00:32, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1227/1563 [02:00<00:31, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1229/1563 [02:01<00:31, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1231/1563 [02:01<00:31, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1233/1563 [02:01<00:31, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1235/1563 [02:01<00:31, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1237/1563 [02:01<00:30, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1239/1563 [02:02<00:30, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1241/1563 [02:02<00:31, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1243/1563 [02:02<00:30, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1245/1563 [02:02<00:30, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1247/1563 [02:02<00:30, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1249/1563 [02:03<00:29, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1251/1563 [02:03<00:29, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1253/1563 [02:03<00:29, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1255/1563 [02:03<00:29, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1257/1563 [02:03<00:29, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1259/1563 [02:04<00:29, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1261/1563 [02:04<00:28, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1263/1563 [02:04<00:28, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1265/1563 [02:04<00:28, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1267/1563 [02:04<00:28, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1269/1563 [02:04<00:27, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1271/1563 [02:05<00:27, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1273/1563 [02:05<00:27, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1275/1563 [02:05<00:27, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1277/1563 [02:05<00:27, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1279/1563 [02:05<00:26, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1281/1563 [02:06<00:26, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1283/1563 [02:06<00:27, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1285/1563 [02:06<00:26, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1287/1563 [02:06<00:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1289/1563 [02:06<00:26, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1291/1563 [02:07<00:25, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1293/1563 [02:07<00:25, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1295/1563 [02:07<00:25, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1297/1563 [02:07<00:25, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1299/1563 [02:07<00:25, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1301/1563 [02:08<00:24, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1303/1563 [02:08<00:24, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1305/1563 [02:08<00:24, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1307/1563 [02:08<00:24, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1309/1563 [02:08<00:24, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1311/1563 [02:08<00:24, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1313/1563 [02:09<00:24, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1315/1563 [02:09<00:24, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1317/1563 [02:09<00:24, 10.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1319/1563 [02:09<00:25,  9.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1320/1563 [02:09<00:25,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1321/1563 [02:10<00:25,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1322/1563 [02:10<00:26,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1323/1563 [02:10<00:27,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1324/1563 [02:10<00:28,  8.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1325/1563 [02:10<00:29,  8.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1326/1563 [02:10<00:28,  8.25batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1327/1563 [02:10<00:28,  8.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1328/1563 [02:10<00:28,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1329/1563 [02:11<00:28,  8.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1330/1563 [02:11<00:27,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1331/1563 [02:11<00:27,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1332/1563 [02:11<00:28,  8.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1333/1563 [02:11<00:28,  8.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1334/1563 [02:11<00:28,  8.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1335/1563 [02:11<00:27,  8.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1336/1563 [02:11<00:25,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1337/1563 [02:11<00:25,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1339/1563 [02:12<00:23,  9.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1341/1563 [02:12<00:22,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1342/1563 [02:12<00:22,  9.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1343/1563 [02:12<00:22,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1345/1563 [02:12<00:21,  9.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1346/1563 [02:12<00:21,  9.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1347/1563 [02:12<00:21,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1348/1563 [02:13<00:21,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▋ | 1350/1563 [02:13<00:20, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1352/1563 [02:13<00:20, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1354/1563 [02:13<00:20, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1356/1563 [02:13<00:20, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1358/1563 [02:14<00:19, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1360/1563 [02:14<00:19, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1362/1563 [02:14<00:19, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1364/1563 [02:14<00:19, 10.24batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1366/1563 [02:14<00:19, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1368/1563 [02:14<00:18, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1370/1563 [02:15<00:18, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1372/1563 [02:15<00:18, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1374/1563 [02:15<00:18, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1376/1563 [02:15<00:18, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1378/1563 [02:15<00:17, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1380/1563 [02:16<00:17, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1382/1563 [02:16<00:17, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1384/1563 [02:16<00:17, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1386/1563 [02:16<00:17, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1388/1563 [02:16<00:17, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1390/1563 [02:17<00:16, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1392/1563 [02:17<00:16, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1394/1563 [02:17<00:16, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1396/1563 [02:17<00:16, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1398/1563 [02:17<00:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1400/1563 [02:18<00:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1402/1563 [02:18<00:15, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1404/1563 [02:18<00:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1406/1563 [02:18<00:15, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1408/1563 [02:18<00:15, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1410/1563 [02:19<00:14, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1412/1563 [02:19<00:14, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1414/1563 [02:19<00:14, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1416/1563 [02:19<00:14, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1418/1563 [02:19<00:14, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1420/1563 [02:20<00:13, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1422/1563 [02:20<00:13, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1424/1563 [02:20<00:13, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1426/1563 [02:20<00:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1428/1563 [02:20<00:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1430/1563 [02:20<00:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1432/1563 [02:21<00:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1434/1563 [02:21<00:12, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1436/1563 [02:21<00:12, 10.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1438/1563 [02:21<00:12, 10.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1440/1563 [02:21<00:12,  9.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1441/1563 [02:22<00:13,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1442/1563 [02:22<00:13,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1443/1563 [02:22<00:13,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1444/1563 [02:22<00:13,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1445/1563 [02:22<00:13,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1446/1563 [02:22<00:13,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1447/1563 [02:22<00:14,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1448/1563 [02:22<00:13,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1449/1563 [02:23<00:13,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1450/1563 [02:23<00:13,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1451/1563 [02:23<00:13,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1452/1563 [02:23<00:12,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1453/1563 [02:23<00:12,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1454/1563 [02:23<00:13,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1455/1563 [02:23<00:13,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1456/1563 [02:23<00:13,  8.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1457/1563 [02:24<00:13,  8.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1459/1563 [02:24<00:11,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1460/1563 [02:24<00:11,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1461/1563 [02:24<00:10,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1462/1563 [02:24<00:10,  9.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1464/1563 [02:24<00:10,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1466/1563 [02:24<00:09,  9.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1468/1563 [02:25<00:09, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1470/1563 [02:25<00:09, 10.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1472/1563 [02:25<00:08, 10.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1474/1563 [02:25<00:08, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1476/1563 [02:25<00:08, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1478/1563 [02:26<00:08, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1480/1563 [02:26<00:07, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1482/1563 [02:26<00:07, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1484/1563 [02:26<00:07, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1486/1563 [02:26<00:07, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1488/1563 [02:27<00:07, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1490/1563 [02:27<00:06, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1492/1563 [02:27<00:06, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1494/1563 [02:27<00:06, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1496/1563 [02:27<00:06, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1498/1563 [02:27<00:06, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1500/1563 [02:28<00:06, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1502/1563 [02:28<00:05, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1504/1563 [02:28<00:05, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1506/1563 [02:28<00:05, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1508/1563 [02:28<00:05, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1510/1563 [02:29<00:05, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1512/1563 [02:29<00:04, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1514/1563 [02:29<00:04, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1516/1563 [02:29<00:04, 10.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1518/1563 [02:29<00:04, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1520/1563 [02:30<00:04, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1522/1563 [02:30<00:03, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1524/1563 [02:30<00:03, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1526/1563 [02:30<00:03, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1528/1563 [02:30<00:03, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1530/1563 [02:31<00:03, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1532/1563 [02:31<00:02, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1534/1563 [02:31<00:02, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1536/1563 [02:31<00:02, 10.37batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1538/1563 [02:31<00:02, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1540/1563 [02:32<00:02, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1542/1563 [02:32<00:02, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1544/1563 [02:32<00:01, 10.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1546/1563 [02:32<00:01, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1548/1563 [02:32<00:01, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1550/1563 [02:32<00:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1552/1563 [02:33<00:01, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1554/1563 [02:33<00:00, 10.51batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1556/1563 [02:33<00:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1558/1563 [02:33<00:00, 10.43batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1560/1563 [02:33<00:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1562/1563 [02:34<00:00, 10.06batch/s]\u001b[A\n",
-            "Epochs:  50%|█████     | 1/2 [02:34<02:34, 154.24s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/2 tamamlandı. Kayıp: 1.7931, Doğruluk: 33.94%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Epoch 2/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 1/1563 [00:00<03:19,  7.83batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 2/1563 [00:00<03:19,  7.83batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 3/1563 [00:00<03:06,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 4/1563 [00:00<03:00,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 5/1563 [00:00<03:02,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 6/1563 [00:00<03:04,  8.45batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 7/1563 [00:00<03:04,  8.41batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 8/1563 [00:00<03:05,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 9/1563 [00:01<03:06,  8.35batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 10/1563 [00:01<03:12,  8.06batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 11/1563 [00:01<03:09,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 12/1563 [00:01<03:05,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 13/1563 [00:01<03:02,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 14/1563 [00:01<03:01,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 15/1563 [00:01<03:00,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 16/1563 [00:01<02:59,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 17/1563 [00:02<03:06,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 19/1563 [00:02<02:48,  9.17batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 21/1563 [00:02<02:38,  9.73batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 23/1563 [00:02<02:35,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 25/1563 [00:02<02:31, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 27/1563 [00:02<02:29, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 29/1563 [00:03<02:31, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 31/1563 [00:03<02:28, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 33/1563 [00:03<02:27, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 35/1563 [00:03<02:26, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 37/1563 [00:03<02:26, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 39/1563 [00:04<02:25, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 41/1563 [00:04<02:24, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 43/1563 [00:04<02:23, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 45/1563 [00:04<02:23, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 47/1563 [00:04<02:23, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 49/1563 [00:05<02:23, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 51/1563 [00:05<02:23, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 53/1563 [00:05<02:22, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 55/1563 [00:05<02:22, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 57/1563 [00:05<02:21, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 59/1563 [00:06<02:21, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 61/1563 [00:06<02:22, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 63/1563 [00:06<02:21, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 65/1563 [00:06<02:20, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 67/1563 [00:06<02:20, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 69/1563 [00:06<02:20, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 71/1563 [00:07<02:22, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 73/1563 [00:07<02:21, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 75/1563 [00:07<02:20, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 77/1563 [00:07<02:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 79/1563 [00:07<02:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 81/1563 [00:08<02:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 83/1563 [00:08<02:21, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 85/1563 [00:08<02:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 87/1563 [00:08<02:19, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 89/1563 [00:08<02:18, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 91/1563 [00:09<02:17, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 93/1563 [00:09<02:19, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 95/1563 [00:09<02:18, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 97/1563 [00:09<02:17, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 99/1563 [00:09<02:17, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 101/1563 [00:09<02:18, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 103/1563 [00:10<02:19, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 105/1563 [00:10<02:19, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 107/1563 [00:10<02:17, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 109/1563 [00:10<02:17, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 111/1563 [00:10<02:16, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 113/1563 [00:11<02:16, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 115/1563 [00:11<02:17, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 117/1563 [00:11<02:16, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 119/1563 [00:11<02:16, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 121/1563 [00:11<02:17, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 123/1563 [00:12<02:23, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 125/1563 [00:12<02:34,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 126/1563 [00:12<02:36,  9.20batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 127/1563 [00:12<02:40,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 128/1563 [00:12<02:39,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 129/1563 [00:12<02:40,  8.94batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 130/1563 [00:12<02:41,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 131/1563 [00:13<02:44,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 132/1563 [00:13<02:46,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 133/1563 [00:13<02:45,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 134/1563 [00:13<02:47,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 135/1563 [00:13<02:45,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 136/1563 [00:13<02:46,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 137/1563 [00:13<02:47,  8.51batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 138/1563 [00:13<02:47,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 139/1563 [00:14<02:48,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 140/1563 [00:14<02:50,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 141/1563 [00:14<02:49,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 143/1563 [00:14<02:36,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 145/1563 [00:14<02:27,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 147/1563 [00:14<02:21, 10.01batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 148/1563 [00:14<02:21,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 150/1563 [00:15<02:19, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 151/1563 [00:15<02:20, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 153/1563 [00:15<02:17, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 155/1563 [00:15<02:15, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 157/1563 [00:15<02:14, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 159/1563 [00:15<02:13, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 161/1563 [00:16<02:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 163/1563 [00:16<02:12, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 165/1563 [00:16<02:12, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 167/1563 [00:16<02:11, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 169/1563 [00:16<02:12, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 171/1563 [00:17<02:12, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 173/1563 [00:17<02:13, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 175/1563 [00:17<02:12, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 177/1563 [00:17<02:11, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 179/1563 [00:17<02:12, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 181/1563 [00:18<02:12, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 183/1563 [00:18<02:11, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 185/1563 [00:18<02:11, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 187/1563 [00:18<02:11, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 189/1563 [00:18<02:10, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 191/1563 [00:19<02:10, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 193/1563 [00:19<02:10, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 195/1563 [00:19<02:09, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 197/1563 [00:19<02:09, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 199/1563 [00:19<02:08, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 201/1563 [00:19<02:07, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 203/1563 [00:20<02:07, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 205/1563 [00:20<02:06, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 207/1563 [00:20<02:05, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 209/1563 [00:20<02:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 211/1563 [00:20<02:06, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▎        | 213/1563 [00:21<02:06, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 215/1563 [00:21<02:07, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 217/1563 [00:21<02:06, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 219/1563 [00:21<02:07, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 221/1563 [00:21<02:06, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 223/1563 [00:22<02:05, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 225/1563 [00:22<02:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 227/1563 [00:22<02:05, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 229/1563 [00:22<02:05, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 231/1563 [00:22<02:05, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 233/1563 [00:22<02:05, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 235/1563 [00:23<02:05, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 237/1563 [00:23<02:04, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 239/1563 [00:23<02:04, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 241/1563 [00:23<02:03, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 243/1563 [00:23<02:03, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 245/1563 [00:24<02:02, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 247/1563 [00:24<02:07, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 249/1563 [00:24<02:13,  9.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 250/1563 [00:24<02:16,  9.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 251/1563 [00:24<02:19,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 252/1563 [00:24<02:20,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 253/1563 [00:24<02:21,  9.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 254/1563 [00:25<02:22,  9.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 255/1563 [00:25<02:26,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 256/1563 [00:25<02:27,  8.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 257/1563 [00:25<02:28,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 258/1563 [00:25<02:30,  8.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 259/1563 [00:25<02:32,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 260/1563 [00:25<02:33,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 261/1563 [00:25<02:36,  8.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 262/1563 [00:26<02:37,  8.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 263/1563 [00:26<02:35,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 264/1563 [00:26<02:34,  8.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 265/1563 [00:26<02:33,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 267/1563 [00:26<02:19,  9.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 269/1563 [00:26<02:13,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 271/1563 [00:26<02:08, 10.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 273/1563 [00:27<02:06, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 275/1563 [00:27<02:04, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 277/1563 [00:27<02:03, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 279/1563 [00:27<02:02, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 281/1563 [00:27<02:03, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 283/1563 [00:28<02:02, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 285/1563 [00:28<02:03, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 287/1563 [00:28<02:02, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 289/1563 [00:28<02:01, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 291/1563 [00:28<02:01, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 293/1563 [00:29<02:01, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 295/1563 [00:29<02:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 297/1563 [00:29<02:00, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 299/1563 [00:29<02:00, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 301/1563 [00:29<01:59, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 303/1563 [00:30<01:58, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 305/1563 [00:30<01:59, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 307/1563 [00:30<01:58, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 309/1563 [00:30<01:57, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 311/1563 [00:30<01:57, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 313/1563 [00:30<01:57, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 315/1563 [00:31<01:56, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 317/1563 [00:31<01:57, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 319/1563 [00:31<01:56, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 321/1563 [00:31<01:56, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 323/1563 [00:31<01:56, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 325/1563 [00:32<01:56, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 327/1563 [00:32<01:55, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 329/1563 [00:32<01:55, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 331/1563 [00:32<01:54, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 333/1563 [00:32<01:54, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 335/1563 [00:32<01:54, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 337/1563 [00:33<01:57, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 339/1563 [00:33<01:56, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 341/1563 [00:33<01:55, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 343/1563 [00:33<01:54, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 345/1563 [00:33<01:55, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 347/1563 [00:34<01:55, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 349/1563 [00:34<01:54, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 351/1563 [00:34<01:53, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 353/1563 [00:34<01:53, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 355/1563 [00:34<01:53, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 357/1563 [00:35<01:55, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 359/1563 [00:35<01:55, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 361/1563 [00:35<01:54, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 363/1563 [00:35<01:53, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 365/1563 [00:35<01:53, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 367/1563 [00:36<01:53, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 369/1563 [00:36<01:53, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 371/1563 [00:36<01:54, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 373/1563 [00:36<02:00,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 374/1563 [00:36<02:03,  9.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 375/1563 [00:36<02:05,  9.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 376/1563 [00:36<02:07,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 377/1563 [00:37<02:09,  9.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 378/1563 [00:37<02:15,  8.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 379/1563 [00:37<02:14,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 380/1563 [00:37<02:13,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 381/1563 [00:37<02:13,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 382/1563 [00:37<02:12,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 383/1563 [00:37<02:12,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 384/1563 [00:37<02:13,  8.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 385/1563 [00:38<02:14,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 386/1563 [00:38<02:14,  8.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 387/1563 [00:38<02:17,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 388/1563 [00:38<02:16,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 389/1563 [00:38<02:15,  8.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 390/1563 [00:38<02:16,  8.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 392/1563 [00:38<02:04,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 394/1563 [00:38<01:58,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 396/1563 [00:39<01:56, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 398/1563 [00:39<01:54, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 400/1563 [00:39<01:52, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 402/1563 [00:39<01:51, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 404/1563 [00:39<01:50, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 406/1563 [00:40<01:51, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 408/1563 [00:40<01:50, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 410/1563 [00:40<01:49, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 412/1563 [00:40<01:49, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 414/1563 [00:40<01:48, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 416/1563 [00:41<01:48, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 418/1563 [00:41<01:51, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 420/1563 [00:41<01:50, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 422/1563 [00:41<01:49, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 424/1563 [00:41<01:48, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 426/1563 [00:42<01:47, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 428/1563 [00:42<01:47, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 430/1563 [00:42<01:46, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 432/1563 [00:42<01:46, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 434/1563 [00:42<01:46, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 436/1563 [00:42<01:45, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 438/1563 [00:43<01:46, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 440/1563 [00:43<01:45, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 442/1563 [00:43<01:44, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 444/1563 [00:43<01:44, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 446/1563 [00:43<01:44, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 448/1563 [00:44<01:44, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 450/1563 [00:44<01:46, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 452/1563 [00:44<01:45, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 454/1563 [00:44<01:44, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 456/1563 [00:44<01:44, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 458/1563 [00:45<01:44, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 460/1563 [00:45<01:43, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 462/1563 [00:45<01:43, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 464/1563 [00:45<01:42, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 466/1563 [00:45<01:42, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 468/1563 [00:45<01:42, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 470/1563 [00:46<01:43, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 472/1563 [00:46<01:42, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 474/1563 [00:46<01:42, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 476/1563 [00:46<01:42, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 478/1563 [00:46<01:42, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 480/1563 [00:47<01:42, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 482/1563 [00:47<01:43, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 484/1563 [00:47<01:44, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 486/1563 [00:47<01:45, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 488/1563 [00:47<01:49,  9.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 489/1563 [00:48<01:49,  9.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 490/1563 [00:48<01:49,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 492/1563 [00:48<01:46, 10.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 494/1563 [00:48<01:44, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 496/1563 [00:48<01:46,  9.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 497/1563 [00:48<01:49,  9.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 498/1563 [00:48<01:52,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 499/1563 [00:49<01:56,  9.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 500/1563 [00:49<01:59,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 501/1563 [00:49<01:59,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 502/1563 [00:49<02:02,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 503/1563 [00:49<02:03,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 504/1563 [00:49<02:01,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 505/1563 [00:49<01:59,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 506/1563 [00:49<01:59,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 507/1563 [00:50<02:04,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 508/1563 [00:50<02:05,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 509/1563 [00:50<02:06,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 510/1563 [00:50<02:05,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 511/1563 [00:50<02:09,  8.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 512/1563 [00:50<02:08,  8.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 513/1563 [00:50<02:07,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 514/1563 [00:50<02:06,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 516/1563 [00:51<01:54,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 518/1563 [00:51<01:48,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 519/1563 [00:51<01:47,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 521/1563 [00:51<01:44,  9.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 523/1563 [00:51<01:42, 10.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 525/1563 [00:51<01:40, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 527/1563 [00:52<01:38, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 529/1563 [00:52<01:39, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 531/1563 [00:52<01:38, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 533/1563 [00:52<01:37, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 535/1563 [00:52<01:36, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 537/1563 [00:53<01:36, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 539/1563 [00:53<01:36, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 541/1563 [00:53<01:36, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 543/1563 [00:53<01:35, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 545/1563 [00:53<01:35, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 547/1563 [00:53<01:35, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 549/1563 [00:54<01:36, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 551/1563 [00:54<01:36, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 553/1563 [00:54<01:35, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 555/1563 [00:54<01:35, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 557/1563 [00:54<01:35, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 559/1563 [00:55<01:35, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 561/1563 [00:55<01:34, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 563/1563 [00:55<01:34, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 565/1563 [00:55<01:35, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 567/1563 [00:55<01:34, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 569/1563 [00:56<01:34, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 571/1563 [00:56<01:34, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 573/1563 [00:56<01:33, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 575/1563 [00:56<01:34, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 577/1563 [00:56<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 579/1563 [00:57<01:33, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 581/1563 [00:57<01:33, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 583/1563 [00:57<01:32, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 585/1563 [00:57<01:31, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 587/1563 [00:57<01:32, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 589/1563 [00:57<01:32, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 591/1563 [00:58<01:31, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 593/1563 [00:58<01:31, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 595/1563 [00:58<01:30, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 597/1563 [00:58<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 599/1563 [00:58<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 601/1563 [00:59<01:29, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 603/1563 [00:59<01:31, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 605/1563 [00:59<01:30, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 607/1563 [00:59<01:30, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 609/1563 [00:59<01:29, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 611/1563 [01:00<01:29, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 613/1563 [01:00<01:29, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 615/1563 [01:00<01:29, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 617/1563 [01:00<01:29, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 619/1563 [01:00<01:29, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 621/1563 [01:01<01:34, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 623/1563 [01:01<01:38,  9.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 624/1563 [01:01<01:42,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 625/1563 [01:01<01:44,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 626/1563 [01:01<01:46,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 627/1563 [01:01<01:50,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 628/1563 [01:01<01:51,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 629/1563 [01:02<01:51,  8.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 630/1563 [01:02<01:54,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 631/1563 [01:02<01:54,  8.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 632/1563 [01:02<01:55,  8.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 633/1563 [01:02<01:53,  8.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 634/1563 [01:02<01:51,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 635/1563 [01:02<01:51,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 636/1563 [01:02<01:52,  8.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 637/1563 [01:02<01:51,  8.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 639/1563 [01:03<01:41,  9.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 641/1563 [01:03<01:35,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 643/1563 [01:03<01:31, 10.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 645/1563 [01:03<01:29, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 647/1563 [01:03<01:27, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 649/1563 [01:04<01:27, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 651/1563 [01:04<01:27, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 653/1563 [01:04<01:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 655/1563 [01:04<01:26, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 657/1563 [01:04<01:26, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 659/1563 [01:05<01:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 661/1563 [01:05<01:25, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 663/1563 [01:05<01:25, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 665/1563 [01:05<01:24, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 667/1563 [01:05<01:24, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 669/1563 [01:06<01:25, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 671/1563 [01:06<01:25, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 673/1563 [01:06<01:24, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 675/1563 [01:06<01:24, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 677/1563 [01:06<01:24, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 679/1563 [01:06<01:24, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 681/1563 [01:07<01:25, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 683/1563 [01:07<01:23, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 685/1563 [01:07<01:23, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 687/1563 [01:07<01:23, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 689/1563 [01:07<01:23, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 691/1563 [01:08<01:22, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 693/1563 [01:08<01:22, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 695/1563 [01:08<01:23, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 697/1563 [01:08<01:22, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 699/1563 [01:08<01:22, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 701/1563 [01:09<01:23, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 703/1563 [01:09<01:22, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 705/1563 [01:09<01:22, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 707/1563 [01:09<01:21, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 709/1563 [01:09<01:20, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 711/1563 [01:10<01:21, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 713/1563 [01:10<01:21, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 715/1563 [01:10<01:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 717/1563 [01:10<01:19, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 719/1563 [01:10<01:19, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 721/1563 [01:10<01:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 723/1563 [01:11<01:21, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 725/1563 [01:11<01:20, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 727/1563 [01:11<01:19, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 729/1563 [01:11<01:19, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 731/1563 [01:11<01:18, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 733/1563 [01:12<01:19, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 735/1563 [01:12<01:19, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 737/1563 [01:12<01:18, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 739/1563 [01:12<01:18, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 741/1563 [01:12<01:18, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 743/1563 [01:13<01:20, 10.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 745/1563 [01:13<01:24,  9.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 746/1563 [01:13<01:26,  9.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 747/1563 [01:13<01:29,  9.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 748/1563 [01:13<01:29,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 749/1563 [01:13<01:30,  9.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 750/1563 [01:13<01:30,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 751/1563 [01:14<01:30,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 752/1563 [01:14<01:32,  8.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 753/1563 [01:14<01:37,  8.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 754/1563 [01:14<01:34,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 755/1563 [01:14<01:33,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 756/1563 [01:14<01:33,  8.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 757/1563 [01:14<01:33,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 758/1563 [01:14<01:36,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 759/1563 [01:14<01:35,  8.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 760/1563 [01:15<01:36,  8.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 761/1563 [01:15<01:39,  8.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 763/1563 [01:15<01:27,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 765/1563 [01:15<01:22,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 767/1563 [01:15<01:19,  9.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 769/1563 [01:15<01:17, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 771/1563 [01:16<01:16, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 773/1563 [01:16<01:16, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 775/1563 [01:16<01:15, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 777/1563 [01:16<01:15, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 779/1563 [01:16<01:14, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 781/1563 [01:17<01:14, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 783/1563 [01:17<01:15, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 785/1563 [01:17<01:14, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 787/1563 [01:17<01:13, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 789/1563 [01:17<01:13, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 791/1563 [01:18<01:14, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 793/1563 [01:18<01:14, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 795/1563 [01:18<01:13, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 797/1563 [01:18<01:13, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 799/1563 [01:18<01:12, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 801/1563 [01:19<01:11, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 803/1563 [01:19<01:12, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 805/1563 [01:19<01:12, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 807/1563 [01:19<01:12, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 809/1563 [01:19<01:12, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 811/1563 [01:19<01:11, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 813/1563 [01:20<01:11, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 815/1563 [01:20<01:11, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 817/1563 [01:20<01:11, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 819/1563 [01:20<01:10, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 821/1563 [01:20<01:11, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 823/1563 [01:21<01:10, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 825/1563 [01:21<01:10, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 827/1563 [01:21<01:10, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 829/1563 [01:21<01:09, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 831/1563 [01:21<01:09, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 833/1563 [01:22<01:08, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 835/1563 [01:22<01:09, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 837/1563 [01:22<01:09, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 839/1563 [01:22<01:08, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 841/1563 [01:22<01:08, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 843/1563 [01:23<01:08, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 845/1563 [01:23<01:08, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 847/1563 [01:23<01:08, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 849/1563 [01:23<01:07, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 851/1563 [01:23<01:07, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 853/1563 [01:23<01:07, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 855/1563 [01:24<01:07, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 857/1563 [01:24<01:06, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 859/1563 [01:24<01:07, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 861/1563 [01:24<01:07, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 863/1563 [01:24<01:07, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 865/1563 [01:25<01:07, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 867/1563 [01:25<01:09, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 869/1563 [01:25<01:12,  9.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 870/1563 [01:25<01:14,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 871/1563 [01:25<01:14,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 872/1563 [01:25<01:14,  9.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 873/1563 [01:26<01:14,  9.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 874/1563 [01:26<01:16,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 875/1563 [01:26<01:17,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 876/1563 [01:26<01:16,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 877/1563 [01:26<01:17,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 878/1563 [01:26<01:17,  8.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 879/1563 [01:26<01:17,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 880/1563 [01:26<01:17,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 881/1563 [01:26<01:17,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 882/1563 [01:27<01:19,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 883/1563 [01:27<01:20,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 884/1563 [01:27<01:19,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 885/1563 [01:27<01:19,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 886/1563 [01:27<01:18,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 888/1563 [01:27<01:11,  9.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 890/1563 [01:27<01:08,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 892/1563 [01:28<01:06, 10.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 893/1563 [01:28<01:07,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 895/1563 [01:28<01:05, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 897/1563 [01:28<01:05, 10.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 899/1563 [01:28<01:04, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 901/1563 [01:28<01:03, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 903/1563 [01:29<01:03, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 905/1563 [01:29<01:02, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 907/1563 [01:29<01:02, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 909/1563 [01:29<01:02, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 911/1563 [01:29<01:02, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 913/1563 [01:30<01:01, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 915/1563 [01:30<01:01, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 917/1563 [01:30<01:01, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 919/1563 [01:30<01:01, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 921/1563 [01:30<01:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 923/1563 [01:31<01:00, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 925/1563 [01:31<01:00, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 927/1563 [01:31<01:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 929/1563 [01:31<00:59, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 931/1563 [01:31<00:59, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 933/1563 [01:31<00:59, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 935/1563 [01:32<00:59, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 937/1563 [01:32<00:59, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 939/1563 [01:32<00:59, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 941/1563 [01:32<00:59, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 943/1563 [01:32<00:59, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 945/1563 [01:33<00:59, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 947/1563 [01:33<00:58, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 949/1563 [01:33<00:58, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 951/1563 [01:33<00:58, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 953/1563 [01:33<00:58, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 955/1563 [01:34<00:58, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 957/1563 [01:34<00:58, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 959/1563 [01:34<00:58, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 961/1563 [01:34<00:58, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 963/1563 [01:34<00:58, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 965/1563 [01:35<00:57, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 967/1563 [01:35<00:57, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 969/1563 [01:35<00:57, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 971/1563 [01:35<00:56, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 973/1563 [01:35<00:56, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 975/1563 [01:36<00:56, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 977/1563 [01:36<00:56, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 979/1563 [01:36<00:56, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 981/1563 [01:36<00:56, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 983/1563 [01:36<00:57, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 985/1563 [01:37<00:56, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 987/1563 [01:37<00:56, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 989/1563 [01:37<00:55, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 991/1563 [01:37<00:56, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 993/1563 [01:37<01:00,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 994/1563 [01:37<01:01,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 995/1563 [01:38<01:01,  9.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 996/1563 [01:38<01:03,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 997/1563 [01:38<01:03,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 998/1563 [01:38<01:05,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 999/1563 [01:38<01:05,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1000/1563 [01:38<01:06,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1001/1563 [01:38<01:06,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1002/1563 [01:38<01:08,  8.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1003/1563 [01:39<01:07,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1004/1563 [01:39<01:07,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1005/1563 [01:39<01:07,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1006/1563 [01:39<01:06,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1007/1563 [01:39<01:05,  8.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1008/1563 [01:39<01:05,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1009/1563 [01:39<01:06,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1011/1563 [01:39<01:00,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1012/1563 [01:40<00:59,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1013/1563 [01:40<00:58,  9.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1015/1563 [01:40<00:55,  9.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1016/1563 [01:40<00:55,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1017/1563 [01:40<00:55,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1018/1563 [01:40<00:55,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1019/1563 [01:40<00:54,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1021/1563 [01:40<00:53, 10.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1023/1563 [01:41<00:52, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1025/1563 [01:41<00:52, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1027/1563 [01:41<00:51, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1029/1563 [01:41<00:51, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1031/1563 [01:41<00:51, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1033/1563 [01:42<00:51, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1035/1563 [01:42<00:51, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1037/1563 [01:42<00:50, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1039/1563 [01:42<00:50, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1041/1563 [01:42<00:50, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1043/1563 [01:43<00:50, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1045/1563 [01:43<00:49, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1047/1563 [01:43<00:49, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1049/1563 [01:43<00:48, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1051/1563 [01:43<00:49, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1053/1563 [01:44<00:48, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1055/1563 [01:44<00:49, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1057/1563 [01:44<00:48, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1059/1563 [01:44<00:48, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1061/1563 [01:44<00:47, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1063/1563 [01:44<00:47, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1065/1563 [01:45<00:48, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1067/1563 [01:45<00:47, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1069/1563 [01:45<00:47, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1071/1563 [01:45<00:47, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1073/1563 [01:45<00:46, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1075/1563 [01:46<00:46, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1077/1563 [01:46<00:46, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1079/1563 [01:46<00:46, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1081/1563 [01:46<00:46, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1083/1563 [01:46<00:46, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1085/1563 [01:47<00:46, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1087/1563 [01:47<00:46, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1089/1563 [01:47<00:46, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1091/1563 [01:47<00:45, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1093/1563 [01:47<00:45, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1095/1563 [01:48<00:45, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1097/1563 [01:48<00:46,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1099/1563 [01:48<00:45, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1101/1563 [01:48<00:45, 10.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1103/1563 [01:48<00:45, 10.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1105/1563 [01:49<00:44, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1107/1563 [01:49<00:44, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1109/1563 [01:49<00:44, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1111/1563 [01:49<00:43, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1113/1563 [01:49<00:45, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1115/1563 [01:50<00:46,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1116/1563 [01:50<00:47,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1117/1563 [01:50<00:48,  9.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1118/1563 [01:50<00:48,  9.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1119/1563 [01:50<00:49,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1120/1563 [01:50<00:49,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1121/1563 [01:50<00:49,  8.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1122/1563 [01:50<00:49,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1123/1563 [01:50<00:49,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1124/1563 [01:51<00:51,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1125/1563 [01:51<00:52,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1126/1563 [01:51<00:53,  8.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1127/1563 [01:51<00:53,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1128/1563 [01:51<00:53,  8.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1129/1563 [01:51<00:53,  8.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1130/1563 [01:51<00:53,  8.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1131/1563 [01:51<00:52,  8.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1132/1563 [01:52<00:52,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1133/1563 [01:52<00:50,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1134/1563 [01:52<00:48,  8.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1135/1563 [01:52<00:46,  9.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1136/1563 [01:52<00:45,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1138/1563 [01:52<00:43,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1140/1563 [01:52<00:41, 10.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1141/1563 [01:52<00:41, 10.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1142/1563 [01:53<00:42, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1143/1563 [01:53<00:42,  9.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1144/1563 [01:53<00:42,  9.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1145/1563 [01:53<00:42,  9.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1147/1563 [01:53<00:41, 10.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1148/1563 [01:53<00:41, 10.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1149/1563 [01:53<00:41, 10.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1150/1563 [01:53<00:41,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1152/1563 [01:54<00:40, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1154/1563 [01:54<00:40, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1156/1563 [01:54<00:40,  9.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1158/1563 [01:54<00:39, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1160/1563 [01:54<00:39, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1162/1563 [01:55<00:38, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1164/1563 [01:55<00:38, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1166/1563 [01:55<00:38, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1168/1563 [01:55<00:37, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1170/1563 [01:55<00:37, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1172/1563 [01:56<00:37, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1174/1563 [01:56<00:37, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1176/1563 [01:56<00:36, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1178/1563 [01:56<00:36, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1180/1563 [01:56<00:36, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1182/1563 [01:56<00:36, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1184/1563 [01:57<00:36, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1186/1563 [01:57<00:35, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1188/1563 [01:57<00:36, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1190/1563 [01:57<00:36, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1192/1563 [01:57<00:35, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1194/1563 [01:58<00:35, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1196/1563 [01:58<00:35, 10.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1198/1563 [01:58<00:35, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1200/1563 [01:58<00:35, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1202/1563 [01:58<00:34, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1204/1563 [01:59<00:34, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1206/1563 [01:59<00:34, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1208/1563 [01:59<00:34, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1210/1563 [01:59<00:33, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1212/1563 [01:59<00:33, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1214/1563 [02:00<00:33, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1216/1563 [02:00<00:33, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1218/1563 [02:00<00:33, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1220/1563 [02:00<00:33, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1222/1563 [02:00<00:33, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1224/1563 [02:01<00:32, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1226/1563 [02:01<00:32, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1228/1563 [02:01<00:32, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1230/1563 [02:01<00:32, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1232/1563 [02:01<00:31, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1234/1563 [02:01<00:31, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1236/1563 [02:02<00:32, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1238/1563 [02:02<00:33,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1239/1563 [02:02<00:34,  9.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1240/1563 [02:02<00:34,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1241/1563 [02:02<00:35,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1242/1563 [02:02<00:35,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1243/1563 [02:03<00:35,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1244/1563 [02:03<00:35,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1245/1563 [02:03<00:36,  8.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1246/1563 [02:03<00:36,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1247/1563 [02:03<00:37,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1248/1563 [02:03<00:36,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1249/1563 [02:03<00:37,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1250/1563 [02:03<00:37,  8.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1251/1563 [02:03<00:36,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1252/1563 [02:04<00:37,  8.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1253/1563 [02:04<00:37,  8.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1254/1563 [02:04<00:37,  8.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1255/1563 [02:04<00:36,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1256/1563 [02:04<00:34,  8.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1257/1563 [02:04<00:33,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1258/1563 [02:04<00:33,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1259/1563 [02:04<00:32,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1261/1563 [02:05<00:30,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1262/1563 [02:05<00:30,  9.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1263/1563 [02:05<00:30,  9.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1265/1563 [02:05<00:29, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1266/1563 [02:05<00:29,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1267/1563 [02:05<00:29,  9.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1268/1563 [02:05<00:30,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1269/1563 [02:05<00:30,  9.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1271/1563 [02:06<00:28, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1272/1563 [02:06<00:29, 10.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1273/1563 [02:06<00:29,  9.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1274/1563 [02:06<00:29,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1275/1563 [02:06<00:29,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1276/1563 [02:06<00:28,  9.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1278/1563 [02:06<00:27, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1280/1563 [02:06<00:27, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1282/1563 [02:07<00:27, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1284/1563 [02:07<00:27, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1286/1563 [02:07<00:26, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1288/1563 [02:07<00:26, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1290/1563 [02:07<00:26, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1292/1563 [02:08<00:26, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1294/1563 [02:08<00:26, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1296/1563 [02:08<00:26, 10.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1298/1563 [02:08<00:25, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1300/1563 [02:08<00:25, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1302/1563 [02:09<00:25, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1304/1563 [02:09<00:25, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1306/1563 [02:09<00:25, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1308/1563 [02:09<00:24, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1310/1563 [02:09<00:25, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1312/1563 [02:10<00:24, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1314/1563 [02:10<00:24, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1316/1563 [02:10<00:23, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1318/1563 [02:10<00:23, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1320/1563 [02:10<00:23, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1322/1563 [02:11<00:23, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1324/1563 [02:11<00:23, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1326/1563 [02:11<00:22, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1328/1563 [02:11<00:22, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1330/1563 [02:11<00:22, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1332/1563 [02:11<00:22, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1334/1563 [02:12<00:22, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1336/1563 [02:12<00:21, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1338/1563 [02:12<00:21, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1340/1563 [02:12<00:21, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1342/1563 [02:12<00:21, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1344/1563 [02:13<00:21, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1346/1563 [02:13<00:21, 10.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1348/1563 [02:13<00:20, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▋ | 1350/1563 [02:13<00:20, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1352/1563 [02:13<00:20, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1354/1563 [02:14<00:20, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1356/1563 [02:14<00:20, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1358/1563 [02:14<00:20, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1360/1563 [02:14<00:21,  9.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1361/1563 [02:14<00:21,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1362/1563 [02:15<00:22,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1363/1563 [02:15<00:23,  8.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1364/1563 [02:15<00:23,  8.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1365/1563 [02:15<00:23,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1366/1563 [02:15<00:22,  8.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1367/1563 [02:15<00:22,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1368/1563 [02:15<00:22,  8.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1369/1563 [02:15<00:22,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1370/1563 [02:15<00:22,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1371/1563 [02:16<00:23,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1372/1563 [02:16<00:23,  8.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1373/1563 [02:16<00:22,  8.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1374/1563 [02:16<00:23,  8.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1375/1563 [02:16<00:23,  8.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1376/1563 [02:16<00:23,  8.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1377/1563 [02:16<00:21,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1379/1563 [02:16<00:19,  9.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1380/1563 [02:17<00:19,  9.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1381/1563 [02:17<00:19,  9.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1382/1563 [02:17<00:19,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1383/1563 [02:17<00:19,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1384/1563 [02:17<00:18,  9.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1385/1563 [02:17<00:18,  9.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1386/1563 [02:17<00:18,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1387/1563 [02:17<00:18,  9.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1388/1563 [02:17<00:18,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1389/1563 [02:18<00:18,  9.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1390/1563 [02:18<00:18,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1391/1563 [02:18<00:19,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1392/1563 [02:18<00:18,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1393/1563 [02:18<00:18,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1394/1563 [02:18<00:17,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1395/1563 [02:18<00:17,  9.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1396/1563 [02:18<00:17,  9.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1397/1563 [02:18<00:17,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1398/1563 [02:18<00:17,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1399/1563 [02:19<00:16,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1400/1563 [02:19<00:17,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1401/1563 [02:19<00:17,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1402/1563 [02:19<00:16,  9.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1404/1563 [02:19<00:15,  9.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1405/1563 [02:19<00:15,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1406/1563 [02:19<00:15,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1407/1563 [02:19<00:15,  9.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1408/1563 [02:20<00:15,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1409/1563 [02:20<00:15,  9.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1410/1563 [02:20<00:16,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1411/1563 [02:20<00:15,  9.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1412/1563 [02:20<00:15,  9.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1413/1563 [02:20<00:15,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1414/1563 [02:20<00:15,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1415/1563 [02:20<00:15,  9.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1416/1563 [02:20<00:15,  9.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1417/1563 [02:20<00:14,  9.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1418/1563 [02:21<00:14,  9.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1420/1563 [02:21<00:14,  9.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1421/1563 [02:21<00:14,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1423/1563 [02:21<00:13, 10.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1425/1563 [02:21<00:13, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1427/1563 [02:21<00:13, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1429/1563 [02:22<00:12, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1431/1563 [02:22<00:12, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1433/1563 [02:22<00:12, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1435/1563 [02:22<00:12, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1437/1563 [02:22<00:12, 10.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1439/1563 [02:23<00:11, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1441/1563 [02:23<00:12, 10.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1443/1563 [02:23<00:11, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1445/1563 [02:23<00:11, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1447/1563 [02:23<00:11, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1449/1563 [02:24<00:11, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1451/1563 [02:24<00:11, 10.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1453/1563 [02:24<00:10, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1455/1563 [02:24<00:10, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1457/1563 [02:24<00:10, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1459/1563 [02:25<00:10, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1461/1563 [02:25<00:09, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1463/1563 [02:25<00:09, 10.25batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1465/1563 [02:25<00:09, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1467/1563 [02:25<00:09, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1469/1563 [02:26<00:09, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1471/1563 [02:26<00:08, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1473/1563 [02:26<00:08, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1475/1563 [02:26<00:08, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1477/1563 [02:26<00:08, 10.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1479/1563 [02:27<00:08,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1480/1563 [02:27<00:08,  9.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1481/1563 [02:27<00:09,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1482/1563 [02:27<00:09,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1483/1563 [02:27<00:09,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1484/1563 [02:27<00:09,  8.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1485/1563 [02:27<00:09,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1486/1563 [02:27<00:08,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1487/1563 [02:27<00:08,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1488/1563 [02:28<00:08,  8.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1489/1563 [02:28<00:08,  8.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1490/1563 [02:28<00:08,  8.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1491/1563 [02:28<00:08,  8.17batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1492/1563 [02:28<00:08,  8.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1493/1563 [02:28<00:08,  8.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1494/1563 [02:28<00:08,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1495/1563 [02:28<00:08,  8.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1496/1563 [02:29<00:08,  8.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1497/1563 [02:29<00:07,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1498/1563 [02:29<00:07,  8.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1499/1563 [02:29<00:07,  8.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1500/1563 [02:29<00:07,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1501/1563 [02:29<00:06,  9.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1502/1563 [02:29<00:06,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1503/1563 [02:29<00:06,  9.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1504/1563 [02:29<00:06,  9.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1505/1563 [02:30<00:06,  9.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1506/1563 [02:30<00:05,  9.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1507/1563 [02:30<00:05,  9.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1508/1563 [02:30<00:05,  9.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1509/1563 [02:30<00:05,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1510/1563 [02:30<00:05,  9.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1511/1563 [02:30<00:05,  9.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1512/1563 [02:30<00:05,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1513/1563 [02:30<00:05,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1514/1563 [02:30<00:05,  9.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1515/1563 [02:31<00:04,  9.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1516/1563 [02:31<00:04,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1517/1563 [02:31<00:04,  9.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1518/1563 [02:31<00:04,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1519/1563 [02:31<00:04,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1520/1563 [02:31<00:04,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1521/1563 [02:31<00:04,  9.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1522/1563 [02:31<00:04,  9.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1523/1563 [02:31<00:04,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1525/1563 [02:32<00:03, 10.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1526/1563 [02:32<00:03,  9.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1527/1563 [02:32<00:03,  9.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1529/1563 [02:32<00:03, 10.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1531/1563 [02:32<00:03, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1533/1563 [02:32<00:02, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1535/1563 [02:33<00:02, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1537/1563 [02:33<00:02, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1539/1563 [02:33<00:02, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1541/1563 [02:33<00:02, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1543/1563 [02:33<00:01, 10.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1545/1563 [02:34<00:01, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1547/1563 [02:34<00:01, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1549/1563 [02:34<00:01, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1551/1563 [02:34<00:01, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1553/1563 [02:34<00:00, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1555/1563 [02:34<00:00, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1557/1563 [02:35<00:00, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1559/1563 [02:35<00:00, 10.29batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1561/1563 [02:35<00:00, 10.31batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|██████████| 1563/1563 [02:35<00:00, 10.95batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 2/2 [05:09<00:00, 155.00s/epoch]\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 2/2 tamamlandı. Kayıp: 1.5690, Doğruluk: 42.15%\n",
-            "RMSNorm Eğitim Süresi: 310.00 saniye, Son Doğruluk: 42.15%\n",
-            "\n",
-            "DyT Modeli Eğitiliyor...\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "Epochs:   0%|          | 0/2 [00:00<?, ?epoch/s]\n",
-            "Epoch 1/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 1/1563 [00:00<02:59,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 3/1563 [00:00<02:39,  9.79batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 4/1563 [00:00<02:39,  9.78batch/s]\u001b[A\n",
-            "Epoch 1/2:   0%|          | 6/1563 [00:00<02:27, 10.55batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 8/1563 [00:00<02:23, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 10/1563 [00:00<02:22, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 12/1563 [00:01<02:21, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 14/1563 [00:01<02:21, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 16/1563 [00:01<02:20, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|          | 18/1563 [00:01<02:20, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 20/1563 [00:01<02:18, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   1%|▏         | 22/1563 [00:02<02:18, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 24/1563 [00:02<02:17, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 26/1563 [00:02<02:18, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 28/1563 [00:02<02:17, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 30/1563 [00:02<02:16, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 32/1563 [00:02<02:15, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 34/1563 [00:03<02:16, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 36/1563 [00:03<02:16, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   2%|▏         | 38/1563 [00:03<02:25, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 40/1563 [00:03<02:32, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 42/1563 [00:03<02:35,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 43/1563 [00:04<02:42,  9.37batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 44/1563 [00:04<02:45,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 45/1563 [00:04<02:49,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 46/1563 [00:04<02:55,  8.62batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 47/1563 [00:04<02:52,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 48/1563 [00:04<02:52,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 49/1563 [00:04<02:49,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 50/1563 [00:04<02:49,  8.93batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 51/1563 [00:04<02:52,  8.78batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 52/1563 [00:05<02:56,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 53/1563 [00:05<03:00,  8.38batch/s]\u001b[A\n",
-            "Epoch 1/2:   3%|▎         | 54/1563 [00:05<03:01,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 55/1563 [00:05<02:58,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▎         | 57/1563 [00:05<02:39,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 59/1563 [00:05<02:28, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 61/1563 [00:06<02:22, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 63/1563 [00:06<02:19, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 65/1563 [00:06<02:18, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 67/1563 [00:06<02:16, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:   4%|▍         | 69/1563 [00:06<02:14, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 71/1563 [00:06<02:13, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 73/1563 [00:07<02:13, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 75/1563 [00:07<02:15, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▍         | 77/1563 [00:07<02:14, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 79/1563 [00:07<02:13, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 81/1563 [00:07<02:12, 11.20batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 83/1563 [00:07<02:11, 11.22batch/s]\u001b[A\n",
-            "Epoch 1/2:   5%|▌         | 85/1563 [00:08<02:11, 11.24batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 87/1563 [00:08<02:12, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 89/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 91/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 93/1563 [00:08<02:12, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 95/1563 [00:09<02:11, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▌         | 97/1563 [00:09<02:12, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 99/1563 [00:09<02:13, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   6%|▋         | 101/1563 [00:09<02:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 103/1563 [00:09<02:12, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 105/1563 [00:09<02:10, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 107/1563 [00:10<02:10, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 109/1563 [00:10<02:11, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 111/1563 [00:10<02:12, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 113/1563 [00:10<02:10, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 115/1563 [00:10<02:09, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:   7%|▋         | 117/1563 [00:11<02:09, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 119/1563 [00:11<02:10, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 121/1563 [00:11<02:10, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 123/1563 [00:11<02:09, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 125/1563 [00:11<02:09, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 127/1563 [00:11<02:09, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 129/1563 [00:12<02:08, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:   8%|▊         | 131/1563 [00:12<02:10, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 133/1563 [00:12<02:09, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▊         | 135/1563 [00:12<02:12, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 137/1563 [00:12<02:10, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 139/1563 [00:13<02:10, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 141/1563 [00:13<02:08, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 143/1563 [00:13<02:10, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 145/1563 [00:13<02:09, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:   9%|▉         | 147/1563 [00:13<02:08, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 149/1563 [00:13<02:07, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 151/1563 [00:14<02:06, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 153/1563 [00:14<02:06, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|▉         | 155/1563 [00:14<02:07, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 157/1563 [00:14<02:06, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 159/1563 [00:14<02:08, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 161/1563 [00:15<02:06, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  10%|█         | 163/1563 [00:15<02:05, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 165/1563 [00:15<02:09, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 167/1563 [00:15<02:15, 10.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 169/1563 [00:15<02:18, 10.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 171/1563 [00:16<02:20,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 172/1563 [00:16<02:21,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 173/1563 [00:16<02:22,  9.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 174/1563 [00:16<02:24,  9.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█         | 175/1563 [00:16<02:29,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 176/1563 [00:16<02:31,  9.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 177/1563 [00:16<02:33,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 178/1563 [00:16<02:35,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  11%|█▏        | 179/1563 [00:16<02:37,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 180/1563 [00:17<02:37,  8.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 181/1563 [00:17<02:38,  8.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 182/1563 [00:17<02:39,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 183/1563 [00:17<02:44,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 184/1563 [00:17<02:41,  8.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 185/1563 [00:17<02:42,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 187/1563 [00:17<02:24,  9.55batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 189/1563 [00:18<02:16, 10.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 191/1563 [00:18<02:12, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 193/1563 [00:18<02:09, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  12%|█▏        | 195/1563 [00:18<02:07, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 197/1563 [00:18<02:05, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 199/1563 [00:18<02:03, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 201/1563 [00:19<02:02, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 203/1563 [00:19<02:01, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 205/1563 [00:19<02:01, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 207/1563 [00:19<02:03, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 209/1563 [00:19<02:02, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  13%|█▎        | 211/1563 [00:19<02:01, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▎        | 213/1563 [00:20<02:01, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 215/1563 [00:20<02:00, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 217/1563 [00:20<02:02, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 219/1563 [00:20<02:01, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 221/1563 [00:20<01:59, 11.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 223/1563 [00:21<01:58, 11.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  14%|█▍        | 225/1563 [00:21<02:00, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 227/1563 [00:21<02:01, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 229/1563 [00:21<02:00, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 231/1563 [00:21<01:59, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▍        | 233/1563 [00:21<01:58, 11.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 235/1563 [00:22<01:58, 11.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 237/1563 [00:22<01:57, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 239/1563 [00:22<01:58, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  15%|█▌        | 241/1563 [00:22<01:59, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 243/1563 [00:22<01:58, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 245/1563 [00:23<01:58, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 247/1563 [00:23<01:57, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 249/1563 [00:23<01:58, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 251/1563 [00:23<01:57, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▌        | 253/1563 [00:23<01:57, 11.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 255/1563 [00:23<01:56, 11.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  16%|█▋        | 257/1563 [00:24<01:55, 11.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 259/1563 [00:24<01:56, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 261/1563 [00:24<01:58, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 263/1563 [00:24<01:58, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 265/1563 [00:24<01:57, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 267/1563 [00:25<01:57, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 269/1563 [00:25<01:56, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 271/1563 [00:25<01:57, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  17%|█▋        | 273/1563 [00:25<01:58, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 275/1563 [00:25<01:58, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 277/1563 [00:25<01:57, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 279/1563 [00:26<01:55, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 281/1563 [00:26<01:55, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 283/1563 [00:26<01:55, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 285/1563 [00:26<01:54, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 287/1563 [00:26<01:54, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  18%|█▊        | 289/1563 [00:27<01:54, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 291/1563 [00:27<01:54, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▊        | 293/1563 [00:27<01:53, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 295/1563 [00:27<01:53, 11.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 297/1563 [00:27<01:57, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 299/1563 [00:27<02:03, 10.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 301/1563 [00:28<02:08,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 302/1563 [00:28<02:10,  9.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 303/1563 [00:28<02:17,  9.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  19%|█▉        | 304/1563 [00:28<02:17,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 305/1563 [00:28<02:18,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 306/1563 [00:28<02:21,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 307/1563 [00:28<02:20,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 308/1563 [00:28<02:19,  8.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 309/1563 [00:29<02:18,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 310/1563 [00:29<02:21,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 311/1563 [00:29<02:23,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|█▉        | 312/1563 [00:29<02:23,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 313/1563 [00:29<02:23,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 314/1563 [00:29<02:25,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 315/1563 [00:29<02:27,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 316/1563 [00:29<02:25,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 318/1563 [00:30<02:11,  9.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  20%|██        | 320/1563 [00:30<02:03, 10.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 322/1563 [00:30<01:59, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 324/1563 [00:30<01:56, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 326/1563 [00:30<01:55, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 328/1563 [00:31<01:54, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 330/1563 [00:31<01:52, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██        | 332/1563 [00:31<01:53, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 334/1563 [00:31<01:52, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  21%|██▏       | 336/1563 [00:31<01:51, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 338/1563 [00:31<01:50, 11.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 340/1563 [00:32<01:49, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 342/1563 [00:32<01:50, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 344/1563 [00:32<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 346/1563 [00:32<01:50, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 348/1563 [00:32<01:49, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  22%|██▏       | 350/1563 [00:33<01:50, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 352/1563 [00:33<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 354/1563 [00:33<01:49, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 356/1563 [00:33<01:49, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 358/1563 [00:33<01:47, 11.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 360/1563 [00:33<01:48, 11.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 362/1563 [00:34<01:47, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 364/1563 [00:34<01:47, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  23%|██▎       | 366/1563 [00:34<01:47, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 368/1563 [00:34<01:47, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▎       | 370/1563 [00:34<01:47, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 372/1563 [00:34<01:47, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 374/1563 [00:35<01:46, 11.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 376/1563 [00:35<01:46, 11.17batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 378/1563 [00:35<01:46, 11.14batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 380/1563 [00:35<01:46, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  24%|██▍       | 382/1563 [00:35<01:45, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 384/1563 [00:36<01:46, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 386/1563 [00:36<01:46, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 388/1563 [00:36<01:47, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▍       | 390/1563 [00:36<01:47, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 392/1563 [00:36<01:46, 11.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 394/1563 [00:36<01:44, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 396/1563 [00:37<01:44, 11.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  25%|██▌       | 398/1563 [00:37<01:44, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 400/1563 [00:37<01:45, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 402/1563 [00:37<01:44, 11.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 404/1563 [00:37<01:43, 11.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 406/1563 [00:38<01:44, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 408/1563 [00:38<01:44, 11.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▌       | 410/1563 [00:38<01:44, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 412/1563 [00:38<01:44, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  26%|██▋       | 414/1563 [00:38<01:43, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 416/1563 [00:38<01:42, 11.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 418/1563 [00:39<01:43, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 420/1563 [00:39<01:42, 11.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 422/1563 [00:39<01:44, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 424/1563 [00:39<01:43, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 426/1563 [00:39<01:43, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  27%|██▋       | 428/1563 [00:40<01:50, 10.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 430/1563 [00:40<01:54,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 431/1563 [00:40<02:01,  9.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 432/1563 [00:40<02:04,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 433/1563 [00:40<02:07,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 434/1563 [00:40<02:07,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 435/1563 [00:40<02:06,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 436/1563 [00:41<02:04,  9.08batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 437/1563 [00:41<02:05,  8.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 438/1563 [00:41<02:04,  9.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 439/1563 [00:41<02:03,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 440/1563 [00:41<02:07,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 441/1563 [00:41<02:06,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 442/1563 [00:41<02:06,  8.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 443/1563 [00:41<02:08,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 444/1563 [00:41<02:09,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  28%|██▊       | 445/1563 [00:42<02:08,  8.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 446/1563 [00:42<02:12,  8.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 447/1563 [00:42<02:07,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▊       | 449/1563 [00:42<01:55,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 450/1563 [00:42<01:54,  9.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 452/1563 [00:42<01:52,  9.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 454/1563 [00:42<01:47, 10.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 456/1563 [00:43<01:44, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 458/1563 [00:43<01:44, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  29%|██▉       | 460/1563 [00:43<01:45, 10.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 462/1563 [00:43<01:43, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 464/1563 [00:43<01:41, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 466/1563 [00:44<01:40, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|██▉       | 468/1563 [00:44<01:40, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 470/1563 [00:44<01:40, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 472/1563 [00:44<01:40, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 474/1563 [00:44<01:39, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  30%|███       | 476/1563 [00:44<01:38, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 478/1563 [00:45<01:38, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 480/1563 [00:45<01:39, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 482/1563 [00:45<01:40, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 484/1563 [00:45<01:39, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 486/1563 [00:45<01:38, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███       | 488/1563 [00:46<01:37, 11.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 490/1563 [00:46<01:37, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  31%|███▏      | 492/1563 [00:46<01:39, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 494/1563 [00:46<01:38, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 496/1563 [00:46<01:37, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 498/1563 [00:46<01:36, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 500/1563 [00:47<01:36, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 502/1563 [00:47<01:37, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 504/1563 [00:47<01:37, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  32%|███▏      | 506/1563 [00:47<01:37, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 508/1563 [00:47<01:36, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 510/1563 [00:48<01:35, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 512/1563 [00:48<01:36, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 514/1563 [00:48<01:38, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 516/1563 [00:48<01:37, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 518/1563 [00:48<01:36, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 520/1563 [00:49<01:35, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  33%|███▎      | 522/1563 [00:49<01:35, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 524/1563 [00:49<01:37, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▎      | 526/1563 [00:49<01:36, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 528/1563 [00:49<01:35, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 530/1563 [00:49<01:34, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 532/1563 [00:50<01:33, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 534/1563 [00:50<01:34, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 536/1563 [00:50<01:34, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  34%|███▍      | 538/1563 [00:50<01:33, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 540/1563 [00:50<01:32, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 542/1563 [00:51<01:32, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 544/1563 [00:51<01:32, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▍      | 546/1563 [00:51<01:32, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 548/1563 [00:51<01:32, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 550/1563 [00:51<01:32, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 552/1563 [00:51<01:32, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  35%|███▌      | 554/1563 [00:52<01:32, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 556/1563 [00:52<01:35, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 558/1563 [00:52<01:41,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 559/1563 [00:52<01:43,  9.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 560/1563 [00:52<01:44,  9.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 561/1563 [00:52<01:45,  9.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 562/1563 [00:52<01:45,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 563/1563 [00:53<01:45,  9.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 564/1563 [00:53<01:46,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 565/1563 [00:53<01:50,  9.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▌      | 566/1563 [00:53<01:51,  8.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 567/1563 [00:53<01:53,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 568/1563 [00:53<01:52,  8.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 569/1563 [00:53<01:53,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  36%|███▋      | 570/1563 [00:53<01:52,  8.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 571/1563 [00:54<01:52,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 572/1563 [00:54<01:52,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 573/1563 [00:54<01:55,  8.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 574/1563 [00:54<01:55,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 575/1563 [00:54<01:58,  8.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 576/1563 [00:54<01:59,  8.28batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 578/1563 [00:54<01:45,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 580/1563 [00:54<01:39,  9.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 582/1563 [00:55<01:34, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 584/1563 [00:55<01:33, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  37%|███▋      | 586/1563 [00:55<01:32, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 588/1563 [00:55<01:31, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 590/1563 [00:55<01:30, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 592/1563 [00:56<01:29, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 594/1563 [00:56<01:28, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 596/1563 [00:56<01:29, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 598/1563 [00:56<01:28, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  38%|███▊      | 600/1563 [00:56<01:27, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 602/1563 [00:56<01:27, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▊      | 604/1563 [00:57<01:27, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 606/1563 [00:57<01:28, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 608/1563 [00:57<01:28, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 610/1563 [00:57<01:27, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 612/1563 [00:57<01:26, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 614/1563 [00:58<01:25, 11.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  39%|███▉      | 616/1563 [00:58<01:26, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 618/1563 [00:58<01:26, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 620/1563 [00:58<01:26, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 622/1563 [00:58<01:26, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|███▉      | 624/1563 [00:59<01:26, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 626/1563 [00:59<01:25, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 628/1563 [00:59<01:25, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 630/1563 [00:59<01:25, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  40%|████      | 632/1563 [00:59<01:26, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 634/1563 [00:59<01:25, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 636/1563 [01:00<01:25, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 638/1563 [01:00<01:25, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 640/1563 [01:00<01:24, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 642/1563 [01:00<01:24, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████      | 644/1563 [01:00<01:24, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 646/1563 [01:01<01:24, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  41%|████▏     | 648/1563 [01:01<01:23, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 650/1563 [01:01<01:24, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 652/1563 [01:01<01:23, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 654/1563 [01:01<01:22, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 656/1563 [01:01<01:22, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 658/1563 [01:02<01:22, 11.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 660/1563 [01:02<01:22, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 662/1563 [01:02<01:22, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  42%|████▏     | 664/1563 [01:02<01:22, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 666/1563 [01:02<01:22, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 668/1563 [01:03<01:21, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 670/1563 [01:03<01:22, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 672/1563 [01:03<01:22, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 674/1563 [01:03<01:21, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 676/1563 [01:03<01:20, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  43%|████▎     | 678/1563 [01:03<01:21, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 680/1563 [01:04<01:20, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▎     | 682/1563 [01:04<01:21, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 684/1563 [01:04<01:21, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 686/1563 [01:04<01:23, 10.45batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 688/1563 [01:04<01:27, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 690/1563 [01:05<01:28,  9.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 691/1563 [01:05<01:29,  9.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 692/1563 [01:05<01:30,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 693/1563 [01:05<01:30,  9.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 694/1563 [01:05<01:31,  9.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  44%|████▍     | 695/1563 [01:05<01:32,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 696/1563 [01:05<01:31,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 697/1563 [01:05<01:31,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 698/1563 [01:06<01:34,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 699/1563 [01:06<01:35,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 700/1563 [01:06<01:35,  8.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 701/1563 [01:06<01:37,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 702/1563 [01:06<01:37,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▍     | 703/1563 [01:06<01:38,  8.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 704/1563 [01:06<01:40,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 705/1563 [01:06<01:38,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 706/1563 [01:06<01:38,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 707/1563 [01:07<01:35,  8.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 709/1563 [01:07<01:26,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  45%|████▌     | 711/1563 [01:07<01:23, 10.15batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 713/1563 [01:07<01:21, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 715/1563 [01:07<01:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 717/1563 [01:07<01:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 719/1563 [01:08<01:19, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▌     | 721/1563 [01:08<01:19, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 723/1563 [01:08<01:20, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  46%|████▋     | 725/1563 [01:08<01:18, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 727/1563 [01:08<01:17, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 729/1563 [01:09<01:18, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 731/1563 [01:09<01:17, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 733/1563 [01:09<01:16, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 735/1563 [01:09<01:15, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 737/1563 [01:09<01:15, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 739/1563 [01:10<01:15, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  47%|████▋     | 741/1563 [01:10<01:16, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 743/1563 [01:10<01:16, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 745/1563 [01:10<01:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 747/1563 [01:10<01:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 749/1563 [01:10<01:14, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 751/1563 [01:11<01:16, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 753/1563 [01:11<01:15, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 755/1563 [01:11<01:15, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  48%|████▊     | 757/1563 [01:11<01:14, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▊     | 759/1563 [01:11<01:13, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▊     | 761/1563 [01:12<01:13, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 763/1563 [01:12<01:14, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 765/1563 [01:12<01:14, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 767/1563 [01:12<01:14, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 769/1563 [01:12<01:13, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 771/1563 [01:12<01:15, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  49%|████▉     | 773/1563 [01:13<01:15, 10.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 775/1563 [01:13<01:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 777/1563 [01:13<01:12, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 779/1563 [01:13<01:12, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|████▉     | 781/1563 [01:13<01:11, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 783/1563 [01:14<01:11, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 785/1563 [01:14<01:11, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 787/1563 [01:14<01:11, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  50%|█████     | 789/1563 [01:14<01:11, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 791/1563 [01:14<01:10, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 793/1563 [01:15<01:10, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 795/1563 [01:15<01:10, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 797/1563 [01:15<01:10, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 799/1563 [01:15<01:09, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████     | 801/1563 [01:15<01:09, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  51%|█████▏    | 803/1563 [01:15<01:08, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 805/1563 [01:16<01:08, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 807/1563 [01:16<01:09, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 809/1563 [01:16<01:09, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 811/1563 [01:16<01:09, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 813/1563 [01:16<01:08, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 815/1563 [01:17<01:10, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 817/1563 [01:17<01:13, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 819/1563 [01:17<01:15,  9.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  52%|█████▏    | 820/1563 [01:17<01:17,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 821/1563 [01:17<01:18,  9.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 822/1563 [01:17<01:19,  9.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 823/1563 [01:17<01:21,  9.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 824/1563 [01:18<01:23,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 825/1563 [01:18<01:25,  8.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 826/1563 [01:18<01:27,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 827/1563 [01:18<01:26,  8.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 828/1563 [01:18<01:26,  8.50batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 829/1563 [01:18<01:24,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 830/1563 [01:18<01:24,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 831/1563 [01:18<01:23,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 832/1563 [01:18<01:24,  8.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 833/1563 [01:19<01:24,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 834/1563 [01:19<01:26,  8.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  53%|█████▎    | 836/1563 [01:19<01:19,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 838/1563 [01:19<01:13,  9.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▎    | 840/1563 [01:19<01:10, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 842/1563 [01:19<01:08, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 844/1563 [01:20<01:06, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 846/1563 [01:20<01:05, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 848/1563 [01:20<01:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  54%|█████▍    | 850/1563 [01:20<01:06, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 852/1563 [01:20<01:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 854/1563 [01:21<01:05, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 856/1563 [01:21<01:04, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▍    | 858/1563 [01:21<01:04, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 860/1563 [01:21<01:04, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 862/1563 [01:21<01:03, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 864/1563 [01:21<01:03, 11.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  55%|█████▌    | 866/1563 [01:22<01:02, 11.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 868/1563 [01:22<01:03, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 870/1563 [01:22<01:03, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 872/1563 [01:22<01:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 874/1563 [01:22<01:02, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 876/1563 [01:23<01:02, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▌    | 878/1563 [01:23<01:02, 11.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 880/1563 [01:23<01:02, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  56%|█████▋    | 882/1563 [01:23<01:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 884/1563 [01:23<01:03, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 886/1563 [01:24<01:02, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 888/1563 [01:24<01:02, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 890/1563 [01:24<01:02, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 892/1563 [01:24<01:02, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 894/1563 [01:24<01:02, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 896/1563 [01:24<01:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  57%|█████▋    | 898/1563 [01:25<01:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 900/1563 [01:25<01:02, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 902/1563 [01:25<01:02, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 904/1563 [01:25<01:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 906/1563 [01:25<01:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 908/1563 [01:26<01:01, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 910/1563 [01:26<01:01, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 912/1563 [01:26<01:01, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  58%|█████▊    | 914/1563 [01:26<01:02, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 916/1563 [01:26<01:01, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▊    | 918/1563 [01:27<01:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 920/1563 [01:27<01:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 922/1563 [01:27<01:00, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 924/1563 [01:27<00:59, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 926/1563 [01:27<01:00, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  59%|█████▉    | 928/1563 [01:27<00:59, 10.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 930/1563 [01:28<00:59, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 932/1563 [01:28<00:59, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 934/1563 [01:28<00:59, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|█████▉    | 936/1563 [01:28<00:59, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 938/1563 [01:28<00:59, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 940/1563 [01:29<00:58, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 942/1563 [01:29<00:59, 10.38batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 944/1563 [01:29<01:03,  9.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  60%|██████    | 945/1563 [01:29<01:06,  9.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 946/1563 [01:29<01:07,  9.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 947/1563 [01:29<01:09,  8.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 948/1563 [01:30<01:10,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 949/1563 [01:30<01:10,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 950/1563 [01:30<01:10,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 951/1563 [01:30<01:11,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 952/1563 [01:30<01:10,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 953/1563 [01:30<01:10,  8.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 954/1563 [01:30<01:10,  8.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 955/1563 [01:30<01:11,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 956/1563 [01:30<01:11,  8.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████    | 957/1563 [01:31<01:11,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 958/1563 [01:31<01:11,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 959/1563 [01:31<01:13,  8.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 960/1563 [01:31<01:13,  8.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  61%|██████▏   | 961/1563 [01:31<01:12,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 963/1563 [01:31<01:04,  9.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 964/1563 [01:31<01:04,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 966/1563 [01:32<01:00,  9.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 968/1563 [01:32<00:58, 10.16batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 970/1563 [01:32<00:57, 10.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 972/1563 [01:32<00:56, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 974/1563 [01:32<00:55, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  62%|██████▏   | 976/1563 [01:32<00:55, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 978/1563 [01:33<00:55, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 980/1563 [01:33<00:54, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 982/1563 [01:33<00:54, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 984/1563 [01:33<00:53, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 986/1563 [01:33<00:53, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 988/1563 [01:34<00:53, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 990/1563 [01:34<00:52, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  63%|██████▎   | 992/1563 [01:34<00:52, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 994/1563 [01:34<00:52, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▎   | 996/1563 [01:34<00:51, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 998/1563 [01:34<00:51, 10.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1000/1563 [01:35<00:51, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1002/1563 [01:35<00:51, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1004/1563 [01:35<00:51, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1006/1563 [01:35<00:50, 11.04batch/s]\u001b[A\n",
-            "Epoch 1/2:  64%|██████▍   | 1008/1563 [01:35<00:50, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1010/1563 [01:36<00:50, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1012/1563 [01:36<00:50, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▍   | 1014/1563 [01:36<00:50, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1016/1563 [01:36<00:50, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1018/1563 [01:36<00:49, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1020/1563 [01:37<00:50, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  65%|██████▌   | 1022/1563 [01:37<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1024/1563 [01:37<00:50, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1026/1563 [01:37<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1028/1563 [01:37<00:49, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1030/1563 [01:37<00:49, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1032/1563 [01:38<00:49, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▌   | 1034/1563 [01:38<00:49, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1036/1563 [01:38<00:49, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  66%|██████▋   | 1038/1563 [01:38<00:49, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1040/1563 [01:38<00:49, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1042/1563 [01:39<00:49, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1044/1563 [01:39<00:48, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1046/1563 [01:39<00:48, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1048/1563 [01:39<00:47, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1050/1563 [01:39<00:47, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1052/1563 [01:40<00:47, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  67%|██████▋   | 1054/1563 [01:40<00:47, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1056/1563 [01:40<00:46, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1058/1563 [01:40<00:46, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1060/1563 [01:40<00:46, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1062/1563 [01:40<00:46, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1064/1563 [01:41<00:46, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1066/1563 [01:41<00:46, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1068/1563 [01:41<00:46, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  68%|██████▊   | 1070/1563 [01:41<00:47, 10.34batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1072/1563 [01:41<00:49,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▊   | 1074/1563 [01:42<00:51,  9.51batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1075/1563 [01:42<00:52,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1076/1563 [01:42<00:54,  8.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1077/1563 [01:42<00:55,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1078/1563 [01:42<00:54,  8.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1079/1563 [01:42<00:55,  8.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1080/1563 [01:42<00:55,  8.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1081/1563 [01:43<00:57,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1082/1563 [01:43<00:56,  8.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1083/1563 [01:43<00:57,  8.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1084/1563 [01:43<00:56,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1085/1563 [01:43<00:58,  8.21batch/s]\u001b[A\n",
-            "Epoch 1/2:  69%|██████▉   | 1086/1563 [01:43<00:57,  8.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1087/1563 [01:43<00:56,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1088/1563 [01:43<00:56,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1089/1563 [01:43<00:56,  8.43batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1091/1563 [01:44<00:50,  9.41batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1093/1563 [01:44<00:47, 10.00batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|██████▉   | 1094/1563 [01:44<00:47,  9.96batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1096/1563 [01:44<00:44, 10.39batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1098/1563 [01:44<00:43, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  70%|███████   | 1100/1563 [01:44<00:43, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1102/1563 [01:45<00:42, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1104/1563 [01:45<00:41, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1106/1563 [01:45<00:42, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1108/1563 [01:45<00:41, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1110/1563 [01:45<00:41, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████   | 1112/1563 [01:46<00:41, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1114/1563 [01:46<00:40, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  71%|███████▏  | 1116/1563 [01:46<00:41, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1118/1563 [01:46<00:41, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1120/1563 [01:46<00:40, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1122/1563 [01:46<00:40, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1124/1563 [01:47<00:40, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1126/1563 [01:47<00:40, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1128/1563 [01:47<00:40, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1130/1563 [01:47<00:40, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  72%|███████▏  | 1132/1563 [01:47<00:39, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1134/1563 [01:48<00:39, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1136/1563 [01:48<00:39, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1138/1563 [01:48<00:39, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1140/1563 [01:48<00:39, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1142/1563 [01:48<00:39, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1144/1563 [01:49<00:39, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1146/1563 [01:49<00:38, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  73%|███████▎  | 1148/1563 [01:49<00:39, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1150/1563 [01:49<00:38, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▎  | 1152/1563 [01:49<00:37, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1154/1563 [01:49<00:37, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1156/1563 [01:50<00:37, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1158/1563 [01:50<00:37, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1160/1563 [01:50<00:37, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1162/1563 [01:50<00:37, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  74%|███████▍  | 1164/1563 [01:50<00:37, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1166/1563 [01:51<00:36, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1168/1563 [01:51<00:36, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1170/1563 [01:51<00:36, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▍  | 1172/1563 [01:51<00:36, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1174/1563 [01:51<00:35, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1176/1563 [01:52<00:35, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1178/1563 [01:52<00:35, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  75%|███████▌  | 1180/1563 [01:52<00:35, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1182/1563 [01:52<00:35, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1184/1563 [01:52<00:34, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1186/1563 [01:52<00:34, 10.98batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1188/1563 [01:53<00:34, 11.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▌  | 1190/1563 [01:53<00:34, 10.95batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1192/1563 [01:53<00:34, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  76%|███████▋  | 1194/1563 [01:53<00:34, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1196/1563 [01:53<00:33, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1198/1563 [01:54<00:33, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1200/1563 [01:54<00:35, 10.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1202/1563 [01:54<00:36,  9.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1204/1563 [01:54<00:37,  9.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1205/1563 [01:54<00:37,  9.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1206/1563 [01:54<00:38,  9.32batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1207/1563 [01:55<00:38,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1208/1563 [01:55<00:38,  9.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1209/1563 [01:55<00:37,  9.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1210/1563 [01:55<00:37,  9.30batch/s]\u001b[A\n",
-            "Epoch 1/2:  77%|███████▋  | 1211/1563 [01:55<00:38,  9.22batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1212/1563 [01:55<00:38,  9.20batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1213/1563 [01:55<00:38,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1214/1563 [01:55<00:38,  9.05batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1215/1563 [01:55<00:38,  9.02batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1216/1563 [01:56<00:39,  8.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1217/1563 [01:56<00:40,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1218/1563 [01:56<00:41,  8.31batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1219/1563 [01:56<00:41,  8.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1221/1563 [01:56<00:36,  9.26batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1223/1563 [01:56<00:34,  9.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  78%|███████▊  | 1225/1563 [01:56<00:33, 10.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1227/1563 [01:57<00:32, 10.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▊  | 1229/1563 [01:57<00:31, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1231/1563 [01:57<00:31, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1233/1563 [01:57<00:30, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1235/1563 [01:57<00:30, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1237/1563 [01:58<00:29, 10.99batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1239/1563 [01:58<00:29, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  79%|███████▉  | 1241/1563 [01:58<00:29, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1243/1563 [01:58<00:29, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1245/1563 [01:58<00:29, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1247/1563 [01:58<00:29, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|███████▉  | 1249/1563 [01:59<00:29, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1251/1563 [01:59<00:28, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1253/1563 [01:59<00:28, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1255/1563 [01:59<00:28, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  80%|████████  | 1257/1563 [01:59<00:28, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1259/1563 [02:00<00:28, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1261/1563 [02:00<00:27, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1263/1563 [02:00<00:27, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1265/1563 [02:00<00:27, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1267/1563 [02:00<00:27, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████  | 1269/1563 [02:01<00:27, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1271/1563 [02:01<00:27, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  81%|████████▏ | 1273/1563 [02:01<00:26, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1275/1563 [02:01<00:26, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1277/1563 [02:01<00:26, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1279/1563 [02:01<00:26, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1281/1563 [02:02<00:26, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1283/1563 [02:02<00:25, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1285/1563 [02:02<00:25, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1287/1563 [02:02<00:25, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  82%|████████▏ | 1289/1563 [02:02<00:25, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1291/1563 [02:03<00:25, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1293/1563 [02:03<00:24, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1295/1563 [02:03<00:24, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1297/1563 [02:03<00:24, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1299/1563 [02:03<00:25, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1301/1563 [02:03<00:24, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1303/1563 [02:04<00:24, 10.57batch/s]\u001b[A\n",
-            "Epoch 1/2:  83%|████████▎ | 1305/1563 [02:04<00:24, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1307/1563 [02:04<00:23, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▎ | 1309/1563 [02:04<00:23, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1311/1563 [02:04<00:23, 10.65batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1313/1563 [02:05<00:23, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1315/1563 [02:05<00:23, 10.76batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1317/1563 [02:05<00:23, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  84%|████████▍ | 1319/1563 [02:05<00:22, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1321/1563 [02:05<00:22, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1323/1563 [02:06<00:22, 10.82batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1325/1563 [02:06<00:22, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▍ | 1327/1563 [02:06<00:22, 10.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1329/1563 [02:06<00:23, 10.01batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1331/1563 [02:06<00:23,  9.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1332/1563 [02:06<00:24,  9.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1333/1563 [02:07<00:24,  9.42batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1334/1563 [02:07<00:24,  9.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1335/1563 [02:07<00:24,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  85%|████████▌ | 1336/1563 [02:07<00:25,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1337/1563 [02:07<00:26,  8.59batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1338/1563 [02:07<00:25,  8.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1339/1563 [02:07<00:26,  8.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1340/1563 [02:07<00:25,  8.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1341/1563 [02:08<00:25,  8.61batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1342/1563 [02:08<00:25,  8.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1343/1563 [02:08<00:26,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1344/1563 [02:08<00:26,  8.27batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1345/1563 [02:08<00:26,  8.18batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1346/1563 [02:08<00:25,  8.49batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1347/1563 [02:08<00:25,  8.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▌ | 1348/1563 [02:08<00:24,  8.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  86%|████████▋ | 1350/1563 [02:09<00:22,  9.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1352/1563 [02:09<00:20, 10.13batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1354/1563 [02:09<00:20, 10.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1356/1563 [02:09<00:19, 10.52batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1358/1563 [02:09<00:19, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1360/1563 [02:09<00:18, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1362/1563 [02:10<00:18, 10.64batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1364/1563 [02:10<00:18, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2:  87%|████████▋ | 1366/1563 [02:10<00:18, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1368/1563 [02:10<00:18, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1370/1563 [02:10<00:17, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1372/1563 [02:11<00:17, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1374/1563 [02:11<00:17, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1376/1563 [02:11<00:17, 10.80batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1378/1563 [02:11<00:17, 10.83batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1380/1563 [02:11<00:16, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  88%|████████▊ | 1382/1563 [02:11<00:16, 10.94batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1384/1563 [02:12<00:16, 10.87batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▊ | 1386/1563 [02:12<00:16, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1388/1563 [02:12<00:16, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1390/1563 [02:12<00:15, 10.97batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1392/1563 [02:12<00:15, 10.88batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1394/1563 [02:13<00:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1396/1563 [02:13<00:15, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  89%|████████▉ | 1398/1563 [02:13<00:15, 10.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1400/1563 [02:13<00:15, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1402/1563 [02:13<00:15, 10.70batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1404/1563 [02:14<00:14, 10.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|████████▉ | 1406/1563 [02:14<00:14, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1408/1563 [02:14<00:14, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1410/1563 [02:14<00:14, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1412/1563 [02:14<00:13, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  90%|█████████ | 1414/1563 [02:14<00:13, 10.92batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1416/1563 [02:15<00:13, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1418/1563 [02:15<00:13, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1420/1563 [02:15<00:13, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1422/1563 [02:15<00:13, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1424/1563 [02:15<00:12, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████ | 1426/1563 [02:16<00:12, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1428/1563 [02:16<00:12, 10.60batch/s]\u001b[A\n",
-            "Epoch 1/2:  91%|█████████▏| 1430/1563 [02:16<00:12, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1432/1563 [02:16<00:12, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1434/1563 [02:16<00:12, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1436/1563 [02:17<00:11, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1438/1563 [02:17<00:11, 10.81batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1440/1563 [02:17<00:11, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1442/1563 [02:17<00:11, 10.93batch/s]\u001b[A\n",
-            "Epoch 1/2:  92%|█████████▏| 1444/1563 [02:17<00:10, 10.86batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1446/1563 [02:17<00:10, 10.84batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1448/1563 [02:18<00:10, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1450/1563 [02:18<00:10, 10.90batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1452/1563 [02:18<00:10, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1454/1563 [02:18<00:10, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1456/1563 [02:18<00:10, 10.36batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1458/1563 [02:19<00:10, 10.10batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1460/1563 [02:19<00:10,  9.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  93%|█████████▎| 1461/1563 [02:19<00:10,  9.33batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1462/1563 [02:19<00:11,  9.07batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1463/1563 [02:19<00:10,  9.12batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1464/1563 [02:19<00:10,  9.03batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▎| 1465/1563 [02:19<00:10,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1466/1563 [02:20<00:10,  9.11batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1467/1563 [02:20<00:10,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1468/1563 [02:20<00:10,  9.23batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1469/1563 [02:20<00:10,  9.19batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1470/1563 [02:20<00:10,  9.09batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1471/1563 [02:20<00:10,  8.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1472/1563 [02:20<00:10,  8.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1473/1563 [02:20<00:10,  8.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1474/1563 [02:20<00:10,  8.29batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1475/1563 [02:21<00:10,  8.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1476/1563 [02:21<00:10,  8.48batch/s]\u001b[A\n",
-            "Epoch 1/2:  94%|█████████▍| 1477/1563 [02:21<00:09,  8.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1478/1563 [02:21<00:09,  8.77batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1480/1563 [02:21<00:08,  9.58batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1482/1563 [02:21<00:08, 10.06batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▍| 1484/1563 [02:21<00:07, 10.35batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1486/1563 [02:22<00:07, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1488/1563 [02:22<00:07, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1490/1563 [02:22<00:06, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  95%|█████████▌| 1492/1563 [02:22<00:06, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1494/1563 [02:22<00:06, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1496/1563 [02:23<00:06, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1498/1563 [02:23<00:05, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1500/1563 [02:23<00:05, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1502/1563 [02:23<00:05, 10.67batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▌| 1504/1563 [02:23<00:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1506/1563 [02:23<00:05, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  96%|█████████▋| 1508/1563 [02:24<00:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1510/1563 [02:24<00:05, 10.46batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1512/1563 [02:24<00:04, 10.40batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1514/1563 [02:24<00:04, 10.47batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1516/1563 [02:24<00:04, 10.54batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1518/1563 [02:25<00:04, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1520/1563 [02:25<00:04, 10.62batch/s]\u001b[A\n",
-            "Epoch 1/2:  97%|█████████▋| 1522/1563 [02:25<00:03, 10.44batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1524/1563 [02:25<00:03, 10.56batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1526/1563 [02:25<00:03, 10.66batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1528/1563 [02:26<00:03, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1530/1563 [02:26<00:03, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1532/1563 [02:26<00:02, 10.69batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1534/1563 [02:26<00:02, 10.53batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1536/1563 [02:26<00:02, 10.63batch/s]\u001b[A\n",
-            "Epoch 1/2:  98%|█████████▊| 1538/1563 [02:27<00:02, 10.78batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1540/1563 [02:27<00:02, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▊| 1542/1563 [02:27<00:01, 10.85batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1544/1563 [02:27<00:01, 10.72batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1546/1563 [02:27<00:01, 10.74batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1548/1563 [02:27<00:01, 10.75batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1550/1563 [02:28<00:01, 10.91batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1552/1563 [02:28<00:01, 10.89batch/s]\u001b[A\n",
-            "Epoch 1/2:  99%|█████████▉| 1554/1563 [02:28<00:00, 10.79batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1556/1563 [02:28<00:00, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1558/1563 [02:28<00:00, 10.71batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1560/1563 [02:29<00:00, 10.73batch/s]\u001b[A\n",
-            "Epoch 1/2: 100%|█████████▉| 1562/1563 [02:29<00:00, 10.79batch/s]\u001b[A\n",
-            "Epochs:  50%|█████     | 1/2 [02:29<02:29, 149.31s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 1/2 tamamlandı. Kayıp: 1.7113, Doğruluk: 36.97%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n",
-            "Epoch 2/2:   0%|          | 0/1563 [00:00<?, ?batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 1/1563 [00:00<03:08,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 2/1563 [00:00<02:51,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 3/1563 [00:00<02:49,  9.19batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 5/1563 [00:00<02:32, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:   0%|          | 7/1563 [00:00<02:30, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 9/1563 [00:00<02:26, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 11/1563 [00:01<02:25, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 13/1563 [00:01<02:22, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 15/1563 [00:01<02:24, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 17/1563 [00:01<02:22, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|          | 19/1563 [00:01<02:22, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 21/1563 [00:02<02:27, 10.48batch/s]\u001b[A\n",
-            "Epoch 2/2:   1%|▏         | 23/1563 [00:02<02:34,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 25/1563 [00:02<02:40,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 26/1563 [00:02<02:44,  9.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 27/1563 [00:02<02:47,  9.19batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 28/1563 [00:02<02:48,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 29/1563 [00:02<02:47,  9.14batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 30/1563 [00:03<02:47,  9.13batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 31/1563 [00:03<02:51,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 32/1563 [00:03<02:49,  9.04batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 33/1563 [00:03<02:48,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 34/1563 [00:03<02:51,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 35/1563 [00:03<02:50,  8.94batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 36/1563 [00:03<02:51,  8.89batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 37/1563 [00:03<02:49,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 38/1563 [00:03<02:50,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:   2%|▏         | 39/1563 [00:04<02:53,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 40/1563 [00:04<02:54,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 41/1563 [00:04<02:53,  8.79batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 43/1563 [00:04<02:39,  9.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 45/1563 [00:04<02:30, 10.08batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 47/1563 [00:04<02:25, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 49/1563 [00:04<02:22, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 51/1563 [00:05<02:20, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:   3%|▎         | 53/1563 [00:05<02:19, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 55/1563 [00:05<02:18, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▎         | 57/1563 [00:05<02:17, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 59/1563 [00:05<02:16, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 61/1563 [00:06<02:18, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 63/1563 [00:06<02:17, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 65/1563 [00:06<02:15, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 67/1563 [00:06<02:17, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:   4%|▍         | 69/1563 [00:06<02:16, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 71/1563 [00:07<02:16, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 73/1563 [00:07<02:17, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 75/1563 [00:07<02:15, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▍         | 77/1563 [00:07<02:15, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 79/1563 [00:07<02:14, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 81/1563 [00:07<02:14, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 83/1563 [00:08<02:14, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:   5%|▌         | 85/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 87/1563 [00:08<02:13, 11.05batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 89/1563 [00:08<02:13, 11.07batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 91/1563 [00:08<02:11, 11.16batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 93/1563 [00:08<02:12, 11.12batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 95/1563 [00:09<02:12, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▌         | 97/1563 [00:09<02:12, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 99/1563 [00:09<02:12, 11.08batch/s]\u001b[A\n",
-            "Epoch 2/2:   6%|▋         | 101/1563 [00:09<02:13, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 103/1563 [00:09<02:12, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 105/1563 [00:10<02:15, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 107/1563 [00:10<02:16, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 109/1563 [00:10<02:17, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 111/1563 [00:10<02:20, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 113/1563 [00:10<02:19, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 115/1563 [00:11<02:19, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:   7%|▋         | 117/1563 [00:11<02:18, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 119/1563 [00:11<02:17, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 121/1563 [00:11<02:15, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 123/1563 [00:11<02:15, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 125/1563 [00:11<02:13, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 127/1563 [00:12<02:16, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 129/1563 [00:12<02:15, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:   8%|▊         | 131/1563 [00:12<02:16, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 133/1563 [00:12<02:16, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▊         | 135/1563 [00:12<02:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 137/1563 [00:13<02:12, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 139/1563 [00:13<02:10, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 141/1563 [00:13<02:09, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 143/1563 [00:13<02:09, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 145/1563 [00:13<02:10, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:   9%|▉         | 147/1563 [00:14<02:13, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 149/1563 [00:14<02:10, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 151/1563 [00:14<02:18, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 153/1563 [00:14<02:23,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 154/1563 [00:14<02:27,  9.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 155/1563 [00:14<02:27,  9.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|▉         | 156/1563 [00:15<02:28,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 157/1563 [00:15<02:33,  9.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 158/1563 [00:15<02:34,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 159/1563 [00:15<02:39,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 160/1563 [00:15<02:41,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 161/1563 [00:15<02:40,  8.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 162/1563 [00:15<02:43,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 163/1563 [00:15<02:42,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  10%|█         | 164/1563 [00:15<02:42,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 165/1563 [00:16<02:41,  8.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 166/1563 [00:16<02:39,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 167/1563 [00:16<02:39,  8.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 168/1563 [00:16<02:38,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 169/1563 [00:16<02:40,  8.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 171/1563 [00:16<02:24,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 173/1563 [00:16<02:19,  9.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█         | 175/1563 [00:17<02:15, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 177/1563 [00:17<02:11, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  11%|█▏        | 179/1563 [00:17<02:09, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 181/1563 [00:17<02:07, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 183/1563 [00:17<02:07, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 185/1563 [00:17<02:09, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 187/1563 [00:18<02:10, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 189/1563 [00:18<02:09, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 191/1563 [00:18<02:08, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 193/1563 [00:18<02:08, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  12%|█▏        | 195/1563 [00:18<02:09, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 197/1563 [00:19<02:08, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 199/1563 [00:19<02:07, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 201/1563 [00:19<02:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 203/1563 [00:19<02:05, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 205/1563 [00:19<02:04, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 207/1563 [00:20<02:05, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 209/1563 [00:20<02:04, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  13%|█▎        | 211/1563 [00:20<02:03, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▎        | 213/1563 [00:20<02:03, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 215/1563 [00:20<02:02, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 217/1563 [00:20<02:02, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 219/1563 [00:21<02:03, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 221/1563 [00:21<02:02, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 223/1563 [00:21<02:01, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  14%|█▍        | 225/1563 [00:21<02:02, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 227/1563 [00:21<02:02, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 229/1563 [00:22<02:01, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 231/1563 [00:22<02:01, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▍        | 233/1563 [00:22<02:00, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 235/1563 [00:22<02:01, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 237/1563 [00:22<02:01, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 239/1563 [00:22<02:02, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  15%|█▌        | 241/1563 [00:23<02:06, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 243/1563 [00:23<02:05, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 245/1563 [00:23<02:03, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 247/1563 [00:23<02:05, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 249/1563 [00:23<02:04, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 251/1563 [00:24<02:02, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▌        | 253/1563 [00:24<02:01, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 255/1563 [00:24<02:02, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  16%|█▋        | 257/1563 [00:24<02:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 259/1563 [00:24<02:12,  9.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 261/1563 [00:25<02:13,  9.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 263/1563 [00:25<02:08, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 265/1563 [00:25<02:05, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 267/1563 [00:25<02:04, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 269/1563 [00:25<02:02, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 271/1563 [00:26<02:02, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  17%|█▋        | 273/1563 [00:26<02:03, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 275/1563 [00:26<02:01, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 277/1563 [00:26<02:03, 10.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 279/1563 [00:26<02:10,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 280/1563 [00:26<02:14,  9.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 281/1563 [00:27<02:19,  9.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 282/1563 [00:27<02:21,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 283/1563 [00:27<02:22,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 284/1563 [00:27<02:23,  8.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 285/1563 [00:27<02:21,  9.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 286/1563 [00:27<02:20,  9.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 287/1563 [00:27<02:22,  8.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 288/1563 [00:27<02:19,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  18%|█▊        | 289/1563 [00:27<02:23,  8.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 290/1563 [00:28<02:25,  8.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 291/1563 [00:28<02:24,  8.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 292/1563 [00:28<02:26,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▊        | 293/1563 [00:28<02:25,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 294/1563 [00:28<02:28,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 295/1563 [00:28<02:33,  8.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 296/1563 [00:28<02:31,  8.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 297/1563 [00:28<02:32,  8.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 299/1563 [00:29<02:16,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 300/1563 [00:29<02:13,  9.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 302/1563 [00:29<02:05, 10.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  19%|█▉        | 303/1563 [00:29<02:06,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 305/1563 [00:29<02:01, 10.34batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 307/1563 [00:29<01:59, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 309/1563 [00:30<01:57, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|█▉        | 311/1563 [00:30<01:55, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 313/1563 [00:30<01:54, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 315/1563 [00:30<01:54, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 317/1563 [00:30<01:53, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  20%|██        | 319/1563 [00:30<01:53, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 321/1563 [00:31<01:55, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 323/1563 [00:31<01:55, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 325/1563 [00:31<01:54, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 327/1563 [00:31<01:54, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 329/1563 [00:31<01:52, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██        | 331/1563 [00:32<01:53, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 333/1563 [00:32<01:52, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  21%|██▏       | 335/1563 [00:32<01:53, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 337/1563 [00:32<01:52, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 339/1563 [00:32<01:52, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 341/1563 [00:32<01:50, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 343/1563 [00:33<01:51, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 345/1563 [00:33<01:51, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 347/1563 [00:33<01:51, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 349/1563 [00:33<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  22%|██▏       | 351/1563 [00:33<01:50, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 353/1563 [00:34<01:50, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 355/1563 [00:34<01:50, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 357/1563 [00:34<01:50, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 359/1563 [00:34<01:49, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 361/1563 [00:34<01:48, 11.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 363/1563 [00:35<01:49, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 365/1563 [00:35<01:49, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  23%|██▎       | 367/1563 [00:35<01:49, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 369/1563 [00:35<01:50, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▎       | 371/1563 [00:35<01:48, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 373/1563 [00:35<01:47, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 375/1563 [00:36<01:48, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 377/1563 [00:36<01:48, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 379/1563 [00:36<01:49, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  24%|██▍       | 381/1563 [00:36<01:48, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 383/1563 [00:36<01:48, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 385/1563 [00:37<01:47, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 387/1563 [00:37<01:49, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▍       | 389/1563 [00:37<01:49, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 391/1563 [00:37<01:48, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 393/1563 [00:37<01:48, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 395/1563 [00:37<01:46, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  25%|██▌       | 397/1563 [00:38<01:46, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 399/1563 [00:38<01:46, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 401/1563 [00:38<01:46, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 403/1563 [00:38<01:45, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 405/1563 [00:38<01:44, 11.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 407/1563 [00:39<01:47, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▌       | 409/1563 [00:39<01:54, 10.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 411/1563 [00:39<01:58,  9.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 412/1563 [00:39<02:01,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 413/1563 [00:39<02:02,  9.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  26%|██▋       | 414/1563 [00:39<02:03,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 415/1563 [00:39<02:06,  9.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 416/1563 [00:40<02:07,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 417/1563 [00:40<02:08,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 418/1563 [00:40<02:07,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 419/1563 [00:40<02:06,  9.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 420/1563 [00:40<02:05,  9.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 421/1563 [00:40<02:13,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 422/1563 [00:40<02:13,  8.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 423/1563 [00:40<02:14,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 424/1563 [00:41<02:19,  8.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 425/1563 [00:41<02:20,  8.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 426/1563 [00:41<02:15,  8.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  27%|██▋       | 428/1563 [00:41<02:00,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 430/1563 [00:41<01:53,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 431/1563 [00:41<01:54,  9.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 433/1563 [00:41<01:48, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 435/1563 [00:42<01:46, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 437/1563 [00:42<01:44, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 439/1563 [00:42<01:43, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 441/1563 [00:42<01:42, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 443/1563 [00:42<01:43, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  28%|██▊       | 445/1563 [00:42<01:43, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 447/1563 [00:43<01:42, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▊       | 449/1563 [00:43<01:42, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 451/1563 [00:43<01:41, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 453/1563 [00:43<01:40, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 455/1563 [00:43<01:41, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 457/1563 [00:44<01:43, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 459/1563 [00:44<01:42, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  29%|██▉       | 461/1563 [00:44<01:42, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 463/1563 [00:44<01:42, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 465/1563 [00:44<01:43, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|██▉       | 467/1563 [00:45<01:41, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 469/1563 [00:45<01:41, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 471/1563 [00:45<01:39, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 473/1563 [00:45<01:39, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  30%|███       | 475/1563 [00:45<01:39, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 477/1563 [00:45<01:39, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 479/1563 [00:46<01:39, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 481/1563 [00:46<01:38, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 483/1563 [00:46<01:38, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 485/1563 [00:46<01:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███       | 487/1563 [00:46<01:38, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 489/1563 [00:47<01:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  31%|███▏      | 491/1563 [00:47<01:39, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 493/1563 [00:47<01:38, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 495/1563 [00:47<01:38, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 497/1563 [00:47<01:38, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 499/1563 [00:47<01:38, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 501/1563 [00:48<01:37, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 503/1563 [00:48<01:36, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 505/1563 [00:48<01:36, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  32%|███▏      | 507/1563 [00:48<01:36, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 509/1563 [00:48<01:36, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 511/1563 [00:49<01:36, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 513/1563 [00:49<01:34, 11.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 515/1563 [00:49<01:35, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 517/1563 [00:49<01:34, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 519/1563 [00:49<01:34, 11.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 521/1563 [00:49<01:33, 11.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  33%|███▎      | 523/1563 [00:50<01:33, 11.08batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 525/1563 [00:50<01:34, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▎      | 527/1563 [00:50<01:33, 11.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 529/1563 [00:50<01:33, 11.09batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 531/1563 [00:50<01:32, 11.15batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 533/1563 [00:51<01:34, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 535/1563 [00:51<01:33, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 537/1563 [00:51<01:38, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  34%|███▍      | 539/1563 [00:51<01:43,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 540/1563 [00:51<01:45,  9.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 541/1563 [00:51<01:46,  9.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 542/1563 [00:52<01:50,  9.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 543/1563 [00:52<01:53,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 544/1563 [00:52<01:52,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 545/1563 [00:52<01:53,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 546/1563 [00:52<01:54,  8.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▍      | 547/1563 [00:52<01:52,  9.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 548/1563 [00:52<01:52,  9.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 549/1563 [00:52<01:50,  9.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 550/1563 [00:52<01:51,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 551/1563 [00:53<01:56,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 552/1563 [00:53<01:59,  8.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 553/1563 [00:53<01:56,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  35%|███▌      | 554/1563 [00:53<01:58,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 555/1563 [00:53<02:00,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 556/1563 [00:53<01:59,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 558/1563 [00:53<01:46,  9.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 560/1563 [00:54<01:41,  9.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 561/1563 [00:54<01:44,  9.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 563/1563 [00:54<01:39, 10.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▌      | 565/1563 [00:54<01:37, 10.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 567/1563 [00:54<01:36, 10.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  36%|███▋      | 569/1563 [00:54<01:36, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 571/1563 [00:55<01:34, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 573/1563 [00:55<01:33, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 575/1563 [00:55<01:31, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 577/1563 [00:55<01:30, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 579/1563 [00:55<01:30, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 581/1563 [00:55<01:30, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 583/1563 [00:56<01:31, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  37%|███▋      | 585/1563 [00:56<01:29, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 587/1563 [00:56<01:29, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 589/1563 [00:56<01:29, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 591/1563 [00:56<01:29, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 593/1563 [00:57<01:30, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 595/1563 [00:57<01:30, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 597/1563 [00:57<01:29, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 599/1563 [00:57<01:28, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  38%|███▊      | 601/1563 [00:57<01:27, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 603/1563 [00:57<01:27, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▊      | 605/1563 [00:58<01:29, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 607/1563 [00:58<01:28, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 609/1563 [00:58<01:27, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 611/1563 [00:58<01:26, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 613/1563 [00:58<01:27, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 615/1563 [00:59<01:27, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  39%|███▉      | 617/1563 [00:59<01:27, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 619/1563 [00:59<01:26, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 621/1563 [00:59<01:26, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 623/1563 [00:59<01:26, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|███▉      | 625/1563 [01:00<01:25, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 627/1563 [01:00<01:26, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 629/1563 [01:00<01:25, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 631/1563 [01:00<01:24, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  40%|████      | 633/1563 [01:00<01:24, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 635/1563 [01:00<01:24, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 637/1563 [01:01<01:24, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 639/1563 [01:01<01:24, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 641/1563 [01:01<01:23, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████      | 643/1563 [01:01<01:23, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 645/1563 [01:01<01:24, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  41%|████▏     | 647/1563 [01:02<01:24, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 649/1563 [01:02<01:24, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 651/1563 [01:02<01:24, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 653/1563 [01:02<01:24, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 655/1563 [01:02<01:23, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 657/1563 [01:02<01:24, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 659/1563 [01:03<01:23, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 661/1563 [01:03<01:24, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  42%|████▏     | 663/1563 [01:03<01:23, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 665/1563 [01:03<01:25, 10.44batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 667/1563 [01:03<01:29,  9.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 669/1563 [01:04<01:32,  9.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 670/1563 [01:04<01:34,  9.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 671/1563 [01:04<01:37,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 672/1563 [01:04<01:39,  8.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 673/1563 [01:04<01:40,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 674/1563 [01:04<01:40,  8.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 675/1563 [01:04<01:42,  8.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 676/1563 [01:05<01:43,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 677/1563 [01:05<01:45,  8.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 678/1563 [01:05<01:44,  8.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  43%|████▎     | 679/1563 [01:05<01:43,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 680/1563 [01:05<01:44,  8.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 681/1563 [01:05<01:44,  8.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 682/1563 [01:05<01:43,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▎     | 683/1563 [01:05<01:45,  8.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 684/1563 [01:05<01:42,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 686/1563 [01:06<01:32,  9.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 688/1563 [01:06<01:27, 10.05batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 690/1563 [01:06<01:25, 10.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 692/1563 [01:06<01:23, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  44%|████▍     | 694/1563 [01:06<01:21, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 696/1563 [01:07<01:20, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 698/1563 [01:07<01:19, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 700/1563 [01:07<01:19, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▍     | 702/1563 [01:07<01:20, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 704/1563 [01:07<01:19, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 706/1563 [01:07<01:18, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 708/1563 [01:08<01:18, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  45%|████▌     | 710/1563 [01:08<01:17, 11.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 712/1563 [01:08<01:18, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 714/1563 [01:08<01:17, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 716/1563 [01:08<01:17, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 718/1563 [01:09<01:16, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 720/1563 [01:09<01:16, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▌     | 722/1563 [01:09<01:16, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 724/1563 [01:09<01:17, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  46%|████▋     | 726/1563 [01:09<01:16, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 728/1563 [01:09<01:16, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 730/1563 [01:10<01:16, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 732/1563 [01:10<01:16, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 734/1563 [01:10<01:16, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 736/1563 [01:10<01:17, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 738/1563 [01:10<01:16, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 740/1563 [01:11<01:15, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  47%|████▋     | 742/1563 [01:11<01:15, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 744/1563 [01:11<01:15, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 746/1563 [01:11<01:16, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 748/1563 [01:11<01:15, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 750/1563 [01:12<01:15, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 752/1563 [01:12<01:15, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 754/1563 [01:12<01:14, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 756/1563 [01:12<01:14, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  48%|████▊     | 758/1563 [01:12<01:15, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▊     | 760/1563 [01:12<01:15, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 762/1563 [01:13<01:14, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 764/1563 [01:13<01:13, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 766/1563 [01:13<01:12, 10.97batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 768/1563 [01:13<01:13, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 770/1563 [01:13<01:13, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  49%|████▉     | 772/1563 [01:14<01:12, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 774/1563 [01:14<01:14, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 776/1563 [01:14<01:13, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 778/1563 [01:14<01:12, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|████▉     | 780/1563 [01:14<01:13, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 782/1563 [01:14<01:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 784/1563 [01:15<01:13, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 786/1563 [01:15<01:12, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  50%|█████     | 788/1563 [01:15<01:12, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 790/1563 [01:15<01:12, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 792/1563 [01:15<01:11, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 794/1563 [01:16<01:15, 10.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 796/1563 [01:16<01:17,  9.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 797/1563 [01:16<01:19,  9.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 798/1563 [01:16<01:21,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 799/1563 [01:16<01:22,  9.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 800/1563 [01:16<01:24,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████     | 801/1563 [01:16<01:26,  8.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 802/1563 [01:17<01:25,  8.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 803/1563 [01:17<01:26,  8.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  51%|█████▏    | 804/1563 [01:17<01:25,  8.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 805/1563 [01:17<01:26,  8.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 806/1563 [01:17<01:26,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 807/1563 [01:17<01:26,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 808/1563 [01:17<01:25,  8.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 809/1563 [01:17<01:26,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 810/1563 [01:17<01:27,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 811/1563 [01:18<01:27,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 812/1563 [01:18<01:26,  8.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 813/1563 [01:18<01:28,  8.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 815/1563 [01:18<01:18,  9.48batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 817/1563 [01:18<01:14, 10.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  52%|█████▏    | 819/1563 [01:18<01:11, 10.37batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 821/1563 [01:19<01:10, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 823/1563 [01:19<01:09, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 825/1563 [01:19<01:08, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 827/1563 [01:19<01:08, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 829/1563 [01:19<01:07, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 831/1563 [01:19<01:06, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 833/1563 [01:20<01:06, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  53%|█████▎    | 835/1563 [01:20<01:06, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 837/1563 [01:20<01:06, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▎    | 839/1563 [01:20<01:06, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 841/1563 [01:20<01:05, 10.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 843/1563 [01:21<01:07, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 845/1563 [01:21<01:06, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 847/1563 [01:21<01:05, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 849/1563 [01:21<01:05, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  54%|█████▍    | 851/1563 [01:21<01:04, 11.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 853/1563 [01:22<01:04, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 855/1563 [01:22<01:05, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 857/1563 [01:22<01:05, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▍    | 859/1563 [01:22<01:05, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 861/1563 [01:22<01:04, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 863/1563 [01:22<01:04, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 865/1563 [01:23<01:05, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  55%|█████▌    | 867/1563 [01:23<01:04, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 869/1563 [01:23<01:04, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 871/1563 [01:23<01:04, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 873/1563 [01:23<01:03, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 875/1563 [01:24<01:03, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 877/1563 [01:24<01:03, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▌    | 879/1563 [01:24<01:02, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 881/1563 [01:24<01:02, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  56%|█████▋    | 883/1563 [01:24<01:02, 10.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 885/1563 [01:24<01:01, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 887/1563 [01:25<01:01, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 889/1563 [01:25<01:01, 10.93batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 891/1563 [01:25<01:01, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 893/1563 [01:25<01:01, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 895/1563 [01:25<01:01, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  57%|█████▋    | 897/1563 [01:26<01:01, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 899/1563 [01:26<01:01, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 901/1563 [01:26<01:01, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 903/1563 [01:26<01:01, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 905/1563 [01:26<01:00, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 907/1563 [01:26<01:00, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 909/1563 [01:27<01:01, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 911/1563 [01:27<01:01, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  58%|█████▊    | 913/1563 [01:27<01:00, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 915/1563 [01:27<01:00, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▊    | 917/1563 [01:27<01:00, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 919/1563 [01:28<01:00, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 921/1563 [01:28<01:00, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 923/1563 [01:28<01:02, 10.19batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 925/1563 [01:28<01:03,  9.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 927/1563 [01:28<01:07,  9.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 928/1563 [01:29<01:08,  9.31batch/s]\u001b[A\n",
-            "Epoch 2/2:  59%|█████▉    | 929/1563 [01:29<01:08,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 930/1563 [01:29<01:09,  9.07batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 931/1563 [01:29<01:10,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 932/1563 [01:29<01:11,  8.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 933/1563 [01:29<01:10,  8.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 934/1563 [01:29<01:09,  9.04batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 935/1563 [01:29<01:09,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 936/1563 [01:30<01:11,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|█████▉    | 937/1563 [01:30<01:13,  8.46batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 938/1563 [01:30<01:12,  8.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 939/1563 [01:30<01:14,  8.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 940/1563 [01:30<01:13,  8.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 941/1563 [01:30<01:12,  8.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 942/1563 [01:30<01:14,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 944/1563 [01:30<01:06,  9.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  60%|██████    | 945/1563 [01:31<01:05,  9.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 946/1563 [01:31<01:05,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 947/1563 [01:31<01:04,  9.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 949/1563 [01:31<01:01,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 951/1563 [01:31<00:59, 10.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 953/1563 [01:31<00:57, 10.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 955/1563 [01:31<00:57, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████    | 957/1563 [01:32<00:56, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 959/1563 [01:32<00:55, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  61%|██████▏   | 961/1563 [01:32<00:56, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 963/1563 [01:32<00:56, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 965/1563 [01:32<00:55, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 967/1563 [01:33<00:55, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 969/1563 [01:33<00:54, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 971/1563 [01:33<00:55, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 973/1563 [01:33<00:54, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  62%|██████▏   | 975/1563 [01:33<00:54, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 977/1563 [01:33<00:54, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 979/1563 [01:34<00:54, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 981/1563 [01:34<00:53, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 983/1563 [01:34<00:54, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 985/1563 [01:34<00:54, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 987/1563 [01:34<00:53, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 989/1563 [01:35<00:53, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  63%|██████▎   | 991/1563 [01:35<00:53, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 993/1563 [01:35<00:53, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▎   | 995/1563 [01:35<00:52, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 997/1563 [01:35<00:52, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 999/1563 [01:36<00:52, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1001/1563 [01:36<00:52, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1003/1563 [01:36<00:52, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1005/1563 [01:36<00:52, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  64%|██████▍   | 1007/1563 [01:36<00:52, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1009/1563 [01:36<00:51, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1011/1563 [01:37<00:51, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1013/1563 [01:37<00:51, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▍   | 1015/1563 [01:37<00:52, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1017/1563 [01:37<00:51, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1019/1563 [01:37<00:51, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1021/1563 [01:38<00:50, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  65%|██████▌   | 1023/1563 [01:38<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1025/1563 [01:38<00:49, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1027/1563 [01:38<00:50, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1029/1563 [01:38<00:49, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1031/1563 [01:39<00:49, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1033/1563 [01:39<00:48, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▌   | 1035/1563 [01:39<00:48, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1037/1563 [01:39<00:48, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  66%|██████▋   | 1039/1563 [01:39<00:48, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1041/1563 [01:39<00:47, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1043/1563 [01:40<00:47, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1045/1563 [01:40<00:47, 11.02batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1047/1563 [01:40<00:46, 11.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1049/1563 [01:40<00:46, 10.96batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1051/1563 [01:40<00:49, 10.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1053/1563 [01:41<00:51,  9.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1054/1563 [01:41<00:52,  9.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  67%|██████▋   | 1055/1563 [01:41<00:53,  9.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1056/1563 [01:41<00:55,  9.13batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1057/1563 [01:41<00:56,  8.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1058/1563 [01:41<00:57,  8.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1059/1563 [01:41<00:56,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1060/1563 [01:41<00:55,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1061/1563 [01:42<00:54,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1062/1563 [01:42<00:55,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1063/1563 [01:42<00:54,  9.23batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1064/1563 [01:42<00:55,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1065/1563 [01:42<00:55,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1066/1563 [01:42<00:54,  9.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1067/1563 [01:42<00:53,  9.24batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1068/1563 [01:42<00:55,  8.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1069/1563 [01:42<00:56,  8.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  68%|██████▊   | 1070/1563 [01:43<00:57,  8.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1071/1563 [01:43<00:58,  8.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▊   | 1073/1563 [01:43<00:52,  9.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1075/1563 [01:43<00:48,  9.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1077/1563 [01:43<00:47, 10.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1079/1563 [01:43<00:46, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1081/1563 [01:44<00:45, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1083/1563 [01:44<00:45, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  69%|██████▉   | 1085/1563 [01:44<00:44, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1087/1563 [01:44<00:44, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1089/1563 [01:44<00:44, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1091/1563 [01:45<00:44, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|██████▉   | 1093/1563 [01:45<00:44, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1095/1563 [01:45<00:43, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1097/1563 [01:45<00:43, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1099/1563 [01:45<00:43, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  70%|███████   | 1101/1563 [01:45<00:42, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1103/1563 [01:46<00:43, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1105/1563 [01:46<00:42, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1107/1563 [01:46<00:42, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1109/1563 [01:46<00:42, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1111/1563 [01:46<00:41, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████   | 1113/1563 [01:47<00:42, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1115/1563 [01:47<00:41, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  71%|███████▏  | 1117/1563 [01:47<00:41, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1119/1563 [01:47<00:40, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1121/1563 [01:47<00:40, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1123/1563 [01:47<00:40, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1125/1563 [01:48<00:40, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1127/1563 [01:48<00:40, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1129/1563 [01:48<00:40, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1131/1563 [01:48<00:39, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  72%|███████▏  | 1133/1563 [01:48<00:39, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1135/1563 [01:49<00:39, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1137/1563 [01:49<00:39, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1139/1563 [01:49<00:38, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1141/1563 [01:49<00:38, 10.94batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1143/1563 [01:49<00:38, 10.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1145/1563 [01:50<00:38, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  73%|███████▎  | 1147/1563 [01:50<00:38, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1149/1563 [01:50<00:38, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▎  | 1151/1563 [01:50<00:38, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1153/1563 [01:50<00:37, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1155/1563 [01:50<00:37, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1157/1563 [01:51<00:37, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1159/1563 [01:51<00:37, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1161/1563 [01:51<00:36, 10.91batch/s]\u001b[A\n",
-            "Epoch 2/2:  74%|███████▍  | 1163/1563 [01:51<00:36, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1165/1563 [01:51<00:36, 11.01batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1167/1563 [01:52<00:36, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1169/1563 [01:52<00:36, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▍  | 1171/1563 [01:52<00:36, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1173/1563 [01:52<00:36, 10.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1175/1563 [01:52<00:35, 10.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1177/1563 [01:52<00:35, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  75%|███████▌  | 1179/1563 [01:53<00:35, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1181/1563 [01:53<00:37, 10.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1183/1563 [01:53<00:38,  9.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1184/1563 [01:53<00:40,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1185/1563 [01:53<00:42,  8.98batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1186/1563 [01:53<00:42,  8.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1187/1563 [01:54<00:43,  8.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1188/1563 [01:54<00:42,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1189/1563 [01:54<00:41,  8.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1190/1563 [01:54<00:41,  9.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▌  | 1191/1563 [01:54<00:40,  9.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1192/1563 [01:54<00:40,  9.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1193/1563 [01:54<00:41,  9.00batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1194/1563 [01:54<00:41,  8.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  76%|███████▋  | 1195/1563 [01:55<00:42,  8.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1196/1563 [01:55<00:43,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1197/1563 [01:55<00:44,  8.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1198/1563 [01:55<00:42,  8.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1199/1563 [01:55<00:44,  8.21batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1200/1563 [01:55<00:42,  8.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1202/1563 [01:55<00:37,  9.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1203/1563 [01:55<00:37,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1205/1563 [01:56<00:35,  9.99batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1206/1563 [01:56<00:36,  9.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1208/1563 [01:56<00:34, 10.14batch/s]\u001b[A\n",
-            "Epoch 2/2:  77%|███████▋  | 1210/1563 [01:56<00:33, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1212/1563 [01:56<00:33, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1214/1563 [01:56<00:32, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1216/1563 [01:57<00:32, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1218/1563 [01:57<00:32, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1220/1563 [01:57<00:32, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1222/1563 [01:57<00:32, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1224/1563 [01:57<00:31, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  78%|███████▊  | 1226/1563 [01:58<00:31, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1228/1563 [01:58<00:31, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▊  | 1230/1563 [01:58<00:31, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1232/1563 [01:58<00:30, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1234/1563 [01:58<00:30, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1236/1563 [01:58<00:29, 10.92batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1238/1563 [01:59<00:30, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1240/1563 [01:59<00:30, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  79%|███████▉  | 1242/1563 [01:59<00:29, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1244/1563 [01:59<00:29, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1246/1563 [01:59<00:29, 10.72batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1248/1563 [02:00<00:29, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|███████▉  | 1250/1563 [02:00<00:30, 10.39batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1252/1563 [02:00<00:29, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1254/1563 [02:00<00:29, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1256/1563 [02:00<00:29, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  80%|████████  | 1258/1563 [02:01<00:28, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1260/1563 [02:01<00:28, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1262/1563 [02:01<00:28, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1264/1563 [02:01<00:28, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1266/1563 [02:01<00:27, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████  | 1268/1563 [02:01<00:27, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1270/1563 [02:02<00:27, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  81%|████████▏ | 1272/1563 [02:02<00:27, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1274/1563 [02:02<00:26, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1276/1563 [02:02<00:26, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1278/1563 [02:02<00:26, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1280/1563 [02:03<00:26, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1282/1563 [02:03<00:26, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1284/1563 [02:03<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1286/1563 [02:03<00:25, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  82%|████████▏ | 1288/1563 [02:03<00:25, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1290/1563 [02:04<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1292/1563 [02:04<00:25, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1294/1563 [02:04<00:25, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1296/1563 [02:04<00:25, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1298/1563 [02:04<00:24, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1300/1563 [02:04<00:24, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1302/1563 [02:05<00:24, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  83%|████████▎ | 1304/1563 [02:05<00:23, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1306/1563 [02:05<00:24, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▎ | 1308/1563 [02:05<00:25, 10.11batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1310/1563 [02:05<00:26,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1311/1563 [02:06<00:26,  9.53batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1312/1563 [02:06<00:26,  9.32batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1313/1563 [02:06<00:27,  9.18batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1314/1563 [02:06<00:26,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1315/1563 [02:06<00:26,  9.30batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1316/1563 [02:06<00:26,  9.27batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1317/1563 [02:06<00:26,  9.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1318/1563 [02:06<00:26,  9.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1319/1563 [02:06<00:26,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  84%|████████▍ | 1320/1563 [02:07<00:26,  9.28batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1321/1563 [02:07<00:26,  9.22batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1322/1563 [02:07<00:26,  9.12batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1323/1563 [02:07<00:26,  9.06batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1324/1563 [02:07<00:26,  8.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1325/1563 [02:07<00:27,  8.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1326/1563 [02:07<00:27,  8.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1327/1563 [02:07<00:28,  8.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▍ | 1328/1563 [02:08<00:28,  8.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1329/1563 [02:08<00:27,  8.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1331/1563 [02:08<00:24,  9.45batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1333/1563 [02:08<00:23,  9.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1334/1563 [02:08<00:23,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  85%|████████▌ | 1336/1563 [02:08<00:22, 10.10batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1338/1563 [02:08<00:21, 10.35batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1340/1563 [02:09<00:21, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1342/1563 [02:09<00:20, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1344/1563 [02:09<00:20, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1346/1563 [02:09<00:20, 10.52batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▌ | 1348/1563 [02:09<00:20, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  86%|████████▋ | 1350/1563 [02:10<00:20, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1352/1563 [02:10<00:19, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1354/1563 [02:10<00:19, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1356/1563 [02:10<00:19, 10.64batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1358/1563 [02:10<00:19, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1360/1563 [02:11<00:18, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1362/1563 [02:11<00:19, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1364/1563 [02:11<00:18, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  87%|████████▋ | 1366/1563 [02:11<00:18, 10.59batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1368/1563 [02:11<00:18, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1370/1563 [02:11<00:18, 10.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1372/1563 [02:12<00:18, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1374/1563 [02:12<00:17, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1376/1563 [02:12<00:17, 10.73batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1378/1563 [02:12<00:17, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1380/1563 [02:12<00:17, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  88%|████████▊ | 1382/1563 [02:13<00:16, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1384/1563 [02:13<00:16, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▊ | 1386/1563 [02:13<00:16, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1388/1563 [02:13<00:16, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1390/1563 [02:13<00:16, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1392/1563 [02:14<00:15, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1394/1563 [02:14<00:15, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1396/1563 [02:14<00:16, 10.43batch/s]\u001b[A\n",
-            "Epoch 2/2:  89%|████████▉ | 1398/1563 [02:14<00:15, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1400/1563 [02:14<00:15, 10.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1402/1563 [02:14<00:15, 10.66batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1404/1563 [02:15<00:14, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|████████▉ | 1406/1563 [02:15<00:14, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1408/1563 [02:15<00:14, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1410/1563 [02:15<00:14, 10.89batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1412/1563 [02:15<00:14, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  90%|█████████ | 1414/1563 [02:16<00:13, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1416/1563 [02:16<00:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1418/1563 [02:16<00:13, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1420/1563 [02:16<00:13, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1422/1563 [02:16<00:13, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1424/1563 [02:17<00:12, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████ | 1426/1563 [02:17<00:12, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1428/1563 [02:17<00:12, 10.85batch/s]\u001b[A\n",
-            "Epoch 2/2:  91%|█████████▏| 1430/1563 [02:17<00:12, 10.83batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1432/1563 [02:17<00:12, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1434/1563 [02:17<00:11, 10.76batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1436/1563 [02:18<00:12, 10.40batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1438/1563 [02:18<00:12, 10.03batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1440/1563 [02:18<00:12,  9.79batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1441/1563 [02:18<00:12,  9.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1442/1563 [02:18<00:12,  9.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1443/1563 [02:18<00:12,  9.36batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1444/1563 [02:19<00:12,  9.26batch/s]\u001b[A\n",
-            "Epoch 2/2:  92%|█████████▏| 1445/1563 [02:19<00:13,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1446/1563 [02:19<00:13,  8.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1447/1563 [02:19<00:13,  8.54batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1448/1563 [02:19<00:13,  8.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1449/1563 [02:19<00:13,  8.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1450/1563 [02:19<00:12,  8.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1451/1563 [02:19<00:12,  8.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1452/1563 [02:19<00:13,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1453/1563 [02:20<00:13,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1454/1563 [02:20<00:13,  8.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1455/1563 [02:20<00:12,  8.38batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1456/1563 [02:20<00:12,  8.50batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1457/1563 [02:20<00:11,  8.88batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1459/1563 [02:20<00:10,  9.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  93%|█████████▎| 1461/1563 [02:20<00:10, 10.16batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1462/1563 [02:21<00:10,  9.86batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1463/1563 [02:21<00:10,  9.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▎| 1465/1563 [02:21<00:09, 10.20batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1467/1563 [02:21<00:09, 10.41batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1469/1563 [02:21<00:08, 10.47batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1471/1563 [02:21<00:08, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1473/1563 [02:22<00:08, 10.33batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1475/1563 [02:22<00:08, 10.49batch/s]\u001b[A\n",
-            "Epoch 2/2:  94%|█████████▍| 1477/1563 [02:22<00:08, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1479/1563 [02:22<00:07, 10.60batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1481/1563 [02:22<00:07, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▍| 1483/1563 [02:23<00:07, 10.56batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1485/1563 [02:23<00:07, 10.51batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1487/1563 [02:23<00:07, 10.61batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1489/1563 [02:23<00:06, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2:  95%|█████████▌| 1491/1563 [02:23<00:06, 10.71batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1493/1563 [02:23<00:06, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1495/1563 [02:24<00:06, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1497/1563 [02:24<00:06, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1499/1563 [02:24<00:05, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1501/1563 [02:24<00:05, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▌| 1503/1563 [02:24<00:05, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1505/1563 [02:25<00:05, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  96%|█████████▋| 1507/1563 [02:25<00:05, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1509/1563 [02:25<00:05, 10.70batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1511/1563 [02:25<00:04, 10.65batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1513/1563 [02:25<00:04, 10.62batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1515/1563 [02:26<00:04, 10.69batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1517/1563 [02:26<00:04, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1519/1563 [02:26<00:04, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1521/1563 [02:26<00:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  97%|█████████▋| 1523/1563 [02:26<00:03, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1525/1563 [02:26<00:03, 10.75batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1527/1563 [02:27<00:03, 10.77batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1529/1563 [02:27<00:03, 10.74batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1531/1563 [02:27<00:02, 10.87batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1533/1563 [02:27<00:02, 10.80batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1535/1563 [02:27<00:02, 10.84batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1537/1563 [02:28<00:02, 10.82batch/s]\u001b[A\n",
-            "Epoch 2/2:  98%|█████████▊| 1539/1563 [02:28<00:02, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1541/1563 [02:28<00:02, 10.81batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▊| 1543/1563 [02:28<00:01, 10.90batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1545/1563 [02:28<00:01, 10.95batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1547/1563 [02:28<00:01, 10.78batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1549/1563 [02:29<00:01, 10.68batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1551/1563 [02:29<00:01, 10.57batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1553/1563 [02:29<00:00, 10.42batch/s]\u001b[A\n",
-            "Epoch 2/2:  99%|█████████▉| 1555/1563 [02:29<00:00, 10.58batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1557/1563 [02:29<00:00, 10.63batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1559/1563 [02:30<00:00, 10.67batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|█████████▉| 1561/1563 [02:30<00:00, 10.55batch/s]\u001b[A\n",
-            "Epoch 2/2: 100%|██████████| 1563/1563 [02:30<00:00, 11.31batch/s]\u001b[A\n",
-            "Epochs: 100%|██████████| 2/2 [04:59<00:00, 149.91s/epoch]"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Epoch 2/2 tamamlandı. Kayıp: 1.3678, Doğruluk: 50.29%\n",
-            "DyT Eğitim Süresi: 299.82 saniye, Son Doğruluk: 50.29%\n",
-            "\n",
-            "Karşılaştırma:\n",
-            "RMSNorm - Süre: 310.00s, Doğruluk: 42.15%\n",
-            "DyT - Süre: 299.82s, Doğruluk: 50.29%\n"
-          ]
-        },
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "\n"
-          ]
-        }
-      ]
-    }
-  ]
-}
\ No newline at end of file
+    "id": "Ligm3e2erYq6",
+    "outputId": "4e4a2bee-0bd8-40ae-ee7d-c28bed94edec"
+   },
+   "execution_count": null,
+   "outputs": []
+  }
+ ]
+}
diff --git a/Genel-5/Mixture_of_Experts.ipynb b/Genel-5/Mixture_of_Experts.ipynb
index 992545e..98c4afe 100644
--- a/Genel-5/Mixture_of_Experts.ipynb
+++ b/Genel-5/Mixture_of_Experts.ipynb
@@ -1,147 +1,139 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "authorship_tag": "ABX9TyNHvixUmBk+UdujKeVm4FmB",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+  "colab": {
+   "provenance": [],
+   "authorship_tag": "ABX9TyNHvixUmBk+UdujKeVm4FmB",
+   "include_colab_link": true
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/Mixture_of_Experts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
+  "kernelspec": {
+   "name": "python3",
+   "display_name": "Python 3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "view-in-github",
+    "colab_type": "text"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers--General-AI/blob/main/Mixture_of_Experts.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "execution_count": 5,
-      "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "Y4Cmr0MnCzgt",
-        "outputId": "004036a3-bbff-439b-8dd7-71aa9b1258e3"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Çıkış boyutu: torch.Size([5, 10, 128])\n"
-          ]
-        }
-      ],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import torch.nn.functional as F\n",
-        "\n",
-        "class Expert(nn.Module):\n",
-        "    \"\"\"Tek bir uzmanın basit ileri beslemeli ağı\"\"\"\n",
-        "    def __init__(self, input_dim, hidden_dim):\n",
-        "        super(Expert, self).__init__()\n",
-        "        self.ffn = nn.Sequential(\n",
-        "            nn.Linear(input_dim, hidden_dim),\n",
-        "            nn.ReLU(),\n",
-        "            nn.Linear(hidden_dim, input_dim)\n",
-        "        )\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        return self.ffn(x)\n",
-        "\n",
-        "class Router(nn.Module):\n",
-        "    \"\"\"Yönlendirici: Hangi uzmanın etkinleştirileceğine karar verir.\"\"\"\n",
-        "    def __init__(self, input_dim, num_experts):\n",
-        "        super(Router, self).__init__()\n",
-        "        self.gate = nn.Linear(input_dim, num_experts)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        # Uzmanlar için olasılık hesaplama\n",
-        "        return F.softmax(self.gate(x), dim=-1)\n",
-        "\n",
-        "class MoELayer(nn.Module):\n",
-        "    \"\"\"Mixture of Experts katmanı\"\"\"\n",
-        "    def __init__(self, input_dim, hidden_dim, num_experts, top_k=2):\n",
-        "        super(MoELayer, self).__init__()\n",
-        "        self.num_experts = num_experts\n",
-        "        self.top_k = top_k\n",
-        "        self.experts = nn.ModuleList([Expert(input_dim, hidden_dim) for _ in range(num_experts)])\n",
-        "        self.router = Router(input_dim, num_experts)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        batch_size, seq_len, _ = x.size()  # Boyutları ayıkla\n",
-        "        x_flat = x.view(-1, x.size(-1))  # Batch ve seq_len birleştir\n",
-        "\n",
-        "        # Yönlendirici tarafından uzman seçimi\n",
-        "        route_weights = self.router(x_flat)\n",
-        "        topk_weights, topk_indices = torch.topk(route_weights, self.top_k, dim=-1)\n",
-        "\n",
-        "        # Uzmanların çıktılarının birleştirilmesi\n",
-        "        outputs = torch.zeros_like(x_flat)\n",
-        "        for i in range(self.top_k):\n",
-        "            weight = topk_weights[:, i].unsqueeze(-1)\n",
-        "            expert_idx = topk_indices[:, i]\n",
-        "            outputs += weight * torch.cat(\n",
-        "                [self.experts[expert](x_flat[j].unsqueeze(0)) for j, expert in enumerate(expert_idx)], dim=0\n",
-        "            )\n",
-        "\n",
-        "        # Orijinal boyuta geri dön\n",
-        "        outputs = outputs.view(batch_size, seq_len, -1)\n",
-        "        return outputs\n",
-        "\n",
-        "class MoETransformer(nn.Module):\n",
-        "    \"\"\"MoE içeren basit bir Transformer\"\"\"\n",
-        "    def __init__(self, input_dim, hidden_dim, num_heads, num_experts, top_k):\n",
-        "        super(MoETransformer, self).__init__()\n",
-        "        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=num_heads, batch_first=True)\n",
-        "        self.moe_layer = MoELayer(input_dim, hidden_dim, num_experts, top_k)\n",
-        "        self.norm1 = nn.LayerNorm(input_dim)\n",
-        "        self.norm2 = nn.LayerNorm(input_dim)\n",
-        "\n",
-        "    def forward(self, x):\n",
-        "        # Multi-head attention\n",
-        "        attn_output, _ = self.attention(x, x, x)\n",
-        "        x = self.norm1(x + attn_output)\n",
-        "\n",
-        "        # Mixture of Experts katmanı\n",
-        "        moe_output = self.moe_layer(x)\n",
-        "        x = self.norm2(x + moe_output)\n",
-        "\n",
-        "        return x\n",
-        "\n",
-        "# Örnek kullanım\n",
-        "input_dim = 128\n",
-        "hidden_dim = 256\n",
-        "num_heads = 4\n",
-        "num_experts = 3\n",
-        "top_k = 2\n",
-        "seq_len = 10\n",
-        "batch_size = 5\n",
-        "\n",
-        "# Model oluşturma\n",
-        "model = MoETransformer(input_dim, hidden_dim, num_heads, num_experts, top_k)\n",
-        "\n",
-        "# Rastgele giriş verisi\n",
-        "x = torch.rand(batch_size, seq_len, input_dim)\n",
-        "\n",
-        "# Çıktı\n",
-        "output = model(x)\n",
-        "print(\"Çıkış boyutu:\", output.shape)"
-      ]
-    }
-  ]
-}
\ No newline at end of file
+    "id": "Y4Cmr0MnCzgt",
+    "outputId": "004036a3-bbff-439b-8dd7-71aa9b1258e3"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "\n",
+    "class Expert(nn.Module):\n",
+    "    \"\"\"Simple feed-forward network for a single expert\"\"\"\n",
+    "    def __init__(self, input_dim, hidden_dim):\n",
+    "        super(Expert, self).__init__()\n",
+    "        self.ffn = nn.Sequential(\n",
+    "            nn.Linear(input_dim, hidden_dim),\n",
+    "            nn.ReLU(),\n",
+    "            nn.Linear(hidden_dim, input_dim)\n",
+    "        )\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.ffn(x)\n",
+    "\n",
+    "class Router(nn.Module):\n",
+    "    \"\"\"Router: decides which expert to activate.\"\"\"\n",
+    "    def __init__(self, input_dim, num_experts):\n",
+    "        super(Router, self).__init__()\n",
+    "        self.gate = nn.Linear(input_dim, num_experts)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # Compute probabilities for each expert\n",
+    "        return F.softmax(self.gate(x), dim=-1)\n",
+    "\n",
+    "class MoELayer(nn.Module):\n",
+    "    \"\"\"Mixture of Experts layer\"\"\"\n",
+    "    def __init__(self, input_dim, hidden_dim, num_experts, top_k=2):\n",
+    "        super(MoELayer, self).__init__()\n",
+    "        self.num_experts = num_experts\n",
+    "        self.top_k = top_k\n",
+    "        self.experts = nn.ModuleList([Expert(input_dim, hidden_dim) for _ in range(num_experts)])\n",
+    "        self.router = Router(input_dim, num_experts)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        batch_size, seq_len, _ = x.size()  # Extract dimensions\n",
+    "        x_flat = x.view(-1, x.size(-1))  # Merge batch and sequence dimensions\n",
+    "\n",
+    "        # Select experts via the router\n",
+    "        route_weights = self.router(x_flat)\n",
+    "        topk_weights, topk_indices = torch.topk(route_weights, self.top_k, dim=-1)\n",
+    "\n",
+    "        # Combine the outputs of the selected experts\n",
+    "        outputs = torch.zeros_like(x_flat)\n",
+    "        for i in range(self.top_k):\n",
+    "            weight = topk_weights[:, i].unsqueeze(-1)\n",
+    "            expert_idx = topk_indices[:, i]\n",
+    "            outputs += weight * torch.cat(\n",
+    "                [self.experts[expert](x_flat[j].unsqueeze(0)) for j, expert in enumerate(expert_idx)], dim=0\n",
+    "            )\n",
+    "\n",
+    "        # Restore the original shape\n",
+    "        outputs = outputs.view(batch_size, seq_len, -1)\n",
+    "        return outputs\n",
+    "\n",
+    "class MoETransformer(nn.Module):\n",
+    "    \"\"\"Simple Transformer with MoE\"\"\"\n",
+    "    def __init__(self, input_dim, hidden_dim, num_heads, num_experts, top_k):\n",
+    "        super(MoETransformer, self).__init__()\n",
+    "        self.attention = nn.MultiheadAttention(embed_dim=input_dim, num_heads=num_heads, batch_first=True)\n",
+    "        self.moe_layer = MoELayer(input_dim, hidden_dim, num_experts, top_k)\n",
+    "        self.norm1 = nn.LayerNorm(input_dim)\n",
+    "        self.norm2 = nn.LayerNorm(input_dim)\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        # Multi-head attention\n",
+    "        attn_output, _ = self.attention(x, x, x)\n",
+    "        x = self.norm1(x + attn_output)\n",
+    "\n",
+    "        # Mixture of Experts layer\n",
+    "        moe_output = self.moe_layer(x)\n",
+    "        x = self.norm2(x + moe_output)\n",
+    "\n",
+    "        return x\n",
+    "\n",
+    "# Example usage\n",
+    "input_dim = 128\n",
+    "hidden_dim = 256\n",
+    "num_heads = 4\n",
+    "num_experts = 3\n",
+    "top_k = 2\n",
+    "seq_len = 10\n",
+    "batch_size = 5\n",
+    "\n",
+    "# Build the model\n",
+    "model = MoETransformer(input_dim, hidden_dim, num_heads, num_experts, top_k)\n",
+    "\n",
+    "# Random input data\n",
+    "x = torch.rand(batch_size, seq_len, input_dim)\n",
+    "\n",
+    "# Output\n",
+    "output = model(x)\n",
+    "print(\"Output shape:\", output.shape)"
+   ]
+  }
+ ]
+}
diff --git a/Genel-5/PROJE_README.md b/Genel-5/PROJE_README.md
index 64a3d44..7692d22 100644
--- a/Genel-5/PROJE_README.md
+++ b/Genel-5/PROJE_README.md
@@ -1,52 +1,52 @@
-# Gelişmiş Görüntü İşleme Uygulaması
+# Advanced Image Processing Application
 
-Bu proje, büyük görüntüleri daha küçük parçalara bölen, bu parçalara çeşitli filtreler uygulayan ve sonrasında görüntüyü tekrar birleştiren bir Streamlit uygulamasıdır.
+This project is a Streamlit application that splits large images into smaller patches, applies a variety of filters to those patches, and then stitches the image back together.
 
-## Özellikler
+## Features
 
-- Büyük görüntüleri parçalara ayırma
-- Her parçaya farklı görüntü filtreleri uygulama
-- İşlenen parçaları orijinal boyutlarında birleştirme
-- Kullanıcı dostu arayüz
-- İşlenmiş görüntüyü indirme imkanı
+- Split large images into tiles
+- Apply different image filters to each tile
+- Merge processed tiles back to the original resolution
+- User-friendly interface
+- Option to download the processed image
 
-## Kurulum
+## Installation
 
-1. Gerekli kütüphaneleri yükleyin:
+1. Install the required libraries:
    ```
    pip install streamlit numpy torch Pillow
    ```
 
-2. Uygulamayı çalıştırın:
+2. Run the application:
    ```
    streamlit run image_processor_app.py
    ```
 
-## Kullanım
+## Usage
 
-1. Sol taraftaki menüden bir görüntü yükleyin
-2. İstediğiniz filtreyi seçin
-3. Örtüşme payını ve maksimum parça sayısını ayarlayın
-4. "Görüntüyü İşle" butonuna tıklayın
-5. İşlenmiş görüntüyü inceleyip indirebilirsiniz
+1. Upload an image from the menu on the left
+2. Choose the filter you want to apply
+3. Adjust the overlap ratio and maximum number of patches
+4. Click the "Process Image" button
+5. Review and download the processed image
 
-## Kullanılan Filtreler
+## Available Filters
 
-- Normal: Orijinal görüntü
-- Siyah-Beyaz: Gri tonlamalı görüntü
-- Blur: Bulanıklaştırma efekti
-- Kontur: Kenar belirleme
-- Keskinleştir: Görüntüyü keskinleştirme
+- Normal: Original image
+- Black & White: Grayscale conversion
+- Blur: Blurring effect
+- Contour: Edge detection
+- Sharpen: Enhance image sharpness
 
-## Geliştirme
+## Development
 
-Bu proje, büyük görüntüleri işlemek için parçalama ve birleştirme işlemlerini gösteren bir örnektir. Daha fazla özellik ekleyerek genişletebilirsiniz:
+This project demonstrates the workflow for splitting and recombining large images. You can extend it with additional features, such as:
 
-- Daha fazla filtre seçeneği
-- Parça boyutlarını özelleştirme
-- Toplu işlem yapabilme
-- Farklı kaydetme formatları
+- Additional filter options
+- Customizable patch sizes
+- Batch processing support
+- Alternative export formats
 
-## Lisans
+## License
 
 MIT
diff --git a/Genel-5/deeplearning-tracer.py b/Genel-5/deeplearning-tracer.py
index 61dbf65..49252c3 100644
--- a/Genel-5/deeplearning-tracer.py
+++ b/Genel-5/deeplearning-tracer.py
@@ -14,43 +14,43 @@ def print_section(title, color="cyan"):
     """Print a section header with rich formatting"""
     console.rule(f"[bold {color}]{title}", style=color)
 
-# --- 1. Daha Derin bir PyTorch Modeli Tanımla ---
+# --- 1. Define a Deeper PyTorch Model ---
 class DeepMLP(nn.Module):
     def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.1):
         """
-        Derin bir çok katmanlı algılayıcı (MLP) modeli
-        
+        A deep multi-layer perceptron (MLP) model
+
         Args:
-            input_dim: Giriş boyutu
-            hidden_dims: Gizli katman boyutlarını içeren liste
-            output_dim: Çıkış boyutu
-            dropout_rate: Dropout oranı (varsayılan: 0.1)
+            input_dim: Input dimension
+            hidden_dims: List containing the hidden layer sizes
+            output_dim: Output dimension
+            dropout_rate: Dropout rate (default: 0.1)
         """
         super().__init__()
         self.layers = nn.ModuleList()
-        
-        # Giriş katmanı
+
+        # Input layer
         prev_dim = input_dim
-        
-        # Gizli katmanları oluştur
+
+        # Create hidden layers
         for i, hidden_dim in enumerate(hidden_dims):
             self.layers.append(nn.Linear(prev_dim, hidden_dim))
             self.layers.append(nn.BatchNorm1d(hidden_dim))
             self.layers.append(nn.ReLU())
             self.layers.append(nn.Dropout(dropout_rate))
             prev_dim = hidden_dim
-            
-        # Çıkış katmanı
+
+        # Output layer
         self.output_layer = nn.Linear(prev_dim, output_dim)
-        
-        # Ağırlık başlatma
+
+        # Weight initialization
         self._init_weights()
-        
-        # Model bilgilerini göster
+
+        # Display model information
         self._print_model_info(input_dim, hidden_dims, output_dim, dropout_rate)
-    
+
     def _init_weights(self):
-        """Ağırlıkları Xavier/Glorot başlatma yöntemiyle başlat"""
+        """Initialize weights using Xavier/Glorot initialization"""
         for layer in self.layers:
             if isinstance(layer, nn.Linear):
                 nn.init.xavier_uniform_(layer.weight)
@@ -59,179 +59,176 @@ def _init_weights(self):
         nn.init.xavier_uniform_(self.output_layer.weight)
         if self.output_layer.bias is not None:
             nn.init.zeros_(self.output_layer.bias)
-    
+
     def _print_model_info(self, input_dim, hidden_dims, output_dim, dropout_rate):
-        """Model yapısı hakkında bilgi göster"""
+        """Display information about the model architecture"""
         total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
-        
+
         info_table = Table(show_header=False, box=box.ROUNDED, show_edge=False)
-        info_table.add_column("Özellik", style="cyan", no_wrap=True)
-        info_table.add_column("Değer", style="green")
-        
-        info_table.add_row("Model Türü", "Derin Çok Katmanlı Algılayıcı (MLP)")
-        info_table.add_row("Toplam Parametre", f"{total_params:,}")
-        info_table.add_row("Giriş Boyutu", str(input_dim))
-        info_table.add_row("Gizli Katmanlar", " → ".join(map(str, hidden_dims)))
-        info_table.add_row("Çıkış Boyutu", str(output_dim))
-        info_table.add_row("Dropout Oranı", str(dropout_rate))
-        
+        info_table.add_column("Feature", style="cyan", no_wrap=True)
+        info_table.add_column("Value", style="green")
+
+        info_table.add_row("Model Type", "Deep Multi-Layer Perceptron (MLP)")
+        info_table.add_row("Total Parameters", f"{total_params:,}")
+        info_table.add_row("Input Dimension", str(input_dim))
+        info_table.add_row("Hidden Layers", " → ".join(map(str, hidden_dims)))
+        info_table.add_row("Output Dimension", str(output_dim))
+        info_table.add_row("Dropout Rate", str(dropout_rate))
+
         console.print(Panel(
             info_table,
-            title="[bold green]Model Yapılandırması[/]",
+            title="[bold green]Model Configuration[/]",
             border_style="green",
             padding=(1, 2)
         ))
-    
+
     def forward(self, x):
-        """İleri yayılım"""
-        # Gizli katmanlardan geçir
+        """Forward pass"""
+        # Pass through hidden layers
         for layer in self.layers:
             x = layer(x)
-            
-        # Çıkış katmanı
+
+        # Output layer
         x = self.output_layer(x)
         return x
 
-# --- Kanca (Hook) için Global Depolama ve Durum Yönetimi ---
-# Gerçek bir uygulamada bu durumu daha temiz yönetmek istersiniz (örneğin bir sınıf içinde).
+# --- Global Storage and State Management for Hooks ---
+# In a real application you would likely manage this state in a cleaner way (e.g., inside a class).
 hook_state = {
-    "captured_activation": None,    # Yakalanan aktivasyonu saklamak için
-    "is_intervention_mode": False,  # Müdahale modunda olup olmadığımızı belirtir
-    "neuron_to_modify_idx": 0,    # Hangi nöronun aktivasyonuna müdahale edileceği
-    "intervention_value": 0.0     # Müdahale edilecek yeni değer
+    "captured_activation": None,    # Stores the captured activation
+    "is_intervention_mode": False,  # Indicates whether we are in intervention mode
+    "neuron_to_modify_idx": 0,    # Which neuron's activation to intervene on
+    "intervention_value": 0.0     # The value to inject during the intervention
 }
 
-# --- 2. Aktivasyonları Yakalamak ve Değiştirmek için bir Kanca (Hook) Uygula ---
+# --- 2. Apply a Hook to Capture and Modify Activations ---
 def activation_hook_fn(module, input_args, output_tensor):
     """
-    Bu bir PyTorch ileri (forward) kancasıdır.
-    Eğer 'is_intervention_mode' False ise, katmanın çıkış aktivasyonunu yakalar.
-    Eğer 'is_intervention_mode' True ise, belirtilen bir nöronun aktivasyonunu değiştirir.
+    This is a PyTorch forward hook.
+    If 'is_intervention_mode' is False, it captures the layer's output activation.
+    If 'is_intervention_mode' is True, it modifies the activation of a specified neuron.
     """
     global hook_state
 
     if not hook_state["is_intervention_mode"]:
-        # Normal (yakalama) mod: Aktivasyonu sakla
+        # Normal (capture) mode: store the activation
         hook_state["captured_activation"] = output_tensor.clone().detach()
-        # print(f"Kanca (Yakalama): {module} çıkışı yakalandı: {hook_state['captured_activation']}")
-        return None # Çıkışı değiştirme, orijinali kullanılsın
+        return None  # Do not modify the output
     else:
-        # Müdahale modu: Aktivasyonu değiştir
-        modified_output = output_tensor.clone() # Değişiklik yapmadan önce klonla!
+        # Intervention mode: modify the activation
+        modified_output = output_tensor.clone()  # Clone before modifying
 
-        # Örneğin, ilk nöronun (batch_size=1 varsayımıyla) aktivasyonunu değiştir
-        # output_tensor'un şekli [batch_size, num_features] beklenir
-        if modified_output.ndim == 2 and modified_output.shape[0] == 1: # [1, hidden_dim] gibi
+        # For example, change the activation of the first neuron (assuming batch_size=1)
+        # The output tensor is expected to have shape [batch_size, num_features]
+        if modified_output.ndim == 2 and modified_output.shape[0] == 1:  # e.g. [1, hidden_dim]
             neuron_idx = hook_state["neuron_to_modify_idx"]
             if 0 <= neuron_idx < modified_output.shape[1]:
-                # print(f"Kanca (Müdahale): {module} Nöron {neuron_idx} orijinal değeri: {modified_output[0, neuron_idx]}")
                 modified_output[0, neuron_idx] = hook_state["intervention_value"]
-                # print(f"Kanca (Müdahale): {module} Nöron {neuron_idx} yeni değeri: {modified_output[0, neuron_idx]}")
-                hook_state["captured_activation"] = modified_output.clone().detach() # Değiştirilmiş aktivasyonu da sakla
-                return modified_output # Değiştirilmiş aktivasyonu döndür
+                hook_state["captured_activation"] = modified_output.clone().detach()  # Store the modified activation
+                return modified_output  # Return the modified activation
             else:
-                print(f"Uyarı: Nöron indeksi {neuron_idx} sınırlar dışında.")
-                return None # Bir sorun varsa orijinali döndür
+                print(f"Warning: Neuron index {neuron_idx} is out of bounds.")
+                return None  # Fall back to the original activation on error
         else:
-            print(f"Uyarı: Kanca, [1, num_features] şeklinde aktivasyon bekliyordu, gelen: {modified_output.shape}")
-            return None # Bir sorun varsa orijinali döndür
+            print(f"Warning: The hook expected an activation shaped like [1, num_features], received: {modified_output.shape}")
+            return None  # Fall back to the original activation on error
 
-# --- Model ve Veri Kurulumu ---
+# --- Model and Data Setup ---
 input_dim = 10
-hidden_dims = [64, 32, 16]  # Daha derin mimari
+hidden_dims = [64, 32, 16]  # Deeper architecture
 output_dim = 2
 dropout_rate = 0.1
 
-# Modeli oluştur
+# Create the model
 model = DeepMLP(input_dim, hidden_dims, output_dim, dropout_rate)
 
-# Kullanılabilir cihazı belirle (GPU varsa onu kullan)
+# Detect the available device (use GPU if present)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = model.to(device)
 
-# Model özetini göster
-console.print(f"\n[bold]Model {device} cihazına yüklendi.[/]")
-console.print(f"Eğitilebilir parametre sayısı: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
+# Display a model summary
+console.print(f"\n[bold]Model loaded to {device}.[/]")
+console.print(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
 
-# Tüm ReLU katmanlarına kancaları kaydet
+# Register hooks on all ReLU layers
 hook_handles = []
 for i, layer in enumerate(model.layers):
     if isinstance(layer, nn.ReLU):
         handle = layer.register_forward_hook(activation_hook_fn)
         hook_handles.append(handle)
-        print(f"ReLU katmanına kanca eklendi: {i}")
+        print(f"Hook added to ReLU layer: {i}")
 
 if not hook_handles:
-    raise ValueError("Modelde hiç ReLU katmanı bulunamadı!")
+    raise ValueError("No ReLU layers found in the model!")
 
-# Rastgele bir girdi verisi oluştur (basitlik için batch_size=1)
+# Create random input data (batch_size=1 for simplicity)
 dummy_input = torch.randn(1, input_dim).to(device)
 
-# Girdi verisi hakkında bilgi
+# Display information about the input data
 input_info = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-input_info.add_column("Özellik", style="cyan")
-input_info.add_column("Değer", style="green")
-input_info.add_row("Girdi Boyutu", f"{tuple(dummy_input.shape)}")
-input_info.add_row("Min Değer", f"{dummy_input.min().item():.4f}")
-input_info.add_row("Maksimum Değer", f"{dummy_input.max().item():.4f}")
-input_info.add_row("Ortalama", f"{dummy_input.mean().item():.4f}")
-input_info.add_row("Standart Sapma", f"{dummy_input.std().item():.4f}")
+input_info.add_column("Feature", style="cyan")
+input_info.add_column("Value", style="green")
+input_info.add_row("Input Shape", f"{tuple(dummy_input.shape)}")
+input_info.add_row("Minimum", f"{dummy_input.min().item():.4f}")
+input_info.add_row("Maximum", f"{dummy_input.max().item():.4f}")
+input_info.add_row("Mean", f"{dummy_input.mean().item():.4f}")
+input_info.add_row("Standard Deviation", f"{dummy_input.std().item():.4f}")
 
 console.print(Panel(
     input_info,
-    title="[bold blue]Girdi Verisi İstatistikleri[/]",
+    title="[bold blue]Input Data Statistics[/]",
     border_style="blue",
     padding=(1, 2)
 ))
 
-# İlk 5 özelliği göster
+# Show the first five features
 input_sample = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-input_sample.add_column("Özellik İndeksi", style="cyan")
-input_sample.add_column("Değer", style="green")
+input_sample.add_column("Feature Index", style="cyan")
+input_sample.add_column("Value", style="green")
 
 for i, val in enumerate(dummy_input.squeeze().cpu().numpy()[:5]):
     input_sample.add_row(f"{i}", f"{val:.6f}")
 
 console.print(Panel(
-    input_info,
-    title="[bold blue]Girdi Verisi (İlk 5 Özellik)[/]",
+    input_sample,
+    title="[bold blue]Input Data (First 5 Features)[/]",
     border_style="blue",
     padding=(1, 2)
 ))
-print_section("🔧 Model ve Veri Kurulumu")
-console.print(f"[bold]Model Yapısı:[/] [cyan]Input: {input_dim}[/] → [green]Hidden: {hidden_dim}[/] → [yellow]Output: {output_dim}[/]")
-console.print(f"[bold]Girdi Verisi:[/] {dummy_input.squeeze().tolist()[:5]}... [dim](ilk 5 özellik gösteriliyor)[/dim]\n")
+print_section("🔧 Model and Data Setup")
+console.print(f"[bold]Model Architecture:[/] [cyan]Input: {input_dim}[/] → [green]Hidden: {hidden_dims}[/] → [yellow]Output: {output_dim}[/]")
+console.print(f"[bold]Input Sample:[/] {dummy_input.squeeze().tolist()[:5]}... [dim](showing the first 5 features)[/dim]\n")
 
-# --- 3. "Temiz Çalıştırma": Temel aktivasyonları ve çıktıyı al ---
-print_section("🔍 Temiz Çalıştırma (Müdahalesiz)")
+# --- 3. "Clean Run": Capture the baseline activations and output ---
+print_section("🔍 Clean Run (No Intervention)")
 
 hook_state["is_intervention_mode"] = False
 with torch.no_grad():
     original_output = model(dummy_input)
     clean_hidden_activation = hook_state["captured_activation"]
 
-# Gizli katman aktivasyonlarını gösteren tablo
+# Table showing hidden layer activations
 table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-table.add_column("Nöron", style="dim", width=12)
-table.add_column("Aktivasyon Değeri", justify="right")
+table.add_column("Neuron", style="dim", width=12)
+table.add_column("Activation Value", justify="right")
 
 for i, val in enumerate(clean_hidden_activation.squeeze().tolist()):
-    table.add_row(f"Nöron {i}", f"{val:.4f}")
+    table.add_row(f"Neuron {i}", f"{val:.4f}")
 
 console.print(Panel.fit(
     table,
-    title="[bold]Gizli Katman Aktivasyonları (ReLU Sonrası)",
+    title="[bold]Hidden Layer Activations (Post-ReLU)",
     border_style="green",
     padding=(1, 2)
 ))
 
-console.print(f"\n[bold]Model Çıktısı:[/] {original_output.squeeze().tolist()}")
+console.print(f"\n[bold]Model Output:[/] {original_output.squeeze().tolist()}")
 console.rule(style="dim")
 
-# --- 4. "Müdahale Çalıştırması": Bir aktivasyonu değiştir ve etkiyi gör ---
-print_section("🔧 Müdahale Çalıştırması")
+# --- 4. "Intervention Run": Change an activation and observe the effect ---
+print_section("🔧 Intervention Run")
 
-# Müdahale ayarları
+# Intervention settings
 neuron_idx = 0
 new_value = 10.0
 
@@ -243,29 +240,29 @@ def activation_hook_fn(module, input_args, output_tensor):
     intervened_output = model(dummy_input)
     intervened_hidden_activation = hook_state["captured_activation"]
 
-# Müdahale özeti
-console.print(f"[bold]Müdahale Detayları:[/]")
-console.print(f"  • [yellow]Hedef Nöron:[/] [bold]{neuron_idx}[/]")
-console.print(f"  • [yellow]Yeni Değer:[/] [bold]{new_value}[/]")
+# Intervention summary
+console.print(f"[bold]Intervention Details:[/]")
+console.print(f"  • [yellow]Target Neuron:[/] [bold]{neuron_idx}[/]")
+console.print(f"  • [yellow]New Value:[/] [bold]{new_value}[/]")
 
-# Müdahale edilmiş aktivasyonlar tablosu
+# Table with modified activations
 modified_table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-modified_table.add_column("Nöron", style="dim", width=12)
-modified_table.add_column("Önceki Değer", justify="right")
-modified_table.add_column("Yeni Değer", justify="right")
-modified_table.add_column("Durum", justify="center")
+modified_table.add_column("Neuron", style="dim", width=12)
+modified_table.add_column("Previous Value", justify="right")
+modified_table.add_column("New Value", justify="right")
+modified_table.add_column("Status", justify="center")
 
 for i, (orig, new) in enumerate(zip(
     clean_hidden_activation.squeeze().tolist(),
     intervened_hidden_activation.squeeze().tolist()
 )):
     modified = i == neuron_idx
-    status = "[bold red]✗ Değiştirildi" if modified else "[green]✓ Aynı"
+    status = "[bold red]✗ Modified" if modified else "[green]✓ Unchanged"
     orig_val = f"[strike dim]{orig:.4f}[/]" if modified else f"{orig:.4f}"
     new_val = f"[bold red]{new:.4f}" if modified else f"{new:.4f}"
-    
+
     modified_table.add_row(
-        f"Nöron {i}",
+        f"Neuron {i}",
         orig_val,
         new_val,
         status
@@ -273,23 +270,23 @@ def activation_hook_fn(module, input_args, output_tensor):
 
 console.print(Panel.fit(
     modified_table,
-    title="[bold]Gizli Katman Karşılaştırması",
+    title="[bold]Hidden Layer Comparison",
     border_style="yellow",
     padding=(1, 2)
 ))
 
-console.print(f"\n[bold]Yeni Model Çıktısı:[/] {intervened_output.squeeze().tolist()}")
+console.print(f"\n[bold]New Model Output:[/] {intervened_output.squeeze().tolist()}")
 console.rule(style="dim")
 
-# --- 5. Karşılaştır ---
-print_section("📊 Sonuçların Karşılaştırılması")
+# --- 5. Compare Results ---
+print_section("📊 Comparing Results")
 
-# Çıktı karşılaştırma tablosu
+# Output comparison table
 output_table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-output_table.add_column("Çıktı Nöronu", style="dim", width=12)
-output_table.add_column("Orijinal Değer", justify="right")
-output_table.add_column("Yeni Değer", justify="right")
-output_table.add_column("Fark", justify="right")
+output_table.add_column("Output Neuron", style="dim", width=12)
+output_table.add_column("Original Value", justify="right")
+output_table.add_column("New Value", justify="right")
+output_table.add_column("Difference", justify="right")
 
 orig_outputs = original_output.squeeze().tolist()
 new_outputs = intervened_output.squeeze().tolist()
@@ -298,7 +295,7 @@ def activation_hook_fn(module, input_args, output_tensor):
 for i, (orig, new, diff) in enumerate(zip(orig_outputs, new_outputs, diffs)):
     diff_style = "[red]" if diff > 0.1 else "[green]"
     output_table.add_row(
-        f"Çıktı {i}",
+        f"Output {i}",
         f"{orig:.6f}",
         f"{new:.6f}",
         f"{diff_style}{diff:.6f}"
@@ -306,21 +303,22 @@ def activation_hook_fn(module, input_args, output_tensor):
 
 console.print(Panel.fit(
     output_table,
-    title="[bold]Çıktı Karşılaştırması",
+    title="[bold]Output Comparison",
     border_style="blue",
     padding=(1, 2)
 ))
 
-# Özet istatistikler
-console.print("\n[bold]📈 Özet İstatistikler:[/]")
-console.print(f"  • [yellow]Toplam Mutlak Fark:[/] {torch.sum(torch.abs(original_output - intervened_output)):.6f}")
-console.print(f"  • [yellow]Maksimum Fark:[/] {torch.max(torch.abs(original_output - intervened_output)):.6f}")
-console.print(f"  • [yellow]Ortalama Mutlak Fark:[/] {torch.mean(torch.abs(original_output - intervened_output)):.6f}")
+# Summary statistics
+console.print("\n[bold]📈 Summary Statistics:[/]")
+console.print(f"  • [yellow]Total Absolute Difference:[/] {torch.sum(torch.abs(original_output - intervened_output)):.6f}")
+console.print(f"  • [yellow]Maximum Difference:[/] {torch.max(torch.abs(original_output - intervened_output)):.6f}")
+console.print(f"  • [yellow]Mean Absolute Difference:[/] {torch.mean(torch.abs(original_output - intervened_output)):.6f}")
 
-# Kanca temizliği hakkında bilgi
-console.print("\n[dim]Not: Kanca başarıyla kaldırıldı.[/dim]")
+# Information about hook cleanup
+console.print("\n[dim]Note: Hooks have been removed successfully.[/dim]")
 
 
-# Kancayı işiniz bittiğinde kaldırmayı unutmayın,
-# özellikle bir notebook'ta hücreleri tekrar tekrar çalıştırıyorsanız.
-hook_handle.remove()
\ No newline at end of file
+# Always remove hooks when you're done, especially if you are repeatedly
+# executing cells in a notebook environment.
+for handle in hook_handles:
+    handle.remove()
diff --git a/Genel-5/dit_code_explanation.md b/Genel-5/dit_code_explanation.md
index 661d967..7875e01 100644
--- a/Genel-5/dit_code_explanation.md
+++ b/Genel-5/dit_code_explanation.md
@@ -1,187 +1,187 @@
-# DiT (Diffusion Transformer) Kodunun Detaylı Açıklaması
+# Detailed Walkthrough of the DiT (Diffusion Transformer) Code
 
-## 1. Genel Bakış
-Bu kod, görsel üretimi için kullanılan **Diffusion Transformer (DiT)** mimarisinin dinamik bir versiyonudur. Temel amacı, metinden görsel üretmek veya görsel iyileştirme yapmaktır.
+## 1. Overview
+This module implements a dynamic version of the **Diffusion Transformer (DiT)** architecture for image generation. It can be used for text-to-image synthesis as well as refinement tasks such as super-resolution or inpainting.
 
-## 2. Yardımcı Fonksiyonlar
+## 2. Utility Functions
 
-### `round_to_nearest` Fonksiyonu
+### `round_to_nearest`
 ```python
 def round_to_nearest(input_size, width_mult, num_heads, min_value=1):
 ```
-- **Amacı**: Ağın boyutlarını attention head sayısına göre uygun şekilde yuvarlar
-- **Kullanımı**: Dinamik genişlik ayarlaması için
-- **Çalışma Prensibi**: width_mult parametresini num_heads'e göre normalize eder
+- **Purpose**: Round model widths so they align with the attention head count.
+- **Usage**: Ensures dynamic width scaling remains compatible with the number of heads.
+- **How it works**: Normalises `width_mult` with respect to `num_heads` and clamps to `min_value`.
 
-## 3. Dinamik Linear Katmanlar
+## 3. Dynamic Linear Layers
 
-### `DynaLinear` Sınıfı
+### `DynaLinear`
 ```python
 class DynaLinear(nn.Linear):
 ```
-- **Amacı**: Çalışma zamanında boyutu değişebilen linear katman
-- **Özellikler**:
-  - `in_features` ve `out_features` dinamik olarak ayarlanabilir
-  - `width_mult` parametresi ile genişlik kontrolü
-  - `dyna_dim` ile hangi boyutların dinamik olacağı belirlenir
+- **Goal**: Provide a linear layer whose input and output dimensions can change at runtime.
+- **Highlights**:
+  - `in_features` and `out_features` can be reconfigured on the fly.
+  - `width_mult` controls the width multiplier.
+  - `dyna_dim` determines which dimension is treated dynamically.
 
-### `DynaQKVLinear` Sınıfı
+### `DynaQKVLinear`
 ```python
 class DynaQKVLinear(nn.Linear):
 ```
-- **Amacı**: Attention mekanizması için Query, Key, Value matrislerini üreten dinamik katman
-- **Özellikler**:
-  - QKV'yi tek seferde hesaplar (3 ayrı matris)
-  - `einops` kütüphanesi ile tensor reshape işlemleri
-  - Dinamik boyut ayarlaması
+- **Goal**: Produce the query, key, and value matrices with a single projection.
+- **Highlights**:
+  - Generates Q, K, and V in one pass.
+  - Uses `einops` for concise tensor reshaping.
+  - Supports dynamic width selection just like `DynaLinear`.
 
-## 4. Attention Mekanizması
+## 4. Attention Mechanism
 
-### `Attention` Sınıfı
+### `Attention`
 ```python
 class Attention(nn.Module):
 ```
-- **Amacı**: Multi-head self-attention mekanizması
-- **Bileşenler**:
-  - `qkv`: Query, Key, Value üretimi için DynaQKVLinear
-  - `q_norm`, `k_norm`: Query ve Key normalizasyonu
-  - `proj`: Çıkış projeksiyonu için DynaLinear
-  - `channel_mask`: Dinamik kanal maskeleme
-
-**Çalışma Prensibi**:
-1. Input tensor'dan Q, K, V üretir
-2. Attention skorlarını hesaplar
-3. Channel mask uygulanabilir
-4. Sonucu project eder
+- **Goal**: Implement the multi-head self-attention block.
+- **Components**:
+  - `qkv`: A `DynaQKVLinear` layer that emits query, key, and value tensors.
+  - `q_norm`, `k_norm`: Normalisation layers applied to query and key tensors.
+  - `proj`: A `DynaLinear` layer for the final projection.
+  - `channel_mask`: Optional channel-wise masking for dynamic pruning.
+
+**Execution Flow**:
+1. Compute Q, K, and V from the input tensor.
+2. Form attention scores and apply normalisation.
+3. Optionally apply channel masks for sparsity.
+4. Project the attended result back to the model dimension.
 
 ## 5. MLP (Multi-Layer Perceptron)
 
-### `Mlp` Sınıfı
+### `Mlp`
 ```python
 class Mlp(nn.Module):
 ```
-- **Amacı**: Feed-forward ağ
-- **Yapısı**:
-  - `fc1`: İlk linear katman (genişletme)
-  - `act`: Aktivasyon fonksiyonu (GELU)
-  - `fc2`: İkinci linear katman (daraltma)
-  - Channel masking desteği
+- **Goal**: Standard feed-forward network.
+- **Structure**:
+  - `fc1`: Expands the hidden dimension.
+  - `act`: GELU activation.
+  - `fc2`: Projects back to the model width.
+  - Built-in support for channel masking.
 
-## 6. Embedding Katmanları
+## 6. Embedding Modules
 
-### `TimestepEmbedder` Sınıfı
+### `TimestepEmbedder`
 ```python
 class TimestepEmbedder(nn.Module):
 ```
-- **Amacı**: Diffusion timestep'lerini vektör formatına çevirir
-- **Yöntem**: Sinusoidal embedding + MLP
-- **Kullanımı**: Diffusion process'inde hangi adımda olduğumuzu modele söyler
+- **Goal**: Encode diffusion timesteps into vector representations.
+- **Technique**: Sinusoidal embeddings followed by an MLP.
+- **Usage**: Provides the model with information about the current diffusion step.
 
-### `LabelEmbedder` Sınıfı
+### `LabelEmbedder`
 ```python
 class LabelEmbedder(nn.Module):
 ```
-- **Amacı**: Sınıf etiketlerini embedding'e çevirir
-- **Özellikler**:
-  - Classifier-free guidance için dropout desteği
-  - Training sırasında random etiket düşürme
+- **Goal**: Embed class labels.
+- **Highlights**:
+  - Includes dropout for classifier-free guidance.
+  - Randomly drops labels during training to improve unconditional generation.
 
-## 7. Ana Model Bileşenleri
+## 7. Core Model Components
 
-### `DiTBlock` Sınıfı
+### `DiTBlock`
 ```python
 class DiTBlock(nn.Module):
 ```
-- **Amacı**: DiT'in temel yapı taşı
-- **Bileşenler**:
-  - `norm1`, `norm2`: Layer normalization
-  - `attn`: Attention mekanizması
-  - `mlp`: Feed-forward ağ
-  - `adaLN_modulation`: Adaptive Layer Norm modülasyonu
-  - `attn_rate`, `mlp_rate`: Dinamik oran kontrolleri
-  - `token_selection`: Token seçim mekanizması
-
-**AdaLN-Zero Kondisyonlama**:
-- Timestep ve class bilgilerini kullanarak normalizasyon parametrelerini ayarlar
-- `shift` ve `scale` parametreleri ile kondisyonlama yapar
-
-### `FinalLayer` Sınıfı
+- **Goal**: The fundamental building block of the DiT architecture.
+- **Components**:
+  - `norm1`, `norm2`: Layer-normalisation layers.
+  - `attn`: The dynamic attention module.
+  - `mlp`: Feed-forward network.
+  - `adaLN_modulation`: Adaptive layer-norm modulation unit.
+  - `attn_rate`, `mlp_rate`: Runtime controls for channel counts.
+  - `token_selection`: Mechanism for selecting the most informative tokens.
+
+**AdaLN-Zero Conditioning**:
+- Uses timestep and class embeddings to modulate the normalisation parameters.
+- Applies `shift` and `scale` values to adapt each block based on the conditioning signal.
+
+### `FinalLayer`
 ```python
 class FinalLayer(nn.Module):
 ```
-- **Amacı**: Son çıkış katmanı
-- **Görevi**: Hidden state'i patch formatına dönüştürür
+- **Goal**: Convert the hidden representation back to the patch format.
+- **Responsibility**: Map the processed tokens into the final pixel-space patches.
 
-## 8. Ana DiT Modeli
+## 8. The Main DiT Model
 
-### `DiT` Sınıfı
+### `DiT`
 ```python
 class DiT(nn.Module):
 ```
-- **Amacı**: Ana diffusion transformer modeli
-- **Bileşenler**:
-  - `x_embedder`: Görsel patch embedding
-  - `t_embedder`: Timestep embedding
-  - `y_embedder`: Label embedding
-  - `pos_embed`: Pozisyonel embedding (sabit)
-  - `blocks`: DiTBlock'ların listesi
-  - `final_layer`: Son çıkış katmanı
+- **Goal**: Assemble the full diffusion transformer pipeline.
+- **Components**:
+  - `x_embedder`: Converts image patches into embeddings.
+  - `t_embedder`: Encodes diffusion timesteps.
+  - `y_embedder`: Encodes class labels (optional).
+  - `pos_embed`: Static positional embeddings.
+  - `blocks`: A stack of `DiTBlock` instances.
+  - `final_layer`: Produces the reconstructed image patches.
 
 **Forward Pass**:
-1. Görsel input'u patch'lere böler ve embedding'e çevirir
-2. Timestep ve label embedding'lerini hesaplar
-3. Tüm DiTBlock'lardan geçirir
-4. Final layer ile çıkış üretir
-5. Patch'leri geri görsel formatına çevirir
+1. Split the input image into patches and embed them.
+2. Add timestep and (optional) label embeddings.
+3. Propagate through every `DiTBlock` with adaptive conditioning.
+4. Decode through the final layer.
+5. Reassemble the patches into the output image tensor.
 
-### `forward_with_cfg` Metodu
-- **Amacı**: Classifier-free guidance ile çıkarım
-- **Yöntem**: Conditional ve unconditional prediction'ları birleştirir
+### `forward_with_cfg`
+- **Goal**: Perform inference with classifier-free guidance.
+- **Method**: Combine conditional and unconditional predictions to steer the output.
 
-## 9. Pozisyonel Embedding Fonksiyonları
+## 9. Positional Embedding Helpers
 
-### `get_2d_sincos_pos_embed` ve İlgili Fonksiyonlar
-- **Amacı**: 2D pozisyonel embedding'ler oluşturur
-- **Yöntem**: Sinüs-cosinüs tabanlı encoding
-- **Kullanımı**: Spatial pozisyon bilgisini modele verir
+### `get_2d_sincos_pos_embed` and Friends
+- **Goal**: Generate 2D sinusoidal positional embeddings.
+- **Technique**: Use sine/cosine patterns across both spatial axes.
+- **Usage**: Inject spatial coordinates into the transformer input.
 
-## 10. Model Konfigürasyonları
+## 10. Model Configurations
 
-### Önceden Tanımlı Modeller
+### Predefined Variants
 ```python
 DiT_models = {
-    'DiT-XL/2': DiT_XL_2,  # En büyük model, 2x2 patch
-    'DiT-L/2':  DiT_L_2,   # Büyük model
-    'DiT-B/2':  DiT_B_2,   # Orta model
-    'DiT-S/2':  DiT_S_2,   # Küçük model
+    'DiT-XL/2': DiT_XL_2,  # Largest model, 2x2 patches
+    'DiT-L/2':  DiT_L_2,   # Large model
+    'DiT-B/2':  DiT_B_2,   # Base model
+    'DiT-S/2':  DiT_S_2,   # Small model
 }
 ```
 
-**Model Boyutları**:
-- **XL**: 28 katman, 1152 hidden size, 16 head
-- **L**: 24 katman, 1024 hidden size, 16 head
-- **B**: 12 katman, 768 hidden size, 12 head
-- **S**: 12 katman, 384 hidden size, 6 head
+**Model Sizes**:
+- **XL**: 28 layers, 1152 hidden size, 16 heads.
+- **L**: 24 layers, 1024 hidden size, 16 heads.
+- **B**: 12 layers, 768 hidden size, 12 heads.
+- **S**: 12 layers, 384 hidden size, 6 heads.
 
-**Patch Boyutları**:
-- `/2`: 2x2 patch (yüksek çözünürlük)
-- `/4`: 4x4 patch (orta çözünürlük)
-- `/8`: 8x8 patch (düşük çözünürlük)
+**Patch Options**:
+- `/2`: 2×2 patches (highest resolution).
+- `/4`: 4×4 patches (medium resolution).
+- `/8`: 8×8 patches (lowest resolution).
 
-## 11. Dinamik Özellikler
+## 11. Dynamic Capabilities
 
-Bu kod'un en önemli özelliği **dinamik adaptasyon** kabiliyeti:
+The signature feature of this implementation is its **dynamic adaptation**:
 
-1. **Dinamik Kanal Sayısı**: `attn_rate` ve `mlp_rate` ile kanal sayısı çalışma zamanında ayarlanır
-2. **Token Seçimi**: `token_selection` ile önemli token'lar seçilir
-3. **Adaptive Genişlik**: `width_mult` parametresi ile model genişliği ayarlanır
-4. **Conditional Execution**: `complete_model` parametresi ile tam model veya dinamik model seçimi
+1. **Channel Scaling**: `attn_rate` and `mlp_rate` adjust channel counts on the fly.
+2. **Token Selection**: `token_selection` prunes to the most informative tokens.
+3. **Adaptive Width**: `width_mult` widens or narrows layers dynamically.
+4. **Conditional Execution**: `complete_model` toggles between the full and dynamic variants.
 
-## 12. Kullanım Alanları
+## 12. Use Cases
 
-- **Görsel Üretimi**: Text-to-image generation
-- **Görsel Düzenleme**: Image inpainting, super-resolution
-- **Stil Transfer**: Style-aware image generation
-- **Conditional Generation**: Class-conditional image synthesis
+- **Image Generation**: Text-to-image workflows.
+- **Image Editing**: Inpainting, outpainting, and super-resolution.
+- **Style Transfer**: Style-aware synthesis and adaptation.
+- **Conditional Generation**: Class-conditioned or prompt-conditioned sampling.
 
-Bu kod, modern diffusion modellerinin transformer mimarisi ile kombinasyonunu gösteriyor ve dinamik adaptasyon ile efficiency'yi artırmaya odaklanıyor.
\ No newline at end of file
+Overall, the code demonstrates how modern diffusion models can benefit from transformer architectures while leveraging dynamic adaptation to improve efficiency.
diff --git a/Genel-5/dit_implementation.py b/Genel-5/dit_implementation.py
index 6741aa2..725df21 100644
--- a/Genel-5/dit_implementation.py
+++ b/Genel-5/dit_implementation.py
@@ -1,133 +1,133 @@
-# Gerekli kütüphaneleri içe aktar
-import torch  # PyTorch kütüphanesi
-import torch.nn as nn  # Sinir ağı modülleri
-import torch.nn.functional as F  # Fonksiyonel operasyonlar
-import math  # Matematiksel işlemler
-import numpy as np  # Sayısal işlemler
-from typing import Optional, Tuple  # Tip ipuçları
-import matplotlib.pyplot as plt  # Görselleştirme için
+# Import required libraries
+import torch  # PyTorch framework
+import torch.nn as nn  # Neural network modules
+import torch.nn.functional as F  # Functional operations
+import math  # Mathematical utilities
+import numpy as np  # Numerical computations
+from typing import Optional, Tuple  # Type hints
+import matplotlib.pyplot as plt  # Visualization
 
 class TimestepEmbedding(nn.Module):
-    """Zaman adımları için sinüzoidal gömme vektörleri oluşturur, transformer pozisyonel kodlamalarına benzer"""
+    """Generate sinusoidal embeddings for timesteps, similar to transformer positional encodings."""
     
     def __init__(self, dim: int):
         super().__init__()
-        self.dim = dim  # Gömme boyutu
-        
+        self.dim = dim  # Embedding size
+
     def forward(self, timesteps: torch.Tensor) -> torch.Tensor:
-        device = timesteps.device  # Hesaplamanın yapılacağı cihaz (CPU/GPU)
-        half_dim = self.dim // 2  # Boyutun yarısı
-        # Logaritmik ölçekli frekanslar oluştur
+        device = timesteps.device  # Device where the computation happens (CPU/GPU)
+        half_dim = self.dim // 2  # Half of the embedding size
+        # Create logarithmically scaled frequencies
         embeddings = math.log(10000) / (half_dim - 1)
-        # Üstel fonksiyonla frekansları hesapla
+        # Compute the frequencies with an exponential schedule
         embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
-        # Zaman adımlarıyla çarparak gömme matrisini oluştur
+        # Multiply with timesteps to create the embedding matrix
         embeddings = timesteps[:, None] * embeddings[None, :]
-        # Sinüs ve kosinüs değerlerini birleştir
+        # Concatenate sine and cosine representations
         embeddings = torch.cat([torch.sin(embeddings), torch.cos(embeddings)], dim=-1)
         return embeddings
 
 class MultiHeadAttention(nn.Module):
-    """Çok kafalı dikkat mekanizması"""
-    
+    """Multi-head attention module."""
+
     def __init__(self, d_model: int, n_heads: int, dropout: float = 0.1):
         super().__init__()
-        # Model boyutunun kafa sayısına bölünebilir olması gerekir
+        # The model dimension must be divisible by the head count
         assert d_model % n_heads == 0
-        
-        self.d_model = d_model  # Girdi boyutu
-        self.n_heads = n_heads    # Kafa sayısı
-        self.d_k = d_model // n_heads  # Her kafanın boyutu
-        
-        # Sorgu, anahtar, değer ve çıkış dönüşümleri
-        self.w_q = nn.Linear(d_model, d_model)  # Sorgu dönüşümü
-        self.w_k = nn.Linear(d_model, d_model)  # Anahtar dönüşümü
-        self.w_v = nn.Linear(d_model, d_model)  # Değer dönüşümü
-        self.w_o = nn.Linear(d_model, d_model)  # Çıkış dönüşümü
-        
-        self.dropout = nn.Dropout(dropout)  # Aşırı öğrenmeyi önlemek için dropout
-        
+
+        self.d_model = d_model  # Input dimension
+        self.n_heads = n_heads  # Number of heads
+        self.d_k = d_model // n_heads  # Dimension per head
+
+        # Linear projections for query, key, value, and output
+        self.w_q = nn.Linear(d_model, d_model)  # Query projection
+        self.w_k = nn.Linear(d_model, d_model)  # Key projection
+        self.w_v = nn.Linear(d_model, d_model)  # Value projection
+        self.w_o = nn.Linear(d_model, d_model)  # Output projection
+
+        self.dropout = nn.Dropout(dropout)  # Dropout regularization
+
     def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        batch_size, seq_len, d_model = x.shape  # Girdi boyutlarını al
-        
-        # Çok kafalı dikkat için doğrusal dönüşümler ve yeniden şekillendirme
-        # Sorgu, anahtar ve değer matrislerini hesapla ve kafalara böl
+        batch_size, seq_len, d_model = x.shape  # Extract input dimensions
+
+        # Apply linear transformations and reshape for the heads
+        # Compute query, key, and value matrices and split across heads
         Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         K = self.w_k(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         V = self.w_v(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
-        
-        # Ölçeklendirilmiş nokta çarpımı dikkat mekanizması
-        # Anahtarların transpozu ile sorguları çarp ve ölçeklendir
+
+        # Scaled dot-product attention
+        # Multiply queries with the transposed keys and scale
         scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
-        
-        # Maske uygula (varsa)
+
+        # Apply mask if provided
         if mask is not None:
             scores = scores.masked_fill(mask == 0, -1e9)
-            
-        # Dikkat ağırlıklarını hesapla ve softmax uygula
+
+        # Compute attention weights and apply softmax
         attention_weights = F.softmax(scores, dim=-1)
-        attention_weights = self.dropout(attention_weights)  # Dropout uygula
-        
-        # Dikkat ağırlıklarını değerlerle çarparak çıktıyı hesapla
+        attention_weights = self.dropout(attention_weights)  # Apply dropout
+
+        # Multiply attention weights with values to get the output
         attention_output = torch.matmul(attention_weights, V)
-        
-        # Kafaları birleştir ve son lineer katmandan geçir
+
+        # Merge heads and pass through the final linear layer
         attention_output = attention_output.transpose(1, 2).contiguous().view(
             batch_size, seq_len, d_model
         )
-        
-        return self.w_o(attention_output)  # Son lineer dönüşümü uygula
+
+        return self.w_o(attention_output)  # Apply the output projection
 
 class FeedForward(nn.Module):
-    """Konum bazlı ileri beslemeli ağ"""
-    
+    """Position-wise feedforward network."""
+
     def __init__(self, d_model: int, d_ff: int, dropout: float = 0.1):
         super().__init__()
-        self.linear1 = nn.Linear(d_model, d_ff)  # Girişten gizli katmana
-        self.linear2 = nn.Linear(d_ff, d_model)   # Gizli katmandan çıkışa
-        self.dropout = nn.Dropout(dropout)         # Aşırı öğrenmeyi önlemek için
-        
+        self.linear1 = nn.Linear(d_model, d_ff)  # Input to hidden layer
+        self.linear2 = nn.Linear(d_ff, d_model)  # Hidden layer to output
+        self.dropout = nn.Dropout(dropout)        # Prevent overfitting
+
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # İleri yayılım: Lineer -> ReLU -> Dropout -> Lineer
+        # Forward pass: Linear -> ReLU -> Dropout -> Linear
         return self.linear2(self.dropout(F.relu(self.linear1(x))))
 
 class TransformerBlock(nn.Module):
-    """Dikkat ve ileri beslemeli katmanlara sahip tek bir transformer bloğu"""
-    
+    """Single transformer block with attention and feedforward layers."""
+
     def __init__(self, d_model: int, n_heads: int, d_ff: int, dropout: float = 0.1):
         super().__init__()
-        self.attention = MultiHeadAttention(d_model, n_heads, dropout)  # Çok kafalı dikkat katmanı
-        self.feed_forward = FeedForward(d_model, d_ff, dropout)         # İleri beslemeli ağ
-        self.norm1 = nn.LayerNorm(d_model)  # İlk normalizasyon katmanı
-        self.norm2 = nn.LayerNorm(d_model)  # İkinci normalizasyon katmanı
-        self.dropout = nn.Dropout(dropout)   # Dropout katmanı
-        
+        self.attention = MultiHeadAttention(d_model, n_heads, dropout)  # Multi-head attention layer
+        self.feed_forward = FeedForward(d_model, d_ff, dropout)         # Feedforward network
+        self.norm1 = nn.LayerNorm(d_model)  # First normalization layer
+        self.norm2 = nn.LayerNorm(d_model)  # Second normalization layer
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+
     def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        # Öz-dikkat mekanizması ve artık bağlantı
-        attn_output = self.attention(self.norm1(x), mask)  # Normalizasyon ve dikkat
-        x = x + self.dropout(attn_output)  # Artık bağlantı ve dropout
-        
-        # İleri beslemeli ağ ve artık bağlantı
-        ff_output = self.feed_forward(self.norm2(x))  # Normalizasyon ve ileri besleme
-        x = x + self.dropout(ff_output)  # İkinci artık bağlantı ve dropout
-        
+        # Self-attention and residual connection
+        attn_output = self.attention(self.norm1(x), mask)  # Normalization followed by attention
+        x = x + self.dropout(attn_output)  # Residual connection and dropout
+
+        # Feedforward network and residual connection
+        ff_output = self.feed_forward(self.norm2(x))  # Normalization and feedforward
+        x = x + self.dropout(ff_output)  # Second residual connection and dropout
+
         return x
 
 class DiffusionTransformer(nn.Module):
     """
-    Görüntü oluşturma için Diffusion Transformer (DiT) modeli
-    
-    Argümanlar:
-        img_size: Giriş görüntülerinin boyutu (kare olduğu varsayılır)
-        patch_size: Görüntünün bölüneceği yama boyutu
-        d_model: Transformer'ın gizli boyutu
-        n_layers: Transformer katman sayısı
-        n_heads: Dikkat başlığı sayısı
-        d_ff: İleri beslemeli ağın gizli boyutu
-        num_classes: Koşullu üretim için sınıf sayısı
-        dropout: Dropout oranı
+    Diffusion Transformer (DiT) model for image generation.
+
+    Args:
+        img_size: Size of the input images (assumes square inputs).
+        patch_size: Size of the patches extracted from the image.
+        d_model: Hidden size of the transformer.
+        n_layers: Number of transformer layers.
+        n_heads: Number of attention heads.
+        d_ff: Hidden dimension of the feedforward network.
+        num_classes: Number of classes for conditional generation.
+        dropout: Dropout probability.
     """
-    
+
     def __init__(
         self,
         img_size: int = 32,
@@ -141,45 +141,45 @@ def __init__(
     ):
         super().__init__()
         
-        self.img_size = img_size  # Görüntü boyutu
-        self.patch_size = patch_size  # Yama boyutu
-        self.d_model = d_model  # Modelin gizli boyutu
-        self.num_patches = (img_size // patch_size) ** 2  # Toplam yama sayısı
-        self.patch_dim = 3 * patch_size ** 2  # RGB yamaları için boyut (3 kanal * yama alanı)
-        
-        # Yama gömme katmanı
+        self.img_size = img_size  # Image size
+        self.patch_size = patch_size  # Patch size
+        self.d_model = d_model  # Model hidden size
+        self.num_patches = (img_size // patch_size) ** 2  # Total number of patches
+        self.patch_dim = 3 * patch_size ** 2  # Patch dimension for RGB images (3 channels * patch area)
+
+        # Patch embedding layer
         self.patch_embedding = nn.Linear(self.patch_dim, d_model)
-        
-        # Konumsal gömme (pozisyonel kodlama)
+
+        # Positional embedding
         self.pos_embedding = nn.Parameter(torch.randn(1, self.num_patches, d_model))
-        
-        # Zaman adımı gömme
+
+        # Timestep embedding
         self.time_embedding = TimestepEmbedding(d_model)
         self.time_mlp = nn.Sequential(
-            nn.Linear(d_model, d_model * 4),  # Zaman gömme için MLP
-            nn.GELU(),  # Gaussian Error Linear Unit aktivasyonu
-            nn.Linear(d_model * 4, d_model)  # Çıkış katmanı
+            nn.Linear(d_model, d_model * 4),  # MLP for the timestep embedding
+            nn.GELU(),  # Gaussian Error Linear Unit activation
+            nn.Linear(d_model * 4, d_model)  # Output layer
         )
-        
-        # Sınıf gömme (koşullu üretim için)
+
+        # Class embedding for conditional generation
         self.class_embedding = nn.Embedding(num_classes, d_model)
-        
-        # Transformer katmanları
+
+        # Transformer layers
         self.transformer_layers = nn.ModuleList([
             TransformerBlock(d_model, n_heads, d_ff, dropout)
-            for _ in range(n_layers)  # Belirtilen sayıda transformer katmanı oluştur
+            for _ in range(n_layers)  # Instantiate the requested number of transformer layers
         ])
-        
-        # Çıkış projeksiyonu
-        self.norm = nn.LayerNorm(d_model)  # Son normalizasyon katmanı
-        self.output_projection = nn.Linear(d_model, self.patch_dim)  # Çıkış boyutuna dönüşüm
-        
-        self.dropout = nn.Dropout(dropout)  # Dropout katmanı
-        
+
+        # Output projection
+        self.norm = nn.LayerNorm(d_model)  # Final normalization layer
+        self.output_projection = nn.Linear(d_model, self.patch_dim)  # Project back to the patch dimension
+
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+
     def patchify(self, x: torch.Tensor) -> torch.Tensor:
-        """Görüntüyü yamalara dönüştür"""
+        """Convert an image into a collection of patches."""
         batch_size, channels, height, width = x.shape
-        
+
         # Reshape to patches
         x = x.reshape(
             batch_size, channels,
@@ -190,21 +190,21 @@ def patchify(self, x: torch.Tensor) -> torch.Tensor:
         x = x.reshape(batch_size, self.num_patches, -1)
         
         return x
-    
+
     def unpatchify(self, x: torch.Tensor) -> torch.Tensor:
-        """Yamaları tekrar görüntüye dönüştür"""
-        batch_size = x.shape[0]  # Toplu iş boyutu
-        height = width = int(self.num_patches ** 0.5)  # Orijinal ızgara boyutları
-        
-        # Yamaları tekrar orijinal formata dönüştür
+        """Reconstruct the image from a collection of patches."""
+        batch_size = x.shape[0]  # Batch size
+        height = width = int(self.num_patches ** 0.5)  # Original patch grid size
+
+        # Rearrange patches back to the original image
         x = x.reshape(
             batch_size, height, width, 3, self.patch_size, self.patch_size
         )
-        # Boyutları yeniden düzenle: [batch, channels, height, patch_h, width, patch_w]
+        # Reorder to [batch, channels, height, patch_h, width, patch_w]
         x = x.permute(0, 3, 1, 4, 2, 5).contiguous()
-        # Yama boyutlarını birleştirerek orijinal görüntü boyutuna getir
+        # Merge patch dimensions to recover the original image size
         x = x.reshape(batch_size, 3, height * self.patch_size, width * self.patch_size)
-        
+
         return x
     
     def forward(
@@ -214,261 +214,261 @@ def forward(
         class_labels: Optional[torch.Tensor] = None
     ) -> torch.Tensor:
         """
-        Diffusion transformer'ın ileri geçişi
-        
-        Argümanlar:
-            x: Girdi tensörü (batch_size, channels, height, width)
-            timesteps: Toplu işteki her örnek için zaman adımı
-            class_labels: Koşullu üretim için isteğe bağlı sınıf etiketleri
-            
-        Dönüş:
-            Girdiyle aynı şekilde gürültü tahmini
+        Forward pass of the diffusion transformer.
+
+        Args:
+            x: Input tensor shaped as (batch_size, channels, height, width).
+            timesteps: Timestep per sample in the batch.
+            class_labels: Optional class labels for conditional generation.
+
+        Returns:
+            Predicted noise with the same shape as the input.
         """
-        batch_size = x.shape[0]  # Toplu iş boyutu
-        device = x.device  # Hesaplama cihazı
-        
-        # Görüntüyü yamalara dönüştür
-        x = self.patchify(x)  # Yamalara dönüştürme
-        
-        # Yamaları gömme boyutuna yansıt
-        x = self.patch_embedding(x)  # Yama gömme
-        
-        # Konumsal gömme ekle
-        x = x + self.pos_embedding  # Konumsal bilgi ekle
-        
-        # Zaman adımı gömme ekle
-        t_emb = self.time_embedding(timesteps)  # Zaman adımları için gömme
-        t_emb = self.time_mlp(t_emb)  # Zaman gömme için MLP'den geçir
-        x = x + t_emb.unsqueeze(1)  # Zamansal bilgiyi ekle
-        
-        # İsteğe bağlı olarak sınıf gömme ekle
+        batch_size = x.shape[0]  # Batch size
+        device = x.device  # Device used for computation
+
+        # Convert the image into patches
+        x = self.patchify(x)
+
+        # Project patches to the embedding dimension
+        x = self.patch_embedding(x)
+
+        # Add positional embeddings
+        x = x + self.pos_embedding
+
+        # Add timestep embedding
+        t_emb = self.time_embedding(timesteps)
+        t_emb = self.time_mlp(t_emb)
+        x = x + t_emb.unsqueeze(1)
+
+        # Optionally add class embeddings
         if class_labels is not None:
-            class_emb = self.class_embedding(class_labels)  # Sınıf gömme
-            x = x + class_emb.unsqueeze(1)  # Sınıf bilgisini ekle
-        
-        # Transformer katmanlarını uygula
+            class_emb = self.class_embedding(class_labels)
+            x = x + class_emb.unsqueeze(1)
+
+        # Run through the transformer layers
         for layer in self.transformer_layers:
-            x = layer(x)  # Her bir transformer katmanından geçir
-        
-        # Son normalizasyon katmanı
-        x = self.norm(x)  # Normalizasyon
-        
-        # Yama boyutuna geri yansıt
-        x = self.output_projection(x)  # Çıkış projeksiyonu
-        
-        # Yamaları tekrar görüntüye dönüştür
-        x = self.unpatchify(x)  # Görüntüye dönüştür
-        
-        return x  # Gürültü tahminini döndür
+            x = layer(x)
+
+        # Final normalization
+        x = self.norm(x)
+
+        # Project back to patch space
+        x = self.output_projection(x)
+
+        # Convert patches back to an image
+        x = self.unpatchify(x)
+
+        return x
 
 class DDPMScheduler:
-    """Diffusion süreci için DDPM gürültü çizelgeleyici"""
-    
+    """DDPM noise scheduler for the diffusion process."""
+
     def __init__(self, num_timesteps: int = 1000, beta_start: float = 0.0001, beta_end: float = 0.02):
-        self.num_timesteps = num_timesteps  # Toplam zaman adımı sayısı
-        
-        # Doğrusal beta çizelgesi
-        self.betas = torch.linspace(beta_start, beta_end, num_timesteps)  # Beta değerleri
-        self.alphas = 1.0 - self.betas  # Alfa değerleri (1 - beta)
-        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)  # Kümülatif çarpım
-        # Önceki kümülatif çarpım (kaydırmalı)
+        self.num_timesteps = num_timesteps  # Total number of timesteps
+
+        # Linear beta schedule
+        self.betas = torch.linspace(beta_start, beta_end, num_timesteps)  # Beta values
+        self.alphas = 1.0 - self.betas  # Alpha values (1 - beta)
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)  # Cumulative product of alphas
+        # Previous cumulative product (shifted)
         self.alphas_cumprod_prev = F.pad(self.alphas_cumprod[:-1], (1, 0), value=1.0)
-        
-        # q(x_t | x_{t-1}) dağılımı için hesaplamalar
-        self.sqrt_alphas_cumprod = torch.sqrt(self.alphas_cumprod)  # Karekök alfa kümülatif çarpım
-        # Karekök (1 - alfa kümülatif çarpım)
+
+        # Quantities for q(x_t | x_{t-1})
+        self.sqrt_alphas_cumprod = torch.sqrt(self.alphas_cumprod)  # Square root of the cumulative product
+        # Square root of (1 - cumulative product of alphas)
         self.sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - self.alphas_cumprod)
-        
-        # q(x_{t-1} | x_t, x_0) posterior dağılımı için hesaplamalar
+
+        # Quantities for the posterior q(x_{t-1} | x_t, x_0)
         self.posterior_variance = self.betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
-        
+
     def add_noise(self, x_0: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor) -> torch.Tensor:
-        """Temiz görüntülere gürültü çizelgesine göre gürültü ekle"""
-        # İlgili zaman adımları için ölçeklendirme faktörlerini al
+        """Add noise to clean images according to the noise schedule."""
+        # Gather scaling factors for the selected timesteps
         sqrt_alpha_prod = self.sqrt_alphas_cumprod[timesteps].view(-1, 1, 1, 1).to(x_0.device)
         sqrt_one_minus_alpha_prod = self.sqrt_one_minus_alphas_cumprod[timesteps].view(-1, 1, 1, 1).to(x_0.device)
-        
-        # Temiz görüntülere gürültü ekle
+
+        # Add noise to the clean images
         x_t = sqrt_alpha_prod * x_0 + sqrt_one_minus_alpha_prod * noise
-        
-        return x_t  # Gürültülü görüntüyü döndür
-        
+
+        return x_t  # Return the noisy images
+
     def sample_prev_timestep(self, x_t: torch.Tensor, noise_pred: torch.Tensor, timestep: int) -> torch.Tensor:
-        """x_t ve tahmin edilen gürültü verildiğinde x_{t-1} örnekle"""
+        """Sample x_{t-1} given x_t and the predicted noise."""
         if timestep == 0:
-            return x_t  # Son zaman adımında, sadece tahmin edilen x_0'ı döndür
-            
-        # Bu zaman adımı için parametreleri al
-        alpha_t = self.alphas[timestep]  # Mevcut alfa
-        alpha_cumprod_t = self.alphas_cumprod[timestep]  # Kümülatif alfa
-        alpha_cumprod_prev_t = self.alphas_cumprod_prev[timestep]  # Önceki kümülatif alfa
-        beta_t = self.betas[timestep]  # Mevcut beta
-        sqrt_one_minus_alpha_cumprod_t = self.sqrt_one_minus_alphas_cumprod[timestep]  # Karekök(1 - alfa kümülatif)
-        
-        # Ters süreç için varyans hesapla
+            return x_t  # At the final step simply return x_t
+
+        # Retrieve parameters for this timestep
+        alpha_t = self.alphas[timestep]
+        alpha_cumprod_t = self.alphas_cumprod[timestep]
+        alpha_cumprod_prev_t = self.alphas_cumprod_prev[timestep]
+        beta_t = self.betas[timestep]
+        sqrt_one_minus_alpha_cumprod_t = self.sqrt_one_minus_alphas_cumprod[timestep]
+
+        # Variance for the reverse process
         posterior_variance_t = self.posterior_variance[timestep]
-        
-        # x_t ve tahmin edilen gürültüden x_0'ı tahmin et
+
+        # Estimate x_0 from x_t and the predicted noise
         pred_x0 = (x_t - sqrt_one_minus_alpha_cumprod_t * noise_pred) / torch.sqrt(alpha_cumprod_t)
-        
-        # q(x_{t-1} | x_t, x_0) dağılımının ortalamasını hesapla
-        mean = (torch.sqrt(alpha_cumprod_prev_t) * beta_t * pred_x0 + 
+
+        # Compute the mean of q(x_{t-1} | x_t, x_0)
+        mean = (torch.sqrt(alpha_cumprod_prev_t) * beta_t * pred_x0 +
                 torch.sqrt(alpha_t) * (1 - alpha_cumprod_prev_t) * x_t) / (1 - alpha_cumprod_t)
-        
-        # q(x_{t-1} | x_t, x_0) dağılımından örnekle
+
+        # Sample from the posterior
         if timestep > 0:
-            noise = torch.randn_like(x_t)  # Rastgele gürültü üret
-            variance = torch.sqrt(posterior_variance_t) * noise  # Varyansı uygula
+            noise = torch.randn_like(x_t)  # Random noise
+            variance = torch.sqrt(posterior_variance_t) * noise  # Apply variance term
         else:
-            variance = 0  # Son adımda varyans yok
-            
-        x_prev = mean + variance  # Ortalama ve varyansı topla
-        
-        return x_prev  # Önceki zaman adımındaki görüntüyü döndür
+            variance = 0  # No variance at the last step
+
+        x_prev = mean + variance  # Combine mean and variance
 
-def train_step(model: DiffusionTransformer, 
-               scheduler: DDPMScheduler, 
-               x_batch: torch.Tensor, 
+        return x_prev  # Return the sample for the previous timestep
+
+def train_step(model: DiffusionTransformer,
+               scheduler: DDPMScheduler,
+               x_batch: torch.Tensor,
                class_labels: Optional[torch.Tensor] = None) -> torch.Tensor:
-    """Diffusion transformer için tek bir eğitim adımı"""
-    
-    batch_size = x_batch.shape[0]  # Toplu iş boyutu
-    device = x_batch.device  # Hesaplama cihazı
-    
-    # Toplu işteki her görüntü için rastgele zaman adımları seç
+    """Perform a single training step for the diffusion transformer."""
+
+    batch_size = x_batch.shape[0]  # Batch size
+    device = x_batch.device  # Device used for computation
+
+    # Select random timesteps for each image in the batch
     timesteps = torch.randint(0, scheduler.num_timesteps, (batch_size,), device=device)
-    
-    # Görüntülere eklenecek gürültüyü örnekle
-    noise = torch.randn_like(x_batch)  # Gürültü tensörü oluştur
-    
-    # Temiz görüntülere, her zaman adımındaki gürültü büyüklüğüne göre gürültü ekle
+
+    # Sample the noise that will be added to the images
+    noise = torch.randn_like(x_batch)
+
+    # Add noise scaled according to the timestep schedule
     noisy_images = scheduler.add_noise(x_batch, noise, timesteps)
-    
-    # Gürültü artığını tahmin et
+
+    # Predict the noise residual
     noise_pred = model(noisy_images, timesteps, class_labels)
-    
-    # Kaybı hesapla (tahmin edilen gürültü ile gerçek gürültü arasındaki ortalama kare hata)
+
+    # Compute the mean squared error between predicted and true noise
     loss = F.mse_loss(noise_pred, noise)
-    
-    return loss  # Hata değerini döndür
+
+    return loss
 
 @torch.no_grad()
 def sample_images(
-    model: DiffusionTransformer, 
-    scheduler: DDPMScheduler, 
-    num_samples: int = 4, 
+    model: DiffusionTransformer,
+    scheduler: DDPMScheduler,
+    num_samples: int = 4,
     class_labels: Optional[torch.Tensor] = None,
     device: str = 'cpu'
 ) -> torch.Tensor:
-    """Eğitilmiş diffusion transformer kullanarak örnek görüntüler oluştur"""
-    model.eval()  # Modeli değerlendirme moduna al
-    
-    # Başlangıç gizli değişkeni olarak rastgele gürültü örnekle
-    img_size = model.img_size  # Görüntü boyutu
+    """Generate sample images with a trained diffusion transformer."""
+    model.eval()  # Switch to evaluation mode
+
+    # Sample initial noise as the latent variable
+    img_size = model.img_size  # Image size
     x_t = torch.randn((num_samples, 3, img_size, img_size), device=device)
-    
-    # Eğer sınıf etiketleri verilmediyse ve model koşullu ise rastgele sınıf etiketleri oluştur
+
+    # If no class labels are provided for a conditional model, sample random labels
     if class_labels is None and hasattr(model, 'class_embedding'):
-        num_classes = model.class_embedding.num_embeddings  # Toplam sınıf sayısı
-        class_labels = torch.randint(0, num_classes, (num_samples,), device=device)  # Rastgele sınıf etiketleri
-    
-    # Modelden örnekleme yap
-    with torch.no_grad():  # Gradyan hesaplaması yapma
-        # Zaman adımlarını tersten dolaş
+        num_classes = model.class_embedding.num_embeddings  # Total number of classes
+        class_labels = torch.randint(0, num_classes, (num_samples,), device=device)  # Random class labels
+
+    # Run the reverse diffusion process
+    with torch.no_grad():
+        # Iterate over timesteps in reverse order
         for t in reversed(range(scheduler.num_timesteps)):
-            # Zaman adımları için tensor oluştur
+            # Create a tensor filled with the current timestep index
             timesteps = torch.full((num_samples,), t, device=device, dtype=torch.long)
-            
-            # Gürültüyü tahmin et
+
+            # Predict the noise for the current latent
             noise_pred = model(x_t, timesteps, class_labels)
-            
-            # Bir önceki örneği al
+
+            # Sample the previous timestep
             x_t = scheduler.sample_prev_timestep(x_t, noise_pred, t)
-    
-    # Geçerli piksel aralığına kırp
+
+    # Clamp to the valid pixel range
     x_t = torch.clamp(x_t, -1.0, 1.0)
-    
-    # [-1, 1] aralığından [0, 1] aralığına ölçekle
+
+    # Scale from [-1, 1] to [0, 1]
     x_t = (x_t + 1) / 2
-    
-    return x_t  # Oluşturulan görüntüleri döndür
+
+    return x_t  # Return the generated samples
 
 # Example usage and training loop
 def example_usage():
-    """Diffusion transformer'ın nasıl kullanılacağını göster"""
-    # Hesaplama cihazını ayarla (GPU varsa kullan, yoksa CPU)
+    """Showcase how to use the diffusion transformer."""
+    # Select the compute device (prefer GPU when available)
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Kullanılan cihaz: {device}")
-    
-    # Modeli ve çizelgeleyiciyi başlat
+    print(f"Using device: {device}")
+
+    # Initialize the model and scheduler
     model = DiffusionTransformer(
-        img_size=32,     # Görüntü boyutu
-        patch_size=4,    # Yama boyutu
-        d_model=256,     # Modelin gizli boyutu
-        n_layers=6,      # Transformer katman sayısı
-        n_heads=8,       # Dikkat başlığı sayısı
-        d_ff=1024,       # İleri beslemeli ağın gizli boyutu
-        num_classes=10,  # Sınıf sayısı (CIFAR-10 için)
-        dropout=0.1      # Dropout oranı
-    ).to(device)  # Modeli uygun cihaza taşı
-    
-    # Gürültü çizelgeleyiciyi başlat
+        img_size=32,     # Image size
+        patch_size=4,    # Patch size
+        d_model=256,     # Model hidden size
+        n_layers=6,      # Number of transformer layers
+        n_heads=8,       # Number of attention heads
+        d_ff=1024,       # Hidden size of the feedforward network
+        num_classes=10,  # Number of classes (e.g., CIFAR-10)
+        dropout=0.1      # Dropout probability
+    ).to(device)  # Move model to the selected device
+
+    # Instantiate the noise scheduler
     scheduler = DDPMScheduler(num_timesteps=1000)
-    
-    # Örnek veri oluştur
-    batch_size = 4  # Toplu iş boyutu
-    x = torch.randn(batch_size, 3, 32, 32, device=device)  # Rastgele giriş görüntüleri
-    timesteps = torch.randint(0, 1000, (batch_size,), device=device)  # Rastgele zaman adımları
-    class_labels = torch.randint(0, 10, (batch_size,), device=device)  # Rastgele sınıf etiketleri
-    
-    # İleri geçiş
-    noise_pred = model(x, timesteps, class_labels)  # Gürültü tahmini yap
-    print(f"Girdi şekli: {x.shape}")
-    print(f"Gürültü tahmini şekli: {noise_pred.shape}")
-    
-    # Eğitim adımı
-    loss = train_step(model, scheduler, x, class_labels)  # Eğitim adımını çalıştır
-    print(f"Eğitim kaybı: {loss.item():.4f}")
-    
-    # Örnek görüntüler oluştur
+
+    # Create sample data
+    batch_size = 4  # Batch size
+    x = torch.randn(batch_size, 3, 32, 32, device=device)  # Random input images
+    timesteps = torch.randint(0, 1000, (batch_size,), device=device)  # Random timesteps
+    class_labels = torch.randint(0, 10, (batch_size,), device=device)  # Random class labels
+
+    # Forward pass
+    noise_pred = model(x, timesteps, class_labels)
+    print(f"Input shape: {x.shape}")
+    print(f"Noise prediction shape: {noise_pred.shape}")
+
+    # Single training step
+    loss = train_step(model, scheduler, x, class_labels)
+    print(f"Training loss: {loss.item():.4f}")
+
+    # Generate sample images
     samples = sample_images(model, scheduler, num_samples=4, device=device)
-    print(f"Oluşturulan örneklerin şekli: {samples.shape}")
-    
-    # Örnekleri görselleştir
-    fig, axes = plt.subplots(1, 4, figsize=(12, 3))  # 1x4'lük bir ızgara oluştur
+    print(f"Generated samples shape: {samples.shape}")
+
+    # Visualize the samples
+    fig, axes = plt.subplots(1, 4, figsize=(12, 3))  # Create a 1x4 grid
     for i, ax in enumerate(axes):
-        # Görüntüyü [C, H, W]'dan [H, W, C]'ye çevir ve göster
+        # Convert from [C, H, W] to [H, W, C] and display
         ax.imshow(samples[i].permute(1, 2, 0).cpu().numpy())
-        ax.axis('off')  # Eksenleri kapat
-    plt.tight_layout()  # Görsel düzenlemeyi iyileştir
-    plt.show()  # Görseli göster
-    
-    # Eğitim döngüsunu başlat
-    print("Eğitim başlatılıyor...")
+        ax.axis('off')  # Hide axes
+    plt.tight_layout()  # Improve layout
+    plt.show()  # Display the figure
+
+    # Start a training loop
+    print("Starting training...")
     optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
-    
-    # Modeli eğitim moduna al
+
+    # Switch to training mode
     model.train()
-    
-    # Tek bir eğitim adımı gerçekleştir
+
+    # Perform a single training iteration
     loss = train_step(model, scheduler, x, class_labels)
-    
-    # Geri yayılım ve parametre güncelleme
-    optimizer.zero_grad()  # Gradyanları sıfırla
-    loss.backward()  # Gradyanları hesapla
-    optimizer.step()  # Parametreleri güncelle
-    
-    print(f"Eğitim kaybı: {loss.item():.4f}")
-    
-    # Örnek görüntüler oluştur
-    print("Örnek görüntüler oluşturuluyor...")
-    sample_labels = torch.arange(4, device=device)  # İlk 4 sınıf için birer örnek oluştur
+
+    # Backpropagation and parameter update
+    optimizer.zero_grad()  # Reset gradients
+    loss.backward()  # Compute gradients
+    optimizer.step()  # Update parameters
+
+    print(f"Training loss: {loss.item():.4f}")
+
+    # Generate additional sample images
+    print("Generating sample images...")
+    sample_labels = torch.arange(4, device=device)  # Create one sample for the first four classes
     generated_images = sample_images(model, scheduler, num_samples=4, class_labels=sample_labels, device=device)
-    
-    print(f"Oluşturulan görüntülerin boyutu: {generated_images.shape}")
-    print("Örnek oluşturma tamamlandı!")
-    
-    return model, scheduler, generated_images  # Modeli, çizelgeleyiciyi ve oluşturulan görüntüleri döndür
+
+    print(f"Generated images shape: {generated_images.shape}")
+    print("Sampling complete!")
+
+    return model, scheduler, generated_images  # Return the model, scheduler, and generated samples
 
 # Run example
 if __name__ == "__main__":
diff --git a/Genel-5/image_crops.py b/Genel-5/image_crops.py
index a32b39e..eaf7744 100644
--- a/Genel-5/image_crops.py
+++ b/Genel-5/image_crops.py
@@ -255,22 +255,22 @@ def reconstruct_from_crops(
 from PIL import Image
 import torch
 
-# Görüntüyü yükle
-image_path = "unnamed.png"  # Görüntü yolunu buraya girin
+# Load an example image
+image_path = "unnamed.png"  # Provide the image path here
 image = np.array(Image.open(image_path))
 
-# Görüntüyü parçalara ayır
+# Generate crops from the image
 output = overlap_crop_image(image, overlap_margin=4, max_crops=12)
 
-# Sadece yerel parçaları al (ilk öğe global crop olduğu için atlıyoruz)
-local_crops = output["crops"][1:]  # Skip the first (global) crop
+# Retrieve only the local crops (skip the first global crop)
+local_crops = output["crops"][1:]
 
-# Parçaları tekrar birleştir
-crops_tensor = torch.from_numpy(local_crops).float()  # Numpy dizisini PyTorch tensörüne dönüştür
+# Stitch the crops back together
+crops_tensor = torch.from_numpy(local_crops).float()  # Convert numpy array to PyTorch tensor
 reconstructed_image = reconstruct_from_crops(crops_tensor, output["tiling"], overlap_margin=4)
 
-# Yeniden oluşturulmuş görüntüyü numpy dizisine dönüştür ve görüntüleyin
+# Convert the reconstructed image to numpy and display it
 reconstructed_image_np = reconstructed_image.cpu().numpy().astype(np.uint8)
 reconstructed_pil_image = Image.fromarray(reconstructed_image_np)
-reconstructed_pil_image.save("reconstructed_image.jpg")  # Yeniden oluşturulmuş görüntüyü kaydet
-reconstructed_pil_image.show()  # Yeniden oluşturulmuş görüntüyü görüntüle
\ No newline at end of file
+reconstructed_pil_image.save("reconstructed_image.jpg")  # Save the reconstructed image
+reconstructed_pil_image.show()  # Display the reconstructed image
\ No newline at end of file
diff --git a/Genel-5/image_processor_app.py b/Genel-5/image_processor_app.py
index 58867b4..1fc7fe5 100644
--- a/Genel-5/image_processor_app.py
+++ b/Genel-5/image_processor_app.py
@@ -15,7 +15,7 @@
 
 # Set page config
 st.set_page_config(
-    page_title="Görüntü İşleme Uygulaması",
+    page_title="Image Processing Application",
     page_icon="🖼️",
     layout="wide"
 )
@@ -24,35 +24,35 @@ def apply_filter(crop, filter_name):
     """Apply the selected filter to an image crop"""
     if filter_name == "Normal":
         return crop
-    elif filter_name == "Siyah-Beyaz":
+    elif filter_name == "Black & White":
         return crop.convert("L").convert("RGB")
     elif filter_name == "Blur":
         return crop.filter(ImageFilter.BLUR)
-    elif filter_name == "Kontur":
+    elif filter_name == "Contour":
         return crop.filter(ImageFilter.CONTOUR)
-    elif filter_name == "Keskinleştir":
+    elif filter_name == "Sharpen":
         return crop.filter(ImageFilter.SHARPEN)
     return crop
 
 def main():
-    st.title("Gelişmiş Görüntü İşleme Uygulaması")
-    st.write("Büyük görüntüleri parçalara ayırıp işleyen ve tekrar birleştiren uygulama")
+    st.title("Advanced Image Processing Application")
+    st.write("Process large images by tiling, editing, and stitching them back together.")
 
     # Sidebar controls
     with st.sidebar:
-        st.header("Ayarlar")
-        uploaded_file = st.file_uploader("Bir görüntü yükleyin", type=["png", "jpg", "jpeg"])
-        
-        st.subheader("Görüntü İşleme Ayarları")
+        st.header("Settings")
+        uploaded_file = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"])
+
+        st.subheader("Image Processing Options")
         filter_option = st.selectbox(
-            "Filtre Seçin:",
-            ["Normal", "Siyah-Beyaz", "Blur", "Kontur", "Keskinleştir"]
+            "Choose a filter:",
+            ["Normal", "Black & White", "Blur", "Contour", "Sharpen"]
         )
-        
-        overlap = st.slider("Örtüşme Payı (piksel):", 0, 20, 4, 1)
-        max_crops = st.slider("Maksimum Parça Sayısı:", 4, 16, 9, 1)
-        
-        process_btn = st.button("Görüntüyü İşle")
+
+        overlap = st.slider("Overlap (pixels):", 0, 20, 4, 1)
+        max_crops = st.slider("Maximum Number of Tiles:", 4, 16, 9, 1)
+
+        process_btn = st.button("Process Image")
     
     if uploaded_file is not None and process_btn:
         try:
@@ -63,7 +63,7 @@ def main():
             image_np = np.array(image)
             
             # Split into tiles
-            with st.spinner("Görüntü parçalara ayrılıyor..."):
+            with st.spinner("Splitting image into tiles..."):
                 output = overlap_crop_image(
                     image_np, 
                     overlap_margin=overlap, 
@@ -87,7 +87,7 @@ def main():
             processed_np = [np.array(img) for img in processed_crops]
             
             # Reconstruct the image
-            with st.spinner("Görüntü yeniden oluşturuluyor..."):
+            with st.spinner("Reconstructing image..."):
                 crops_tensor = torch.from_numpy(np.array(processed_np)).float()
                 reconstructed = reconstruct_from_crops(
                     crops_tensor, 
@@ -101,34 +101,34 @@ def main():
             # Display results
             col1, col2 = st.columns(2)
             with col1:
-                st.subheader("Orijinal Görüntü")
+                st.subheader("Original Image")
                 st.image(image, use_container_width=True)
-                
+
             with col2:
-                st.subheader("İşlenmiş Görüntü")
+                st.subheader("Processed Image")
                 st.image(result_img, use_container_width=True)
-                
+
                 # Download button
                 buffered = io.BytesIO()
                 result_img.save(buffered, format="JPEG")
                 st.download_button(
-                    label="İşlenmiş Görüntüyü İndir",
+                    label="Download Processed Image",
                     data=buffered,
                     file_name=f"processed_{uploaded_file.name}",
                     mime="image/jpeg"
                 )
-            
+
             # Show crop grid
-            st.subheader("İşlenen Parçalar")
+            st.subheader("Processed Tiles")
             cols = st.columns(3)  # 3 columns for the grid
             for idx, crop in enumerate(processed_crops):
                 with cols[idx % 3]:
-                    st.image(crop, caption=f"Parça {idx+1}", use_container_width=True)
-                    
+                    st.image(crop, caption=f"Tile {idx+1}", use_container_width=True)
+
         except Exception as e:
-            st.error(f"Bir hata oluştu: {str(e)}")
+            st.error(f"An error occurred: {str(e)}")
     elif uploaded_file is None and process_btn:
-        st.warning("Lütfen önce bir görüntü yükleyin.")
+        st.warning("Please upload an image first.")
 
 if __name__ == "__main__":
     main()
diff --git a/Genel-5/llada.py b/Genel-5/llada.py
index 5162c4f..d5b8018 100644
--- a/Genel-5/llada.py
+++ b/Genel-5/llada.py
@@ -8,20 +8,20 @@
 from tqdm import tqdm
 from collections import Counter
 
-# HuggingFace veri setini yükle
+# Load the Hugging Face dataset
 dataset = load_dataset('salihturkoglu/se_data_set', split='train')
 instructions = [ex['instruction'] for ex in dataset]
 responses = [ex['response'] for ex in dataset]
 
-# Gelişmiş Türkçe tokenizer
+# Advanced Turkish tokenizer
 def turkish_tokenize(text):
-    # Noktalama, sayılar, Türkçe karakterler ve kelime kökleri için daha iyi ayrıştırma
+    # Better segmentation for punctuation, numbers, Turkish characters, and word stems
     text = re.sub(r"([.,!?;:()\"'])", r" \1 ", text)
     text = re.sub(r"([0-9]+)", r" \1 ", text)
     text = re.sub(r"\s+", " ", text)
     return text.lower().strip().split()
 
-# Vocab oluştur (daha büyük ve çeşitli)
+# Build a richer vocabulary
 PAD_TOKEN = "<PAD>"
 UNK_TOKEN = "<UNK>"
 all_texts = instructions + responses
@@ -36,7 +36,7 @@ def encode(text):
     return [vocab.get(tok, vocab[UNK_TOKEN]) for tok in turkish_tokenize(text)]
 
 def decode(token_ids):
-    # <UNK> oranını azaltmak için tekrarları ve padleri temizle
+    # Remove repeats and pad tokens to reduce the <UNK> ratio
     words = []
     for idx in token_ids:
         if idx == vocab[PAD_TOKEN]:
@@ -47,7 +47,7 @@ def decode(token_ids):
     return " ".join(words)
 
 def build_prompt(instruction, response=None):
-    # Prompt formatı
+    # Prompt format
     if response is not None:
         return f"Instruction: {instruction} Response: {response}"
     else:
@@ -67,7 +67,7 @@ def __init__(self, instructions, responses, vocab, max_len=128, prompt_len=64):
             resp_ids = encode(resp)[:(max_len - prompt_len)]
             resp_ids += [vocab[PAD_TOKEN]] * ((max_len - prompt_len) - len(resp_ids))
             self.inputs.append(torch.tensor(prompt_ids, dtype=torch.long))
-            self.targets.append(torch.tensor(resp_ids, dtype=torch.long))  # Sadece response target!
+            self.targets.append(torch.tensor(resp_ids, dtype=torch.long))  # Response-only target!
 
     def __len__(self):
         return len(self.inputs)
@@ -91,7 +91,7 @@ def add_noise(batch, noise_level=0.5):
     noisy[mask] = random_tokens[mask]
     return noisy
 
-# Cosine noise schedule (daha iyi diffusion için)
+# Cosine noise schedule (better diffusion behavior)
 def cosine_noise_schedule(step, total_steps):
     import math
     return math.cos((step / total_steps) * math.pi / 2)
@@ -115,7 +115,7 @@ def forward(self, prompt, x, timestep, prompt_emb, src_key_padding_mask=None):
         t_emb = self.timestep_embed(timestep).unsqueeze(1)
         prompt_cond = self.prompt_proj(prompt_emb).unsqueeze(1)
         emb = torch.cat([prompt_embs, x_embs], dim=1) + t_emb + prompt_cond
-        # src_key_padding_mask shape düzeltme
+        # Fix the src_key_padding_mask shape
         if src_key_padding_mask is not None:
             # src_key_padding_mask: (batch, response_len) -> (batch, prompt_len + response_len)
             pad = torch.zeros((src_key_padding_mask.shape[0], prompt_embs.shape[1]), dtype=torch.bool, device=src_key_padding_mask.device)
@@ -157,7 +157,7 @@ def train_diffusion_model(model, dataloader, epochs=10, steps=16):
             mask = (batch_targets == vocab[PAD_TOKEN])
             optimizer.zero_grad()
             outputs = model(batch_prompts, noisy_targets, timestep, prompt_emb, src_key_padding_mask=mask)
-            # .view yerine .reshape kullan
+            # Prefer reshape over view
             loss = criterion(outputs.reshape(-1, outputs.size(-1)), batch_targets.reshape(-1))
             loss.backward()
             optimizer.step()
@@ -174,7 +174,7 @@ def generate_response(model, instruction, steps=16, max_len=256, prompt_len=64):
     prompt_ids += [vocab[PAD_TOKEN]] * (prompt_len - len(prompt_ids))
     prompt_tensor = torch.tensor([prompt_ids], dtype=torch.long, device=device)
     prompt_emb = get_prompt_embedding([prompt], vocab, model, prompt_len=prompt_len)
-    # Response kısmı random başlatılır
+    # Initialize the response portion randomly
     response_len = max_len - prompt_len
     response_part = torch.randint(2, len(vocab), (1, response_len), device=device)
     generated = response_part.clone()
@@ -193,12 +193,12 @@ def generate_response(model, instruction, steps=16, max_len=256, prompt_len=64):
 
 test_instruction = instructions[0]
 print('Instruction:', test_instruction)
-print('Gerçek Response:', responses[0])
+print('Ground Truth Response:', responses[0])
 print('Model Response:', generate_response(model, test_instruction, steps=16, max_len=max_len, prompt_len=prompt_len))
 
-test_instruction = "Çift anadal veya yandal yapmak istiyorum. Hangi bölümlerle yapabilirim?"
+test_instruction = "I want to pursue a double major or minor. Which departments can I pair it with?"
 print('Instruction:', test_instruction)
-print('Gerçek Response:', responses[instructions.index(test_instruction)] if test_instruction in instructions else "Yok")
+print('Ground Truth Response:', responses[instructions.index(test_instruction)] if test_instruction in instructions else "Not Found")
 print('Model Response:', generate_response(model, test_instruction, steps=16, max_len=max_len, prompt_len=prompt_len))
 
 def evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=256, prompt_len=64):
@@ -228,6 +228,6 @@ def evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=25
         correct += ((generated == tgt) & mask).sum().item()
         loop.set_postfix(acc=(correct/total if total > 0 else 0.0))
     accuracy = correct / total if total > 0 else 0.0
-    print(f"Test doğruluğu: {accuracy:.2%} ({correct}/{total})")
+    print(f"Test accuracy: {accuracy:.2%} ({correct}/{total})")
 
 evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=max_len, prompt_len=prompt_len)
\ No newline at end of file
diff --git a/Genel-5/modern_llm_components.py b/Genel-5/modern_llm_components.py
index f066ccd..f11c641 100644
--- a/Genel-5/modern_llm_components.py
+++ b/Genel-5/modern_llm_components.py
@@ -5,13 +5,13 @@
 from typing import Optional, Tuple
 import numpy as np
 
-# 1. RoPE (Rotary Position Embedding) - Llama'da kullanılan
+# 1. RoPE (Rotary Position Embedding) - Used in Llama
 class RotaryPositionalEmbedding(nn.Module):
     """
-    RoPE, pozisyonel bilgiyi doğrudan attention hesaplamasına entegre eder.
-    Avantajları:
-    - Extrapolation capability (training'den uzun sequence'larda çalışır)
-    - Relative position bilgisi
+    RoPE integrates positional information directly into the attention computation.
+    Advantages:
+    - Extrapolation capability (works on sequences longer than the training context)
+    - Relative positional information
     - Efficiency
     """
     def __init__(self, dim: int, max_seq_len: int = 2048, base: float = 10000.0):
@@ -20,11 +20,11 @@ def __init__(self, dim: int, max_seq_len: int = 2048, base: float = 10000.0):
         self.max_seq_len = max_seq_len
         self.base = base
         
-        # Frequency hesaplama
+        # Compute frequencies
         inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
         self.register_buffer('inv_freq', inv_freq)
         
-        # Cache için sin/cos değerleri
+        # Cache sin/cos values
         self._set_cos_sin_cache(max_seq_len)
     
     def _set_cos_sin_cache(self, seq_len: int):
@@ -42,9 +42,9 @@ def forward(self, x: torch.Tensor, seq_len: int = None):
         return self.cos_cached[:seq_len], self.sin_cached[:seq_len]
 
 def apply_rotary_pos_emb(q, k, cos, sin):
-    """RoPE uygulama fonksiyonu"""
+    """Apply RoPE to queries and keys"""
     def rotate_half(x):
-        # x'in yarısını rotate et
+        # Rotate half of x
         x1, x2 = x[..., :x.shape[-1]//2], x[..., x.shape[-1]//2:]
         return torch.cat((-x2, x1), dim=-1)
     
@@ -52,12 +52,12 @@ def rotate_half(x):
     k_embed = k * cos + rotate_half(k) * sin
     return q_embed, k_embed
 
-# 2. RMSNorm - LayerNorm'dan daha verimli
+# 2. RMSNorm - More efficient than LayerNorm
 class RMSNorm(nn.Module):
     """
     Root Mean Square Normalization
-    - LayerNorm'dan daha hızlı (mean hesaplama yok)
-    - Llama'da kullanılır
+    - Faster than LayerNorm (no mean computation)
+    - Used in Llama
     """
     def __init__(self, dim: int, eps: float = 1e-6):
         super().__init__()
@@ -65,16 +65,16 @@ def __init__(self, dim: int, eps: float = 1e-6):
         self.weight = nn.Parameter(torch.ones(dim))
     
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # RMS hesaplama
+        # Compute RMS
         norm = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
         return self.weight * norm
 
-# 3. SwiGLU Activation - Llama'nın kullandığı
+# 3. SwiGLU Activation - Used by Llama
 class SwiGLU(nn.Module):
     """
     Swish-Gated Linear Unit
     - GLU (Gated Linear Unit) + Swish activation
-    - Standard FFN'den daha iyi performance
+    - Better performance than a standard FFN
     """
     def __init__(self, dim: int, hidden_dim: int):
         super().__init__()
@@ -91,9 +91,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 # 4. Grouped Query Attention (GQA) - Memory efficient
 class GroupedQueryAttention(nn.Module):
     """
-    GQA - Query/Key/Value head'leri farklı sayıda
-    - Multi-Head Attention ve Multi-Query Attention arası compromise
-    - Memory efficiency + quality balance
+    GQA - Different numbers of query/key/value heads
+    - A compromise between Multi-Head Attention and Multi-Query Attention
+    - Balances memory efficiency and quality
     """
     def __init__(self, dim: int, n_heads: int, n_kv_heads: int):
         super().__init__()
@@ -102,13 +102,13 @@ def __init__(self, dim: int, n_heads: int, n_kv_heads: int):
         self.head_dim = dim // n_heads
         self.group_size = n_heads // n_kv_heads
         
-        # Query için tüm head'ler
+        # Full set of heads for the queries
         self.wq = nn.Linear(dim, n_heads * self.head_dim, bias=False)
-        # Key/Value için daha az head
+        # Fewer heads for keys/values
         self.wk = nn.Linear(dim, n_kv_heads * self.head_dim, bias=False)
         self.wv = nn.Linear(dim, n_kv_heads * self.head_dim, bias=False)
         self.wo = nn.Linear(n_heads * self.head_dim, dim, bias=False)
-        
+
         # RoPE
         self.rope = RotaryPositionalEmbedding(self.head_dim)
     
@@ -124,11 +124,11 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
         cos, sin = self.rope(x, seq_len)
         q, k = apply_rotary_pos_emb(q, k, cos, sin)
         
-        # K, V'yi group_size kadar repeat et
+        # Repeat K and V by group_size
         k = k.repeat_interleave(self.group_size, dim=2)
         v = v.repeat_interleave(self.group_size, dim=2)
         
-        # Attention hesaplama
+        # Compute attention
         q = q.transpose(1, 2)  # (bsz, n_heads, seq_len, head_dim)
         k = k.transpose(1, 2)
         v = v.transpose(1, 2)
@@ -141,7 +141,7 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
         attn = F.softmax(scores, dim=-1)
         out = torch.matmul(attn, v)
         
-        # Reshape ve output projection
+        # Reshape and project back
         out = out.transpose(1, 2).contiguous().view(bsz, seq_len, -1)
         return self.wo(out)
 
@@ -149,7 +149,7 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
 class TransformerBlock(nn.Module):
     """
     Modern transformer block:
-    - Pre-normalization (norm önce gelir)
+    - Pre-normalization (norm comes first)
     - Residual connections
     - SwiGLU FFN
     - GQA attention
@@ -172,7 +172,7 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
         
         return x
 
-# 6. Weight Tying ve Advanced Initialization
+# 6. Weight tying and advanced initialization
 def scaled_init_(tensor: torch.Tensor, scale: float = 1.0):
     """Modern weight initialization"""
     std = scale / math.sqrt(tensor.shape[-1])
@@ -207,7 +207,7 @@ def __init__(self, vocab_size: int, dim: int, n_layers: int,
         # Output projection (weight tying ile)
         self.output = nn.Linear(dim, vocab_size, bias=False)
         
-        # Weight tying: input ve output embedding'leri paylaş
+        # Weight tying: share input and output embeddings
         self.output.weight = self.tok_embeddings.weight
         
         # Modern initialization
@@ -227,7 +227,7 @@ def forward(self, tokens: torch.Tensor, targets: Optional[torch.Tensor] = None):
         # Token embeddings
         x = self.tok_embeddings(tokens)
         
-        # Causal mask oluştur
+        # Create a causal mask
         mask = torch.tril(torch.ones(seq_len, seq_len, device=tokens.device))
         mask = mask.unsqueeze(0).unsqueeze(0)  # (1, 1, seq_len, seq_len)
         
@@ -242,29 +242,29 @@ def forward(self, tokens: torch.Tensor, targets: Optional[torch.Tensor] = None):
         logits = self.output(x)
         
         if targets is not None:
-            # Training loss hesaplama
+            # Compute the training loss
             loss = F.cross_entropy(
                 logits.view(-1, self.vocab_size),
                 targets.view(-1),
                 ignore_index=-100
             )
             return logits, loss
-        
+
         return logits
 
 # Usage example
 if __name__ == "__main__":
-    # Model parametreleri (Llama-style)
+    # Model hyperparameters (Llama-style)
     model = ModernLLM(
         vocab_size=32000,
         dim=4096,
         n_layers=32,
         n_heads=32,
-        n_kv_heads=8,  # GQA için daha az KV head
+        n_kv_heads=8,  # Fewer KV heads for GQA
         norm_eps=1e-6
     )
     
-    # Örnek input
+    # Example input
     batch_size, seq_len = 2, 512
     tokens = torch.randint(0, 32000, (batch_size, seq_len))
     
diff --git a/Genel-5/training_inference_techniques.py b/Genel-5/training_inference_techniques.py
index c74f132..52fb1bd 100644
--- a/Genel-5/training_inference_techniques.py
+++ b/Genel-5/training_inference_techniques.py
@@ -12,7 +12,7 @@ class CosineScheduler:
     """
     Cosine Learning Rate Scheduling - Modern optimization
     - Warmup + Cosine decay
-    - Llama ve GPT-4'te kullanılır
+    - Used in models such as Llama and GPT-4
     """
     def __init__(self, optimizer, warmup_steps: int, max_steps: int, 
                  min_lr: float = 0.0, max_lr: float = 1e-4):
@@ -43,13 +43,13 @@ class GradientClipper:
     """
     Gradient Clipping - Training stability
     - Global norm clipping
-    - Exploding gradient problemini çözer
+    - Mitigates exploding gradient issues
     """
     def __init__(self, max_norm: float = 1.0):
         self.max_norm = max_norm
     
     def clip_gradients(self, model: nn.Module) -> float:
-        # Global gradient norm hesapla
+        # Compute the global gradient norm
         total_norm = 0.0
         for p in model.parameters():
             if p.grad is not None:
diff --git a/Multi Modal/README.md b/Multi Modal/README.md
index 3580745..1b592a2 100644
--- a/Multi Modal/README.md	
+++ b/Multi Modal/README.md	
@@ -1,78 +1,78 @@
-# Gelişmiş Multimodal Transformer Modeli
+# Advanced Multimodal Transformer Model
 
-Bu proje, gerçek video, ses ve metin verilerini işleyebilen gelişmiş bir multimodal (çoklu-modal) transformer modeli içermektedir. Model, verilen video, ses ve metin verilerini birleştirerek sınıflandırma yapmak üzere tasarlanmıştır.
+This project contains an advanced multimodal transformer capable of processing real video, audio, and text data. The model fuses the provided modalities to perform classification.
 
-## Özellikler
+## Features
 
-- Gerçek video dosyalarını işleme
-- Gerçek ses dosyalarını işleme
-- İlgili metin dosyalarını işleme
-- Tüm modalitelerin füzyonu ile sınıflandırma
-- Video için gelişmiş 3D-CNN mimarisi
-- Ses için gelişmiş spektrogram işleme
-- Türkçe metin desteği (BERT tabanlı)
+- Process real video files
+- Process real audio files
+- Parse accompanying text descriptions
+- Fuse all modalities for classification
+- Use an enhanced 3D-CNN backbone for video
+- Apply advanced spectrogram processing for audio
+- Provide BERT-based text support
 
-## Proje İçeriği
+## Project Layout
 
-- `basic-multimodal.py`: Ana kod dosyası
-- `requirements.txt`: Gerekli Python paketleri
-- `multimodal_dataset/`: Veri klasörü
-  - `videos/`: Video dosyaları
-  - `audios/`: Ses dosyaları
-  - `texts/`: Metin dosyaları
-  - `metadata.json`: Veri seti metadatası
+- `basic-multimodal.py`: Main application script
+- `requirements.txt`: Required Python packages
+- `multimodal_dataset/`: Dataset folder
+  - `videos/`: Video files
+  - `audios/`: Audio files
+  - `texts/`: Text files
+  - `metadata.json`: Dataset metadata
 
-## Kurulum
+## Installation
 
-Gerekli paketleri yüklemek için:
+Install the dependencies with:
 
 ```bash
 pip install -r requirements.txt
 ```
 
-## Kullanım
+## Usage
 
-Model, hem örnek veri ile hem de gerçek video, ses ve metin dosyalarıyla çalışabilir:
+The model can run with either sample data or your own video, audio, and text files:
 
 ```bash
 python basic-multimodal.py
 ```
 
-Program çalıştığında size iki seçenek sunacaktır:
-1. Örnek veri (otomatik oluşturulan demo)
-2. Gerçek veri (kendi video, ses ve metin dosyalarınız)
+When the program starts it offers two options:
+1. Sample data (automatically generated demo)
+2. Real data (provide your own video, audio, and text files)
 
-Gerçek veri seçeneğini seçtiğinizde:
-1. Video dosyalarınızı `multimodal_dataset/videos/` klasörüne koyun
-2. Ses dosyalarınızı `multimodal_dataset/audios/` klasörüne koyun
-3. Her örnek için metin açıklamalarını girebilir veya `multimodal_dataset/texts/` klasörüne koyabilirsiniz
+If you choose real data:
+1. Place video files in `multimodal_dataset/videos/`
+2. Place audio files in `multimodal_dataset/audios/`
+3. Provide text descriptions for each example or add files to `multimodal_dataset/texts/`
 
-## Model Mimarisi
+## Model Architecture
 
-Bu gelişmiş multimodal model üç temel bileşenden oluşmaktadır:
+The multimodal model consists of three primary components:
 
-1. **Video Enkoder**: 3D CNN kullanarak videolardan özellik çıkarımı yapar
-   - 224x224 çözünürlük
-   - 16 frame işleme
-   - AdaptiveAvgPool ve dropout katmanları
+1. **Video Encoder**: Extracts features from videos using a 3D CNN
+   - 224×224 resolution
+   - 16 frames per clip
+   - Adaptive average pooling and dropout layers
 
-2. **Ses Enkoder**: Spektrogramlar üzerinde 2D CNN kullanarak seslerden özellik çıkarımı yapar
-   - Mel spektrogramları
-   - 128 mel-filtre bandı
-   - 5 saniyelik ses örnekleri
+2. **Audio Encoder**: Extracts features from spectrograms using a 2D CNN
+   - Mel spectrogram inputs
+   - 128 mel filter bands
+   - 5-second audio segments
 
-3. **Metin Enkoder**: Türkçe BERT modeli kullanarak metinlerden özellik çıkarımı yapar
+3. **Text Encoder**: Uses a BERT model to produce text embeddings
 
-Çoklu-modal füzyon için:
-- Transformer-tabanlı cross-attention
-- Multi-head attention mekanizması
-- Katmanlı normalizasyon
+For multimodal fusion the pipeline includes:
+- Transformer-based cross-attention
+- Multi-head attention
+- Layer normalisation
 
-## Çıktılar
+## Outputs
 
-Model eğitim sonuçları, görseller ve eğitilen model `multimodal_dataset/` dizini içinde kaydedilir.
+Training artefacts, visualisations, and the trained model are saved inside the `multimodal_dataset/` directory.
 
-## Gereksinimler
+## Requirements
 
 - Python 3.7+
 - PyTorch 1.9+
diff --git a/Multi Modal/basic-multimodal.py b/Multi Modal/basic-multimodal.py
index e95acac..716992c 100644
--- a/Multi Modal/basic-multimodal.py	
+++ b/Multi Modal/basic-multimodal.py	
@@ -14,53 +14,53 @@
 import cv2
 from scipy.io import wavfile
 
-# Cihaz yapılandırması
+# Device configuration
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(f"Kullanılan cihaz: {device}")
+print(f"Using device: {device}")
 
-# Veri yolu ayarları
+# Data path configuration
 DATA_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "multimodal_dataset")
 os.makedirs(DATA_DIR, exist_ok=True)
 
-# Veri seti yapısını oluşturalım
+# Build the dataset structure
 def create_real_data_metadata():
-    """Gerçek video, ses ve metin dosyaları için metadata oluşturur"""
-    
-    # Veri dizinlerini oluştur
+    """Create metadata for real video, audio, and text files."""
+
+    # Create the data directories
     video_dir = os.path.join(DATA_DIR, "videos")
     audio_dir = os.path.join(DATA_DIR, "audios")
     text_dir = os.path.join(DATA_DIR, "texts")
-    
+
     os.makedirs(video_dir, exist_ok=True)
     os.makedirs(audio_dir, exist_ok=True)
     os.makedirs(text_dir, exist_ok=True)
-    
-    # Metadata dosyası için veri yapısı
+
+    # Data structure for the metadata file
     data_entries = []
-    
-    # Kullanıcıdan video dosyalarını yüklemesini iste
+
+    # Prompt the user to upload video files
     print("\n" + "="*80)
-    print("GERÇEK VERİ HAZIRLIĞI")
+    print("REAL DATA PREPARATION")
     print("="*80)
-    print("Bu adımda gerçek video, ses ve metin dosyalarını kullanacağız.")
-    print("Bunun için birkaç video dosyasını belirtilen klasörlere kopyaladıktan sonra metadatasını oluşturacağız.")
-    print("\nAşağıdaki işlemleri manuel olarak yapmanız gerekiyor:")
-    print(f"1. Video dosyalarınızı şu klasöre kopyalayın: {video_dir}")
-    print(f"2. Ses dosyalarınızı şu klasöre kopyalayın: {audio_dir}")
-    print(f"3. Her video/ses için metin dosyalarını şu klasöre kopyalayın: {text_dir}")
-    print("4. Video, ses ve metin dosyalarının isimlerini eşleşecek şekilde numaralandırın.")
-    print("   Örnek: video_1.mp4, audio_1.wav, text_1.txt")
-    print("\nHazır olduğunuzda ENTER tuşuna basın...")
+    print("This step uses real video, audio, and text assets.")
+    print("Copy a few files into the folders below and we will generate the metadata.")
+    print("\nPlease perform the following steps manually:")
+    print(f"1. Copy your video files to: {video_dir}")
+    print(f"2. Copy your audio files to: {audio_dir}")
+    print(f"3. Copy or create text files for each sample in: {text_dir}")
+    print("4. Make sure video, audio, and text file names align with matching indices.")
+    print("   Example: video_1.mp4, audio_1.wav, text_1.txt")
+    print("\nPress ENTER when everything is ready...")
     input()
-    
-    # Dosyaları tara ve metadata oluştur
+
+    # Scan the files and build metadata
     video_files = [f for f in os.listdir(video_dir) if f.endswith(('.mp4', '.avi', '.mov'))]
-    
+
     for i, video_file in enumerate(video_files):
         video_id = i
         video_path = os.path.join(video_dir, video_file)
-        
-        # İlgili ses dosyasını bul (aynı isimde veya numarada olan)
+
+        # Locate the matching audio file (same name or index)
         base_name = os.path.splitext(video_file)[0]
         audio_file = None
         for ext in ['.wav', '.mp3']:
@@ -68,38 +68,38 @@ def create_real_data_metadata():
             if os.path.exists(os.path.join(audio_dir, possible_audio)):
                 audio_file = possible_audio
                 break
-        
-        # Ses dosyası bulunamadıysa videodan ses çıkar
+
+        # If there is no audio file, request manual extraction
         audio_path = None
         if audio_file:
             audio_path = os.path.join(audio_dir, audio_file)
         else:
-            # Yeni bir ses dosyası ismi oluştur
+            # Suggest a new audio file name
             audio_path = os.path.join(audio_dir, f"{base_name}.wav")
-            
-            # Videodan ses çıkar (FFmpeg gerekir - kullanıcıya bilgi ver)
-            print(f"'{base_name}' için ses dosyası bulunamadı.")
-            print(f"Ses dosyasını manuel olarak oluşturup '{audio_path}' konumuna kaydedin.")
-            print("Hazır olduğunuzda ENTER tuşuna basın...")
+
+            # Ask the user to extract audio manually (FFmpeg required)
+            print(f"No audio track found for '{base_name}'.")
+            print(f"Please create the audio file manually and save it to '{audio_path}'.")
+            print("Press ENTER once the file is available...")
             input()
-        
-        # İlgili metin dosyasını bul veya oluştur
+
+        # Locate or create the corresponding text file
         text_file = base_name + ".txt"
         text_path = os.path.join(text_dir, text_file)
-        
+
         text = ""
         if os.path.exists(text_path):
             with open(text_path, 'r', encoding='utf-8') as f:
                 text = f.read().strip()
         else:
-            # Metin dosyası yoksa kullanıcıdan metin girmesini iste
-            print(f"'{base_name}' için metin açıklaması girin (video içeriğini açıklayan metin):")
+            # Ask the user to provide a text description if none exists
+            print(f"Enter a text description for '{base_name}' (describe the video content):")
             text = input().strip()
-            # Metin dosyasını kaydet
+            # Save the text file
             with open(text_path, 'w', encoding='utf-8') as f:
                 f.write(text)
-        
-        # Metadatalara ekle
+
+        # Add the sample to the metadata collection
         data_entries.append({
             "id": video_id,
             "video_path": os.path.relpath(video_path, DATA_DIR),
@@ -107,36 +107,36 @@ def create_real_data_metadata():
             "text": text,
             "text_path": os.path.relpath(text_path, DATA_DIR)
         })
-    
-    # JSON dosyasına kaydet
+
+    # Persist metadata to JSON
     metadata_path = os.path.join(DATA_DIR, "metadata.json")
     with open(metadata_path, "w", encoding="utf-8") as f:
         json.dump(data_entries, f, ensure_ascii=False, indent=4)
-    
-    print(f"Metadata oluşturuldu. Toplam {len(data_entries)} örnek.")
+
+    print(f"Metadata created for {len(data_entries)} samples.")
     return metadata_path
 
 
 class MultiModalDataset(Dataset):
-    """Multimodal veri seti: video, ses ve metin içeren bir dataset"""
+    """Multimodal dataset containing video, audio, and text."""
     
     def __init__(self, metadata_path, max_length=128):
         with open(metadata_path, "r", encoding="utf-8") as f:
             self.data = json.load(f)
         self.data_dir = os.path.dirname(metadata_path)
         
-        # Metin tokenizeri
+        # Text tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
         self.max_length = max_length
-        
-        # Video dönüşümleri
+
+        # Video transforms
         self.video_transform = transforms.Compose([
-            transforms.Resize((224, 224)),  # Gerçek videolar için daha büyük boyut
+            transforms.Resize((224, 224)),  # Larger resolution for real videos
             transforms.ToTensor(),
             transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
         ])
-        
-        # Ses dönüşümleri
+
+        # Audio transforms
         self.audio_transform = transforms.Compose([
             transforms.Normalize(mean=[-15], std=[40])
         ])
@@ -147,7 +147,7 @@ def __len__(self):
     def __getitem__(self, idx):
         item = self.data[idx]
         
-        # Metin işleme - varsa metin dosyasını oku, yoksa direkt metin kullan
+        # Text processing - prefer reading from file, fallback to inline text
         text = item.get("text", "")
         if "text_path" in item:
             try:
@@ -156,8 +156,8 @@ def __getitem__(self, idx):
                     with open(text_path, "r", encoding="utf-8") as f:
                         text = f.read().strip()
             except Exception as e:
-                print(f"Metin dosyası okuma hatası: {e}")
-        
+                print(f"Text file read error: {e}")
+
         text_encoding = self.tokenizer(
             text,
             max_length=self.max_length,
@@ -165,124 +165,124 @@ def __getitem__(self, idx):
             truncation=True,
             return_tensors="pt"
         )
-        
-        # Ses işleme - wav ve mp3 formatlarını destekle
+
+        # Audio processing - support wav and mp3 formats
         audio_path = os.path.join(self.data_dir, item["audio_path"])
         try:
             if audio_path.lower().endswith('.wav'):
-                # WAV dosyaları için scipy.io.wavfile kullan
+                # Use scipy.io.wavfile for WAV files
                 sample_rate, audio_data = wavfile.read(audio_path)
-                # Int16'dan float32'ye dönüştür
+                # Convert from integer representations to float32
                 if audio_data.dtype == np.int16:
                     audio_data = audio_data.astype(np.float32) / 32767.0
                 elif audio_data.dtype == np.int32:
                     audio_data = audio_data.astype(np.float32) / 2147483647.0
                 elif audio_data.dtype == np.uint8:
                     audio_data = (audio_data.astype(np.float32) - 128) / 128.0
-                
-                # Çok kanallı sesi mono'ya dönüştür
+
+                # Convert multi-channel audio to mono
                 if len(audio_data.shape) > 1 and audio_data.shape[1] > 1:
                     audio_data = np.mean(audio_data, axis=1)
-                
-                # Tensöre çevir
+
+                # Convert to tensor
                 waveform = torch.tensor(audio_data).float().unsqueeze(0)
             else:
-                # Diğer ses formatları için torchaudio.load deneyin
+                # Fallback to torchaudio.load for other formats
                 try:
                     waveform, sample_rate = torchaudio.load(audio_path)
-                    # Stereo ise mono'ya çevir
+                    # Convert stereo to mono
                     if waveform.shape[0] > 1:
                         waveform = torch.mean(waveform, dim=0, keepdim=True)
                 except Exception as e:
-                    print(f"Ses dosyası yükleme hatası: {e}")
-                    # Boş bir ses tensörü oluştur
-                    waveform = torch.zeros(1, 16000 * 5)  # 5 saniyelik boş ses
+                    print(f"Audio load error: {e}")
+                    # Create an empty waveform placeholder
+                    waveform = torch.zeros(1, 16000 * 5)  # 5 seconds of silence
                     sample_rate = 16000
         except Exception as e:
-            print(f"Ses işleme hatası: {e}")
-            waveform = torch.zeros(1, 16000 * 5)  # 5 saniyelik boş ses
+            print(f"Audio processing error: {e}")
+            waveform = torch.zeros(1, 16000 * 5)  # 5 seconds of silence
             sample_rate = 16000
-        
-        # Yeniden örnekleme - tüm ses verilerini 16 kHz'e getir
+
+        # Resample to 16 kHz
         if sample_rate != 16000:
             resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
             waveform = resampler(waveform)
             sample_rate = 16000
-            
-        # Sabit uzunluğa getir (5 saniye)
+
+        # Normalise duration to five seconds
         target_length = 5 * 16000
         if waveform.shape[1] < target_length:
-            # Padding ekle
+            # Pad if the audio is shorter
             padding = torch.zeros(waveform.shape[0], target_length - waveform.shape[1])
             waveform = torch.cat([waveform, padding], dim=1)
         else:
-            # Kes
+            # Trim longer audio
             waveform = waveform[:, :target_length]
-        
-        # Spektrogram oluştur
+
+        # Create a spectrogram
         spectrogram = torchaudio.transforms.MelSpectrogram(
             sample_rate=16000, n_fft=400, n_mels=128
         )(waveform)
         spectrogram = torchaudio.transforms.AmplitudeToDB()(spectrogram)
-        # İlk boyutu sıkıştır
+        # Remove the channel dimension
         spectrogram = spectrogram.squeeze(0)
-        
-        # Video işleme
+
+        # Video processing
         video_path = os.path.join(self.data_dir, item["video_path"])
         try:
             cap = cv2.VideoCapture(video_path)
             if not cap.isOpened():
-                raise ValueError(f"Video dosyası açılamadı: {video_path}")
-                
-            # Video bilgilerini al
+                raise ValueError(f"Unable to open video file: {video_path}")
+
+            # Gather video information
             total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
             fps = cap.get(cv2.CAP_PROP_FPS)
-            
+
             frames = []
             frame_indices = []
-            
-            # Hedef kare sayısı
-            target_frames = 16  # Daha fazla frame al
-            
+
+            # Target number of frames
+            target_frames = 16  # Capture more frames
+
             if total_frames <= 0:
-                raise ValueError(f"Video frame sayısı sıfır veya negatif: {total_frames}")
-                
-            # Frame indislerini belirle
+                raise ValueError(f"Video frame count is zero or negative: {total_frames}")
+
+            # Determine frame indices
             if total_frames <= target_frames:
                 frame_indices = list(range(total_frames))
             else:
-                # Düzenli aralıklarla örnekleme yap
+                # Sample frames at regular intervals
                 step = total_frames / target_frames
                 frame_indices = [int(i * step) for i in range(target_frames)]
-            
+
             for frame_idx in frame_indices:
-                # Belirli bir frame'e git
+                # Seek to the desired frame
                 cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
                 ret, frame = cap.read()
                 if not ret:
                     continue
-                    
-                # BGR'den RGB'ye dönüştür
+
+                # Convert from BGR to RGB
                 frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                 frame = Image.fromarray(frame)
                 frame = self.video_transform(frame)
                 frames.append(frame)
-            
+
             cap.release()
-            
-            # Eksik frame'leri doldur
+
+            # Fill any missing frames
             while len(frames) < target_frames:
                 if frames:
-                    frames.append(frames[-1])  # Son frame ile doldur
+                    frames.append(frames[-1])  # Repeat the last frame
                 else:
-                    # Boş bir frame ekle
+                    # Insert an empty frame placeholder
                     frames.append(torch.zeros(3, 224, 224))
-            
-            video_tensor = torch.stack(frames[:target_frames])  # Emin olmak için kırp
-            
+
+            video_tensor = torch.stack(frames[:target_frames])  # Ensure consistent length
+
         except Exception as e:
-            print(f"Video işleme hatası: {e}")
-            # Hata durumunda boş video tensörü döndür
+            print(f"Video processing error: {e}")
+            # Return an empty video tensor if processing fails
             video_tensor = torch.zeros(16, 3, 224, 224)
         
         return {
@@ -297,13 +297,13 @@ def __getitem__(self, idx):
 
 # Model mimarisi - Multimodal Fusion
 class VideoEncoder(nn.Module):
-    """Video kodlayıcı modül - Gerçek videolar için daha güçlü"""
+    """Video encoder module tuned for real-world videos."""
     def __init__(self, embed_dim=256, input_shape=(16, 3, 224, 224)):
         super().__init__()
         
         num_frames, channels, height, width = input_shape
         
-        # 3D CNN tabanlı enkoder - daha güçlü yapı
+        # 3D CNN encoder with a deeper stack
         self.conv3d = nn.Sequential(
             nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
             nn.BatchNorm3d(64),
@@ -326,12 +326,12 @@ def __init__(self, embed_dim=256, input_shape=(16, 3, 224, 224)):
             nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2)),  # [B, 512, F/8, H/16, W/16]
         )
         
-        # Son spatial boyutları hesapla
+        # Compute the final spatial dimensions
         f_out = num_frames // 8
         h_out = height // 16
         w_out = width // 16
         
-        # Global average pooling ve projeksiyon
+        # Global average pooling and projection
         self.avgpool = nn.AdaptiveAvgPool3d((1, 1, 1))
         self.projection = nn.Sequential(
             nn.Linear(512, 1024),
@@ -341,34 +341,34 @@ def __init__(self, embed_dim=256, input_shape=(16, 3, 224, 224)):
         )
         
     def forward(self, x):
-        # x girişi: [batch_size, frames, channels, height, width]
-        # 3D CNN için: [batch_size, channels, frames, height, width]
+        # Input x: [batch_size, frames, channels, height, width]
+        # Reorder for 3D CNN: [batch_size, channels, frames, height, width]
         x = x.permute(0, 2, 1, 3, 4)
         
         try:
-            # İleri geçişi gerçekleştir
+            # Standard forward pass
             x = self.conv3d(x)
             # Global average pooling
             x = self.avgpool(x)
             x = x.reshape(x.size(0), -1)
             x = self.projection(x)
         except RuntimeError as e:
-            # Hata oluşursa boyutları yazdır ve daha güvenli bir forward uygula
-            print(f"VideoEncoder hatası: {e}")
-            print(f"Giriş boyutları: {x.shape}")
-            
-            # Güvenli alternatif: Basitleştirilmiş işleme
+            # On failure, report the shape and use a safer path
+            print(f"VideoEncoder error: {e}")
+            print(f"Input shape: {x.shape}")
+
+            # Safe alternative: simplified processing
             batch_size = x.size(0)
             x = torch.mean(x, dim=(2, 3, 4))  # Global average pooling [B, C]
             x = torch.nn.functional.normalize(x, p=2, dim=1)
-            x = torch.nn.functional.linear(x, 
+            x = torch.nn.functional.linear(x,
                                           torch.randn(256, x.size(1), device=x.device))
             
         return x
 
 
 class AudioEncoder(nn.Module):
-    """Ses kodlayıcı modül - Gerçek ses verileri için daha güçlü"""
+    """Audio encoder module designed for high-fidelity audio."""
     def __init__(self, embed_dim=256):
         super().__init__()
         self.conv = nn.Sequential(
@@ -393,7 +393,7 @@ def __init__(self, embed_dim=256):
             nn.MaxPool2d(kernel_size=2, stride=2)   # [B, 512, F/16, T/16]
         )
         
-        # Global average pooling ve projeksiyon
+        # Global average pooling and projection
         self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
         self.projection = nn.Sequential(
             nn.Linear(512, 1024),
@@ -403,38 +403,39 @@ def __init__(self, embed_dim=256):
         )
         
     def forward(self, x):
-        # x girişi: [batch_size, freq_bins, time_frames]
+        # Input x: [batch_size, freq_bins, time_frames]
         x = x.unsqueeze(1)  # [batch_size, 1, freq_bins, time_frames]
-        
+
         try:
-            # İleri geçişi gerçekleştir
+            # Standard forward pass
             x = self.conv(x)
             # Global average pooling
             x = self.avgpool(x)
             x = x.reshape(x.size(0), -1)
             x = self.projection(x)
         except RuntimeError as e:
-            print(f"AudioEncoder hatası: {e}")
-            print(f"Giriş boyutları: {x.shape}")
-            
-            # Güvenli alternatif: Basitleştirilmiş işleme
+            print(f"AudioEncoder error: {e}")
+            print(f"Input shape: {x.shape}")
+
+            # Safe fallback: simplified processing
             batch_size = x.size(0)
             x = torch.mean(x, dim=(2, 3))  # Global average pooling [B, C]
             x = torch.nn.functional.normalize(x, p=2, dim=1)
-            x = torch.nn.functional.linear(x, 
+            x = torch.nn.functional.linear(x,
                                           torch.randn(256, x.size(1), device=x.device))
         
         return x
 
 
 class TextEncoder(nn.Module):
-    """Metin kodlayıcı modül - BERT tabanlı"""
+    """BERT-based text encoder module."""
+
     def __init__(self, embed_dim=256):
         super().__init__()
-        # Türkçe BERT modelini kullan
+        # Load the Turkish BERT model
         self.bert = AutoModel.from_pretrained("dbmdz/bert-base-turkish-cased")
-        
-        # BERT çıktısını projekte etmek için
+
+        # Project BERT outputs to the shared embedding size
         self.projection = nn.Linear(self.bert.config.hidden_size, embed_dim)
         
     def forward(self, input_ids, attention_mask):
@@ -445,21 +446,21 @@ def forward(self, input_ids, attention_mask):
 
 
 class MultiModalTransformer(nn.Module):
-    """Çoklu modal transformer modeli - Video, Ses ve Metin için gelişmiş model"""
+    """Enhanced multimodal transformer for video, audio, and text."""
     def __init__(self, embed_dim=256, num_heads=8, num_layers=4, output_dim=5):
         super().__init__()
         
-        # Alt modül enkoderleri - gerçek video ve ses için daha güçlü
+        # Sub-encoders tailored for high-quality video and audio inputs
         self.video_encoder = VideoEncoder(embed_dim, input_shape=(16, 3, 224, 224))
         self.audio_encoder = AudioEncoder(embed_dim)
         self.text_encoder = TextEncoder(embed_dim)
         
-        # Modalite projeksiyon katmanları
+        # Projection layers per modality
         self.video_projection = nn.Linear(embed_dim, embed_dim)
         self.audio_projection = nn.Linear(embed_dim, embed_dim)
         self.text_projection = nn.Linear(embed_dim, embed_dim)
         
-        # Cross-Attention için transformer blokları
+        # Transformer encoder blocks for cross-attention
         encoder_layer = nn.TransformerEncoderLayer(
             d_model=embed_dim,
             nhead=num_heads,
@@ -469,7 +470,7 @@ def __init__(self, embed_dim=256, num_heads=8, num_layers=4, output_dim=5):
         )
         self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
         
-        # Modalite füzyonu için dikkat mekanizması
+        # Attention module for modality fusion
         self.modal_attention = nn.MultiheadAttention(
             embed_dim=embed_dim, 
             num_heads=num_heads, 
@@ -477,7 +478,7 @@ def __init__(self, embed_dim=256, num_heads=8, num_layers=4, output_dim=5):
             batch_first=True
         )
         
-        # Füzyon katmanı
+        # Fusion feed-forward network
         self.fusion_layer = nn.Sequential(
             nn.Linear(embed_dim * 3, embed_dim * 2),
             nn.LayerNorm(embed_dim * 2),
@@ -488,7 +489,7 @@ def __init__(self, embed_dim=256, num_heads=8, num_layers=4, output_dim=5):
             nn.ReLU()
         )
         
-        # Çıkış katmanı
+        # Output classification head
         self.output_layer = nn.Sequential(
             nn.Linear(embed_dim, embed_dim),
             nn.ReLU(),
@@ -496,97 +497,97 @@ def __init__(self, embed_dim=256, num_heads=8, num_layers=4, output_dim=5):
             nn.Linear(embed_dim, output_dim)
         )
         
-        # Embedding boyutu
+        # Store embedding dimension
         self.embed_dim = embed_dim
         
     def forward(self, video, audio, text_input_ids, text_attention_mask):
-        # Her bir modalite için özellikleri çıkar
+        # Extract features for each modality
         try:
-            # Modaliteleri ayrı ayrı kodla
+            # Encode each modality separately
             video_emb = self.video_encoder(video)
             audio_emb = self.audio_encoder(audio)
             text_emb = self.text_encoder(text_input_ids, text_attention_mask)
-            
-            # Projeksiyon katmanları ile özellikleri uyumlu hale getir
+
+            # Align features with the projection layers
             video_emb = self.video_projection(video_emb)
             audio_emb = self.audio_projection(audio_emb)
             text_emb = self.text_projection(text_emb)
-            
-            # Özellikleri birleştir (concatenate) ve füzyon katmanı ile işle
+
+            # Concatenate features and pass through the fusion network
             combined_features = torch.cat([video_emb, audio_emb, text_emb], dim=1)
             fused_features = self.fusion_layer(combined_features)
-            
-            # Sınıflandırma çıktısı
+
+            # Produce classification logits
             output = self.output_layer(fused_features)
-            
+
         except RuntimeError as e:
-            print(f"MultiModalTransformer hatası: {e}")
-            # Daha basit bir modelle devam et
+            print(f"MultiModalTransformer error: {e}")
+            # Fall back to a simplified representation
             batch_size = video.size(0)
-            
-            # Güvenli alternatif
+
+            # Safe fallback features
             video_mean = torch.mean(video, dim=(1, 2, 3, 4))
             audio_mean = torch.mean(audio, dim=(1, 2))
             text_mean = torch.mean(text_input_ids.float(), dim=1)
-            
+
             combined = torch.cat([video_mean, audio_mean, text_mean], dim=1)
             combined = torch.nn.functional.normalize(combined, p=2, dim=1)
-            
-            # Doğrudan çıkış katmanına geç - 5 sınıf için
-            out_dim = 5  # Varsayılan sınıf sayısı
-            output = torch.nn.functional.linear(combined, 
+
+            # Apply a direct linear projection for five classes
+            out_dim = 5  # Default number of classes
+            output = torch.nn.functional.linear(combined,
                                               torch.randn(out_dim, combined.size(1), device=video.device))
         
         return output
 
 
-# Eğitim ve değerlendirme fonksiyonları
+# Training and evaluation helpers
 def train_model(model, train_loader, optimizer, criterion, device, num_epochs=5):
-    """Model eğitim fonksiyonu"""
+    """Train the multimodal model."""
     model.train()
     train_losses = []
     
     for epoch in range(num_epochs):
         epoch_loss = 0
         for batch_idx, batch in enumerate(train_loader):
-            # Veriyi cihaza taşı
+            # Move data to device
             video = batch["video"].to(device)
             audio = batch["audio"].to(device)
             text_input_ids = batch["text_input_ids"].to(device)
             text_attention_mask = batch["text_attention_mask"].to(device)
-            targets = batch["id"].to(device)  # ID'leri hedef olarak kullan
-            
-            # Veri boyutlarını yazdır (hata ayıklama için)
+            targets = batch["id"].to(device)  # Use the ID field as the label
+
+            # Print tensor shapes for debugging on the first batch
             if batch_idx == 0 and epoch == 0:
-                print(f"Video boyutu: {video.shape}")
-                print(f"Audio boyutu: {audio.shape}")
-                print(f"Text input_ids boyutu: {text_input_ids.shape}")
+                print(f"Video shape: {video.shape}")
+                print(f"Audio shape: {audio.shape}")
+                print(f"Text input_ids shape: {text_input_ids.shape}")
             
             # Forward pass
             try:
                 outputs = model(video, audio, text_input_ids, text_attention_mask)
                 loss = criterion(outputs, targets)
                 
-                # Backward pass ve optimize et
+                # Backward pass and optimisation step
                 optimizer.zero_grad()
                 loss.backward()
                 optimizer.step()
-                
+
                 epoch_loss += loss.item()
             except RuntimeError as e:
-                print(f"Hata oluştu (batch {batch_idx}): {e}")
+                print(f"Runtime error (batch {batch_idx}): {e}")
                 print(f"Video shape: {video.shape}, Audio shape: {audio.shape}")
                 continue
-            
-        # Epoch sonunda ortalama kaybı hesapla
+
+        # Track epoch-level loss
         avg_loss = epoch_loss / len(train_loader)
         train_losses.append(avg_loss)
         print(f"Epoch {epoch+1}/{num_epochs}, Loss: {avg_loss:.4f}")
-    
+
     return train_losses
 
 def evaluate_model(model, test_loader, criterion, device):
-    """Model değerlendirme fonksiyonu"""
+    """Evaluate the multimodal model."""
     model.eval()
     total_loss = 0
     correct = 0
@@ -594,29 +595,29 @@ def evaluate_model(model, test_loader, criterion, device):
     
     with torch.no_grad():
         for batch_idx, batch in enumerate(test_loader):
-            # Veriyi cihaza taşı
+            # Move data to device
             video = batch["video"].to(device)
             audio = batch["audio"].to(device)
             text_input_ids = batch["text_input_ids"].to(device)
             text_attention_mask = batch["text_attention_mask"].to(device)
             targets = batch["id"].to(device)
-            
+
             try:
                 # Forward pass
                 outputs = model(video, audio, text_input_ids, text_attention_mask)
                 loss = criterion(outputs, targets)
-                
-                # İstatistikleri hesapla
+
+                # Update metrics
                 total_loss += loss.item()
                 _, predicted = torch.max(outputs.data, 1)
                 total += targets.size(0)
                 correct += (predicted == targets).sum().item()
             except RuntimeError as e:
-                print(f"Değerlendirme sırasında hata oluştu (batch {batch_idx}): {e}")
+                print(f"Runtime error during evaluation (batch {batch_idx}): {e}")
                 print(f"Video shape: {video.shape}, Audio shape: {audio.shape}")
                 continue
-    
-    # Ortalama kayıp ve doğruluk oranı
+
+    # Average loss and accuracy
     avg_loss = total_loss / len(test_loader)
     accuracy = 100 * correct / total
     
@@ -624,10 +625,10 @@ def evaluate_model(model, test_loader, criterion, device):
     return avg_loss, accuracy
 
 
-# Demo için örnek veri oluşturma
+# Demo data generation
 def create_sample_data():
-    """Örnek multimodal veri oluşturur: video, ses ve metin (demo için)"""
-    # Veri dosyalarını oluşturmak için dizinleri kontrol et
+    """Create sample multimodal data: video, audio, and text (demo)."""
+    # Ensure directories exist for generated files
     video_dir = os.path.join(DATA_DIR, "videos")
     audio_dir = os.path.join(DATA_DIR, "audios")
     text_dir = os.path.join(DATA_DIR, "texts")
@@ -635,63 +636,63 @@ def create_sample_data():
     os.makedirs(audio_dir, exist_ok=True)
     os.makedirs(text_dir, exist_ok=True)
     
-    # Metadata dosyası için veri yapısı
+    # Metadata container
     data_entries = []
     
-    # Örnek bir video oluştur (basit renkli kareler dizisi)
+    # Create a simple synthetic video (sequence of coloured squares)
     for i in range(5):
-        # Her örnek için
+        # For each sample
         video_frames = []
-        for j in range(30):  # 30 frame'lik video
-            # Renkli bir kare oluştur (RGB)
+        for j in range(30):  # 30-frame video
+            # Generate a coloured RGB square
             if j < 10:
-                frame = np.ones((64, 64, 3), dtype=np.uint8) * 50  # Koyu gri
+                frame = np.ones((64, 64, 3), dtype=np.uint8) * 50  # Dark gray
             elif j < 20:
-                frame = np.ones((64, 64, 3), dtype=np.uint8) * 150  # Orta gri
+                frame = np.ones((64, 64, 3), dtype=np.uint8) * 150  # Medium gray
             else:
-                frame = np.ones((64, 64, 3), dtype=np.uint8) * 250  # Açık gri
-                
-            # Her örnek için farklı bir renk bileşeni ekle
+                frame = np.ones((64, 64, 3), dtype=np.uint8) * 250  # Light gray
+
+            # Adjust colour channels per sample for variety
             frame[:,:,i % 3] = 200
             video_frames.append(frame)
-        
-        # Videoyu kaydet
+
+        # Save the video
         video_path = os.path.join(video_dir, f"sample_video_{i}.mp4")
         out = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), 10, (64, 64))
         for frame in video_frames:
             out.write(frame)
         out.release()
         
-        # Basit bir sinüs dalgası içeren ses dosyası oluştur
+        # Create an audio file containing a simple sine wave
         audio_path = os.path.join(audio_dir, f"sample_audio_{i}.wav")
         sample_rate = 16000
         t = np.linspace(0, 2, 2 * sample_rate, endpoint=False)
-        # Her örnek için farklı bir frekans
+        # Use a different frequency for each sample
         frequency = 440 * (i + 1)
         audio_data = 0.5 * np.sin(2 * np.pi * frequency * t)
-        # Stereo'ya çevir - scipy için 16-bit int'e dönüştür
+        # Convert to 16-bit integers for scipy
         audio_data_16bit = (audio_data * 32767).astype(np.int16)
-        # Mono ses olarak kaydet (scipy.io.wavfile ile)
+        # Save as mono audio via scipy.io.wavfile
         wavfile.write(audio_path, sample_rate, audio_data_16bit)
-        
-        # İlişkili metin oluştur
+
+        # Generate associated text descriptions
         if i == 0:
-            text = "Bu video gri tonlamalı karelerden oluşmaktadır ve 440 Hz'lik bir ses içerir."
+            text = "This video contains grayscale squares with a 440 Hz tone."
         elif i == 1:
-            text = "Bu video kırmızı tonlarında karelerden oluşmaktadır ve 880 Hz'lik bir ses içerir."
+            text = "This video contains red-tinted squares with an 880 Hz tone."
         elif i == 2:
-            text = "Bu video yeşil tonlarında karelerden oluşmaktadır ve 1320 Hz'lik bir ses içerir."
+            text = "This video contains green-tinted squares with a 1,320 Hz tone."
         elif i == 3:
-            text = "Bu video mavi tonlarında karelerden oluşmaktadır ve 1760 Hz'lik bir ses içerir."
+            text = "This video contains blue-tinted squares with a 1,760 Hz tone."
         else:
-            text = "Bu video karışık tonlardaki karelerden oluşmaktadır ve 2200 Hz'lik bir ses içerir."
-            
-        # Metin dosyası kaydet
+            text = "This video contains mixed-colour squares with a 2,200 Hz tone."
+
+        # Save text file
         text_path = os.path.join(text_dir, f"sample_text_{i}.txt")
         with open(text_path, "w", encoding="utf-8") as f:
             f.write(text)
-        
-        # Metadatalara ekle
+
+        # Append to metadata entries
         data_entries.append({
             "id": i,
             "video_path": os.path.relpath(video_path, DATA_DIR),
@@ -700,142 +701,140 @@ def create_sample_data():
             "text_path": os.path.relpath(text_path, DATA_DIR)
         })
     
-    # JSON dosyasına kaydet
+    # Persist metadata to JSON
     metadata_path = os.path.join(DATA_DIR, "metadata.json")
     with open(metadata_path, "w", encoding="utf-8") as f:
         json.dump(data_entries, f, ensure_ascii=False, indent=4)
     
-    print(f"Örnek veri oluşturuldu. Toplam {len(data_entries)} örnek.")
+    print(f"Demo data created with {len(data_entries)} samples.")
     return metadata_path
 
-# Ana fonksiyon
+# Main execution entry point
 def main():
-    """Ana çalıştırma fonksiyonu"""
-    print("Multimodal model eğitimine başlıyoruz...")
-    
-    # Kullanıcıya veri tipi seçimi yaptır
-    print("\nVeri tipi seçin:")
-    print("1 - Örnek veri (otomatik oluşturulan demo verisi)")
-    print("2 - Gerçek veri (gerçek video, ses ve metin dosyaları)")
-    
-    choice = input("Seçiminiz (1/2): ").strip()
-    
-    # Seçime göre veri oluştur
+    """Main driver function."""
+    print("Starting multimodal model training...")
+
+    # Prompt the user to select the data source
+    print("\nSelect the data source:")
+    print("1 - Sample data (automatically generated demo dataset)")
+    print("2 - Real data (actual video, audio, and text files)")
+
+    choice = input("Your choice (1/2): ").strip()
+
+    # Prepare data according to the chosen option
     if choice == "2":
-        print("\nGerçek veri kullanılacak...")
+        print("\nUsing real data...")
         metadata_path = create_real_data_metadata()
     else:
-        print("\nÖrnek demo verisi oluşturuluyor...")
+        print("\nCreating sample demo data...")
         metadata_path = create_sample_data()
-    
-    # Veri setini hazırla
+
+    # Build the dataset
     dataset = MultiModalDataset(metadata_path)
     
-    # Veri setini eğitim ve test olarak ayır
+    # Split into training and test subsets
     train_size = int(0.8 * len(dataset))
     test_size = len(dataset) - train_size
     train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])
     
-    # Veri yükleyicileri
+    # Data loaders
     train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
     test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)
     
-    # Model oluştur
+    # Instantiate the model
     model = MultiModalTransformer(embed_dim=256, num_heads=4, num_layers=2, output_dim=5).to(device)
-    print(f"Model oluşturuldu: {model.__class__.__name__}")
-    
-    # Kayıp fonksiyonu ve optimizer - daha düşük öğrenme oranı ile
+    print(f"Model initialised: {model.__class__.__name__}")
+
+    # Loss function and optimiser (with weight decay)
     criterion = nn.CrossEntropyLoss()
     optimizer = torch.optim.Adam(model.parameters(), lr=0.0005, weight_decay=1e-5)
-    
-    # Modeli eğit
-    print("Model eğitimi başlıyor...")
+
+    # Train the model
+    print("Starting model training...")
     train_losses = train_model(model, train_loader, optimizer, criterion, device, num_epochs=10)
     
-    # Modeli değerlendir
-    print("Model değerlendirmesi yapılıyor...")
+    # Evaluate the model
+    print("Running model evaluation...")
     test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)
-    
-    # Sınıflandırma sonuçlarını detaylı analiz et
-    print("\nModel Analizi:")
-    print(f"- Toplam Eğitim Epoch: 10")
-    print(f"- Son Eğitim Kaybı: {train_losses[-1]:.4f}")
-    print(f"- Test Kaybı: {test_loss:.4f}")
-    print(f"- Doğruluk Oranı: {test_accuracy:.2f}%")
-    
-    # Sonuçları görselleştir
+
+    # Summarise metrics
+    print("\nModel Summary:")
+    print(f"- Total Training Epochs: 10")
+    print(f"- Final Training Loss: {train_losses[-1]:.4f}")
+    print(f"- Test Loss: {test_loss:.4f}")
+    print(f"- Accuracy: {test_accuracy:.2f}%")
+
+    # Plot training losses
     plt.figure(figsize=(10, 5))
-    plt.plot(train_losses, label='Eğitim Kaybı')
+    plt.plot(train_losses, label='Training Loss')
     plt.xlabel('Epoch')
-    plt.ylabel('Kayıp')
-    plt.title('Eğitim Kaybı')
+    plt.ylabel('Loss')
+    plt.title('Training Loss')
     plt.legend()
     plt.grid(True)
     plt.savefig(os.path.join(DATA_DIR, "training_loss.png"))
     plt.show()
     
-    # Modeli kaydet
+    # Save the trained model
     model_path = os.path.join(DATA_DIR, "multimodal_model.pth")
     torch.save({
         'model_state_dict': model.state_dict(),
         'optimizer_state_dict': optimizer.state_dict()
     }, model_path)
-    print(f"Model kaydedildi: {model_path}")
-    
-    # Test veri setinden bir örnek göster
-    # Görselleştirme için orijinal dataset'i kullan (Subset sorununu önlemek için)
-    print("Örnek bir veriyi görselleştirme...")
-    # Orijinal veri setini görselleştirme için kullan, token çözümleme sorunundan kaçınmak için
+    print(f"Model saved to: {model_path}")
+
+    # Visualise a sample from the test set using the original dataset
+    print("Visualising a sample example...")
     visualize_example(model, dataset, device)
     
     return model, test_accuracy
 
 def visualize_example(model, dataset, device):
-    """Test setinden bir örnek gösterimi"""
-    # Rastgele bir örnek seç
+    """Display a sample prediction from the test set."""
+    # Pick a random example
     idx = np.random.randint(len(dataset))
     sample = dataset[idx]
-    
-    # Modeli değerlendirme moduna al
+
+    # Switch model to evaluation mode
     model.eval()
-    
-    # Verileri tensöre dönüştür ve cihaza taşı
+
+    # Convert to tensors and move to device
     video = sample["video"].unsqueeze(0).to(device)
     audio = sample["audio"].unsqueeze(0).to(device)
     text_input_ids = sample["text_input_ids"].unsqueeze(0).to(device)
     text_attention_mask = sample["text_attention_mask"].unsqueeze(0).to(device)
-    
-    # Veri boyutlarını yazdır
-    print(f"Örnek görselleştirme - Video boyutu: {video.shape}")
-    print(f"Örnek görselleştirme - Audio boyutu: {audio.shape}")
-    
-    # Tahmin yap
+
+    # Log tensor shapes
+    print(f"Sample visualisation - Video shape: {video.shape}")
+    print(f"Sample visualisation - Audio shape: {audio.shape}")
+
+    # Make a prediction
     predicted_class = None
     try:
         with torch.no_grad():
             output = model(video, audio, text_input_ids, text_attention_mask)
             _, predicted_class = torch.max(output, 1)
     except RuntimeError as e:
-        print(f"Örnek görselleştirme sırasında hata: {e}")
-        predicted_class = torch.tensor([-1]).to(device)  # Hata durumunda geçersiz sınıf
-    
-    # Gerçek sınıf
+        print(f"Error during sample visualisation: {e}")
+        predicted_class = torch.tensor([-1]).to(device)  # Invalid class on error
+
+    # Ground truth label
     true_class = sample["id"]
-    
-    # Sonuçları göster
-    print(f"\nÖrnek Görselleştirme (Örnek {idx}):")
-    print(f"Gerçek sınıf: {true_class}")
+
+    # Present the results
+    print(f"\nSample Visualisation (Index {idx}):")
+    print(f"True class: {true_class}")
     if predicted_class is not None and predicted_class.item() != -1:
-        print(f"Tahmin edilen sınıf: {predicted_class.item()}")
+        print(f"Predicted class: {predicted_class.item()}")
     else:
-        print("Tahmin yapılamadı (model hatası)")
-    
-    # Videodan birkaç frame'i göster
+        print("Prediction unavailable (model error)")
+
+    # Plot a few frames from the video
     plt.figure(figsize=(15, 5))
     for i in range(min(5, video.size(1))):
         plt.subplot(1, 5, i+1)
         frame = video[0, i].cpu().permute(1, 2, 0)
-        # Normalize edilmiş görüntüyü geri al
+        # Revert normalisation
         frame = frame * torch.tensor([0.229, 0.224, 0.225]) + torch.tensor([0.485, 0.456, 0.406])
         frame = torch.clamp(frame, 0, 1)
         plt.imshow(frame)
@@ -844,55 +843,55 @@ def visualize_example(model, dataset, device):
     plt.savefig(os.path.join(DATA_DIR, "sample_frames.png"))
     plt.show()
     
-    # Ses spektrogramını göster
+    # Display the audio spectrogram
     plt.figure(figsize=(10, 4))
-    # Spektrogram verilerini kontrol et ve 2B bir tensöre dönüştür
+    # Ensure the spectrogram is 2D
     audio_data = sample["audio"].cpu()
     if len(audio_data.shape) == 1:
-        # 1B tensörü 2B'ye genişlet
+        # Expand 1D tensor to 2D
         audio_data = audio_data.unsqueeze(0)
     elif len(audio_data.shape) > 2:
-        # İlk boyutu kullan
+        # Use the first slice if extra dimensions exist
         audio_data = audio_data[0]
-    
+
     plt.imshow(audio_data, aspect='auto', origin='lower')
     plt.colorbar(format='%+2.0f dB')
-    plt.title('Mel Spektrogram')
-    plt.xlabel('Zaman Çerçeveleri')
-    plt.ylabel('Mel Filtre Bantları')
+    plt.title('Mel Spectrogram')
+    plt.xlabel('Time Frames')
+    plt.ylabel('Mel Filter Banks')
     plt.savefig(os.path.join(DATA_DIR, "sample_spectrogram.png"))
     plt.show()
     
-    # Metni göster - tokenizer'a direkt erişim yerine özel tokenleri çıkartan basit bir yol kullan
+    # Retrieve the decoded text, falling back if tokenizer access is limited
     raw_text = ""
     try:
-        # Subset içindeki dataset.dataset erişimi ile orijinal veri setine ulaşmaya çalış
+        # Attempt to reach the original dataset when working with Subset
         if hasattr(dataset, 'dataset') and hasattr(dataset.dataset, 'tokenizer'):
-            # Subset olduğunda
+            # When wrapped by Subset
             tokenizer = dataset.dataset.tokenizer
             raw_text = tokenizer.decode(sample["text_input_ids"].tolist(), skip_special_tokens=True)
         else:
-            # Direkt veri seti olduğunda
+            # When using the original dataset directly
             raw_text = dataset.tokenizer.decode(sample["text_input_ids"].tolist(), skip_special_tokens=True)
     except Exception as e:
-        # Tokenizer erişimi yoksa, özel token kodlarını temizleyen basit bir çözüm uygula
+        # Strip out special token IDs if the tokenizer is unavailable
         text_tokens = sample["text_input_ids"].tolist()
-        # 0, 101, 102 gibi özel token ID'lerini filtrele (BERT özel tokenleri)
+        # Filter out special token IDs such as 0, 101, and 102 (BERT specials)
         text_tokens = [t for t in text_tokens if t > 102 and t != 0]
-        raw_text = f"ID'ler: {text_tokens} (Tokenizer erişilemediğinden ham metin gösterilemiyor)"
-    
-    print(f"Metin: {raw_text}")
+        raw_text = f"IDs: {text_tokens} (raw text unavailable without tokenizer access)"
+
+    print(f"Text: {raw_text}")
 
 
-# Ana programı çalıştır
+# Run the main program
 if __name__ == "__main__":
-    torch.manual_seed(42)  # Tekrarlanabilirlik için
+    torch.manual_seed(42)  # Ensure reproducibility
     try:
         model, accuracy = main()
         print(f"Final test accuracy: {accuracy:.2f}%")
-        print("Program başarıyla tamamlandı!")
+        print("Program finished successfully!")
     except Exception as e:
         import traceback
-        print(f"Program çalıştırılırken bir hata oluştu: {e}")
+        print(f"An error occurred while running the program: {e}")
         traceback.print_exc()
-        print("\nHata oluştu, ancak eğer model kaydedildiyse sonuçları kontrol edebilirsiniz.")
\ No newline at end of file
+        print("\nAn error occurred, but you can still inspect saved results if available.")
\ No newline at end of file
diff --git a/Qwen3/README.txt b/Qwen3/README.txt
index 4c25038..3899803 100644
--- a/Qwen3/README.txt
+++ b/Qwen3/README.txt
@@ -1,82 +1,80 @@
-# QWEN3 TÜRKÇE DİL MODELİ
-
-Bu proje, Qwen3 mimarisini baz alan bir Türkçe dil modeli implementasyonudur. Temel amacı, Türkçe finans alanı sorularına cevap verebilen, düşünme süreçlerini modelleyebilen bir yapay zeka modeli oluşturmaktır.
-
-## MODEL ÖZELLİKLERİ
-
-### Mimari Bileşenleri
-- **Temel Yapı**: Transformer mimarisi (Encoder-Decoder değil, yalnızca Decoder tabanlı)
-- **Parametre Sayısı**: 100M+ (büyük model konfigürasyonu)
-- **Konumsal Kodlama**: Sinüzoidal konumsal kodlama
-- **Dikkat Mekanizması**: Gruplandırılmış Sorgu Dikkati (GQA - Grouped Query Attention)
-- **Normalizasyon**: LayerNorm (RMSNorm yerine basitlik için)
-- **Aktivasyon Fonksiyonu**: GELU
-
-### Özel Özellikler
-1. **Düşünme Modu**: Model, cevap vermeden önce "düşünme" sürecini simüle edebilir
-   - <think> ve </think> özel tokenları ile işaretlenen düşünme adımları
-   - Düşünme adımları sonrası daha iyi yanıtlar üretebilme
-   
-2. **Türkçe Tokenizer**: Türkçe karakterler için özelleştirilmiş basit tokenizer
-   - Türkçe karakter seti desteği (ç, ğ, ı, ö, ş, ü vb.)
-   - Özel tokenlar için rezerve edilmiş ID'ler
-   
-3. **Soru-Cevap Formatlama**: Finans alanı sorularına özel QA formatı
-
-### Model Boyutlandırma Parametreleri
-- **Vocab Boyutu**: 50,000 token
-- **Gizli Boyut (Hidden Size)**: 1024
-- **Katman Sayısı**: 24
-- **Q Başlık Sayısı**: 16
-- **KV Başlık Sayısı**: 8
-- **FFN Boyutu**: 4096
-- **Maksimum Dizi Uzunluğu**: 2048 token
-
-## VERİ SETİ
-
-- **Kaynak**: umarigan/turkiye_finance_qa (HuggingFace)
-- **İçerik**: 428 Türkçe finans soru-cevap çifti
-- **Format**: "Soru: {soru}\nCevap: {cevap}"
-
-## EĞİTİM ÖZELLİKLERİ
-
-- **Optimizer**: AdamW (öğrenme oranı: 1e-5)
-- **Batch Boyutu**: 2 (büyük model için hafıza optimizasyonu)
-- **Gradyan Clipping**: 1.0 maksimum norm
+# QWEN3 TURKISH LANGUAGE MODEL
+
+This project implements a Turkish language model based on the Qwen3 architecture. The primary goal is to build an AI system that can answer finance-related questions in Turkish while modelling intermediate reasoning steps.
+
+## MODEL FEATURES
+
+### Architectural Components
+- **Core Structure**: Transformer architecture (decoder-only, no encoder)
+- **Parameter Count**: 100M+ (large model configuration)
+- **Positional Encoding**: Sinusoidal position embeddings
+- **Attention Mechanism**: Grouped Query Attention (GQA)
+- **Normalisation**: LayerNorm (kept simple instead of RMSNorm)
+- **Activation Function**: GELU
+
+### Special Capabilities
+1. **Thinking Mode**: The model can simulate a reasoning phase before producing an answer
+   - Uses custom <think> and </think> tokens to mark reasoning spans
+   - Generates higher-quality answers after the thinking phase
+
+2. **Turkish Tokeniser**: Lightweight tokenizer tailored for Turkish characters
+   - Supports the extended Turkish character set (ç, ğ, ı, ö, ş, ü, etc.)
+   - Reserves dedicated IDs for special tokens
+
+3. **Question-Answer Formatting**: Custom QA format for finance-related prompts
+
+### Model Sizing Parameters
+- **Vocabulary Size**: 50,000 tokens
+- **Hidden Size**: 1024
+- **Number of Layers**: 24
+- **Number of Q Heads**: 16
+- **Number of KV Heads**: 8
+- **FFN Dimension**: 4096
+- **Maximum Sequence Length**: 2,048 tokens
+
+## DATASET
+
+- **Source**: umarigan/turkiye_finance_qa (Hugging Face)
+- **Content**: 428 Turkish finance question–answer pairs
+- **Format**: "Soru: {question}\nCevap: {answer}"
+
+## TRAINING DETAILS
+
+- **Optimiser**: AdamW (learning rate 1e-5)
+- **Batch Size**: 2 (keeps memory usage manageable for the large configuration)
+- **Gradient Clipping**: Max norm 1.0
 - **Dropout**: 0.1
-- **Text Generation**: 
-  - Top-k sampling (k=50)
-  - Top-p sampling (p=0.9)
-  - Sıcaklık (temperature): 0.7
+- **Text Generation**:
+  - Top-k sampling (k = 50)
+  - Top-p sampling (p = 0.9)
+  - Temperature: 0.7
 
-## KULLANIM
+## USAGE
 
-Model şu şekilde kullanılabilir:
-1. Standart metin üretimi için `generate_text` fonksiyonu
-2. Düşünme modunda üretim için `think_mode=True` parametresi
+You can interact with the model via:
+1. `generate_text` for standard text generation
+2. Setting `think_mode=True` to enable the reasoning phase
 
-## TEKNİK DETAYLAR
+## TECHNICAL DETAILS
 
-### Gruplandırılmış Sorgu Dikkati (GQA)
-Q için 16 başlık, KV için 8 başlık kullanılarak bellek verimliliği sağlanmıştır. 
-Her KV başlığı birden fazla Q başlığı tarafından paylaşılır.
+### Grouped Query Attention (GQA)
+Sixteen query heads and eight key/value heads share parameters to improve memory efficiency. Each KV head is reused by multiple query heads.
 
-### Veri İşleme
-1. Veri tokenize edilir
-2. Batch halinde gruplandırılır
-3. Attention mask oluşturulur
-4. Causal maskeleme uygulanır
+### Data Processing
+1. Tokenise the dataset
+2. Form mini-batches
+3. Build attention masks
+4. Apply causal masking
 
-### Otomatik Regresif Üretim
-Model, bir sonraki token tahminlerini daha önce üretilen tokenleri kullanarak gerçekleştirir.
+### Autoregressive Generation
+The model predicts the next token based on previously generated tokens, following a standard autoregressive pattern.
 
-## PERFORMANS
+## PERFORMANCE
 
-Model eğitim sonrası finans alanındaki soruları cevaplayabilme yeteneğine sahiptir. 
-Düşünme modu aktivasyonu ile daha karmaşık sorularda iyileştirilmiş yanıtlar sağlayabilir.
+After training, the model can answer finance-related questions in Turkish. Activating thinking mode yields better responses on more complex prompts.
 
-## SINIRLAMALAR
+## LIMITATIONS
 
-- CPU ile eğitim uzun sürebilir
-- Türkçe karakterler için özel tokenizer basit olduğundan büyük dil modellerindeki subword tokenizer kadar etkili değildir
-- Veri seti küçük olduğundan (428 örnek) modelin genelleme yeteneği sınırlı olabilir
+- Training on CPU can be time-consuming
+- The lightweight Turkish tokenizer is less expressive than subword tokenisers used in larger language models
+- The dataset is relatively small (428 examples), so generalisation is limited
diff --git a/Qwen3/qwen3-0.6.py b/Qwen3/qwen3-0.6.py
index 7fe29ef..b0d67db 100644
--- a/Qwen3/qwen3-0.6.py
+++ b/Qwen3/qwen3-0.6.py
@@ -11,7 +11,7 @@
 import os
 
 
-# --- Konumsal Kodlama ---
+# --- Positional Encoding ---
 class PositionalEncoding(nn.Module):
     def __init__(self, d_model: int, max_seq_length: int = 32768):
         super().__init__()
@@ -38,7 +38,7 @@ def forward(self, x):
         return self.weight * (x / rms)
 
 
-# --- GQA Mekanizması ---
+# --- GQA Mechanism ---
 class GroupedQueryAttention(nn.Module):
     def __init__(self, hidden_size: int, num_q_heads: int = 8, num_kv_heads: int = 4, dropout: float = 0.1):
         super().__init__()
@@ -100,7 +100,7 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         return self.dropout(self.fc2(self.act(self.fc1(x))))
 
 
-# --- Transformer Katmanı ---
+# --- Transformer Layer ---
 class TransformerLayer(nn.Module):
     def __init__(self, hidden_size: int, num_q_heads: int, num_kv_heads: int, intermediate_size: int, dropout: float = 0.1):
         super().__init__()
@@ -123,7 +123,7 @@ def forward(self, hidden_states: torch.Tensor, attention_mask: Optional[torch.Te
         return hidden_states
 
 
-# --- Ana Model ---
+# --- Main Model ---
 class Qwen3SmallModel(nn.Module):
     def __init__(
         self,
@@ -193,7 +193,7 @@ def forward(
         return {"loss": loss, "logits": logits}
 
 
-# --- Dataset ve Collate Fonksiyonu ---
+# --- Dataset and Collate Function ---
 class HFDataset(Dataset):
     def __init__(self, dataset, tokenizer, max_length=512):
         self.dataset = dataset
@@ -208,8 +208,8 @@ def __getitem__(self, idx):
         question = item['soru'].strip()
         answer = item['cevap'].strip()
         if len(answer.split()) < 5:
-            answer += " Belirtilmemiş."
-        text = f"Soru: {question}\nCevap: {answer}"
+            answer += " Not specified."
+        text = f"Question: {question}\nAnswer: {answer}"
         encoded = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.max_length, return_tensors="pt")
         input_ids = encoded["input_ids"].squeeze(0)
         return {
@@ -230,7 +230,7 @@ def collate_fn(batch):
     }
 
 
-# --- Generate Fonksiyonu (Top-k ve Top-p Sampling ile) ---
+# --- Generate Function (Top-k and Top-p Sampling) ---
 def generate_text(model, prompt, tokenizer, device="cuda", max_new_tokens=100, temperature=0.4, top_k=50, top_p=0.9):
     model.eval()
     inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512).to(device)
@@ -307,7 +307,7 @@ def generate_text(model, prompt, tokenizer, device="cuda", max_new_tokens=100, t
     return answer
 
 
-# --- Eğitim Fonksiyonu ---
+# --- Training Function ---
 def train_model(
     model, 
     dataloader, 
@@ -361,13 +361,16 @@ def train_model(
             total_loss += loss.item() * gradient_accumulation_steps
             bar.set_postfix(loss=loss.item() * gradient_accumulation_steps, lr=optimizer.param_groups[0]['lr'])
             
-        print(f"Epoch {epoch+1} Ortalama Kayıp: {total_loss / len(dataloader):.4f}, Süre: {time.time()-start_time:.2f}s")
+        print(
+            f"Epoch {epoch+1} Average Loss: {total_loss / len(dataloader):.4f}, "
+            f"Duration: {time.time()-start_time:.2f}s"
+        )
 
 
-# --- Main Fonksiyonu ---
+# --- Main Function ---
 def main():
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    print(f"Cihaz: {device}")
+    print(f"Device: {device}")
 
     tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
     
@@ -426,7 +429,7 @@ def main():
         num_training_steps=total_steps
     )
 
-    print(f"Model Eğitiliyor... (Etkin batch boyutu: {effective_batch_size})")
+    print(f"Training model... (Effective batch size: {effective_batch_size})")
     train_model(
         model, 
         train_loader, 
@@ -439,21 +442,21 @@ def main():
         save_steps=200
     )
 
-    print("\nTest Başlatılıyor...")
+    print("\nStarting evaluation...")
     sample_questions = [
-        "Yatırım fonlarına yatırım yapmanın dezavantajları nelerdir?",
-        "Kamu harcamaları neleri içerir?"
+        "What are the disadvantages of investing in mutual funds?",
+        "What do public expenditures include?"
     ]
     for question in sample_questions:
-        prompt = f"Soru: {question}\nCevap:"
+        prompt = f"Question: {question}\nAnswer:"
         answer = generate_text(model, prompt, tokenizer, device=device)
-        print(f"\nSoru: {question}")
-        print(f"Model Cevabı: {answer}")
+        print(f"\nQuestion: {question}")
+        print(f"Model Answer: {answer}")
 
     final_model_path = "c:/Users/emreq/Desktop/Transformers/Qwen3/turkiye_finance_qa_model_improved.pt"
     torch.save(model.state_dict(), final_model_path)
-    print(f"Model kaydedildi: {final_model_path}")
+    print(f"Model saved to: {final_model_path}")
 
 
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/README.md b/README.md
index 1609af8..4d8e517 100644
--- a/README.md
+++ b/README.md
@@ -1,117 +1,117 @@
 # Transformers Examples
 
-Bu repository, modern derin öğrenme modellerinin farklı yönlerini gösteren Transformers kütüphanesi kullanılarak geliştirilmiş çeşitli örnekler ve implementasyonlar içerir. Dil modelleri, vision transformers, multimodal modeller ve daha fazlasını kapsar.
+This repository showcases a wide range of examples and implementations built with the Transformers library to highlight different aspects of modern deep learning models. It covers language models, vision transformers, multimodal architectures, and more.
 
-## 📁 Repository Yapısı
+## 📁 Repository Structure
 
-### Ana Dizinler
+### Top-Level Directories
 
-- **`Architecture/`** - **YENİ!** RoPE (Rotary Position Embedding) karşılaştırmaları ve transformer mimarisi örnekleri
-- **`Genel-1/`** - Temel transformer implementasyonları ve konfigürasyon örnekleri
-- **`Genel-2/`** - Gelişmiş transformer modelleri (vision transformers ve multimodal örnekler)
-- **`Genel-3/`** - Ek transformer varyantları ve deneyler
-- **`Genel-4/`** - Performans karşılaştırmaları ve fine-tuning örnekleri
-- **`Genel-5/`** - İleri teknikler ve model optimizasyonları
-- **`Multi Modal/`** - Video, ses ve metin için multimodal transformer implementasyonları
-- **`Vision Transformers/`** - Vision transformer modelleri ve uygulamaları
-- **`Time series - Transformers/`** - Transformer modelleri kullanarak zaman serisi analizi
-- **`Tokenizer/`** - Özel tokenizer implementasyonları ve eğitimi
-- **`llama/`** - LLaMA model implementasyonu ve utilities
-- **`Qwen3/`** - Qwen 3 model örnekleri ve kullanımı
-- **`finetuned-llm/`** - Fine-tuned dil modeli checkpoint'leri
-- **`archive/`** - MMLU benchmark sonuçları ve arşivlenmiş dosyalar
+- **`Architecture/`** – **NEW!** RoPE (Rotary Position Embedding) comparisons and transformer architecture explorations
+- **`Genel-1/`** – Foundational transformer implementations and configuration examples
+- **`Genel-2/`** – Advanced transformer models (vision transformers and multimodal demos)
+- **`Genel-3/`** – Additional transformer variants and experiments
+- **`Genel-4/`** – Performance comparisons and fine-tuning workflows
+- **`Genel-5/`** – Cutting-edge techniques and model optimisations
+- **`Multi Modal/`** – Multimodal transformer implementations for video, audio, and text
+- **`Vision Transformers/`** – Vision transformer models and applications
+- **`Time series - Transformers/`** – Time-series analysis with transformer models
+- **`Tokenizer/`** – Custom tokenizer implementations and training scripts
+- **`llama/`** – LLaMA model implementation and utilities
+- **`Qwen3/`** – Qwen 3 model examples and usage guides
+- **`finetuned-llm/`** – Fine-tuned language model checkpoints
+- **`archive/`** – MMLU benchmark results and archived artefacts
 
-### Önemli Dosyalar
+### Notable Files
 
-- **`test-time-scaling.py`** - Dil modelleri için test-time scaling implementasyonu
-- **`requirements.txt`** - Temel Python bağımlılıkları
-- **`requirements-jax.txt`** - JAX ekosistemi için ek bağımlılıklar
-- **`requirements-dev.txt`** - Geliştirme ve ileri seviye eğitim araçları
-- **`setup.sh`** - Otomatik kurulum script'i
-- **`.env.example`** - Çevre değişkenleri şablonu
-- **`CONTRIBUTING.md`** - Katkıda bulunma rehberi
+- **`test-time-scaling.py`** – Test-time scaling implementation for language models
+- **`requirements.txt`** – Core Python dependencies
+- **`requirements-jax.txt`** – Additional dependencies for the JAX ecosystem
+- **`requirements-dev.txt`** – Tooling for development and advanced training
+- **`setup.sh`** – Automated setup script
+- **`.env.example`** – Template for environment variables
+- **`CONTRIBUTING.md`** – Contribution guidelines
 
-## 🚀 Hızlı Başlangıç
+## 🚀 Quick Start
 
-### Gereksinimler
+### Requirements
 
-Sisteminizde Python 3.7+ yüklü olduğundan emin olun.
+Ensure that Python 3.7+ is installed on your system.
 
-### Kurulum
+### Installation
 
-**Otomatik Kurulum (Önerilen):**
+**Automatic Setup (Recommended):**
 
 ```bash
-# Repository'yi klonlayın
+# Clone the repository
 git clone https://github.com/emredeveloper/Transformers-Examples.git
 cd Transformers-Examples
 
-# Otomatik kurulum script'ini çalıştırın (varsayılan profil: base)
+# Run the automated setup script (default profile: base)
 chmod +x setup.sh
 ./setup.sh --venv
-# JAX veya geliştirme bağımlılıklarını da eklemek için:
+# To include JAX or development dependencies:
 # ./setup.sh --profile jax
 # ./setup.sh --profile dev
 # ./setup.sh --profile all
 ```
 
-**Manuel Kurulum:**
+**Manual Setup:**
 
-1. Repository'yi klonlayın:
+1. Clone the repository:
 
 ```bash
 git clone https://github.com/emredeveloper/Transformers-Examples.git
 cd Transformers-Examples
 ```
 
-2. Virtual environment oluşturun (önerilen):
+2. Create a virtual environment (recommended):
 
 ```bash
 python -m venv .venv
-# Windows için:
+# Windows:
 .venv\Scripts\activate
-# Linux/Mac için:
+# Linux/macOS:
 source .venv/bin/activate
 ```
 
-3. Bağımlılıkları yükleyin:
+3. Install dependencies:
 
 ```bash
 pip install -r requirements.txt
-# JAX örnekleri için ek bağımlılıklar:
+# Extra dependencies for JAX experiments:
 # pip install -r requirements-jax.txt
-# Geliştirme araçları için:
+# Development tooling:
 # pip install -r requirements-dev.txt
 ```
 
-### Bağımlılık Profilleri
+### Dependency Profiles
 
-- **Base (`requirements.txt`)**: PyTorch, Transformers ve çoğu örnek için gerekli çekirdek paketler.
-- **JAX (`requirements-jax.txt`)**: JAX tabanlı deneyler ve örnekler için gerekli `jax`, `jaxlib` ve `flax` paketleri.
-- **Development (`requirements-dev.txt`)**: Not defterleri, büyük ölçekli eğitim yardımcıları ve gelişmiş araçlar (`jupyter`, `notebook`, `fairscale`, `deepspeed`).
+- **Base (`requirements.txt`)**: Core packages required for PyTorch, Transformers, and most examples.
+- **JAX (`requirements-jax.txt`)**: Adds `jax`, `jaxlib`, and `flax` for JAX-based experiments.
+- **Development (`requirements-dev.txt`)**: Provides notebooks, large-scale training helpers, and advanced tooling (`jupyter`, `notebook`, `fairscale`, `deepspeed`).
 
-`setup.sh` script'i bu profilleri `--profile` parametresiyle otomatik olarak yükleyebilir. Varsayılan profil `base`'dir.
+The `setup.sh` script can install these profiles automatically with the `--profile` flag. The default profile is `base`.
 
-4. Çevre değişkenlerini ayarlayın:
+4. Configure environment variables:
 
 ```bash
-# .env.example dosyasını .env olarak kopyalayın
+# Copy the template to .env
 copy .env.example .env  # Windows
-cp .env.example .env    # Linux/Mac
+cp .env.example .env    # Linux/macOS
 
-# .env dosyasını düzenleyip Hugging Face token'ınızı ekleyin
+# Edit .env and add your Hugging Face token
 ```
 
-## 📖 Kullanım Örnekleri
+## 📖 Usage Examples
 
-### RoPE Karşılaştırması (YENİ!)
+### RoPE Comparison (NEW!)
 
 ```bash
 cd Architecture
 python partial-rope.py
 ```
 
-### Temel Transformer Kullanımı
+### Basic Transformer Usage
 
 ```bash
 cd Genel-1
@@ -125,21 +125,21 @@ cd "Vision Transformers"
 jupyter notebook sglip2.ipynb
 ```
 
-### Multimodal Örnekler
+### Multimodal Examples
 
 ```bash
 cd "Multi Modal"
 python basic-multimodal.py
 ```
 
-### LLaMA Modeli
+### LLaMA Model
 
 ```bash
 cd llama
 python run_cpu.py
 ```
 
-### Tokenizer Eğitimi
+### Tokenizer Training
 
 ```bash
 cd Tokenizer
@@ -152,109 +152,109 @@ python tokenizer.py
 python test-time-scaling.py
 ```
 
-## ⚙️ Konfigürasyon
+## ⚙️ Configuration
 
-Birçok örnek çevre değişkenleri aracılığıyla konfigürasyonu destekler:
+Many examples can be configured via environment variables:
 
-- `HUGGINGFACE_TOKEN`: Hugging Face API token'ınız
-- `CUDA_VISIBLE_DEVICES`: GPU cihaz seçimi
-- `MODEL_CACHE_DIR`: İndirilen modeller için cache dizini
+- `HUGGINGFACE_TOKEN`: Your Hugging Face API token
+- `CUDA_VISIBLE_DEVICES`: GPU device selection
+- `MODEL_CACHE_DIR`: Cache directory for downloaded models
 
-## 📝 Örneklere Genel Bakış
+## 📝 Example Overview
 
-### Dil Modelleri
+### Language Models
 
-- GPT-2 konfigürasyonu ve fine-tuning
-- DeepSeek transformer implementasyonları
-- Qwen 3 model kullanımı
-- Test-time scaling teknikleri
-- RoPE (Rotary Position Embedding) karşılaştırmaları
+- GPT-2 configuration and fine-tuning
+- DeepSeek transformer implementations
+- Qwen 3 model usage
+- Test-time scaling techniques
+- RoPE (Rotary Position Embedding) comparisons
 
-### Vision Modelleri
+### Vision Models
 
-- Vision Transformer (ViT) implementasyonları
-- SGLIP-2 multimodal anlayış
-- Görüntü sınıflandırma örnekleri
+- Vision Transformer (ViT) implementations
+- SGLIP-2 multimodal understanding
+- Image classification examples
 
-### Multimodal Modeller
+### Multimodal Models
 
-- Video, ses ve metin işleme
-- Cross-modal attention mekanizmaları
-- Multimodal fusion teknikleri
+- Video, audio, and text processing
+- Cross-modal attention mechanisms
+- Multimodal fusion techniques
 
-### Zaman Serileri
+### Time Series
 
-- Transformer tabanlı zaman serisi tahmini
-- Sequence-to-sequence modelleme
+- Transformer-based time-series forecasting
+- Sequence-to-sequence modelling
 
-### İleri Teknikler
+### Advanced Techniques
 
 - Mixture of Experts (MoE)
-- Cross-attention mekanizmaları
-- Özel tokenization stratejileri
-- Model optimizasyon teknikleri
-- Partial RoPE implementasyonları
+- Cross-attention mechanisms
+- Custom tokenisation strategies
+- Model optimisation techniques
+- Partial RoPE implementations
 
-## 🔧 Yeni Özellikler
+## 🔧 New Highlights
 
-### Architecture Dizini
+### Architecture Directory
 
-Bu dizin transformer mimarisi ile ilgili gelişmiş örnekler içerir:
+This directory focuses on advanced transformer architecture examples:
 
-- **`partial-rope.py`**: Partial RoPE vs Full RoPE performans karşılaştırması
-- Detaylı benchmark sonuçları ve görselleştirmeler
-- Bellek kullanımı analizleri
-- Ablasyon çalışmaları
+- **`partial-rope.py`**: Partial RoPE vs. full RoPE performance comparison
+- Detailed benchmark results and visualisations
+- Memory usage analyses
+- Ablation studies
 
-## 🤝 Katkıda Bulunma
+## 🤝 Contributing
 
-Katkılar memnuniyetle karşılanır! Lütfen Pull Request göndermekten çekinmeyin. Büyük değişiklikler için, önce ne değiştirmek istediğinizi tartışmak üzere bir issue açın.
+Contributions are welcome! Feel free to open a Pull Request. For major changes, please start a discussion by opening an issue first.
 
-Detaylı bilgi için `CONTRIBUTING.md` dosyasını kontrol edin.
+See `CONTRIBUTING.md` for more information.
 
-## 📄 Lisans
+## 📄 License
 
-Bu proje açık kaynaklıdır ve [MIT Lisansı](LICENSE) altında mevcuttur. Depoda yer alan bazı üçüncü parti örnekler kendi lisans metinlerini (örn. Apache 2.0) içerebilir ve ilgili dizinlerde belirtilen şartlarla dağıtılır.
+This project is open source and available under the [MIT License](LICENSE). Some third-party examples may include their own licence texts (e.g., Apache 2.0) and are distributed under the terms specified in their respective directories.
 
-## 🔍 Notlar
+## 🔍 Notes
 
-- Bazı örnekler özel model erişim izinleri gerektirir
-- Büyük modelleri çalıştırmak için GPU önerilir
-- Belirli gereksinimler için bireysel dizin README dosyalarını kontrol edin
-- Hugging Face modelleri için uygun kimlik doğrulaması ayarladığınızdan emin olun
-- `.env` dosyasını oluşturmayı ve API token'larınızı eklemeyi unutmayın
+- Certain examples require special access to hosted models
+- A GPU is recommended for large-scale models
+- Check the individual directory README files for specific requirements
+- Ensure authentication is configured for Hugging Face models
+- Remember to create the `.env` file and add your API tokens
 
-## 🐛 Sorun Giderme
+## 🐛 Troubleshooting
 
-### Yaygın Sorunlar
+### Common Issues
 
-1. **Import hataları**: Tüm bağımlılıkların yüklü olduğundan emin olun
-2. **CUDA hataları**: GPU kullanılabilirliğini ve CUDA kurulumunu kontrol edin
-3. **Model erişimi**: Özel modeller için uygun izinlere sahip olduğunuzdan emin olun
-4. **Bellek hataları**: Daha küçük batch boyutları veya model varyantları kullanmayı düşünün
-5. **Token hataları**: `.env` dosyasında Hugging Face token'ınızın doğru ayarlandığından emin olun
+1. **Import errors**: Verify all dependencies are installed
+2. **CUDA errors**: Check GPU availability and CUDA installation
+3. **Model access**: Confirm you have permission to use private models
+4. **Out of memory**: Reduce batch sizes or switch to smaller model variants
+5. **Token errors**: Ensure your Hugging Face token is set correctly in `.env`
 
-Daha detaylı yardım için, lütfen belirli dizin belgelerini kontrol edin veya bir issue açın.
+For deeper assistance, review the documentation in the relevant directory or open an issue.
 
-## 📊 Benchmark Sonuçları
+## 📊 Benchmark Results
 
-Repository, çeşitli transformer varyantları için performans karşılaştırmaları içerir:
+The repository includes performance comparisons for multiple transformer variants:
 
-- RoPE implementasyonları arasındaki hız ve doğruluk karşılaştırmaları
-- MMLU benchmark sonuçları (archive/ dizininde)
-- Model optimizasyon teknikleri analizi
+- Speed and accuracy comparisons between RoPE implementations
+- MMLU benchmark results (see the `archive/` directory)
+- Analyses of model optimisation techniques
 
-Detaylı sonuçlar için `Architecture/` dizinini ve generate edilen PNG dosyalarını kontrol edin. 
+For detailed results, inspect the `Architecture/` directory and the generated PNG assets.
 
-## ✅ Test ve Kod Kalitesi
+## ✅ Testing and Code Quality
 
-Hafif testleri ve kod kalite kontrollerini çalıştırmak için isteğe bağlı geliştirme bağımlılıklarını yükleyin:
+Install the optional development dependencies to run lightweight tests and quality checks:
 
 ```bash
 pip install -r requirements-dev.txt
 ```
 
-Ardından aşağıdaki komutları çalıştırabilirsiniz:
+Then run:
 
 ```bash
 pytest
@@ -262,4 +262,4 @@ ruff check tests
 black --check tests
 ```
 
-Sürekli entegrasyon iş akışı bu kontrolleri otomatik olarak yürütür.
+The continuous integration workflow executes these checks automatically.
diff --git a/Time series - Transformers/predict.py b/Time series - Transformers/predict.py
index e16fba1..418f5ff 100644
--- a/Time series - Transformers/predict.py	
+++ b/Time series - Transformers/predict.py	
@@ -5,22 +5,22 @@
 from train import TimeSeriesTransformer
 
 def predict():
-    # Argümanlar
+    # Arguments
     parser = argparse.ArgumentParser()
     parser.add_argument('--model_path', type=str, default='model.pth',
-                      help='Eğitilmiş model dosya yolu')
+                      help='Path to the trained model file')
     parser.add_argument('--data', type=str, default='daily-total-female-births.csv',
-                      help='Veri dosya yolu')
+                      help='Path to the data file')
     parser.add_argument('--steps', type=int, default=10,
-                      help='Tahmin adedi')
+                      help='Number of forecast steps')
     args = parser.parse_args()
     
-    # Cihaz
+    # Device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Kullanılan cihaz: {device}")
+    print(f"Using device: {device}")
     
-    # Modeli yükle
-    print(f"Model yükleniyor: {args.model_path}")
+    # Load the model
+    print(f"Loading model from: {args.model_path}")
     checkpoint = torch.load(args.model_path, map_location=device)
     
     model = TimeSeriesTransformer(
@@ -32,12 +32,12 @@ def predict():
     model.load_state_dict(checkpoint['model_state_dict'])
     model.eval()
     
-    # Scaler'ı yükle
+    # Restore scaler configuration
     scaler = checkpoint['scaler']
     seq_length = checkpoint['seq_length']
     
-    # Veriyi yükle
-    print(f"Veri yükleniyor: {args.data}")
+    # Load the data
+    print(f"Loading data: {args.data}")
     df = pd.read_csv(args.data)
     if 'Date' in df.columns:
         dates = pd.to_datetime(df['Date'])
@@ -45,45 +45,45 @@ def predict():
     else:
         dates = pd.RangeIndex(start=0, stop=len(df))
     
-    # Son sequence'i al ve normalize et
+    # Extract the latest sequence and normalise
     data = scaler.transform(df.values)
     last_sequence = torch.FloatTensor(data[-seq_length:]).unsqueeze(0).to(device)
     
-    # Tahmin yap
-    print(f"{args.steps} adım tahmin yapılıyor...")
+    # Perform the forecast
+    print(f"Generating {args.steps} step predictions...")
     predictions = []
     with torch.no_grad():
         current_sequence = last_sequence
         for step in range(args.steps):
-            # Tahmin yap
+            # Predict the next value
             pred = model(current_sequence)
             pred_value = pred.item()
             predictions.append(pred_value)
             
-            # Yeni sequence oluştur (sadece ilk sütunu güncelle)
+            # Build the next sequence (update only the first feature)
             next_step = torch.zeros_like(current_sequence[:, 0:1])
-            next_step[0, 0] = pred_value  # Sadece ilk özelliği güncelle
+            next_step[0, 0] = pred_value  # Update only the first feature
             
-            # Yeni sequence: mevcut sequence'nin son seq_length-1 adımını al + yeni tahmin
+            # New sequence: drop the oldest step and append the prediction
             current_sequence = torch.cat([
-                current_sequence[:, 1:],  # İlk adımı çıkar
-                next_step.unsqueeze(1)    # Yeni tahmini ekle
+                current_sequence[:, 1:],  # Remove the first timestep
+                next_step.unsqueeze(1)    # Append the new forecast
             ], dim=1)
-    
-    # Tahminleri orijinal ölçeğe çevir
+
+    # Rescale predictions back to the original domain
     dummy = np.zeros((len(predictions), data.shape[1]))
     dummy[:, 0] = predictions
     predictions = scaler.inverse_transform(dummy)[:, 0]
     
-    # Sonuçları yazdır
-    print("\nTahminler:")
+    # Display predictions
+    print("\nPredictions:")
     last_date = dates[-1] if 'dates' in locals() else len(dates) - 1
     for i, pred in enumerate(predictions, 1):
         if 'dates' in locals():
             pred_date = last_date + pd.DateOffset(days=i)
             print(f"{pred_date.strftime('%Y-%m-%d')}: {pred:.2f}")
         else:
-            print(f"Adım {i}: {pred:.2f}")
+            print(f"Step {i}: {pred:.2f}")
 
 if __name__ == '__main__':
     predict()
diff --git a/Time series - Transformers/train.py b/Time series - Transformers/train.py
index e0fbf08..b89dc59 100644
--- a/Time series - Transformers/train.py	
+++ b/Time series - Transformers/train.py	
@@ -40,7 +40,7 @@ def forward(self, src):
         return self.output_linear(output[:, -1, :])
 
 def train():
-    # Argümanlar
+    # Arguments
     parser = argparse.ArgumentParser()
     parser.add_argument('--data', type=str, default='daily-total-female-births.csv')
     parser.add_argument('--seq_length', type=int, default=24)
@@ -50,23 +50,23 @@ def train():
     parser.add_argument('--model_path', type=str, default='model.pth')
     args = parser.parse_args()
 
-    # Cihaz
+    # Device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     
-    # Veriyi yükle ve işle
+    # Load and prepare the data
     df = pd.read_csv(args.data)
     if 'Date' in df.columns:
         df = df.set_index('Date')
     
-    # Normalizasyon
+    # Normalisation
     scaler = MinMaxScaler()
     data = scaler.fit_transform(df.values)
     
-    # Sequence oluştur
+    # Build sliding-window sequences
     X, y = [], []
     for i in range(len(data) - args.seq_length):
         X.append(data[i:i + args.seq_length])
-        y.append(data[i + args.seq_length, 0])  # İlk sütunu tahmin et
+        y.append(data[i + args.seq_length, 0])  # Predict the first column
     
     X = torch.FloatTensor(np.array(X))
     y = torch.FloatTensor(np.array(y)).unsqueeze(-1)
@@ -86,13 +86,13 @@ def train():
         num_layers=2
     ).to(device)
     
-    # Eğitim
+    # Training setup
     criterion = nn.MSELoss()
     optimizer = optim.Adam(model.parameters(), lr=args.lr)
     
     best_val_loss = float('inf')
     for epoch in range(args.epochs):
-        # Train
+        # Training loop
         model.train()
         for X_batch, y_batch in train_loader:
             X_batch, y_batch = X_batch.to(device), y_batch.to(device)
@@ -102,7 +102,7 @@ def train():
             loss.backward()
             optimizer.step()
         
-        # Validation
+        # Validation loop
         model.eval()
         val_loss = 0
         with torch.no_grad():
@@ -124,7 +124,7 @@ def train():
                 'input_dim': X.shape[2]
             }, args.model_path)
     
-    print(f'Model kaydedildi: {args.model_path}')
+    print(f'Model saved to: {args.model_path}')
 
 if __name__ == '__main__':
     train()
diff --git a/Tokenizer/basit_tokenizer.py b/Tokenizer/basit_tokenizer.py
index e7e2500..2e86984 100644
--- a/Tokenizer/basit_tokenizer.py
+++ b/Tokenizer/basit_tokenizer.py
@@ -1,25 +1,16 @@
 from typing import List
 
 class SimpleTokenizer:
-    """
-    Basit bir tokenizer sınıfı. Bu sınıf, metni tokenlara ayırır ve tokenları tekrar metne dönüştürür.
-    """
+    """A minimal tokenizer that maps whitespace-separated tokens to IDs and back."""
 
     def __init__(self):
-        """
-        Tokenizer'ı başlatır. Bu örnekte, boşluklara göre tokenlara ayırma işlemi yapılır.
-        """
-        self.vocab = {}  # Tokenları saklamak için bir sözlük
-        self.id_to_token = {}  # ID'den tokena eşleme yapmak için bir sözlük
-        self.next_id = 0  # Bir sonraki token ID'si
+        """Initialise the tokenizer using whitespace tokenisation."""
+        self.vocab = {}  # Token to ID mapping
+        self.id_to_token = {}  # Reverse lookup from ID to token
+        self.next_id = 0  # Next available token ID
 
     def add_token(self, token: str) -> int:
-        """
-        Yeni bir token ekler ve bir ID atar.
-
-        :param token: Eklenmek istenen token.
-        :return: Token'a atanmış ID.
-        """
+        """Register a new token and return its ID."""
         if token not in self.vocab:
             self.vocab[token] = self.next_id
             self.id_to_token[self.next_id] = token
@@ -27,41 +18,31 @@ def add_token(self, token: str) -> int:
         return self.vocab[token]
 
     def tokenize(self, text: str) -> List[int]:
-        """
-        Metni tokenlara ayırır ve token ID'lerini döndürür.
-
-        :param text: Tokenlara ayrılacak metin.
-        :return: Token ID'lerinin listesi.
-        """
-        tokens = text.split()  # Metni boşluklara göre ayır
+        """Split text into tokens and return their IDs."""
+        tokens = text.split()  # Split on whitespace
         token_ids = []
         for token in tokens:
-            token_id = self.add_token(token)  # Token'ı ekle ve ID'sini al
+            token_id = self.add_token(token)  # Add token and retrieve its ID
             token_ids.append(token_id)
         return token_ids
 
     def detokenize(self, token_ids: List[int]) -> str:
-        """
-        Token ID'lerini metne dönüştürür.
-
-        :param token_ids: Token ID'lerinin listesi.
-        :return: Tokenlardan oluşturulmuş metin.
-        """
+        """Convert token IDs back into a whitespace-separated string."""
         tokens = []
         for token_id in token_ids:
-            token = self.id_to_token.get(token_id, "")  # ID'ye karşılık gelen token'ı al
+            token = self.id_to_token.get(token_id, "")  # Look up the token for each ID
             tokens.append(token)
-        return " ".join(tokens)  # Tokenları birleştir ve metni oluştur
+        return " ".join(tokens)  # Join tokens back into text
 
-# Örnek kullanım
+# Example usage
 if __name__ == "__main__":
     tokenizer = SimpleTokenizer()
 
-    # Metni tokenlara ayır
-    text = "Merhaba dünya! Bu bir örnek metin."
+    # Tokenise text
+    text = "Hello world! This is a sample sentence."
     token_ids = tokenizer.tokenize(text)
-    print(f"Token ID'leri: {token_ids}")
+    print(f"Token IDs: {token_ids}")
 
-    # Token ID'lerini metne dönüştür
+    # Convert IDs back into text
     decoded_text = tokenizer.detokenize(token_ids)
-    print(f"Çözülen metin: {decoded_text}")""
+    print(f"Decoded text: {decoded_text}")
diff --git a/Tokenizer/training1.py b/Tokenizer/training1.py
index 2ff94da..562eb65 100644
--- a/Tokenizer/training1.py
+++ b/Tokenizer/training1.py
@@ -5,13 +5,13 @@
 from typing import List
 
 
-# Örnek Türkçe metinler
+# Sample texts
 texts = [
-    "Merhaba, nasılsın?",
-    "Bugün hava çok güzel.",
-    "Python programlama dili çok popüler.",
-    "Derin öğrenme, yapay zekanın bir dalıdır.",
-    "Python, metin işlemede önemli bir adımdır."
+    "Hello, how are you?",
+    "The weather is great today.",
+    "The Python programming language is very popular.",
+    "Deep learning is a branch of artificial intelligence.",
+    "Python is an important tool for text processing."
 ]
 
 
@@ -20,18 +20,18 @@ class TurkishTokenizer:
     def __init__(self):
         self.vocab = {}  # Token -> ID
         self.id_to_token = {}  # ID -> Token
-        self.next_id = 0  # Yeni token ID'si
-        self.unk_token = "<UNK>"  # Bilinmeyen token
-        self.pad_token = "<PAD>"  # Dolgu tokenı
+        self.next_id = 0  # Next token ID
+        self.unk_token = "<UNK>"  # Unknown token placeholder
+        self.pad_token = "<PAD>"  # Padding token
         self.special_tokens = [self.unk_token, self.pad_token]
 
-        # Özel tokenları ekle
+        # Add special tokens to the vocabulary
         for token in self.special_tokens:
             self.add_token(token)
 
     def add_token(self, token: str) -> int:
         """
-        Yeni bir token ekler ve bir ID atar.
+        Add a new token to the vocabulary and assign an ID.
         """
         if token not in self.vocab:
             self.vocab[token] = self.next_id
@@ -41,22 +41,22 @@ def add_token(self, token: str) -> int:
 
     def tokenize(self, text: str) -> List[int]:
         """
-        Metni tokenlara ayırır ve token ID'lerini döndürür.
+        Split text into tokens and return their IDs.
         """
-        # Metni küçük harfe çevir ve noktalama işaretlerini ayır
+        # Lowercase text and isolate punctuation
         text = text.lower()
-        tokens = re.findall(r"\w+|\S", text)  # Kelimeler ve noktalama işaretleri
+        tokens = re.findall(r"\w+|\S", text)  # Words and punctuation marks
         token_ids = []
         for token in tokens:
             if token in self.vocab:
                 token_ids.append(self.vocab[token])
             else:
-                token_ids.append(self.vocab[self.unk_token])  # Bilinmeyen token
+                token_ids.append(self.vocab[self.unk_token])  # Unknown token fallback
         return token_ids
 
     def detokenize(self, token_ids: List[int]) -> str:
         """
-        Token ID'lerini metne dönüştürür.
+        Convert token IDs back to text.
         """
         tokens = []
         for token_id in token_ids:
@@ -68,7 +68,7 @@ def detokenize(self, token_ids: List[int]) -> str:
 
     def build_vocab(self, texts: List[str]):
         """
-        Metinler üzerinden kelime dağarcığı oluşturur.
+        Build a vocabulary from the provided texts.
         """
         counter = Counter()
         for text in texts:
@@ -76,7 +76,7 @@ def build_vocab(self, texts: List[str]):
             tokens = re.findall(r"\w+|\S", text)
             counter.update(tokens)
 
-        # En sık kullanılan tokenları ekle
+        # Add the most frequent tokens
         for token, _ in counter.most_common():
             self.add_token(token)
             
@@ -94,23 +94,23 @@ def __getitem__(self, idx):
         token_ids = self.tokenizer.tokenize(text)
         return torch.tensor(token_ids, dtype=torch.long)
 
-# Tokenizer'ı ve veri setini oluştur
+# Build the tokenizer and dataset
 tokenizer = TurkishTokenizer()
-tokenizer.build_vocab(texts)  # Kelime dağarcığını oluştur
+tokenizer.build_vocab(texts)  # Populate the vocabulary
 
 dataset = TextDataset(texts, tokenizer)
 dataloader = DataLoader(dataset, batch_size=1, shuffle=True)
 
-# Tokenizer'ı test et
-test_text = "Bugün hava çok güzel."
+# Test the tokenizer
+test_text = "The weather is great today."
 token_ids = tokenizer.tokenize(test_text)
-print(f"Token ID'leri: {token_ids}")
+print(f"Token IDs: {token_ids}")
 
-# Token ID'lerini metne dönüştür
+# Convert token IDs back to text
 decoded_text = tokenizer.detokenize(token_ids)
-print(f"Çözülen metin: {decoded_text}")
+print(f"Decoded text: {decoded_text}")
 
 print(dataset)
-# DataLoader üzerinden örnekler al
+# Iterate through DataLoader samples
 for batch in dataloader:
     print("Batch:", batch)
\ No newline at end of file
diff --git a/Vision Transformers/sglip2.ipynb b/Vision Transformers/sglip2.ipynb
index 7e4b2dc..81c0790 100644
--- a/Vision Transformers/sglip2.ipynb	
+++ b/Vision Transformers/sglip2.ipynb	
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -14,7 +14,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -34,7 +34,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -53,7 +53,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -72,7 +72,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -96,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -140,7 +140,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -183,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -212,7 +212,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -229,7 +229,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -244,7 +244,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -322,7 +322,7 @@
     "import random\n",
     "from tqdm import tqdm\n",
     "\n",
-    "# CUDA kontrolü\n",
+    "# CUDA check\n",
     "print(\"Checking CUDA availability...\")\n",
     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
     "print(f\"Using device: {device}\")\n",
@@ -334,14 +334,14 @@
     "    print(\"CUDA not available, falling back to CPU\")\n",
     "    exit()\n",
     "\n",
-    "# Dataset yükleme\n",
+    "# Load dataset\n",
     "print(\"Loading Flickr30k dataset...\")\n",
     "dataset = load_dataset(\"nlphuji/flickr30k\")\n",
     "print(\"Dataset loaded successfully!\")\n",
     "full_dataset = dataset[\"test\"]\n",
     "print(f\"Full dataset size: {len(full_dataset)} examples\")\n",
     "\n",
-    "# Subset oluşturma\n",
+    "# Create subsets\n",
     "print(\"Creating training and validation subsets...\")\n",
     "random.seed(42)\n",
     "all_indices = list(range(len(full_dataset)))\n",
@@ -367,7 +367,7 @@
     "])\n",
     "print(\"Image transformations defined!\")\n",
     "\n",
-    "# Custom Dataset (Düzeltildi)\n",
+    "# Custom Dataset (Corrected)\n",
     "print(\"Defining custom Flickr30kMiniDataset class...\")\n",
     "class Flickr30kMiniDataset(Dataset):\n",
     "    def __init__(self, dataset, transform, tokenizer):\n",
@@ -382,7 +382,7 @@
     "    def __getitem__(self, idx):\n",
     "        example = self.dataset[idx]\n",
     "        print(f\"Processing example at index {idx}\")\n",
-    "        # Görüntü 'image' anahtarında\n",
+    "        # Image stored under the 'image' key\n",
     "        image = example[\"image\"]\n",
     "        print(f\"Image retrieved from dataset, type: {type(image)}\")\n",
     "        if image.mode != \"RGB\":\n",
@@ -390,8 +390,8 @@
     "            print(\"Image converted to RGB\")\n",
     "        pixel_values = self.transform(image)\n",
     "        print(f\"Image transformed, pixel_values shape: {pixel_values.shape}\")\n",
-    "        # İlk caption’ı al (liste içinden)\n",
-    "        caption = example[\"caption\"][0]  # Düzeltme burada!\n",
+    "        # Take the first caption (from the list)\n",
+    "        caption = example[\"caption\"][0]  # Correction here!\n",
     "        print(f\"Tokenizing caption: {caption}\")\n",
     "        tokenized = self.tokenizer(caption, padding=\"max_length\", max_length=64, truncation=True, return_tensors=\"pt\")\n",
     "        print(f\"Caption tokenized, input_ids shape: {tokenized['input_ids'].shape}\")\n",
@@ -401,7 +401,7 @@
     "            \"attention_mask\": tokenized[\"attention_mask\"].squeeze(0),\n",
     "        }\n",
     "\n",
-    "# Datasets oluşturma\n",
+    "# Create datasets\n",
     "print(\"Creating training dataset...\")\n",
     "train_data = Flickr30kMiniDataset(train_subset, image_transform, tokenizer)\n",
     "print(\"Creating validation dataset...\")\n",
@@ -418,7 +418,7 @@
     "# [Paste your Siglip2VisionConfig, Siglip2TextConfig, Siglip2Config, AttentionPooling,\n",
     "# Siglip2VisionModel, Siglip2TextModel, Siglip2Model, and siglip_loss definitions here]\n",
     "\n",
-    "# Model başlatma\n",
+    "# Model initialization\n",
     "print(\"Initializing SIGLIP2 model...\")\n",
     "vision_config = Siglip2VisionConfig()\n",
     "text_config = Siglip2TextConfig()\n",
@@ -526,7 +526,7 @@
     "import random\n",
     "from tqdm import tqdm\n",
     "\n",
-    "# CUDA kontrolü\n",
+    "# CUDA check\n",
     "print(\"Checking CUDA availability...\")\n",
     "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
     "print(f\"Using device: {device}\")\n",
@@ -535,7 +535,7 @@
     "    print(f\"CUDA Version: {torch.version.cuda}\")\n",
     "    print(f\"PyTorch Version: {torch.__version__}\")\n",
     "\n",
-    "# Dataset yükleme\n",
+    "# Load dataset\n",
     "dataset = load_dataset(\"nlphuji/flickr30k\")\n",
     "full_dataset = dataset[\"test\"]\n",
     "random.seed(42)\n",
@@ -572,7 +572,7 @@
     "        if image.mode != \"RGB\":\n",
     "            image = image.convert(\"RGB\")\n",
     "        pixel_values = self.transform(image)\n",
-    "        caption = example[\"caption\"][0]  # İlk caption\n",
+    "        caption = example[\"caption\"][0]  # First caption\n",
     "        tokenized = self.tokenizer(caption, padding=\"max_length\", max_length=64, truncation=True, return_tensors=\"pt\")\n",
     "        return {\n",
     "            \"pixel_values\": pixel_values,\n",
@@ -589,7 +589,7 @@
     "# Model Configs unchanged (Siglip2VisionConfig, Siglip2TextConfig, AttentionPooling as before)\n",
     "# [Paste your previous Siglip2VisionConfig, Siglip2TextConfig, AttentionPooling here]\n",
     "\n",
-    "# Text Decoder (Image-to-Text için)\n",
+    "# Text Decoder (for image-to-text)\n",
     "class Siglip2Decoder(nn.Module):\n",
     "    def __init__(self, hidden_size=768, num_layers=6, num_heads=12, vocab_size=30522):\n",
     "        super().__init__()\n",
@@ -618,7 +618,7 @@
     "                x = layer(x, vision_embedding, tgt_mask=nn.Transformer.generate_square_subsequent_mask(target_ids.size(1)).to(device))\n",
     "            return self.fc_out(x)\n",
     "\n",
-    "# Simple Diffusion Model (Text-to-Image için temel)\n",
+    "# Simple Diffusion Model (baseline for text-to-image)\n",
     "class SimpleDiffusion(nn.Module):\n",
     "    def __init__(self, hidden_size=768, img_size=224):\n",
     "        super().__init__()\n",
@@ -638,7 +638,7 @@
     "        for t in range(steps):\n",
     "            t_tensor = torch.full((text_embedding.size(0),), self.noise_scheduler[t], device=device)\n",
     "            pred_noise = self.unet(noise + self.text_proj(text_embedding).view(-1, 64, 224, 224))\n",
-    "            noise = noise - 0.1 * pred_noise  # Basit denoising\n",
+    "            noise = noise - 0.1 * pred_noise  # Simple denoising\n",
     "        return noise.clamp(-1, 1)\n",
     "\n",
     "# Extended SIGLIP2 Model\n",
@@ -650,7 +650,7 @@
     "        self.text_model = Siglip2TextModel(config.text_config)\n",
     "        self.vision_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim)\n",
     "        self.text_projection = nn.Linear(config.text_config.hidden_size, config.projection_dim)\n",
-    "        # Yeni eklenenler\n",
+    "        # Newly added components\n",
     "        self.decoder = Siglip2Decoder(config.text_config.hidden_size, vocab_size=tokenizer.vocab_size)\n",
     "        self.diffusion = SimpleDiffusion(config.projection_dim)\n",
     "\n",
diff --git a/fine_tune_whisper.ipynb b/fine_tune_whisper.ipynb
index a062c76..7b9d685 100644
--- a/fine_tune_whisper.ipynb
+++ b/fine_tune_whisper.ipynb
@@ -1,1837 +1,1698 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers-Examples/blob/main/fine_tune_whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 16,
-      "id": "95048026-a3b7-43f0-a274-1bad65e407b4",
-      "metadata": {
-        "id": "95048026-a3b7-43f0-a274-1bad65e407b4",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "fe456316-68da-4980-bdd7-2c8ae1e11492"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Mon Oct  6 13:22:15 2025       \n",
-            "+-----------------------------------------------------------------------------------------+\n",
-            "| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |\n",
-            "|-----------------------------------------+------------------------+----------------------+\n",
-            "| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |\n",
-            "| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |\n",
-            "|                                         |                        |               MIG M. |\n",
-            "|=========================================+========================+======================|\n",
-            "|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |\n",
-            "| N/A   43C    P8              9W /   70W |       2MiB /  15360MiB |      0%      Default |\n",
-            "|                                         |                        |                  N/A |\n",
-            "+-----------------------------------------+------------------------+----------------------+\n",
-            "                                                                                         \n",
-            "+-----------------------------------------------------------------------------------------+\n",
-            "| Processes:                                                                              |\n",
-            "|  GPU   GI   CI        PID   Type   Process name                              GPU Memory |\n",
-            "|        ID   ID                                                               Usage      |\n",
-            "|=========================================================================================|\n",
-            "|  No running processes found                                                             |\n",
-            "+-----------------------------------------------------------------------------------------+\n"
-          ]
-        }
-      ],
-      "source": [
-        "gpu_info = !nvidia-smi\n",
-        "gpu_info = '\\n'.join(gpu_info)\n",
-        "if gpu_info.find('failed') >= 0:\n",
-        "  print('Not connected to a GPU')\n",
-        "else:\n",
-        "  print(gpu_info)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 17,
-      "id": "e68ea9f8-9b61-414e-8885-3033b67c2850",
-      "metadata": {
-        "id": "e68ea9f8-9b61-414e-8885-3033b67c2850"
-      },
-      "outputs": [],
-      "source": [
-        "!pip install --upgrade --quiet pip\n",
-        "!pip install --upgrade --quiet datasets[audio] transformers accelerate evaluate jiwer tensorboard gradio"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 18,
-      "id": "b045a39e-2a3e-4153-bdb5-281500bcd348",
-      "metadata": {
-        "id": "b045a39e-2a3e-4153-bdb5-281500bcd348",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 17,
-          "referenced_widgets": [
-            "3e1ffb4a30784946bd63dd0aacf08e5a",
-            "60840eccbe824ab790ba07c3cdba6fd4",
-            "7c6d00d2f34d4681acd6bba4b5b9e76a",
-            "08539a062cd94849bf52b2d778714fbc",
-            "73c0f925b4f6468f8cd676445a5a5a56",
-            "33b6d32edbaa4cde85297e9cdb067847",
-            "e6ec79893ee74a378b19f89e94380115",
-            "f2f6c02b6c1140dd9681daf6c3fedcdf",
-            "570d124779574118a9d6493621ab65ce",
-            "ed8e901c90cb4c018ed11f54cb995ea2",
-            "deccbc3683c14832ac0e337f6b0970a0",
-            "59e4108cbab04c5e975a118f82bfcdc5",
-            "a14648f6c1444a7887c60010cf6b857d",
-            "d7c556f97e124354827b5c0ada8d1007",
-            "bb60ca22fea548119375630b057daa1e",
-            "8633572a6144474c92724493882fa056",
-            "4f4060fcf3754ca4a729a5ecb0a256a2",
-            "49dd1f5779dc45ecbdf4b8378e6bb0da",
-            "d612d61478da44868d5b0f64037a71b2",
-            "5f9252200218437893e8314380d75f0c"
-          ]
-        },
-        "outputId": "fb4c99b3-d48c-479e-9781-95605c0f225d"
-      },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "3e1ffb4a30784946bd63dd0aacf08e5a"
-            }
-          },
-          "metadata": {}
-        }
-      ],
-      "source": [
-        "from huggingface_hub import notebook_login\n",
-        "\n",
-        "notebook_login()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "id": "b219c9dd-39b6-4a95-b2a1-3f547a1e7bc0",
-      "metadata": {
-        "id": "b219c9dd-39b6-4a95-b2a1-3f547a1e7bc0"
-      },
-      "source": [
-        "## Load Dataset"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": 20,
-      "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
-      "metadata": {
-        "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "b988d090-d3aa-4324-b8b2-aa167172a16c"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "DatasetDict({\n",
-            "    train: Dataset({\n",
-            "        features: ['transcription', 'audio'],\n",
-            "        num_rows: 25741\n",
-            "    })\n",
-            "    test: Dataset({\n",
-            "        features: ['transcription', 'audio'],\n",
-            "        num_rows: 1355\n",
-            "    })\n",
-            "})\n"
-          ]
-        }
-      ],
-      "source": [
-        "from datasets import load_dataset, DatasetDict\n",
-        "\n",
-        "# Veri kümesini yükle\n",
-        "khanacademy_turkish = DatasetDict()\n",
-        "\n",
-        "khanacademy_turkish[\"train\"] = load_dataset(\"ysdede/khanacademy-turkish\", split=\"train\")\n",
-        "khanacademy_turkish[\"test\"] = load_dataset(\"ysdede/khanacademy-turkish\", split=\"test\")\n",
-        "\n",
-        "print(khanacademy_turkish)"
-      ]
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "id": "view-in-github",
+    "colab_type": "text"
+   },
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/emredeveloper/Transformers-Examples/blob/main/fine_tune_whisper.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95048026-a3b7-43f0-a274-1bad65e407b4",
+   "metadata": {
+    "id": "95048026-a3b7-43f0-a274-1bad65e407b4",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "markdown",
-      "id": "2d63b2d2-f68a-4d74-b7f1-5127f6d16605",
-      "metadata": {
-        "id": "2d63b2d2-f68a-4d74-b7f1-5127f6d16605"
-      },
-      "source": [
-        "## Prepare Feature Extractor, Tokenizer and Data"
-      ]
+    "outputId": "fe456316-68da-4980-bdd7-2c8ae1e11492"
+   },
+   "outputs": [],
+   "source": [
+    "gpu_info = !nvidia-smi\n",
+    "gpu_info = '\\n'.join(gpu_info)\n",
+    "if gpu_info.find('failed') >= 0:\n",
+    "  print('Not connected to a GPU')\n",
+    "else:\n",
+    "  print(gpu_info)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e68ea9f8-9b61-414e-8885-3033b67c2850",
+   "metadata": {
+    "id": "e68ea9f8-9b61-414e-8885-3033b67c2850"
+   },
+   "outputs": [],
+   "source": [
+    "!pip install --upgrade --quiet pip\n",
+    "!pip install --upgrade --quiet datasets[audio] transformers accelerate evaluate jiwer tensorboard gradio"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b045a39e-2a3e-4153-bdb5-281500bcd348",
+   "metadata": {
+    "id": "b045a39e-2a3e-4153-bdb5-281500bcd348",
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 17,
+     "referenced_widgets": [
+      "3e1ffb4a30784946bd63dd0aacf08e5a",
+      "60840eccbe824ab790ba07c3cdba6fd4",
+      "7c6d00d2f34d4681acd6bba4b5b9e76a",
+      "08539a062cd94849bf52b2d778714fbc",
+      "73c0f925b4f6468f8cd676445a5a5a56",
+      "33b6d32edbaa4cde85297e9cdb067847",
+      "e6ec79893ee74a378b19f89e94380115",
+      "f2f6c02b6c1140dd9681daf6c3fedcdf",
+      "570d124779574118a9d6493621ab65ce",
+      "ed8e901c90cb4c018ed11f54cb995ea2",
+      "deccbc3683c14832ac0e337f6b0970a0",
+      "59e4108cbab04c5e975a118f82bfcdc5",
+      "a14648f6c1444a7887c60010cf6b857d",
+      "d7c556f97e124354827b5c0ada8d1007",
+      "bb60ca22fea548119375630b057daa1e",
+      "8633572a6144474c92724493882fa056",
+      "4f4060fcf3754ca4a729a5ecb0a256a2",
+      "49dd1f5779dc45ecbdf4b8378e6bb0da",
+      "d612d61478da44868d5b0f64037a71b2",
+      "5f9252200218437893e8314380d75f0c"
+     ]
     },
-    {
-      "cell_type": "code",
-      "execution_count": 21,
-      "id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5",
-      "metadata": {
-        "id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import WhisperFeatureExtractor\n",
-        "\n",
-        "feature_extractor = WhisperFeatureExtractor.from_pretrained(\"openai/whisper-small\")"
-      ]
+    "outputId": "fb4c99b3-d48c-479e-9781-95605c0f225d"
+   },
+   "outputs": [],
+   "source": [
+    "from huggingface_hub import notebook_login\n",
+    "\n",
+    "notebook_login()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b219c9dd-39b6-4a95-b2a1-3f547a1e7bc0",
+   "metadata": {
+    "id": "b219c9dd-39b6-4a95-b2a1-3f547a1e7bc0"
+   },
+   "source": [
+    "## Load Dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
+   "metadata": {
+    "id": "a2787582-554f-44ce-9f38-4180a5ed6b44",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "markdown",
-      "id": "93748af7-b917-4ecf-a0c8-7d89077ff9cb",
-      "metadata": {
-        "id": "93748af7-b917-4ecf-a0c8-7d89077ff9cb"
-      },
-      "source": [
-        "### Load WhisperTokenizer"
-      ]
+    "outputId": "b988d090-d3aa-4324-b8b2-aa167172a16c"
+   },
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset, DatasetDict\n",
+    "\n",
+    "# Load the dataset\n",
+    "khanacademy_turkish = DatasetDict()\n",
+    "\n",
+    "khanacademy_turkish[\"train\"] = load_dataset(\"ysdede/khanacademy-turkish\", split=\"train\")\n",
+    "khanacademy_turkish[\"test\"] = load_dataset(\"ysdede/khanacademy-turkish\", split=\"test\")\n",
+    "\n",
+    "print(khanacademy_turkish)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2d63b2d2-f68a-4d74-b7f1-5127f6d16605",
+   "metadata": {
+    "id": "2d63b2d2-f68a-4d74-b7f1-5127f6d16605"
+   },
+   "source": [
+    "## Prepare Feature Extractor, Tokenizer and Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5",
+   "metadata": {
+    "id": "bc77d7bb-f9e2-47f5-b663-30f7a4321ce5"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import WhisperFeatureExtractor\n",
+    "\n",
+    "feature_extractor = WhisperFeatureExtractor.from_pretrained(\"openai/whisper-small\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "93748af7-b917-4ecf-a0c8-7d89077ff9cb",
+   "metadata": {
+    "id": "93748af7-b917-4ecf-a0c8-7d89077ff9cb"
+   },
+   "source": [
+    "### Load WhisperTokenizer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6",
+   "metadata": {
+    "id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import WhisperTokenizer\n",
+    "\n",
+    "tokenizer = WhisperTokenizer.from_pretrained(\"openai/whisper-small\", language=\"Turkish\", task=\"transcribe\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d2ef23f3-f4a8-483a-a2dc-080a7496cb1b",
+   "metadata": {
+    "id": "d2ef23f3-f4a8-483a-a2dc-080a7496cb1b"
+   },
+   "source": [
+    "### Combine To Create A WhisperProcessor"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5ff67654-5a29-4bb8-a69d-0228946c6f8d",
+   "metadata": {
+    "id": "5ff67654-5a29-4bb8-a69d-0228946c6f8d"
+   },
+   "source": [
+    "To simplify using the feature extractor and tokenizer, we can _wrap_\n",
+    "both into a single `WhisperProcessor` class. This processor object\n",
+    "inherits from the `WhisperFeatureExtractor` and `WhisperProcessor`,\n",
+    "and can be used on the audio inputs and model predictions as required.\n",
+    "In doing so, we only need to keep track of two objects during training:\n",
+    "the `processor` and the `model`:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
+   "metadata": {
+    "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import WhisperProcessor\n",
+    "\n",
+    "processor = WhisperProcessor.from_pretrained(\"openai/whisper-small\", language=\"Turkish\", task=\"transcribe\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "381acd09-0b0f-4d04-9eb3-f028ac0e5f2c",
+   "metadata": {
+    "id": "381acd09-0b0f-4d04-9eb3-f028ac0e5f2c"
+   },
+   "source": [
+    "### Prepare Data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9649bf01-2e8a-45e5-8fca-441c13637b8f",
+   "metadata": {
+    "id": "9649bf01-2e8a-45e5-8fca-441c13637b8f"
+   },
+   "source": [
+    "Let's print the first example of the Common Voice dataset to see\n",
+    "what form the data is in:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e6b0ec5-0c94-4e2c-ae24-c791be1b2255",
+   "metadata": {
+    "id": "6e6b0ec5-0c94-4e2c-ae24-c791be1b2255",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "execution_count": 23,
-      "id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6",
-      "metadata": {
-        "id": "c7b07f9b-ae0e-4f89-98f0-0c50d432eab6"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import WhisperTokenizer\n",
-        "\n",
-        "tokenizer = WhisperTokenizer.from_pretrained(\"openai/whisper-small\", language=\"Turkish\", task=\"transcribe\")"
-      ]
+    "outputId": "29330a59-b805-460b-8ca9-2e3e7437fdbc"
+   },
+   "outputs": [],
+   "source": [
+    "print(khanacademy_turkish[\"train\"][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f12e2e57-156f-417b-8cfb-69221cc198e8",
+   "metadata": {
+    "id": "f12e2e57-156f-417b-8cfb-69221cc198e8"
+   },
+   "outputs": [],
+   "source": [
+    "from datasets import Audio\n",
+    "\n",
+    "common_voice = khanacademy_turkish.cast_column(\"audio\", Audio(sampling_rate=16000))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "00382a3e-abec-4cdd-a54c-d1aaa3ea4707",
+   "metadata": {
+    "id": "00382a3e-abec-4cdd-a54c-d1aaa3ea4707"
+   },
+   "source": [
+    "Re-loading the first audio sample in the Common Voice dataset will resample\n",
+    "it to the desired sampling rate:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "87122d71-289a-466a-afcf-fa354b18946b",
+   "metadata": {
+    "id": "87122d71-289a-466a-afcf-fa354b18946b",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "markdown",
-      "id": "d2ef23f3-f4a8-483a-a2dc-080a7496cb1b",
-      "metadata": {
-        "id": "d2ef23f3-f4a8-483a-a2dc-080a7496cb1b"
-      },
-      "source": [
-        "### Combine To Create A WhisperProcessor"
-      ]
+    "outputId": "92f82272-40e6-489c-9d28-0062076f9ff0"
+   },
+   "outputs": [],
+   "source": [
+    "print(common_voice[\"train\"][0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6525c478-8962-4394-a1c4-103c54cce170",
+   "metadata": {
+    "id": "6525c478-8962-4394-a1c4-103c54cce170"
+   },
+   "outputs": [],
+   "source": [
+    "def prepare_dataset(batch):\n",
+    "    # Retrieve audio data\n",
+    "    audio = batch[\"audio\"]\n",
+    "\n",
+    "    # Compute log-Mel features\n",
+    "    batch[\"input_features\"] = feature_extractor(\n",
+    "        audio[\"array\"],\n",
+    "        sampling_rate=audio[\"sampling_rate\"]\n",
+    "    ).input_features[0]\n",
+    "\n",
+    "    # Encode the transcript into label IDs\n",
+    "    batch[\"labels\"] = tokenizer(batch[\"transcription\"]).input_ids\n",
+    "\n",
+    "    return batch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b73ab39-ffaf-4b9e-86e5-782963c6134b",
+   "metadata": {
+    "id": "7b73ab39-ffaf-4b9e-86e5-782963c6134b",
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 49,
+     "referenced_widgets": [
+      "e8f08a9cb8fc4ebc9f2264863c6c1559",
+      "2221666469ec4e8f9261b0f39978e606",
+      "813a9ada6f8742e586bdda3924a95393",
+      "b2f9e261afa74ffab111e899c13055ff",
+      "3f5a5b17ee314195bb933f106311cf39",
+      "67a763a00dc34e55aaf9fd2c188dc0c3",
+      "7fa1f46e33ea417a95864959ffdf6492",
+      "93d31d1a572f4c18b7b5910c59b9e88e",
+      "d30a59903dd84170a89f9f005e775f7a",
+      "309c9ef88e464d258c4fadf3d8aea7dd",
+      "fc75a9d13b5e4e64975e656a11bf6b61"
+     ]
     },
-    {
-      "cell_type": "markdown",
-      "id": "5ff67654-5a29-4bb8-a69d-0228946c6f8d",
-      "metadata": {
-        "id": "5ff67654-5a29-4bb8-a69d-0228946c6f8d"
-      },
-      "source": [
-        "To simplify using the feature extractor and tokenizer, we can _wrap_\n",
-        "both into a single `WhisperProcessor` class. This processor object\n",
-        "inherits from the `WhisperFeatureExtractor` and `WhisperProcessor`,\n",
-        "and can be used on the audio inputs and model predictions as required.\n",
-        "In doing so, we only need to keep track of two objects during training:\n",
-        "the `processor` and the `model`:"
-      ]
+    "outputId": "5d30f555-a1fd-48d7-8741-32330036a7d3"
+   },
+   "outputs": [],
+   "source": [
+    "# Apply the preparation function\n",
+    "small_train_dataset = khanacademy_turkish[\"train\"].select(range(1000))\n",
+    "small_test_dataset = khanacademy_turkish[\"test\"].select(range(1000))  # or the entire test dataset\n",
+    "\n",
+    "prepared_train_dataset = small_train_dataset.map(\n",
+    "    prepare_dataset,\n",
+    "    remove_columns=[\"audio\", \"transcription\"],\n",
+    "    num_proc=2\n",
+    ")\n",
+    "\n",
+    "prepared_test_dataset = small_test_dataset.map(\n",
+    "    prepare_dataset,\n",
+    "    remove_columns=[\"audio\", \"transcription\"],\n",
+    "    num_proc=2\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
+   "metadata": {
+    "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import WhisperForConditionalGeneration\n",
+    "\n",
+    "model = WhisperForConditionalGeneration.from_pretrained(\"openai/whisper-small\")\n",
+    "model.config.use_cache = False\n",
+    "model.config.gradient_checkpointing = True\n",
+    "model.config.use_reentrant = False"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
+   "metadata": {
+    "id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
+   },
+   "outputs": [],
+   "source": [
+    "model.generation_config.language = \"turkish\"\n",
+    "model.generation_config.task = \"transcribe\"\n",
+    "\n",
+    "model.generation_config.forced_decoder_ids = None"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8d230e6d-624c-400a-bbf5-fa660881df25",
+   "metadata": {
+    "id": "8d230e6d-624c-400a-bbf5-fa660881df25"
+   },
+   "source": [
+    "### Define a Data Collator"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
+   "metadata": {
+    "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "from dataclasses import dataclass\n",
+    "from typing import Any, Dict, List, Union\n",
+    "\n",
+    "@dataclass\n",
+    "class DataCollatorSpeechSeq2SeqWithPadding:\n",
+    "    processor: Any\n",
+    "    decoder_start_token_id: int\n",
+    "\n",
+    "    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
+    "        # split inputs and labels since they have to be of different lengths and need different padding methods\n",
+    "        # first treat the audio inputs by simply returning torch tensors\n",
+    "        input_features = [{\"input_features\": feature[\"input_features\"]} for feature in features]\n",
+    "        batch = self.processor.feature_extractor.pad(input_features, return_tensors=\"pt\")\n",
+    "\n",
+    "        # get the tokenized label sequences\n",
+    "        label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
+    "        # pad the labels to max length\n",
+    "        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors=\"pt\")\n",
+    "\n",
+    "        # replace padding with -100 to ignore loss correctly\n",
+    "        labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
+    "\n",
+    "        # if bos token is appended in previous tokenization step,\n",
+    "        # cut bos token here as it's append later anyways\n",
+    "        if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item():\n",
+    "            labels = labels[:, 1:]\n",
+    "\n",
+    "        batch[\"labels\"] = labels\n",
+    "\n",
+    "        return batch"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3cae7dbf-8a50-456e-a3a8-7fd005390f86",
+   "metadata": {
+    "id": "3cae7dbf-8a50-456e-a3a8-7fd005390f86"
+   },
+   "source": [
+    "Let's initialise the data collator we've just defined:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
+   "metadata": {
+    "id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
+   },
+   "outputs": [],
+   "source": [
+    "data_collator = DataCollatorSpeechSeq2SeqWithPadding(\n",
+    "    processor=processor,\n",
+    "    decoder_start_token_id=model.config.decoder_start_token_id,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d62bb2ab-750a-45e7-82e9-61d6f4805698",
+   "metadata": {
+    "id": "d62bb2ab-750a-45e7-82e9-61d6f4805698"
+   },
+   "source": [
+    "### Evaluation Metrics"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b22b4011-f31f-4b57-b684-c52332f92890",
+   "metadata": {
+    "id": "b22b4011-f31f-4b57-b684-c52332f92890"
+   },
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "\n",
+    "metric = evaluate.load(\"wer\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52",
+   "metadata": {
+    "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
+   },
+   "outputs": [],
+   "source": [
+    "def compute_metrics(pred):\n",
+    "    pred_ids = pred.predictions\n",
+    "    label_ids = pred.label_ids\n",
+    "\n",
+    "    # replace -100 with the pad_token_id\n",
+    "    label_ids[label_ids == -100] = tokenizer.pad_token_id\n",
+    "\n",
+    "    # we do not want to group tokens when computing the metrics\n",
+    "    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
+    "    label_str = tokenizer.batch_decode(label_ids, skip_special_tokens=True)\n",
+    "\n",
+    "    wer = 100 * metric.compute(predictions=pred_str, references=label_str)\n",
+    "\n",
+    "    return {\"wer\": wer}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
+   "metadata": {
+    "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import Seq2SeqTrainingArguments\n",
+    "\n",
+    "training_args = Seq2SeqTrainingArguments(\n",
+    "    output_dir=\"./whisper-small-hi\",  # change to a repo name of your choice\n",
+    "    per_device_train_batch_size=8,\n",
+    "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
+    "    learning_rate=1e-5,\n",
+    "    warmup_steps=500,\n",
+    "    max_steps=100,\n",
+    "    gradient_checkpointing=True,\n",
+    "    fp16=True,\n",
+    "    eval_strategy=\"steps\",\n",
+    "    per_device_eval_batch_size=4,\n",
+    "    predict_with_generate=True,\n",
+    "    generation_max_length=225,\n",
+    "    save_steps=1000,\n",
+    "    eval_steps=1000,\n",
+    "    logging_steps=25,\n",
+    "    report_to=[\"tensorboard\"],\n",
+    "    load_best_model_at_end=True,\n",
+    "    metric_for_best_model=\"wer\",\n",
+    "    greater_is_better=False,\n",
+    "    push_to_hub=False,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3a944d8-3112-4552-82a0-be25988b3857",
+   "metadata": {
+    "id": "b3a944d8-3112-4552-82a0-be25988b3857"
+   },
+   "source": [
+    "**Note**: if one does not want to upload the model checkpoints to the Hub,\n",
+    "set `push_to_hub=False`."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "bac29114-d226-4f54-97cf-8718c9f94e1e",
+   "metadata": {
+    "id": "bac29114-d226-4f54-97cf-8718c9f94e1e"
+   },
+   "source": [
+    "We can forward the training arguments to the 🤗 Trainer along with our model,\n",
+    "dataset, data collator and `compute_metrics` function:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d546d7fe-0543-479a-b708-2ebabec19493",
+   "metadata": {
+    "id": "d546d7fe-0543-479a-b708-2ebabec19493",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "code",
-      "execution_count": 24,
-      "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6",
-      "metadata": {
-        "id": "77d9f0c5-8607-4642-a8ac-c3ab2e223ea6"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import WhisperProcessor\n",
-        "\n",
-        "processor = WhisperProcessor.from_pretrained(\"openai/whisper-small\", language=\"Turkish\", task=\"transcribe\")"
-      ]
+    "outputId": "b4e4312d-0c87-45d5-f3ff-456f1643699b"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import Seq2SeqTrainer\n",
+    "\n",
+    "trainer = Seq2SeqTrainer(\n",
+    "    args=training_args,\n",
+    "    model=model,\n",
+    "    train_dataset=prepared_train_dataset,\n",
+    "    eval_dataset=prepared_test_dataset,\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    "    tokenizer=processor.feature_extractor,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "We'll save the processor object once before starting training. Since the processor is not trainable, it won't change over the course of training:"
+   ],
+   "metadata": {
+    "id": "uOrRhDGtN5S4"
+   },
+   "id": "uOrRhDGtN5S4"
+  },
+  {
+   "cell_type": "code",
+   "source": [
+    "processor.save_pretrained(training_args.output_dir)"
+   ],
+   "metadata": {
+    "id": "-2zQwMfEOBJq",
+    "colab": {
+     "base_uri": "https://localhost:8080/"
     },
-    {
-      "cell_type": "markdown",
-      "id": "381acd09-0b0f-4d04-9eb3-f028ac0e5f2c",
-      "metadata": {
-        "id": "381acd09-0b0f-4d04-9eb3-f028ac0e5f2c"
-      },
-      "source": [
-        "### Prepare Data"
-      ]
+    "outputId": "941064fd-8318-4917-9974-f61092c15c45"
+   },
+   "id": "-2zQwMfEOBJq",
+   "execution_count": null,
+   "outputs": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
+   "metadata": {
+    "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
+    "colab": {
+     "base_uri": "https://localhost:8080/",
+     "height": 75
     },
-    {
-      "cell_type": "markdown",
-      "id": "9649bf01-2e8a-45e5-8fca-441c13637b8f",
-      "metadata": {
-        "id": "9649bf01-2e8a-45e5-8fca-441c13637b8f"
-      },
-      "source": [
-        "Let's print the first example of the Common Voice dataset to see\n",
-        "what form the data is in:"
-      ]
+    "outputId": "8070efd8-3009-4235-e767-656181819cf1"
+   },
+   "outputs": [],
+   "source": [
+    "trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c704f91e-241b-48c9-b8e0-f0da396a9663",
+   "metadata": {
+    "id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
+   },
+   "outputs": [],
+   "source": [
+    "kwargs = {\n",
+    "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
+    "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
+    "    \"dataset_args\": \"config: hi, split: test\",\n",
+    "    \"language\": \"hi\",\n",
+    "    \"model_name\": \"Whisper Small Hi - Sanchit Gandhi\",  # a 'pretty' name for our model\n",
+    "    \"finetuned_from\": \"openai/whisper-small\",\n",
+    "    \"tasks\": \"automatic-speech-recognition\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "090d676a-f944-4297-a938-a40eda0b2b68",
+   "metadata": {
+    "id": "090d676a-f944-4297-a938-a40eda0b2b68"
+   },
+   "source": [
+    "The training results can now be uploaded to the Hub. To do so, execute the `push_to_hub` command and save the preprocessor object we created:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d7030622-caf7-4039-939b-6195cdaa2585",
+   "metadata": {
+    "id": "d7030622-caf7-4039-939b-6195cdaa2585"
+   },
+   "outputs": [],
+   "source": [
+    "trainer.push_to_hub(**kwargs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "34d4360d-5721-426e-b6ac-178f833fedeb",
+   "metadata": {
+    "id": "34d4360d-5721-426e-b6ac-178f833fedeb"
+   },
+   "source": [
+    "## Building a Demo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e0ace3aa-1ef3-45cb-933f-6ddca037c5aa",
+   "metadata": {
+    "id": "e0ace3aa-1ef3-45cb-933f-6ddca037c5aa"
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import pipeline\n",
+    "import gradio as gr\n",
+    "\n",
+    "pipe = pipeline(model=\"sanchit-gandhi/whisper-small-hi\")  # change to \"your-username/the-name-you-picked\"\n",
+    "\n",
+    "def transcribe(audio):\n",
+    "    text = pipe(audio)[\"text\"]\n",
+    "    return text\n",
+    "\n",
+    "iface = gr.Interface(\n",
+    "    fn=transcribe,\n",
+    "    inputs=gr.Audio(source=\"microphone\", type=\"filepath\"),\n",
+    "    outputs=\"text\",\n",
+    "    title=\"Whisper Small Hindi\",\n",
+    "    description=\"Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.\",\n",
+    ")\n",
+    "\n",
+    "iface.launch()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.9"
+  },
+  "colab": {
+   "provenance": [],
+   "gpuType": "T4",
+   "include_colab_link": true
+  },
+  "accelerator": "GPU",
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "3e1ffb4a30784946bd63dd0aacf08e5a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "VBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "VBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "VBoxView",
+      "box_style": "",
+      "children": [],
+      "layout": "IPY_MODEL_e6ec79893ee74a378b19f89e94380115"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 25,
-      "id": "6e6b0ec5-0c94-4e2c-ae24-c791be1b2255",
-      "metadata": {
-        "id": "6e6b0ec5-0c94-4e2c-ae24-c791be1b2255",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "29330a59-b805-460b-8ca9-2e3e7437fdbc"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{'transcription': 'Bunu belirleyen iki dinamik var birincisi litre başına sabit maliyetimizi Asgariye düşürebilmek için mümkün olduğunca çok üretmek isteriz.', 'audio': <datasets.features._torchcodec.AudioDecoder object at 0x7adf07451040>}\n"
-          ]
-        }
-      ],
-      "source": [
-        "print(khanacademy_turkish[\"train\"][0])"
-      ]
+    "60840eccbe824ab790ba07c3cdba6fd4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f2f6c02b6c1140dd9681daf6c3fedcdf",
+      "placeholder": "​",
+      "style": "IPY_MODEL_570d124779574118a9d6493621ab65ce",
+      "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 26,
-      "id": "f12e2e57-156f-417b-8cfb-69221cc198e8",
-      "metadata": {
-        "id": "f12e2e57-156f-417b-8cfb-69221cc198e8"
-      },
-      "outputs": [],
-      "source": [
-        "from datasets import Audio\n",
-        "\n",
-        "common_voice = khanacademy_turkish.cast_column(\"audio\", Audio(sampling_rate=16000))"
-      ]
+    "7c6d00d2f34d4681acd6bba4b5b9e76a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "PasswordModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "PasswordModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "PasswordView",
+      "continuous_update": true,
+      "description": "Token:",
+      "description_tooltip": null,
+      "disabled": false,
+      "layout": "IPY_MODEL_ed8e901c90cb4c018ed11f54cb995ea2",
+      "placeholder": "​",
+      "style": "IPY_MODEL_deccbc3683c14832ac0e337f6b0970a0",
+      "value": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "00382a3e-abec-4cdd-a54c-d1aaa3ea4707",
-      "metadata": {
-        "id": "00382a3e-abec-4cdd-a54c-d1aaa3ea4707"
-      },
-      "source": [
-        "Re-loading the first audio sample in the Common Voice dataset will resample\n",
-        "it to the desired sampling rate:"
-      ]
+    "08539a062cd94849bf52b2d778714fbc": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "CheckboxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "CheckboxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "CheckboxView",
+      "description": "Add token as git credential?",
+      "description_tooltip": null,
+      "disabled": false,
+      "indent": true,
+      "layout": "IPY_MODEL_59e4108cbab04c5e975a118f82bfcdc5",
+      "style": "IPY_MODEL_a14648f6c1444a7887c60010cf6b857d",
+      "value": false
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 27,
-      "id": "87122d71-289a-466a-afcf-fa354b18946b",
-      "metadata": {
-        "id": "87122d71-289a-466a-afcf-fa354b18946b",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "92f82272-40e6-489c-9d28-0062076f9ff0"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "{'transcription': 'Bunu belirleyen iki dinamik var birincisi litre başına sabit maliyetimizi Asgariye düşürebilmek için mümkün olduğunca çok üretmek isteriz.', 'audio': <datasets.features._torchcodec.AudioDecoder object at 0x7adf0722eb40>}\n"
-          ]
-        }
-      ],
-      "source": [
-        "print(common_voice[\"train\"][0])"
-      ]
+    "73c0f925b4f6468f8cd676445a5a5a56": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ButtonModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ButtonModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ButtonView",
+      "button_style": "",
+      "description": "Login",
+      "disabled": false,
+      "icon": "",
+      "layout": "IPY_MODEL_d7c556f97e124354827b5c0ada8d1007",
+      "style": "IPY_MODEL_bb60ca22fea548119375630b057daa1e",
+      "tooltip": ""
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 30,
-      "id": "6525c478-8962-4394-a1c4-103c54cce170",
-      "metadata": {
-        "id": "6525c478-8962-4394-a1c4-103c54cce170"
-      },
-      "outputs": [],
-      "source": [
-        "def prepare_dataset(batch):\n",
-        "    # Audio verisini al\n",
-        "    audio = batch[\"audio\"]\n",
-        "\n",
-        "    # Log-Mel özelliklerini hesapla\n",
-        "    batch[\"input_features\"] = feature_extractor(\n",
-        "        audio[\"array\"],\n",
-        "        sampling_rate=audio[\"sampling_rate\"]\n",
-        "    ).input_features[0]\n",
-        "\n",
-        "    # Transkripti label id'lere encode et\n",
-        "    batch[\"labels\"] = tokenizer(batch[\"transcription\"]).input_ids\n",
-        "\n",
-        "    return batch"
-      ]
+    "33b6d32edbaa4cde85297e9cdb067847": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_8633572a6144474c92724493882fa056",
+      "placeholder": "​",
+      "style": "IPY_MODEL_4f4060fcf3754ca4a729a5ecb0a256a2",
+      "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 56,
-      "id": "7b73ab39-ffaf-4b9e-86e5-782963c6134b",
-      "metadata": {
-        "id": "7b73ab39-ffaf-4b9e-86e5-782963c6134b",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 49,
-          "referenced_widgets": [
-            "e8f08a9cb8fc4ebc9f2264863c6c1559",
-            "2221666469ec4e8f9261b0f39978e606",
-            "813a9ada6f8742e586bdda3924a95393",
-            "b2f9e261afa74ffab111e899c13055ff",
-            "3f5a5b17ee314195bb933f106311cf39",
-            "67a763a00dc34e55aaf9fd2c188dc0c3",
-            "7fa1f46e33ea417a95864959ffdf6492",
-            "93d31d1a572f4c18b7b5910c59b9e88e",
-            "d30a59903dd84170a89f9f005e775f7a",
-            "309c9ef88e464d258c4fadf3d8aea7dd",
-            "fc75a9d13b5e4e64975e656a11bf6b61"
-          ]
-        },
-        "outputId": "5d30f555-a1fd-48d7-8741-32330036a7d3"
-      },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "Map (num_proc=2):   0%|          | 0/1000 [00:00<?, ? examples/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "e8f08a9cb8fc4ebc9f2264863c6c1559"
-            }
-          },
-          "metadata": {}
-        }
-      ],
-      "source": [
-        "# Hazırlık fonksiyonunu uygula\n",
-        "small_train_dataset = khanacademy_turkish[\"train\"].select(range(1000))\n",
-        "small_test_dataset = khanacademy_turkish[\"test\"].select(range(1000))  # ya da tüm test dataset\n",
-        "\n",
-        "prepared_train_dataset = small_train_dataset.map(\n",
-        "    prepare_dataset,\n",
-        "    remove_columns=[\"audio\", \"transcription\"],\n",
-        "    num_proc=2\n",
-        ")\n",
-        "\n",
-        "prepared_test_dataset = small_test_dataset.map(\n",
-        "    prepare_dataset,\n",
-        "    remove_columns=[\"audio\", \"transcription\"],\n",
-        "    num_proc=2\n",
-        ")"
-      ]
+    "e6ec79893ee74a378b19f89e94380115": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": "center",
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": "flex",
+      "flex": null,
+      "flex_flow": "column",
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": "50%"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 79,
-      "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f",
-      "metadata": {
-        "id": "5a10cc4b-07ec-4ebd-ac1d-7c601023594f"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import WhisperForConditionalGeneration\n",
-        "\n",
-        "model = WhisperForConditionalGeneration.from_pretrained(\"openai/whisper-small\")\n",
-        "model.config.use_cache = False\n",
-        "model.config.gradient_checkpointing = True\n",
-        "model.config.use_reentrant = False"
-      ]
+    "f2f6c02b6c1140dd9681daf6c3fedcdf": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 80,
-      "id": "62038ba3-88ed-4fce-84db-338f50dcd04f",
-      "metadata": {
-        "id": "62038ba3-88ed-4fce-84db-338f50dcd04f"
-      },
-      "outputs": [],
-      "source": [
-        "model.generation_config.language = \"turkish\"\n",
-        "model.generation_config.task = \"transcribe\"\n",
-        "\n",
-        "model.generation_config.forced_decoder_ids = None"
-      ]
+    "570d124779574118a9d6493621ab65ce": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "8d230e6d-624c-400a-bbf5-fa660881df25",
-      "metadata": {
-        "id": "8d230e6d-624c-400a-bbf5-fa660881df25"
-      },
-      "source": [
-        "### Define a Data Collator"
-      ]
+    "ed8e901c90cb4c018ed11f54cb995ea2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 81,
-      "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5",
-      "metadata": {
-        "id": "8326221e-ec13-4731-bb4e-51e5fc1486c5"
-      },
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "\n",
-        "from dataclasses import dataclass\n",
-        "from typing import Any, Dict, List, Union\n",
-        "\n",
-        "@dataclass\n",
-        "class DataCollatorSpeechSeq2SeqWithPadding:\n",
-        "    processor: Any\n",
-        "    decoder_start_token_id: int\n",
-        "\n",
-        "    def __call__(self, features: List[Dict[str, Union[List[int], torch.Tensor]]]) -> Dict[str, torch.Tensor]:\n",
-        "        # split inputs and labels since they have to be of different lengths and need different padding methods\n",
-        "        # first treat the audio inputs by simply returning torch tensors\n",
-        "        input_features = [{\"input_features\": feature[\"input_features\"]} for feature in features]\n",
-        "        batch = self.processor.feature_extractor.pad(input_features, return_tensors=\"pt\")\n",
-        "\n",
-        "        # get the tokenized label sequences\n",
-        "        label_features = [{\"input_ids\": feature[\"labels\"]} for feature in features]\n",
-        "        # pad the labels to max length\n",
-        "        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors=\"pt\")\n",
-        "\n",
-        "        # replace padding with -100 to ignore loss correctly\n",
-        "        labels = labels_batch[\"input_ids\"].masked_fill(labels_batch.attention_mask.ne(1), -100)\n",
-        "\n",
-        "        # if bos token is appended in previous tokenization step,\n",
-        "        # cut bos token here as it's append later anyways\n",
-        "        if (labels[:, 0] == self.decoder_start_token_id).all().cpu().item():\n",
-        "            labels = labels[:, 1:]\n",
-        "\n",
-        "        batch[\"labels\"] = labels\n",
-        "\n",
-        "        return batch"
-      ]
+    "deccbc3683c14832ac0e337f6b0970a0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "3cae7dbf-8a50-456e-a3a8-7fd005390f86",
-      "metadata": {
-        "id": "3cae7dbf-8a50-456e-a3a8-7fd005390f86"
-      },
-      "source": [
-        "Let's initialise the data collator we've just defined:"
-      ]
+    "59e4108cbab04c5e975a118f82bfcdc5": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 82,
-      "id": "fc834702-c0d3-4a96-b101-7b87be32bf42",
-      "metadata": {
-        "id": "fc834702-c0d3-4a96-b101-7b87be32bf42"
-      },
-      "outputs": [],
-      "source": [
-        "data_collator = DataCollatorSpeechSeq2SeqWithPadding(\n",
-        "    processor=processor,\n",
-        "    decoder_start_token_id=model.config.decoder_start_token_id,\n",
-        ")"
-      ]
+    "a14648f6c1444a7887c60010cf6b857d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "d62bb2ab-750a-45e7-82e9-61d6f4805698",
-      "metadata": {
-        "id": "d62bb2ab-750a-45e7-82e9-61d6f4805698"
-      },
-      "source": [
-        "### Evaluation Metrics"
-      ]
+    "d7c556f97e124354827b5c0ada8d1007": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 83,
-      "id": "b22b4011-f31f-4b57-b684-c52332f92890",
-      "metadata": {
-        "id": "b22b4011-f31f-4b57-b684-c52332f92890"
-      },
-      "outputs": [],
-      "source": [
-        "import evaluate\n",
-        "\n",
-        "metric = evaluate.load(\"wer\")"
-      ]
+    "bb60ca22fea548119375630b057daa1e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ButtonStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ButtonStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "button_color": null,
+      "font_weight": ""
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 84,
-      "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52",
-      "metadata": {
-        "id": "23959a70-22d0-4ffe-9fa1-72b61e75bb52"
-      },
-      "outputs": [],
-      "source": [
-        "def compute_metrics(pred):\n",
-        "    pred_ids = pred.predictions\n",
-        "    label_ids = pred.label_ids\n",
-        "\n",
-        "    # replace -100 with the pad_token_id\n",
-        "    label_ids[label_ids == -100] = tokenizer.pad_token_id\n",
-        "\n",
-        "    # we do not want to group tokens when computing the metrics\n",
-        "    pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)\n",
-        "    label_str = tokenizer.batch_decode(label_ids, skip_special_tokens=True)\n",
-        "\n",
-        "    wer = 100 * metric.compute(predictions=pred_str, references=label_str)\n",
-        "\n",
-        "    return {\"wer\": wer}"
-      ]
+    "8633572a6144474c92724493882fa056": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 85,
-      "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a",
-      "metadata": {
-        "id": "0ae3e9af-97b7-4aa0-ae85-20b23b5bcb3a"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import Seq2SeqTrainingArguments\n",
-        "\n",
-        "training_args = Seq2SeqTrainingArguments(\n",
-        "    output_dir=\"./whisper-small-hi\",  # change to a repo name of your choice\n",
-        "    per_device_train_batch_size=8,\n",
-        "    gradient_accumulation_steps=1,  # increase by 2x for every 2x decrease in batch size\n",
-        "    learning_rate=1e-5,\n",
-        "    warmup_steps=500,\n",
-        "    max_steps=100,\n",
-        "    gradient_checkpointing=True,\n",
-        "    fp16=True,\n",
-        "    eval_strategy=\"steps\",\n",
-        "    per_device_eval_batch_size=4,\n",
-        "    predict_with_generate=True,\n",
-        "    generation_max_length=225,\n",
-        "    save_steps=1000,\n",
-        "    eval_steps=1000,\n",
-        "    logging_steps=25,\n",
-        "    report_to=[\"tensorboard\"],\n",
-        "    load_best_model_at_end=True,\n",
-        "    metric_for_best_model=\"wer\",\n",
-        "    greater_is_better=False,\n",
-        "    push_to_hub=False,\n",
-        ")"
-      ]
+    "4f4060fcf3754ca4a729a5ecb0a256a2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "b3a944d8-3112-4552-82a0-be25988b3857",
-      "metadata": {
-        "id": "b3a944d8-3112-4552-82a0-be25988b3857"
-      },
-      "source": [
-        "**Note**: if one does not want to upload the model checkpoints to the Hub,\n",
-        "set `push_to_hub=False`."
-      ]
+    "49dd1f5779dc45ecbdf4b8378e6bb0da": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "LabelModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "LabelModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "LabelView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d612d61478da44868d5b0f64037a71b2",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5f9252200218437893e8314380d75f0c",
+      "value": "Connecting..."
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "bac29114-d226-4f54-97cf-8718c9f94e1e",
-      "metadata": {
-        "id": "bac29114-d226-4f54-97cf-8718c9f94e1e"
-      },
-      "source": [
-        "We can forward the training arguments to the 🤗 Trainer along with our model,\n",
-        "dataset, data collator and `compute_metrics` function:"
-      ]
+    "d612d61478da44868d5b0f64037a71b2": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": 86,
-      "id": "d546d7fe-0543-479a-b708-2ebabec19493",
-      "metadata": {
-        "id": "d546d7fe-0543-479a-b708-2ebabec19493",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "b4e4312d-0c87-45d5-f3ff-456f1643699b"
-      },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "/tmp/ipython-input-456700979.py:3: FutureWarning: `tokenizer` is deprecated and will be removed in version 5.0.0 for `Seq2SeqTrainer.__init__`. Use `processing_class` instead.\n",
-            "  trainer = Seq2SeqTrainer(\n"
-          ]
-        }
-      ],
-      "source": [
-        "from transformers import Seq2SeqTrainer\n",
-        "\n",
-        "trainer = Seq2SeqTrainer(\n",
-        "    args=training_args,\n",
-        "    model=model,\n",
-        "    train_dataset=prepared_train_dataset,\n",
-        "    eval_dataset=prepared_test_dataset,\n",
-        "    data_collator=data_collator,\n",
-        "    compute_metrics=compute_metrics,\n",
-        "    tokenizer=processor.feature_extractor,\n",
-        ")"
-      ]
+    "5f9252200218437893e8314380d75f0c": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "We'll save the processor object once before starting training. Since the processor is not trainable, it won't change over the course of training:"
+    "e8f08a9cb8fc4ebc9f2264863c6c1559": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HBoxModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_2221666469ec4e8f9261b0f39978e606",
+       "IPY_MODEL_813a9ada6f8742e586bdda3924a95393",
+       "IPY_MODEL_b2f9e261afa74ffab111e899c13055ff"
       ],
-      "metadata": {
-        "id": "uOrRhDGtN5S4"
-      },
-      "id": "uOrRhDGtN5S4"
+      "layout": "IPY_MODEL_3f5a5b17ee314195bb933f106311cf39"
+     }
     },
-    {
-      "cell_type": "code",
-      "source": [
-        "processor.save_pretrained(training_args.output_dir)"
-      ],
-      "metadata": {
-        "id": "-2zQwMfEOBJq",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "941064fd-8318-4917-9974-f61092c15c45"
-      },
-      "id": "-2zQwMfEOBJq",
-      "execution_count": 87,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "[]"
-            ]
-          },
-          "metadata": {},
-          "execution_count": 87
-        }
-      ]
+    "2221666469ec4e8f9261b0f39978e606": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_67a763a00dc34e55aaf9fd2c188dc0c3",
+      "placeholder": "​",
+      "style": "IPY_MODEL_7fa1f46e33ea417a95864959ffdf6492",
+      "value": "Map (num_proc=2): 100%"
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
-      "metadata": {
-        "id": "ee8b7b8e-1c9a-4d77-9137-1778a629e6de",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 75
-        },
-        "outputId": "8070efd8-3009-4235-e767-656181819cf1"
-      },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "<IPython.core.display.HTML object>"
-            ],
-            "text/html": [
-              "\n",
-              "    <div>\n",
-              "      \n",
-              "      <progress value='78' max='4000' style='width:300px; height:20px; vertical-align: middle;'></progress>\n",
-              "      [  78/4000 02:36 < 2:14:55, 0.48 it/s, Epoch 0.62/32]\n",
-              "    </div>\n",
-              "    <table border=\"1\" class=\"dataframe\">\n",
-              "  <thead>\n",
-              " <tr style=\"text-align: left;\">\n",
-              "      <th>Step</th>\n",
-              "      <th>Training Loss</th>\n",
-              "      <th>Validation Loss</th>\n",
-              "    </tr>\n",
-              "  </thead>\n",
-              "  <tbody>\n",
-              "  </tbody>\n",
-              "</table><p>"
-            ]
-          },
-          "metadata": {}
-        }
-      ],
-      "source": [
-        "trainer.train()"
-      ]
+    "813a9ada6f8742e586bdda3924a95393": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "FloatProgressModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_93d31d1a572f4c18b7b5910c59b9e88e",
+      "max": 1000,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_d30a59903dd84170a89f9f005e775f7a",
+      "value": 1000
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "c704f91e-241b-48c9-b8e0-f0da396a9663",
-      "metadata": {
-        "id": "c704f91e-241b-48c9-b8e0-f0da396a9663"
-      },
-      "outputs": [],
-      "source": [
-        "kwargs = {\n",
-        "    \"dataset_tags\": \"mozilla-foundation/common_voice_11_0\",\n",
-        "    \"dataset\": \"Common Voice 11.0\",  # a 'pretty' name for the training dataset\n",
-        "    \"dataset_args\": \"config: hi, split: test\",\n",
-        "    \"language\": \"hi\",\n",
-        "    \"model_name\": \"Whisper Small Hi - Sanchit Gandhi\",  # a 'pretty' name for our model\n",
-        "    \"finetuned_from\": \"openai/whisper-small\",\n",
-        "    \"tasks\": \"automatic-speech-recognition\",\n",
-        "}"
-      ]
+    "b2f9e261afa74ffab111e899c13055ff": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "HTMLModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_309c9ef88e464d258c4fadf3d8aea7dd",
+      "placeholder": "​",
+      "style": "IPY_MODEL_fc75a9d13b5e4e64975e656a11bf6b61",
+      "value": " 1000/1000 [00:45&lt;00:00,  1.09s/ examples]"
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "090d676a-f944-4297-a938-a40eda0b2b68",
-      "metadata": {
-        "id": "090d676a-f944-4297-a938-a40eda0b2b68"
-      },
-      "source": [
-        "The training results can now be uploaded to the Hub. To do so, execute the `push_to_hub` command and save the preprocessor object we created:"
-      ]
+    "3f5a5b17ee314195bb933f106311cf39": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "d7030622-caf7-4039-939b-6195cdaa2585",
-      "metadata": {
-        "id": "d7030622-caf7-4039-939b-6195cdaa2585"
-      },
-      "outputs": [],
-      "source": [
-        "trainer.push_to_hub(**kwargs)"
-      ]
+    "67a763a00dc34e55aaf9fd2c188dc0c3": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    {
-      "cell_type": "markdown",
-      "id": "34d4360d-5721-426e-b6ac-178f833fedeb",
-      "metadata": {
-        "id": "34d4360d-5721-426e-b6ac-178f833fedeb"
-      },
-      "source": [
-        "## Building a Demo"
-      ]
+    "7fa1f46e33ea417a95864959ffdf6492": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "e0ace3aa-1ef3-45cb-933f-6ddca037c5aa",
-      "metadata": {
-        "id": "e0ace3aa-1ef3-45cb-933f-6ddca037c5aa"
-      },
-      "outputs": [],
-      "source": [
-        "from transformers import pipeline\n",
-        "import gradio as gr\n",
-        "\n",
-        "pipe = pipeline(model=\"sanchit-gandhi/whisper-small-hi\")  # change to \"your-username/the-name-you-picked\"\n",
-        "\n",
-        "def transcribe(audio):\n",
-        "    text = pipe(audio)[\"text\"]\n",
-        "    return text\n",
-        "\n",
-        "iface = gr.Interface(\n",
-        "    fn=transcribe,\n",
-        "    inputs=gr.Audio(source=\"microphone\", type=\"filepath\"),\n",
-        "    outputs=\"text\",\n",
-        "    title=\"Whisper Small Hindi\",\n",
-        "    description=\"Realtime demo for Hindi speech recognition using a fine-tuned Whisper small model.\",\n",
-        ")\n",
-        "\n",
-        "iface.launch()"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
+    "93d31d1a572f4c18b7b5910c59b9e88e": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.8.9"
+    "d30a59903dd84170a89f9f005e775f7a": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "ProgressStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
     },
-    "colab": {
-      "provenance": [],
-      "gpuType": "T4",
-      "include_colab_link": true
+    "309c9ef88e464d258c4fadf3d8aea7dd": {
+     "model_module": "@jupyter-widgets/base",
+     "model_name": "LayoutModel",
+     "model_module_version": "1.2.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
     },
-    "accelerator": "GPU",
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "3e1ffb4a30784946bd63dd0aacf08e5a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "VBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "VBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "VBoxView",
-            "box_style": "",
-            "children": [],
-            "layout": "IPY_MODEL_e6ec79893ee74a378b19f89e94380115"
-          }
-        },
-        "60840eccbe824ab790ba07c3cdba6fd4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_f2f6c02b6c1140dd9681daf6c3fedcdf",
-            "placeholder": "​",
-            "style": "IPY_MODEL_570d124779574118a9d6493621ab65ce",
-            "value": "<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svg\nalt='Hugging Face'> <br> Copy a token from <a\nhref=\"https://huggingface.co/settings/tokens\" target=\"_blank\">your Hugging Face\ntokens page</a> and paste it below. <br> Immediately click login after copying\nyour token or it might be stored in plain text in this notebook file. </center>"
-          }
-        },
-        "7c6d00d2f34d4681acd6bba4b5b9e76a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "PasswordModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "PasswordModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "PasswordView",
-            "continuous_update": true,
-            "description": "Token:",
-            "description_tooltip": null,
-            "disabled": false,
-            "layout": "IPY_MODEL_ed8e901c90cb4c018ed11f54cb995ea2",
-            "placeholder": "​",
-            "style": "IPY_MODEL_deccbc3683c14832ac0e337f6b0970a0",
-            "value": ""
-          }
-        },
-        "08539a062cd94849bf52b2d778714fbc": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "CheckboxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "CheckboxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "CheckboxView",
-            "description": "Add token as git credential?",
-            "description_tooltip": null,
-            "disabled": false,
-            "indent": true,
-            "layout": "IPY_MODEL_59e4108cbab04c5e975a118f82bfcdc5",
-            "style": "IPY_MODEL_a14648f6c1444a7887c60010cf6b857d",
-            "value": false
-          }
-        },
-        "73c0f925b4f6468f8cd676445a5a5a56": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ButtonModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ButtonModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ButtonView",
-            "button_style": "",
-            "description": "Login",
-            "disabled": false,
-            "icon": "",
-            "layout": "IPY_MODEL_d7c556f97e124354827b5c0ada8d1007",
-            "style": "IPY_MODEL_bb60ca22fea548119375630b057daa1e",
-            "tooltip": ""
-          }
-        },
-        "33b6d32edbaa4cde85297e9cdb067847": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8633572a6144474c92724493882fa056",
-            "placeholder": "​",
-            "style": "IPY_MODEL_4f4060fcf3754ca4a729a5ecb0a256a2",
-            "value": "\n<b>Pro Tip:</b> If you don't already have one, you can create a dedicated\n'notebooks' token with 'write' access, that you can then easily reuse for all\nnotebooks. </center>"
-          }
-        },
-        "e6ec79893ee74a378b19f89e94380115": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": "center",
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": "flex",
-            "flex": null,
-            "flex_flow": "column",
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": "50%"
-          }
-        },
-        "f2f6c02b6c1140dd9681daf6c3fedcdf": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "570d124779574118a9d6493621ab65ce": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "ed8e901c90cb4c018ed11f54cb995ea2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "deccbc3683c14832ac0e337f6b0970a0": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "59e4108cbab04c5e975a118f82bfcdc5": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a14648f6c1444a7887c60010cf6b857d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "d7c556f97e124354827b5c0ada8d1007": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "bb60ca22fea548119375630b057daa1e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ButtonStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ButtonStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "button_color": null,
-            "font_weight": ""
-          }
-        },
-        "8633572a6144474c92724493882fa056": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "4f4060fcf3754ca4a729a5ecb0a256a2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "49dd1f5779dc45ecbdf4b8378e6bb0da": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "LabelModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "LabelModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "LabelView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_d612d61478da44868d5b0f64037a71b2",
-            "placeholder": "​",
-            "style": "IPY_MODEL_5f9252200218437893e8314380d75f0c",
-            "value": "Connecting..."
-          }
-        },
-        "d612d61478da44868d5b0f64037a71b2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "5f9252200218437893e8314380d75f0c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "e8f08a9cb8fc4ebc9f2264863c6c1559": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_2221666469ec4e8f9261b0f39978e606",
-              "IPY_MODEL_813a9ada6f8742e586bdda3924a95393",
-              "IPY_MODEL_b2f9e261afa74ffab111e899c13055ff"
-            ],
-            "layout": "IPY_MODEL_3f5a5b17ee314195bb933f106311cf39"
-          }
-        },
-        "2221666469ec4e8f9261b0f39978e606": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_67a763a00dc34e55aaf9fd2c188dc0c3",
-            "placeholder": "​",
-            "style": "IPY_MODEL_7fa1f46e33ea417a95864959ffdf6492",
-            "value": "Map (num_proc=2): 100%"
-          }
-        },
-        "813a9ada6f8742e586bdda3924a95393": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_93d31d1a572f4c18b7b5910c59b9e88e",
-            "max": 1000,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_d30a59903dd84170a89f9f005e775f7a",
-            "value": 1000
-          }
-        },
-        "b2f9e261afa74ffab111e899c13055ff": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_309c9ef88e464d258c4fadf3d8aea7dd",
-            "placeholder": "​",
-            "style": "IPY_MODEL_fc75a9d13b5e4e64975e656a11bf6b61",
-            "value": " 1000/1000 [00:45&lt;00:00,  1.09s/ examples]"
-          }
-        },
-        "3f5a5b17ee314195bb933f106311cf39": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "67a763a00dc34e55aaf9fd2c188dc0c3": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "7fa1f46e33ea417a95864959ffdf6492": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "93d31d1a572f4c18b7b5910c59b9e88e": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "d30a59903dd84170a89f9f005e775f7a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "309c9ef88e464d258c4fadf3d8aea7dd": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "fc75a9d13b5e4e64975e656a11bf6b61": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        }
-      }
+    "fc75a9d13b5e4e64975e656a11bf6b61": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_name": "DescriptionStyleModel",
+     "model_module_version": "1.5.0",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
     }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 5
-}
\ No newline at end of file
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/llama/run_cpu.py b/llama/run_cpu.py
index 1e76280..43090c5 100644
--- a/llama/run_cpu.py
+++ b/llama/run_cpu.py
@@ -209,9 +209,9 @@ def sample_top_p(probs, p):
     return next_token
 
 
-# Örnek kullanım
+# Example usage
 if __name__ == "__main__":
-    # Modeli yükle
+    # Load the model
     llama = Llama.build(
         ckpt_dir='checkpoints',
         tokenizer_path='checkpoints/tokenizer.model',
@@ -219,13 +219,13 @@ def sample_top_p(probs, p):
         max_batch_size=2,  # updated to allow 2 prompts
     )
 
-    # Örnek prompt
-    prompts = ["Merhaba, nasılsın?", "Python programlama hakkında bilgi verir misin?"]
+    # Example prompts
+    prompts = ["Hello, how are you?", "Could you share some information about Python programming?"]
 
-    # Metin üretme
+    # Generate completions
     results = llama.text_completion(prompts, max_gen_len=50)
 
-    # Sonuçları yazdır
+    # Print results
     for result in results:
         print(result['generation'])
         print("------")
\ No newline at end of file
diff --git a/llama/save_checkpoint.py b/llama/save_checkpoint.py
index 7a0a1ae..f12ee48 100644
--- a/llama/save_checkpoint.py
+++ b/llama/save_checkpoint.py
@@ -3,7 +3,7 @@
 import torch
 from model import Transformer, ModelArgs
 
-# Model parametrelerini tanımla
+# Define model parameters
 params = ModelArgs(
     dim=512,  # Same change to avoid mismatch
     n_layers=16,
@@ -13,9 +13,9 @@
     max_batch_size=8,
 )
 
-# Modeli oluştur
+# Build the model
 model = Transformer(params)
 
-# Model ağırlıklarını kaydet
+# Save the model weights
 torch.save(model.state_dict(), "checkpoints/consolidated.00.pth")
-print("Model ağırlıkları 'checkpoints/consolidated.00.pth' olarak kaydedildi.")
\ No newline at end of file
+print("Model weights saved to 'checkpoints/consolidated.00.pth'.")
\ No newline at end of file
diff --git a/llama/save_params.py b/llama/save_params.py
index 71adb21..eb6f4cb 100644
--- a/llama/save_params.py
+++ b/llama/save_params.py
@@ -3,7 +3,7 @@
 import json
 from model import ModelArgs
 
-# Model parametrelerini tanımla
+# Define model parameters
 params = ModelArgs(
     dim=512,  # Updated to be divisible by 16 heads
     n_layers=16,
@@ -14,7 +14,7 @@
 )
 
 
-# Parametreleri bir sözlük olarak kaydet
+# Store parameters as a dictionary
 params_dict = {
     "dim": params.dim,
     "n_layers": params.n_layers,
@@ -24,7 +24,7 @@
     "max_batch_size": params.max_batch_size,
 }
 
-# JSON dosyasına kaydet
+# Write to JSON
 with open("checkpoints/params.json", "w") as f:
     json.dump(params_dict, f, indent=4)
-print("Model parametreleri 'checkpoints/params.json' olarak kaydedildi.")
\ No newline at end of file
+print("Model parameters saved to 'checkpoints/params.json'.")
\ No newline at end of file
diff --git a/test-time-scaling.py b/test-time-scaling.py
index 34a38c2..1bfdf5f 100644
--- a/test-time-scaling.py
+++ b/test-time-scaling.py
@@ -57,7 +57,10 @@ def score_response(response):
     def iterative_refinement(self, prompt, iterations=5, max_new_tokens=100):
         current_response = self.generate_single_response(prompt, max_new_tokens)
         for i in range(iterations):
-            refinement_prompt = f"{prompt}\n\nÖnceki yanıt: {current_response}\n\nBu yanıtı daha iyi hale getir:"
+            refinement_prompt = (
+                f"{prompt}\n\nPrevious response: {current_response}"
+                "\n\nImprove this answer:"
+            )
             current_response = self.generate_single_response(refinement_prompt, max_new_tokens, temperature=0.6)
         return current_response
 
@@ -89,17 +92,17 @@ def score_response(response):
 def benchmark_strategies(model_name="Qwen/Qwen3-0.6B"):
     console = Console()
     tts = TestTimeScaling(model_name)
-    prompt = "Yapay zeka gelecekte insanlığın hayatını nasıl etkileyecek?"
+    prompt = "How will artificial intelligence impact human life in the future?"
     results = []
 
     strategies = [
         ("Best-of-N", lambda: tts.best_of_n_sampling(prompt, num_samples=7, max_new_tokens=80)),
-        ("İteratif İyileştirme", lambda: tts.iterative_refinement(prompt, iterations=5, max_new_tokens=80)),
-        ("Konsensüs Örneklemesi", lambda: tts.consensus_sampling(prompt, num_samples=7, max_new_tokens=80)),
+        ("Iterative Refinement", lambda: tts.iterative_refinement(prompt, iterations=5, max_new_tokens=80)),
+        ("Consensus Sampling", lambda: tts.consensus_sampling(prompt, num_samples=7, max_new_tokens=80)),
     ]
 
     for name, func in strategies:
-        console.print(f"\n[bold cyan]⏳ {name} başlatıldı...[/bold cyan]")
+        console.print(f"\n[bold cyan]⏳ {name} started...[/bold cyan]")
         start = time.time()
         response = func()
         end = time.time()
@@ -110,15 +113,15 @@ def benchmark_strategies(model_name="Qwen/Qwen3-0.6B"):
             "response": response
         })
         results.append(metrics)
-        console.print(Panel(f"[bold green]{name} Yanıtı:[/bold green]\n{response}", expand=False))
+        console.print(Panel(f"[bold green]{name} Response:[/bold green]\n{response}", expand=False))
 
     df = pd.DataFrame(results)
-    table = Table(title="Benchmark Sonuçları", show_header=True, header_style="bold magenta")
-    table.add_column("Strateji", style="cyan")
-    table.add_column("Süre (sn)", justify="right", style="green")
-    table.add_column("Kelime Sayısı", justify="right", style="yellow")
-    table.add_column("Benzersiz Kelimeler", justify="right", style="yellow")
-    table.add_column("Skor", justify="right", style="red")
+    table = Table(title="Benchmark Results", show_header=True, header_style="bold magenta")
+    table.add_column("Strategy", style="cyan")
+    table.add_column("Time (s)", justify="right", style="green")
+    table.add_column("Word Count", justify="right", style="yellow")
+    table.add_column("Unique Words", justify="right", style="yellow")
+    table.add_column("Score", justify="right", style="red")
 
     for _, row in df.iterrows():
         table.add_row(