From 62b91f4a6a2130ec25c082983c8210cde3e7afde Mon Sep 17 00:00:00 2001
From: Emre <69351390+emredeveloper@users.noreply.github.com>
Date: Sun, 19 Oct 2025 21:01:26 +0300
Subject: [PATCH] Translate remaining Turkish comments across utilities

---
 Architecture/partial-rope-full-rope.py   | 199 ++++---
 Architecture/sigmoid-gates.py            | 148 +++---
 Genel-5/PROJE_README.md                  |  60 +--
 Genel-5/deeplearning-tracer.py           | 270 +++++-----
 Genel-5/dit_implementation.py            | 650 +++++++++++------------
 Genel-5/llada.py                         |  30 +-
 Genel-5/modern_llm_components.py         |  66 +--
 Genel-5/training_inference_techniques.py |   6 +-
 Time series - Transformers/predict.py    |  50 +-
 Time series - Transformers/train.py      |  20 +-
 Tokenizer/basit_tokenizer.py             |  55 +-
 11 files changed, 766 insertions(+), 788 deletions(-)

diff --git a/Architecture/partial-rope-full-rope.py b/Architecture/partial-rope-full-rope.py
index 5bba030..2a3eae4 100644
--- a/Architecture/partial-rope-full-rope.py
+++ b/Architecture/partial-rope-full-rope.py
@@ -8,7 +8,7 @@
 import matplotlib.pyplot as plt
 
 class PartialRoPE(nn.Module):
-    """Partial RoPE implementasyonu - sadece belirli bir oranı döndürür"""
+    """Partial RoPE implementation that rotates only a fraction of dimensions."""
     def __init__(self, dim, max_position_embeddings=2048, base=10000, partial_rotary_factor=0.5):
         super().__init__()
         self.dim = dim
@@ -16,7 +16,7 @@ def __init__(self, dim, max_position_embeddings=2048, base=10000, partial_rotary
         self.base = base
         self.partial_rotary_factor = partial_rotary_factor
         
-        # Sadece partial faktörü kadar dimension kullan
+        # Use only the fraction defined by the partial factor
         self.rotary_dim = int(self.dim * self.partial_rotary_factor)
         
         inv_freq = 1.0 / (self.base ** (torch.arange(0, self.rotary_dim, 2).float() / self.rotary_dim))
@@ -43,7 +43,7 @@ def forward(self, q, k, seq_len=None):
         if seq_len > self.max_seq_len_cached:
             self._set_cos_sin_cache(seq_len)
         
-        # Sadece rotary_dim kadar uygula
+        # Apply RoPE only to the rotary portion
         q_rot = q[..., :self.rotary_dim]
         q_pass = q[..., self.rotary_dim:]
         k_rot = k[..., :self.rotary_dim]
@@ -52,11 +52,11 @@ def forward(self, q, k, seq_len=None):
         cos = self.cos_cached[:seq_len, :].unsqueeze(0).unsqueeze(0)
         sin = self.sin_cached[:seq_len, :].unsqueeze(0).unsqueeze(0)
         
-        # RoPE sadece rotary kısmına uygula
+        # Embed only the rotary portion with RoPE
         q_rot_embed = (q_rot * cos) + (self._rotate_half(q_rot) * sin)
         k_rot_embed = (k_rot * cos) + (self._rotate_half(k_rot) * sin)
         
-        # Rotary ve pass kısımlarını birleştir
+        # Concatenate rotary and passthrough sections
         q_embed = torch.cat([q_rot_embed, q_pass], dim=-1)
         k_embed = torch.cat([k_rot_embed, k_pass], dim=-1)
         
@@ -64,7 +64,7 @@ def forward(self, q, k, seq_len=None):
 
 
 class FullRoPE(nn.Module):
-    """Tam RoPE implementasyonu"""
+    """Full RoPE implementation."""
     def __init__(self, dim, max_position_embeddings=2048, base=10000):
         super().__init__()
         self.dim = dim
@@ -105,7 +105,7 @@ def forward(self, q, k, seq_len=None):
 
 
 class AttentionWithRoPE(nn.Module):
-    """RoPE kullanan Attention katmanı"""
+    """Attention layer that uses RoPE."""
     def __init__(self, dim, num_heads, rope_module):
         super().__init__()
         self.dim = dim
@@ -123,15 +123,15 @@ def __init__(self, dim, num_heads, rope_module):
     def forward(self, x, mask=None):
         batch_size, seq_len, _ = x.shape
         
-        # Q, K, V projeksiyonları
+        # Q, K, V projections
         q = self.q_proj(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
         k = self.k_proj(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
         v = self.v_proj(x).view(batch_size, seq_len, self.num_heads, self.head_dim).transpose(1, 2)
         
-        # RoPE uygula
+        # Apply RoPE
         q, k = self.rope(q, k)
         
-        # Attention hesapla
+        # Compute attention
         attn_scores = torch.matmul(q, k.transpose(-2, -1)) * self.scale
         
         if mask is not None:
@@ -140,7 +140,7 @@ def forward(self, x, mask=None):
         attn_probs = F.softmax(attn_scores, dim=-1)
         attn_output = torch.matmul(attn_probs, v)
         
-        # Çıktıyı birleştir
+        # Merge the heads back together
         attn_output = attn_output.transpose(1, 2).contiguous().view(batch_size, seq_len, self.dim)
         output = self.out_proj(attn_output)
         
@@ -148,7 +148,7 @@ def forward(self, x, mask=None):
 
 
 class SimpleTransformerBlock(nn.Module):
-    """Basit Transformer bloğu"""
+    """Simple Transformer block."""
     def __init__(self, dim, num_heads, rope_module, mlp_ratio=4):
         super().__init__()
         self.attention = AttentionWithRoPE(dim, num_heads, rope_module)
@@ -170,7 +170,7 @@ def forward(self, x, mask=None):
 
 
 class LanguageModel(nn.Module):
-    """Basit dil modeli"""
+    """Simple language model."""
     def __init__(self, vocab_size, dim, num_heads, num_layers, rope_module):
         super().__init__()
         self.embedding = nn.Embedding(vocab_size, dim)
@@ -194,17 +194,17 @@ def forward(self, input_ids, mask=None):
 
 
 def create_causal_mask(seq_len, device):
-    """Causal mask oluştur"""
+    """Create a causal mask."""
     mask = torch.triu(torch.ones(seq_len, seq_len, device=device), diagonal=1)
     return mask == 0
 
 
 def train_step(model, data, labels, optimizer, device):
-    """Tek eğitim adımı"""
+    """Single training step."""
     model.train()
     data, labels = data.to(device), labels.to(device)
     
-    # Causal mask oluştur
+    # Build the causal mask
     seq_len = data.shape[1]
     mask = create_causal_mask(seq_len, device)
     
@@ -221,7 +221,7 @@ def train_step(model, data, labels, optimizer, device):
 
 
 def evaluate_perplexity(model, data, labels, device):
-    """Perplexity hesapla"""
+    """Compute perplexity."""
     model.eval()
     data, labels = data.to(device), labels.to(device)
     
@@ -236,27 +236,27 @@ def evaluate_perplexity(model, data, labels, device):
 
 
 def benchmark_rope_performance():
-    """Partial ve Full RoPE performans karşılaştırması"""
-    
-    # Türkçe örnek metinler
+    """Compare the performance of partial and full RoPE."""
+
+    # Sample Turkish texts used for training data
     turkish_texts = [
-        "Merhaba dünya! Bugün hava çok güzel.",
-        "İstanbul'un tarihi ve kültürel zenginlikleri dünyaca ünlüdür.",
-        "Türk mutfağı, zengin lezzetleri ve çeşitliliği ile tanınır.",
-        "Yapay zeka teknolojileri hızla gelişmektedir.",
-        "Kitap okumak, hayal gücünü geliştiren harika bir aktivitedir.",
-        "Spor yapmak sağlıklı bir yaşam için önemlidir.",
-        "Müzik, evrensel bir dil olarak kabul edilir.",
-        "Doğa, insanlara huzur ve ilham verir.",
-        "Eğitim, toplumların gelişimi için temel taştır.",
-        "Teknoloji hayatımızı kolaylaştırır ama dengeli kullanılmalıdır."
+        "Hello world! The weather is wonderful today.",
+        "Istanbul's historic and cultural richness is famous worldwide.",
+        "Turkish cuisine is known for its rich flavours and variety.",
+        "Artificial intelligence technologies are rapidly advancing.",
+        "Reading books is a fantastic activity that boosts imagination.",
+        "Exercising is important for a healthy life.",
+        "Music is considered a universal language.",
+        "Nature gives people peace and inspiration.",
+        "Education is the cornerstone of societal development.",
+        "Technology makes our lives easier but should be used in balance."
     ]
-    
-    # Tokenizer yükle
-    print("Tokenizer yükleniyor...")
+
+    # Load the tokenizer
+    print("Loading tokenizer...")
     tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
     
-    # Padding token ayarla
+    # Configure the padding token
     if tokenizer.pad_token is None:
         if tokenizer.eos_token is not None:
             tokenizer.pad_token = tokenizer.eos_token
@@ -265,10 +265,10 @@ def benchmark_rope_performance():
         else:
             tokenizer.add_special_tokens({'pad_token': '[PAD]'})
     
-    # Metinleri tokenize et
-    print("Metinler tokenize ediliyor...")
+    # Tokenise the texts
+    print("Tokenising texts...")
     encoded = tokenizer(
-        turkish_texts * 10,  # Daha fazla veri için tekrarla
+        turkish_texts * 10,  # Repeat to create additional data
         padding=True,
         truncation=True,
         max_length=64,
@@ -278,7 +278,7 @@ def benchmark_rope_performance():
     input_ids = encoded["input_ids"]
     labels = input_ids.clone()
     
-    # Model parametreleri
+    # Model parameters
     vocab_size = tokenizer.vocab_size
     dim = 128
     num_heads = 8
@@ -288,9 +288,9 @@ def benchmark_rope_performance():
     learning_rate = 1e-3
     
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    print(f"Cihaz: {device}")
+    print(f"Device: {device}")
     
-    # Veriyi batch'lere böl
+    # Prepare mini-batches
     num_samples = input_ids.shape[0]
     num_batches = num_samples // batch_size
     
@@ -299,8 +299,8 @@ def benchmark_rope_performance():
         "full_rope": {"losses": [], "perplexities": [], "times": []}
     }
     
-    # Partial RoPE modeli
-    print("\n=== Partial RoPE Eğitimi ===")
+    # Partial RoPE model
+    print("\n=== Training Partial RoPE ===")
     partial_rope = PartialRoPE(dim // num_heads, partial_rotary_factor=0.5)
     model_partial = LanguageModel(vocab_size, dim, num_heads, num_layers, partial_rope).to(device)
     optimizer_partial = torch.optim.Adam(model_partial.parameters(), lr=learning_rate)
@@ -329,8 +329,8 @@ def benchmark_rope_performance():
         
         print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, Perplexity={perplexity:.2f}, Time={epoch_time:.3f}s")
     
-    # Full RoPE modeli
-    print("\n=== Full RoPE Eğitimi ===")
+    # Full RoPE model
+    print("\n=== Training Full RoPE ===")
     full_rope = FullRoPE(dim // num_heads)
     model_full = LanguageModel(vocab_size, dim, num_heads, num_layers, full_rope).to(device)
     optimizer_full = torch.optim.Adam(model_full.parameters(), lr=learning_rate)
@@ -359,54 +359,53 @@ def benchmark_rope_performance():
         
         print(f"Epoch {epoch+1}: Loss={avg_loss:.4f}, Perplexity={perplexity:.2f}, Time={epoch_time:.3f}s")
     
-    # Sonuçları görselleştir
+    # Visualise the results
     visualize_results(results)
     
-    # Özet istatistikler
-    print("\n=== Performans Özeti ===")
+    # Summary statistics
+    print("\n=== Performance Summary ===")
     print(f"Partial RoPE - Final Loss: {results['partial_rope']['losses'][-1]:.4f}")
     print(f"Full RoPE - Final Loss: {results['full_rope']['losses'][-1]:.4f}")
     print(f"Partial RoPE - Final Perplexity: {results['partial_rope']['perplexities'][-1]:.2f}")
     print(f"Full RoPE - Final Perplexity: {results['full_rope']['perplexities'][-1]:.2f}")
     print(f"Partial RoPE - Avg Time/Epoch: {np.mean(results['partial_rope']['times']):.3f}s")
     print(f"Full RoPE - Avg Time/Epoch: {np.mean(results['full_rope']['times']):.3f}s")
-    
-    # Hız kazancı
+
+    # Speed-up percentage
     speed_gain = (np.mean(results['full_rope']['times']) - np.mean(results['partial_rope']['times'])) / np.mean(results['full_rope']['times']) * 100
-    print(f"\nPartial RoPE hız kazancı: %{speed_gain:.1f}")
+    print(f"\nPartial RoPE speed-up: {speed_gain:.1f}%")
 
 
 def visualize_results(results):
-    """Sonuçları görselleştir"""
+    """Visualise benchmarking metrics."""
     fig, axes = plt.subplots(1, 3, figsize=(15, 5))
     
     epochs = range(1, len(results["partial_rope"]["losses"]) + 1)
     
-    # Loss grafiği
-        # Loss grafiği
+    # Loss chart
     axes[0].plot(epochs, results["partial_rope"]["losses"], 'b-', label='Partial RoPE', linewidth=2)
     axes[0].plot(epochs, results["full_rope"]["losses"], 'r-', label='Full RoPE', linewidth=2)
     axes[0].set_xlabel('Epoch')
     axes[0].set_ylabel('Loss')
-    axes[0].set_title('Eğitim Loss Karşılaştırması')
+    axes[0].set_title('Training Loss Comparison')
     axes[0].legend()
     axes[0].grid(True, alpha=0.3)
     
-    # Perplexity grafiği
+    # Perplexity chart
     axes[1].plot(epochs, results["partial_rope"]["perplexities"], 'b-', label='Partial RoPE', linewidth=2)
     axes[1].plot(epochs, results["full_rope"]["perplexities"], 'r-', label='Full RoPE', linewidth=2)
     axes[1].set_xlabel('Epoch')
     axes[1].set_ylabel('Perplexity')
-    axes[1].set_title('Perplexity Karşılaştırması')
+    axes[1].set_title('Perplexity Comparison')
     axes[1].legend()
     axes[1].grid(True, alpha=0.3)
     
-    # Eğitim süresi grafiği
+    # Training time chart
     axes[2].plot(epochs, results["partial_rope"]["times"], 'b-', label='Partial RoPE', linewidth=2)
     axes[2].plot(epochs, results["full_rope"]["times"], 'r-', label='Full RoPE', linewidth=2)
     axes[2].set_xlabel('Epoch')
-    axes[2].set_ylabel('Süre (saniye)')
-    axes[2].set_title('Epoch Başına Eğitim Süresi')
+    axes[2].set_ylabel('Time (seconds)')
+    axes[2].set_title('Training Time per Epoch')
     axes[2].legend()
     axes[2].grid(True, alpha=0.3)
     
@@ -416,54 +415,54 @@ def visualize_results(results):
 
 
 def inference_comparison(model_partial, model_full, tokenizer, device):
-    """Çıkarım performansı karşılaştırması"""
-    print("\n=== Çıkarım Performansı Karşılaştırması ===")
+    """Compare inference behaviour between the models."""
+    print("\n=== Inference Performance Comparison ===")
     
     test_texts = [
-        "Bugün hava",
-        "Türkiye'nin başkenti",
-        "Yapay zeka",
-        "En sevdiğim yemek"
+        "Today the weather",
+        "The capital of Turkey",
+        "Artificial intelligence",
+        "My favourite meal"
     ]
     
     model_partial.eval()
     model_full.eval()
     
     for text in test_texts:
-        print(f"\nGiriş: '{text}'")
+        print(f"\nInput: '{text}'")
         
-        # Tokenize et
+        # Tokenise the prompt
         inputs = tokenizer(text, return_tensors="pt").to(device)
         input_ids = inputs["input_ids"]
         
-        # Partial RoPE ile tahmin
+        # Prediction with Partial RoPE
         with torch.no_grad():
             start_time = time.time()
             mask = create_causal_mask(input_ids.shape[1], device)
             logits_partial = model_partial(input_ids, mask)
             partial_time = time.time() - start_time
             
-            # En olası sonraki kelimeyi bul
+            # Select the most probable next token
             next_token_partial = torch.argmax(logits_partial[0, -1, :])
             next_word_partial = tokenizer.decode(next_token_partial)
         
-        # Full RoPE ile tahmin
+        # Prediction with Full RoPE
         with torch.no_grad():
             start_time = time.time()
             logits_full = model_full(input_ids, mask)
             full_time = time.time() - start_time
             
-            # En olası sonraki kelimeyi bul
+            # Select the most probable next token
             next_token_full = torch.argmax(logits_full[0, -1, :])
             next_word_full = tokenizer.decode(next_token_full)
         
-        print(f"  Partial RoPE tahmini: '{next_word_partial}' (Süre: {partial_time*1000:.2f}ms)")
-        print(f"  Full RoPE tahmini: '{next_word_full}' (Süre: {full_time*1000:.2f}ms)")
+        print(f"  Partial RoPE prediction: '{next_word_partial}' (Time: {partial_time*1000:.2f}ms)")
+        print(f"  Full RoPE prediction: '{next_word_full}' (Time: {full_time*1000:.2f}ms)")
 
 
 def memory_comparison():
-    """Bellek kullanımı karşılaştırması"""
-    print("\n=== Bellek Kullanımı Karşılaştırması ===")
+    """Contrast memory usage between the two approaches."""
+    print("\n=== Memory Usage Comparison ===")
     
     dim = 512
     seq_len = 1024
@@ -471,7 +470,7 @@ def memory_comparison():
     num_heads = 8
     head_dim = dim // num_heads
     
-    # Partial RoPE bellek kullanımı
+    # Memory usage for Partial RoPE
     partial_rope = PartialRoPE(head_dim, max_position_embeddings=seq_len, partial_rotary_factor=0.5)
     q = torch.randn(batch_size, num_heads, seq_len, head_dim)
     k = torch.randn(batch_size, num_heads, seq_len, head_dim)
@@ -485,24 +484,24 @@ def memory_comparison():
         _ = partial_rope(q, k)
         partial_memory = torch.cuda.max_memory_allocated() / 1024**2  # MB
         
-        # Full RoPE bellek kullanımı
+        # Memory usage for Full RoPE
         torch.cuda.reset_peak_memory_stats()
         full_rope = FullRoPE(head_dim, max_position_embeddings=seq_len).cuda()
         
         # Forward pass
         _ = full_rope(q, k)
         full_memory = torch.cuda.max_memory_allocated() / 1024**2  # MB
-        
-        print(f"Partial RoPE bellek kullanımı: {partial_memory:.2f} MB")
-        print(f"Full RoPE bellek kullanımı: {full_memory:.2f} MB")
-        print(f"Bellek tasarrufu: {(full_memory - partial_memory) / full_memory * 100:.1f}%")
+
+        print(f"Partial RoPE memory usage: {partial_memory:.2f} MB")
+        print(f"Full RoPE memory usage: {full_memory:.2f} MB")
+        print(f"Memory savings: {(full_memory - partial_memory) / full_memory * 100:.1f}%")
     else:
-        print("CUDA mevcut değil, bellek karşılaştırması yapılamıyor.")
+        print("CUDA is not available, memory comparison skipped.")
 
 
 def ablation_study():
-    """Farklı partial_rotary_factor değerleri için ablasyon çalışması"""
-    print("\n=== Ablasyon Çalışması: Farklı Partial Rotary Factor Değerleri ===")
+    """Ablation study for varying partial_rotary_factor values."""
+    print("\n=== Ablation Study: Partial Rotary Factor Variants ===")
     
     factors = [0.25, 0.5, 0.75, 1.0]
     dim = 64
@@ -521,7 +520,7 @@ def ablation_study():
         q = torch.randn(batch_size, 1, seq_len, dim)
         k = torch.randn(batch_size, 1, seq_len, dim)
         
-        # Performans ölçümü
+        # Measure execution time
         start_time = time.time()
         for _ in range(num_iterations):
             _ = rope(q, k)
@@ -532,18 +531,18 @@ def ablation_study():
         
         print(f"Factor {factor}: {avg_time:.3f} ms/iteration")
     
-    # Sonuçları görselleştir
+    # Visualise the ablation results
     plt.figure(figsize=(8, 6))
     factors_list = list(results.keys())
     times_list = list(results.values())
     
     plt.bar(factors_list, times_list, color=['blue', 'green', 'orange', 'red'])
     plt.xlabel('Partial Rotary Factor')
-    plt.ylabel('Ortalama Süre (ms)')
-    plt.title('Farklı Partial Rotary Factor Değerleri için Performans')
+    plt.ylabel('Average Time (ms)')
+    plt.title('Performance for Different Partial Rotary Factors')
     plt.grid(True, alpha=0.3)
     
-    # Değerleri bar üzerine yaz
+    # Annotate the bars with values
     for i, (factor, exec_time) in enumerate(zip(factors_list, times_list)):
         plt.text(i, exec_time + 0.01, f'{exec_time:.3f}', ha='center', va='bottom')
     
@@ -552,19 +551,19 @@ def ablation_study():
     plt.show()
 
 
-# Ana fonksiyon
+# Main entry point
 if __name__ == "__main__":
-    print("Partial RoPE vs Full RoPE Performans Karşılaştırması")
+    print("Partial RoPE vs Full RoPE Performance Benchmark")
     print("=" * 60)
-    
-    # Ana benchmark
+
+    # Main benchmark
     benchmark_rope_performance()
-    
-    # Bellek karşılaştırması
+
+    # Memory comparison
     memory_comparison()
-    
-    # Ablasyon çalışması
+
+    # Ablation study
     ablation_study()
-    
-    print("\n✅ Tüm testler tamamlandı!")
-    print("📊 Grafikler 'rope_comparison.png' ve 'ablation_study.png' olarak kaydedildi.")
\ No newline at end of file
+
+    print("\n✅ All benchmarks completed!")
+    print("📊 Charts saved as 'rope_comparison.png' and 'ablation_study.png'.")
\ No newline at end of file
diff --git a/Architecture/sigmoid-gates.py b/Architecture/sigmoid-gates.py
index f7da3a0..1e88f74 100644
--- a/Architecture/sigmoid-gates.py
+++ b/Architecture/sigmoid-gates.py
@@ -5,7 +5,7 @@
 import matplotlib.pyplot as plt
 
 class SigmoidGateExamples(nn.Module):
-    """Farklı sigmoid gate örnekleri"""
+    """Examples of different sigmoid gate mechanisms."""
     
     def __init__(self, input_dim, hidden_dim):
         super().__init__()
@@ -29,88 +29,88 @@ def __init__(self, input_dim, hidden_dim):
         self.highway_transform = nn.Linear(input_dim, input_dim)
     
     def simple_gate(self, x):
-        """Basit sigmoid gate örneği"""
-        # Gate değeri hesapla (0-1 arası)
+        """Basic sigmoid gate example."""
+        # Compute gate value between 0 and 1
         gate = torch.sigmoid(self.gate_linear(x))
         
-        # Gate'i uygula: çıktı = gate * input
+        # Apply the gate: output = gate * input
         output = gate * x[:, :self.hidden_dim]
         
         return output, gate
     
     def lstm_gates_example(self, x, h, c):
-        """LSTM'deki 4 sigmoid gate"""
+        """The four sigmoid gates used in an LSTM."""
         # x: input, h: hidden state, c: cell state
         combined = torch.cat([x, h], dim=1)
         gates = self.lstm_gates(combined)
         
-        # 4 gate'e ayır
+        # Split into the four gates
         i, f, g, o = gates.chunk(4, dim=1)
         
         # Sigmoid gates
-        i = torch.sigmoid(i)  # Input gate: neyi hatırlayacağız
-        f = torch.sigmoid(f)  # Forget gate: neyi unutacağız
-        o = torch.sigmoid(o)  # Output gate: neyi çıktı olarak vereceğiz
-        g = torch.tanh(g)     # Candidate values (gate değil)
+        i = torch.sigmoid(i)  # Input gate: what we keep in memory
+        f = torch.sigmoid(f)  # Forget gate: what we discard
+        o = torch.sigmoid(o)  # Output gate: what we expose as output
+        g = torch.tanh(g)     # Candidate values (not a sigmoid gate)
         
-        # Yeni cell state
+        # Updated cell state
         c_new = f * c + i * g
         
-        # Yeni hidden state
+        # Updated hidden state
         h_new = o * torch.tanh(c_new)
         
         return h_new, c_new, {'input': i, 'forget': f, 'output': o}
     
     def gru_gates_example(self, x, h):
-        """GRU'daki sigmoid gates"""
+        """Sigmoid gates inside a GRU."""
         combined = torch.cat([x, h], dim=1)
         gates = self.gru_gates(combined)
         
-        # 3 kısma ayır
+        # Split into three sections
         r, z, n = gates.chunk(3, dim=1)
         
-        # Reset gate: önceki bilginin ne kadarını kullanacağız
+        # Reset gate: how much of the previous state to use
         r = torch.sigmoid(r)
         
-        # Update gate: yeni ve eski bilgiyi nasıl birleştireceğiz
+        # Update gate: how to mix new and old information
         z = torch.sigmoid(z)
         
-        # Yeni hidden state adayı
+        # Candidate hidden state
         n = torch.tanh(n)
         
-        # Yeni hidden state
+        # Updated hidden state
         h_new = (1 - z) * n + z * h
         
         return h_new, {'reset': r, 'update': z}
     
     def glu_example(self, x):
-        """Gated Linear Unit (GLU)"""
-        # Linear dönüşüm
+        """Gated Linear Unit (GLU)."""
+        # Linear projection
         output = self.glu_linear(x)
         
-        # İkiye böl
+        # Split in two
         a, b = output.chunk(2, dim=1)
         
         # GLU: a * sigmoid(b)
         return a * torch.sigmoid(b)
     
     def highway_gate_example(self, x):
-        """Highway Network gate"""
-        # Transform gate (T): ne kadar dönüşüm uygulayacağız
+        """Highway Network gate."""
+        # Transform gate (T): how much of the transformed signal to use
         T = torch.sigmoid(self.highway_gate(x))
         
-        # Dönüştürülmüş veri
+        # Transformed data
         H = torch.relu(self.highway_transform(x))
         
-        # Highway formülü: y = T * H + (1 - T) * x
-        # T=1: tamamen dönüşüm, T=0: girdiyi olduğu gibi geçir
+        # Highway formula: y = T * H + (1 - T) * x
+        # T=1: full transform, T=0: passthrough the input
         output = T * H + (1 - T) * x
         
         return output, T
 
 
 class AttentionGate(nn.Module):
-    """Attention mekanizmasında sigmoid gate kullanımı"""
+    """Attention mechanism augmented with sigmoid gating."""
     
     def __init__(self, hidden_dim):
         super().__init__()
@@ -125,18 +125,18 @@ def forward(self, query, keys, values):
         """
         batch_size, seq_len, hidden_dim = keys.shape
         
-        # Query'yi genişlet
+        # Broadcast the query across the sequence dimension
         query_expanded = query.unsqueeze(1).expand(-1, seq_len, -1)
         
-        # Attention hesapla
+        # Compute the attention projection
         combined = torch.cat([query_expanded, keys], dim=2)
         attention_hidden = torch.tanh(self.attention_linear(combined))
         
-        # Sigmoid gate ile attention weights
+        # Attention weights from the sigmoid gate
         attention_scores = self.gate_linear(attention_hidden).squeeze(-1)
         attention_weights = torch.sigmoid(attention_scores)
         
-        # Normalize (opsiyonel - soft attention için)
+        # Normalize (optional, for soft attention)
         attention_weights = attention_weights / (attention_weights.sum(dim=1, keepdim=True) + 1e-8)
         
         # Weighted sum
@@ -146,13 +146,13 @@ def forward(self, query, keys, values):
 
 
 class SigmoidGatingMechanism(nn.Module):
-    """Genel amaçlı sigmoid gating mekanizması"""
+    """General-purpose sigmoid gating mechanism."""
     
     def __init__(self, input_dim, num_experts=4):
         super().__init__()
         self.num_experts = num_experts
         
-        # Her expert için bir ağ
+        # A small network for each expert
         self.experts = nn.ModuleList([
             nn.Linear(input_dim, input_dim) for _ in range(num_experts)
         ])
@@ -165,61 +165,61 @@ def __init__(self, input_dim, num_experts=4):
         )
     
     def forward(self, x):
-        # Expert çıktıları
+        # Collect expert outputs
         expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1)
         
-        # Gate değerleri (sigmoid)
+        # Sigmoid gate values
         gates = torch.sigmoid(self.gate_network(x))
         gates = gates.unsqueeze(-1)
         
-        # Ağırlıklı toplam
+        # Weighted sum of expert outputs
         output = (gates * expert_outputs).sum(dim=1)
         
         return output, gates.squeeze(-1)
 
 
 def visualize_sigmoid_gate():
-    """Sigmoid fonksiyonunu ve gate davranışını görselleştir"""
+    """Visualize the sigmoid function and gate behaviour."""
     x = np.linspace(-10, 10, 1000)
     sigmoid = 1 / (1 + np.exp(-x))
     
     fig, axes = plt.subplots(2, 2, figsize=(12, 10))
     
-    # 1. Sigmoid fonksiyonu
+    # 1. Sigmoid function
     axes[0, 0].plot(x, sigmoid, 'b-', linewidth=2)
     axes[0, 0].axhline(y=0.5, color='r', linestyle='--', alpha=0.5)
     axes[0, 0].axvline(x=0, color='r', linestyle='--', alpha=0.5)
-    axes[0, 0].set_title('Sigmoid Fonksiyonu')
+    axes[0, 0].set_title('Sigmoid Function')
     axes[0, 0].set_xlabel('x')
     axes[0, 0].set_ylabel('σ(x)')
     axes[0, 0].grid(True, alpha=0.3)
     
-    # 2. Gate çarpımı etkisi
+    # 2. Effect of multiplying by the gate
     input_signal = np.sin(x)
     gated_signal = sigmoid * input_signal
     
-    axes[0, 1].plot(x, input_signal, 'g-', label='Giriş sinyali', alpha=0.7)
-    axes[0, 1].plot(x, sigmoid, 'r-', label='Gate değeri', alpha=0.7)
-    axes[0, 1].plot(x, gated_signal, 'b-', label='Gate * Sinyal', linewidth=2)
-    axes[0, 1].set_title('Gate Çarpımı Etkisi')
+    axes[0, 1].plot(x, input_signal, 'g-', label='Input signal', alpha=0.7)
+    axes[0, 1].plot(x, sigmoid, 'r-', label='Gate value', alpha=0.7)
+    axes[0, 1].plot(x, gated_signal, 'b-', label='Gate * Signal', linewidth=2)
+    axes[0, 1].set_title('Effect of Gate Multiplication')
     axes[0, 1].set_xlabel('x')
     axes[0, 1].legend()
     axes[0, 1].grid(True, alpha=0.3)
     
-    # 3. Farklı gate değerleri
+    # 3. Different gate values
     gate_values = [0.1, 0.3, 0.5, 0.7, 0.9]
     colors = plt.cm.viridis(np.linspace(0, 1, len(gate_values)))
     
     for gate, color in zip(gate_values, colors):
         axes[1, 0].plot(x, gate * np.sin(x), color=color, label=f'Gate={gate}')
-    
-    axes[1, 0].set_title('Farklı Gate Değerlerinin Etkisi')
+
+    axes[1, 0].set_title('Effect of Different Gate Values')
     axes[1, 0].set_xlabel('x')
     axes[1, 0].set_ylabel('Gate * sin(x)')
     axes[1, 0].legend()
     axes[1, 0].grid(True, alpha=0.3)
     
-    # 4. LSTM gate dinamikleri
+    # 4. LSTM gate dynamics
     time_steps = 50
     forget_gate = np.random.beta(5, 2, time_steps)
     input_gate = np.random.beta(2, 5, time_steps)
@@ -228,9 +228,9 @@ def visualize_sigmoid_gate():
     axes[1, 1].plot(forget_gate, 'r-', label='Forget gate', linewidth=2)
     axes[1, 1].plot(input_gate, 'g-', label='Input gate', linewidth=2)
     axes[1, 1].plot(output_gate, 'b-', label='Output gate', linewidth=2)
-    axes[1, 1].set_title('LSTM Gate Dinamikleri (Örnek)')
-    axes[1, 1].set_xlabel('Zaman adımı')
-    axes[1, 1].set_ylabel('Gate değeri')
+    axes[1, 1].set_title('LSTM Gate Dynamics (Sample)')
+    axes[1, 1].set_xlabel('Time step')
+    axes[1, 1].set_ylabel('Gate value')
     axes[1, 1].legend()
     axes[1, 1].grid(True, alpha=0.3)
     axes[1, 1].set_ylim(0, 1)
@@ -241,10 +241,10 @@ def visualize_sigmoid_gate():
 
 
 def demonstrate_gate_effects():
-    """Gate'lerin etkilerini göster"""
-    print("=== Sigmoid Gate Etkileri Demonstrasyonu ===\n")
+    """Showcase how different gates behave."""
+    print("=== Sigmoid Gate Effects Demonstration ===\n")
     
-    # Örnek veri
+    # Sample data
     batch_size = 2
     input_dim = 4
     hidden_dim = 4
@@ -253,48 +253,48 @@ def demonstrate_gate_effects():
     h = torch.randn(batch_size, hidden_dim)
     c = torch.randn(batch_size, hidden_dim)
     
-    # Model oluştur
+    # Build model
     model = SigmoidGateExamples(input_dim, hidden_dim)
     
-    # 1. Basit gate
-    print("1. Basit Sigmoid Gate:")
+    # 1. Simple gate
+    print("1. Simple Sigmoid Gate:")
     output, gate = model.simple_gate(x)
-    print(f"   Giriş boyutu: {x.shape}")
-    print(f"   Gate değerleri: {gate[0, :4].detach().numpy()}")
-    print(f"   Çıktı: {output[0, :4].detach().numpy()}\n")
+    print(f"   Input shape: {x.shape}")
+    print(f"   Gate values: {gate[0, :4].detach().numpy()}")
+    print(f"   Output: {output[0, :4].detach().numpy()}\n")
     
     # 2. LSTM gates
     print("2. LSTM Gates:")
     h_new, c_new, lstm_gates = model.lstm_gates_example(x, h, c)
-    print(f"   Input gate ortalaması: {lstm_gates['input'].mean().item():.3f}")
-    print(f"   Forget gate ortalaması: {lstm_gates['forget'].mean().item():.3f}")
-    print(f"   Output gate ortalaması: {lstm_gates['output'].mean().item():.3f}\n")
+    print(f"   Input gate mean: {lstm_gates['input'].mean().item():.3f}")
+    print(f"   Forget gate mean: {lstm_gates['forget'].mean().item():.3f}")
+    print(f"   Output gate mean: {lstm_gates['output'].mean().item():.3f}\n")
     
     # 3. GRU gates
     print("3. GRU Gates:")
     h_new, gru_gates = model.gru_gates_example(x, h)
-    print(f"   Reset gate ortalaması: {gru_gates['reset'].mean().item():.3f}")
-    print(f"   Update gate ortalaması: {gru_gates['update'].mean().item():.3f}\n")
+    print(f"   Reset gate mean: {gru_gates['reset'].mean().item():.3f}")
+    print(f"   Update gate mean: {gru_gates['update'].mean().item():.3f}\n")
     
     # 4. Highway gate
     print("4. Highway Gate:")
     output, transform_gate = model.highway_gate_example(x)
-    print(f"   Transform gate ortalaması: {transform_gate.mean().item():.3f}")
-    print(f"   Bypass oranı: {(1 - transform_gate).mean().item():.3f}\n")
+    print(f"   Transform gate mean: {transform_gate.mean().item():.3f}")
+    print(f"   Bypass rate: {(1 - transform_gate).mean().item():.3f}\n")
     
     # 5. Expert gating
     print("5. Expert Gating:")
     expert_model = SigmoidGatingMechanism(input_dim, num_experts=4)
     output, expert_gates = expert_model(x)
-    print(f"   Expert gate değerleri: {expert_gates[0].detach().numpy()}")
-    print(f"   En aktif expert: {expert_gates[0].argmax().item()}")
+    print(f"   Expert gate values: {expert_gates[0].detach().numpy()}")
+    print(f"   Most active expert: {expert_gates[0].argmax().item()}")
 
 
 if __name__ == "__main__":
-    # Görselleştirme
+    # Visualization
     visualize_sigmoid_gate()
-    
-    # Demonstrasyon
+
+    # Demonstration
     demonstrate_gate_effects()
-    
-    print("\n✅ Sigmoid gates demonstrasyonu tamamlandı!")
\ No newline at end of file
+
+    print("\n✅ Sigmoid gates demonstration completed!")
\ No newline at end of file
diff --git a/Genel-5/PROJE_README.md b/Genel-5/PROJE_README.md
index 64a3d44..7692d22 100644
--- a/Genel-5/PROJE_README.md
+++ b/Genel-5/PROJE_README.md
@@ -1,52 +1,52 @@
-# Gelişmiş Görüntü İşleme Uygulaması
+# Advanced Image Processing Application
 
-Bu proje, büyük görüntüleri daha küçük parçalara bölen, bu parçalara çeşitli filtreler uygulayan ve sonrasında görüntüyü tekrar birleştiren bir Streamlit uygulamasıdır.
+This project is a Streamlit application that splits large images into smaller patches, applies a variety of filters to those patches, and then stitches the image back together.
 
-## Özellikler
+## Features
 
-- Büyük görüntüleri parçalara ayırma
-- Her parçaya farklı görüntü filtreleri uygulama
-- İşlenen parçaları orijinal boyutlarında birleştirme
-- Kullanıcı dostu arayüz
-- İşlenmiş görüntüyü indirme imkanı
+- Split large images into tiles
+- Apply different image filters to each tile
+- Merge processed tiles back to the original resolution
+- User-friendly interface
+- Option to download the processed image
 
-## Kurulum
+## Installation
 
-1. Gerekli kütüphaneleri yükleyin:
+1. Install the required libraries:
    ```
    pip install streamlit numpy torch Pillow
    ```
 
-2. Uygulamayı çalıştırın:
+2. Run the application:
    ```
    streamlit run image_processor_app.py
    ```
 
-## Kullanım
+## Usage
 
-1. Sol taraftaki menüden bir görüntü yükleyin
-2. İstediğiniz filtreyi seçin
-3. Örtüşme payını ve maksimum parça sayısını ayarlayın
-4. "Görüntüyü İşle" butonuna tıklayın
-5. İşlenmiş görüntüyü inceleyip indirebilirsiniz
+1. Upload an image from the menu on the left
+2. Choose the filter you want to apply
+3. Adjust the overlap ratio and maximum number of patches
+4. Click the "Process Image" button
+5. Review and download the processed image
 
-## Kullanılan Filtreler
+## Available Filters
 
-- Normal: Orijinal görüntü
-- Siyah-Beyaz: Gri tonlamalı görüntü
-- Blur: Bulanıklaştırma efekti
-- Kontur: Kenar belirleme
-- Keskinleştir: Görüntüyü keskinleştirme
+- Normal: Original image
+- Black & White: Grayscale conversion
+- Blur: Blurring effect
+- Contour: Edge detection
+- Sharpen: Enhance image sharpness
 
-## Geliştirme
+## Development
 
-Bu proje, büyük görüntüleri işlemek için parçalama ve birleştirme işlemlerini gösteren bir örnektir. Daha fazla özellik ekleyerek genişletebilirsiniz:
+This project demonstrates the workflow for splitting and recombining large images. You can extend it with additional features, such as:
 
-- Daha fazla filtre seçeneği
-- Parça boyutlarını özelleştirme
-- Toplu işlem yapabilme
-- Farklı kaydetme formatları
+- Additional filter options
+- Customizable patch sizes
+- Batch processing support
+- Alternative export formats
 
-## Lisans
+## License
 
 MIT
diff --git a/Genel-5/deeplearning-tracer.py b/Genel-5/deeplearning-tracer.py
index 61dbf65..49252c3 100644
--- a/Genel-5/deeplearning-tracer.py
+++ b/Genel-5/deeplearning-tracer.py
@@ -14,43 +14,43 @@ def print_section(title, color="cyan"):
     """Print a section header with rich formatting"""
     console.rule(f"[bold {color}]{title}", style=color)
 
-# --- 1. Daha Derin bir PyTorch Modeli Tanımla ---
+# --- 1. Define a Deeper PyTorch Model ---
 class DeepMLP(nn.Module):
     def __init__(self, input_dim, hidden_dims, output_dim, dropout_rate=0.1):
         """
-        Derin bir çok katmanlı algılayıcı (MLP) modeli
-        
+        A deep multi-layer perceptron (MLP) model
+
         Args:
-            input_dim: Giriş boyutu
-            hidden_dims: Gizli katman boyutlarını içeren liste
-            output_dim: Çıkış boyutu
-            dropout_rate: Dropout oranı (varsayılan: 0.1)
+            input_dim: Input dimension
+            hidden_dims: List containing the hidden layer sizes
+            output_dim: Output dimension
+            dropout_rate: Dropout rate (default: 0.1)
         """
         super().__init__()
         self.layers = nn.ModuleList()
-        
-        # Giriş katmanı
+
+        # Input layer
         prev_dim = input_dim
-        
-        # Gizli katmanları oluştur
+
+        # Create hidden layers
         for i, hidden_dim in enumerate(hidden_dims):
             self.layers.append(nn.Linear(prev_dim, hidden_dim))
             self.layers.append(nn.BatchNorm1d(hidden_dim))
             self.layers.append(nn.ReLU())
             self.layers.append(nn.Dropout(dropout_rate))
             prev_dim = hidden_dim
-            
-        # Çıkış katmanı
+
+        # Output layer
         self.output_layer = nn.Linear(prev_dim, output_dim)
-        
-        # Ağırlık başlatma
+
+        # Weight initialization
         self._init_weights()
-        
-        # Model bilgilerini göster
+
+        # Display model information
         self._print_model_info(input_dim, hidden_dims, output_dim, dropout_rate)
-    
+
     def _init_weights(self):
-        """Ağırlıkları Xavier/Glorot başlatma yöntemiyle başlat"""
+        """Initialize weights using Xavier/Glorot initialization"""
         for layer in self.layers:
             if isinstance(layer, nn.Linear):
                 nn.init.xavier_uniform_(layer.weight)
@@ -59,179 +59,176 @@ def _init_weights(self):
         nn.init.xavier_uniform_(self.output_layer.weight)
         if self.output_layer.bias is not None:
             nn.init.zeros_(self.output_layer.bias)
-    
+
     def _print_model_info(self, input_dim, hidden_dims, output_dim, dropout_rate):
-        """Model yapısı hakkında bilgi göster"""
+        """Display information about the model architecture"""
         total_params = sum(p.numel() for p in self.parameters() if p.requires_grad)
-        
+
         info_table = Table(show_header=False, box=box.ROUNDED, show_edge=False)
-        info_table.add_column("Özellik", style="cyan", no_wrap=True)
-        info_table.add_column("Değer", style="green")
-        
-        info_table.add_row("Model Türü", "Derin Çok Katmanlı Algılayıcı (MLP)")
-        info_table.add_row("Toplam Parametre", f"{total_params:,}")
-        info_table.add_row("Giriş Boyutu", str(input_dim))
-        info_table.add_row("Gizli Katmanlar", " → ".join(map(str, hidden_dims)))
-        info_table.add_row("Çıkış Boyutu", str(output_dim))
-        info_table.add_row("Dropout Oranı", str(dropout_rate))
-        
+        info_table.add_column("Feature", style="cyan", no_wrap=True)
+        info_table.add_column("Value", style="green")
+
+        info_table.add_row("Model Type", "Deep Multi-Layer Perceptron (MLP)")
+        info_table.add_row("Total Parameters", f"{total_params:,}")
+        info_table.add_row("Input Dimension", str(input_dim))
+        info_table.add_row("Hidden Layers", " → ".join(map(str, hidden_dims)))
+        info_table.add_row("Output Dimension", str(output_dim))
+        info_table.add_row("Dropout Rate", str(dropout_rate))
+
         console.print(Panel(
             info_table,
-            title="[bold green]Model Yapılandırması[/]",
+            title="[bold green]Model Configuration[/]",
             border_style="green",
             padding=(1, 2)
         ))
-    
+
     def forward(self, x):
-        """İleri yayılım"""
-        # Gizli katmanlardan geçir
+        """Forward pass"""
+        # Pass through hidden layers
         for layer in self.layers:
             x = layer(x)
-            
-        # Çıkış katmanı
+
+        # Output layer
         x = self.output_layer(x)
         return x
 
-# --- Kanca (Hook) için Global Depolama ve Durum Yönetimi ---
-# Gerçek bir uygulamada bu durumu daha temiz yönetmek istersiniz (örneğin bir sınıf içinde).
+# --- Global Storage and State Management for Hooks ---
+# In a real application you would likely manage this state in a cleaner way (e.g., inside a class).
 hook_state = {
-    "captured_activation": None,    # Yakalanan aktivasyonu saklamak için
-    "is_intervention_mode": False,  # Müdahale modunda olup olmadığımızı belirtir
-    "neuron_to_modify_idx": 0,    # Hangi nöronun aktivasyonuna müdahale edileceği
-    "intervention_value": 0.0     # Müdahale edilecek yeni değer
+    "captured_activation": None,    # Stores the captured activation
+    "is_intervention_mode": False,  # Indicates whether we are in intervention mode
+    "neuron_to_modify_idx": 0,    # Which neuron's activation to intervene on
+    "intervention_value": 0.0     # The value to inject during the intervention
 }
 
-# --- 2. Aktivasyonları Yakalamak ve Değiştirmek için bir Kanca (Hook) Uygula ---
+# --- 2. Apply a Hook to Capture and Modify Activations ---
 def activation_hook_fn(module, input_args, output_tensor):
     """
-    Bu bir PyTorch ileri (forward) kancasıdır.
-    Eğer 'is_intervention_mode' False ise, katmanın çıkış aktivasyonunu yakalar.
-    Eğer 'is_intervention_mode' True ise, belirtilen bir nöronun aktivasyonunu değiştirir.
+    This is a PyTorch forward hook.
+    If 'is_intervention_mode' is False, it captures the layer's output activation.
+    If 'is_intervention_mode' is True, it modifies the activation of a specified neuron.
     """
     global hook_state
 
     if not hook_state["is_intervention_mode"]:
-        # Normal (yakalama) mod: Aktivasyonu sakla
+        # Normal (capture) mode: store the activation
         hook_state["captured_activation"] = output_tensor.clone().detach()
-        # print(f"Kanca (Yakalama): {module} çıkışı yakalandı: {hook_state['captured_activation']}")
-        return None # Çıkışı değiştirme, orijinali kullanılsın
+        return None  # Do not modify the output
     else:
-        # Müdahale modu: Aktivasyonu değiştir
-        modified_output = output_tensor.clone() # Değişiklik yapmadan önce klonla!
+        # Intervention mode: modify the activation
+        modified_output = output_tensor.clone()  # Clone before modifying
 
-        # Örneğin, ilk nöronun (batch_size=1 varsayımıyla) aktivasyonunu değiştir
-        # output_tensor'un şekli [batch_size, num_features] beklenir
-        if modified_output.ndim == 2 and modified_output.shape[0] == 1: # [1, hidden_dim] gibi
+        # For example, change the activation of the first neuron (assuming batch_size=1)
+        # The output tensor is expected to have shape [batch_size, num_features]
+        if modified_output.ndim == 2 and modified_output.shape[0] == 1:  # e.g. [1, hidden_dim]
             neuron_idx = hook_state["neuron_to_modify_idx"]
             if 0 <= neuron_idx < modified_output.shape[1]:
-                # print(f"Kanca (Müdahale): {module} Nöron {neuron_idx} orijinal değeri: {modified_output[0, neuron_idx]}")
                 modified_output[0, neuron_idx] = hook_state["intervention_value"]
-                # print(f"Kanca (Müdahale): {module} Nöron {neuron_idx} yeni değeri: {modified_output[0, neuron_idx]}")
-                hook_state["captured_activation"] = modified_output.clone().detach() # Değiştirilmiş aktivasyonu da sakla
-                return modified_output # Değiştirilmiş aktivasyonu döndür
+                hook_state["captured_activation"] = modified_output.clone().detach()  # Store the modified activation
+                return modified_output  # Return the modified activation
             else:
-                print(f"Uyarı: Nöron indeksi {neuron_idx} sınırlar dışında.")
-                return None # Bir sorun varsa orijinali döndür
+                print(f"Warning: Neuron index {neuron_idx} is out of bounds.")
+                return None  # Fall back to the original activation on error
         else:
-            print(f"Uyarı: Kanca, [1, num_features] şeklinde aktivasyon bekliyordu, gelen: {modified_output.shape}")
-            return None # Bir sorun varsa orijinali döndür
+            print(f"Warning: The hook expected an activation shaped like [1, num_features], received: {modified_output.shape}")
+            return None  # Fall back to the original activation on error
 
-# --- Model ve Veri Kurulumu ---
+# --- Model and Data Setup ---
 input_dim = 10
-hidden_dims = [64, 32, 16]  # Daha derin mimari
+hidden_dims = [64, 32, 16]  # Deeper architecture
 output_dim = 2
 dropout_rate = 0.1
 
-# Modeli oluştur
+# Create the model
 model = DeepMLP(input_dim, hidden_dims, output_dim, dropout_rate)
 
-# Kullanılabilir cihazı belirle (GPU varsa onu kullan)
+# Detect the available device (use GPU if present)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = model.to(device)
 
-# Model özetini göster
-console.print(f"\n[bold]Model {device} cihazına yüklendi.[/]")
-console.print(f"Eğitilebilir parametre sayısı: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
+# Display a model summary
+console.print(f"\n[bold]Model loaded to {device}.[/]")
+console.print(f"Number of trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")
 
-# Tüm ReLU katmanlarına kancaları kaydet
+# Register hooks on all ReLU layers
 hook_handles = []
 for i, layer in enumerate(model.layers):
     if isinstance(layer, nn.ReLU):
         handle = layer.register_forward_hook(activation_hook_fn)
         hook_handles.append(handle)
-        print(f"ReLU katmanına kanca eklendi: {i}")
+        print(f"Hook added to ReLU layer: {i}")
 
 if not hook_handles:
-    raise ValueError("Modelde hiç ReLU katmanı bulunamadı!")
+    raise ValueError("No ReLU layers found in the model!")
 
-# Rastgele bir girdi verisi oluştur (basitlik için batch_size=1)
+# Create random input data (batch_size=1 for simplicity)
 dummy_input = torch.randn(1, input_dim).to(device)
 
-# Girdi verisi hakkında bilgi
+# Display information about the input data
 input_info = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-input_info.add_column("Özellik", style="cyan")
-input_info.add_column("Değer", style="green")
-input_info.add_row("Girdi Boyutu", f"{tuple(dummy_input.shape)}")
-input_info.add_row("Min Değer", f"{dummy_input.min().item():.4f}")
-input_info.add_row("Maksimum Değer", f"{dummy_input.max().item():.4f}")
-input_info.add_row("Ortalama", f"{dummy_input.mean().item():.4f}")
-input_info.add_row("Standart Sapma", f"{dummy_input.std().item():.4f}")
+input_info.add_column("Feature", style="cyan")
+input_info.add_column("Value", style="green")
+input_info.add_row("Input Shape", f"{tuple(dummy_input.shape)}")
+input_info.add_row("Minimum", f"{dummy_input.min().item():.4f}")
+input_info.add_row("Maximum", f"{dummy_input.max().item():.4f}")
+input_info.add_row("Mean", f"{dummy_input.mean().item():.4f}")
+input_info.add_row("Standard Deviation", f"{dummy_input.std().item():.4f}")
 
 console.print(Panel(
     input_info,
-    title="[bold blue]Girdi Verisi İstatistikleri[/]",
+    title="[bold blue]Input Data Statistics[/]",
     border_style="blue",
     padding=(1, 2)
 ))
 
-# İlk 5 özelliği göster
+# Show the first five features
 input_sample = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-input_sample.add_column("Özellik İndeksi", style="cyan")
-input_sample.add_column("Değer", style="green")
+input_sample.add_column("Feature Index", style="cyan")
+input_sample.add_column("Value", style="green")
 
 for i, val in enumerate(dummy_input.squeeze().cpu().numpy()[:5]):
     input_sample.add_row(f"{i}", f"{val:.6f}")
 
 console.print(Panel(
-    input_info,
-    title="[bold blue]Girdi Verisi (İlk 5 Özellik)[/]",
+    input_sample,
+    title="[bold blue]Input Data (First 5 Features)[/]",
     border_style="blue",
     padding=(1, 2)
 ))
-print_section("🔧 Model ve Veri Kurulumu")
-console.print(f"[bold]Model Yapısı:[/] [cyan]Input: {input_dim}[/] → [green]Hidden: {hidden_dim}[/] → [yellow]Output: {output_dim}[/]")
-console.print(f"[bold]Girdi Verisi:[/] {dummy_input.squeeze().tolist()[:5]}... [dim](ilk 5 özellik gösteriliyor)[/dim]\n")
+print_section("🔧 Model and Data Setup")
+console.print(f"[bold]Model Architecture:[/] [cyan]Input: {input_dim}[/] → [green]Hidden: {hidden_dims}[/] → [yellow]Output: {output_dim}[/]")
+console.print(f"[bold]Input Sample:[/] {dummy_input.squeeze().tolist()[:5]}... [dim](showing the first 5 features)[/dim]\n")
 
-# --- 3. "Temiz Çalıştırma": Temel aktivasyonları ve çıktıyı al ---
-print_section("🔍 Temiz Çalıştırma (Müdahalesiz)")
+# --- 3. "Clean Run": Capture the baseline activations and output ---
+print_section("🔍 Clean Run (No Intervention)")
 
 hook_state["is_intervention_mode"] = False
 with torch.no_grad():
     original_output = model(dummy_input)
     clean_hidden_activation = hook_state["captured_activation"]
 
-# Gizli katman aktivasyonlarını gösteren tablo
+# Table showing hidden layer activations
 table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-table.add_column("Nöron", style="dim", width=12)
-table.add_column("Aktivasyon Değeri", justify="right")
+table.add_column("Neuron", style="dim", width=12)
+table.add_column("Activation Value", justify="right")
 
 for i, val in enumerate(clean_hidden_activation.squeeze().tolist()):
-    table.add_row(f"Nöron {i}", f"{val:.4f}")
+    table.add_row(f"Neuron {i}", f"{val:.4f}")
 
 console.print(Panel.fit(
     table,
-    title="[bold]Gizli Katman Aktivasyonları (ReLU Sonrası)",
+    title="[bold]Hidden Layer Activations (Post-ReLU)",
     border_style="green",
     padding=(1, 2)
 ))
 
-console.print(f"\n[bold]Model Çıktısı:[/] {original_output.squeeze().tolist()}")
+console.print(f"\n[bold]Model Output:[/] {original_output.squeeze().tolist()}")
 console.rule(style="dim")
 
-# --- 4. "Müdahale Çalıştırması": Bir aktivasyonu değiştir ve etkiyi gör ---
-print_section("🔧 Müdahale Çalıştırması")
+# --- 4. "Intervention Run": Change an activation and observe the effect ---
+print_section("🔧 Intervention Run")
 
-# Müdahale ayarları
+# Intervention settings
 neuron_idx = 0
 new_value = 10.0
 
@@ -243,29 +240,29 @@ def activation_hook_fn(module, input_args, output_tensor):
     intervened_output = model(dummy_input)
     intervened_hidden_activation = hook_state["captured_activation"]
 
-# Müdahale özeti
-console.print(f"[bold]Müdahale Detayları:[/]")
-console.print(f"  • [yellow]Hedef Nöron:[/] [bold]{neuron_idx}[/]")
-console.print(f"  • [yellow]Yeni Değer:[/] [bold]{new_value}[/]")
+# Intervention summary
+console.print(f"[bold]Intervention Details:[/]")
+console.print(f"  • [yellow]Target Neuron:[/] [bold]{neuron_idx}[/]")
+console.print(f"  • [yellow]New Value:[/] [bold]{new_value}[/]")
 
-# Müdahale edilmiş aktivasyonlar tablosu
+# Table with modified activations
 modified_table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-modified_table.add_column("Nöron", style="dim", width=12)
-modified_table.add_column("Önceki Değer", justify="right")
-modified_table.add_column("Yeni Değer", justify="right")
-modified_table.add_column("Durum", justify="center")
+modified_table.add_column("Neuron", style="dim", width=12)
+modified_table.add_column("Previous Value", justify="right")
+modified_table.add_column("New Value", justify="right")
+modified_table.add_column("Status", justify="center")
 
 for i, (orig, new) in enumerate(zip(
     clean_hidden_activation.squeeze().tolist(),
     intervened_hidden_activation.squeeze().tolist()
 )):
     modified = i == neuron_idx
-    status = "[bold red]✗ Değiştirildi" if modified else "[green]✓ Aynı"
+    status = "[bold red]✗ Modified" if modified else "[green]✓ Unchanged"
     orig_val = f"[strike dim]{orig:.4f}[/]" if modified else f"{orig:.4f}"
     new_val = f"[bold red]{new:.4f}" if modified else f"{new:.4f}"
-    
+
     modified_table.add_row(
-        f"Nöron {i}",
+        f"Neuron {i}",
         orig_val,
         new_val,
         status
@@ -273,23 +270,23 @@ def activation_hook_fn(module, input_args, output_tensor):
 
 console.print(Panel.fit(
     modified_table,
-    title="[bold]Gizli Katman Karşılaştırması",
+    title="[bold]Hidden Layer Comparison",
     border_style="yellow",
     padding=(1, 2)
 ))
 
-console.print(f"\n[bold]Yeni Model Çıktısı:[/] {intervened_output.squeeze().tolist()}")
+console.print(f"\n[bold]New Model Output:[/] {intervened_output.squeeze().tolist()}")
 console.rule(style="dim")
 
-# --- 5. Karşılaştır ---
-print_section("📊 Sonuçların Karşılaştırılması")
+# --- 5. Compare Results ---
+print_section("📊 Comparing Results")
 
-# Çıktı karşılaştırma tablosu
+# Output comparison table
 output_table = Table(show_header=True, header_style="bold magenta", box=box.ROUNDED)
-output_table.add_column("Çıktı Nöronu", style="dim", width=12)
-output_table.add_column("Orijinal Değer", justify="right")
-output_table.add_column("Yeni Değer", justify="right")
-output_table.add_column("Fark", justify="right")
+output_table.add_column("Output Neuron", style="dim", width=12)
+output_table.add_column("Original Value", justify="right")
+output_table.add_column("New Value", justify="right")
+output_table.add_column("Difference", justify="right")
 
 orig_outputs = original_output.squeeze().tolist()
 new_outputs = intervened_output.squeeze().tolist()
@@ -298,7 +295,7 @@ def activation_hook_fn(module, input_args, output_tensor):
 for i, (orig, new, diff) in enumerate(zip(orig_outputs, new_outputs, diffs)):
     diff_style = "[red]" if diff > 0.1 else "[green]"
     output_table.add_row(
-        f"Çıktı {i}",
+        f"Output {i}",
         f"{orig:.6f}",
         f"{new:.6f}",
         f"{diff_style}{diff:.6f}"
@@ -306,21 +303,22 @@ def activation_hook_fn(module, input_args, output_tensor):
 
 console.print(Panel.fit(
     output_table,
-    title="[bold]Çıktı Karşılaştırması",
+    title="[bold]Output Comparison",
     border_style="blue",
     padding=(1, 2)
 ))
 
-# Özet istatistikler
-console.print("\n[bold]📈 Özet İstatistikler:[/]")
-console.print(f"  • [yellow]Toplam Mutlak Fark:[/] {torch.sum(torch.abs(original_output - intervened_output)):.6f}")
-console.print(f"  • [yellow]Maksimum Fark:[/] {torch.max(torch.abs(original_output - intervened_output)):.6f}")
-console.print(f"  • [yellow]Ortalama Mutlak Fark:[/] {torch.mean(torch.abs(original_output - intervened_output)):.6f}")
+# Summary statistics
+console.print("\n[bold]📈 Summary Statistics:[/]")
+console.print(f"  • [yellow]Total Absolute Difference:[/] {torch.sum(torch.abs(original_output - intervened_output)):.6f}")
+console.print(f"  • [yellow]Maximum Difference:[/] {torch.max(torch.abs(original_output - intervened_output)):.6f}")
+console.print(f"  • [yellow]Mean Absolute Difference:[/] {torch.mean(torch.abs(original_output - intervened_output)):.6f}")
 
-# Kanca temizliği hakkında bilgi
-console.print("\n[dim]Not: Kanca başarıyla kaldırıldı.[/dim]")
+# Information about hook cleanup
+console.print("\n[dim]Note: Hooks have been removed successfully.[/dim]")
 
 
-# Kancayı işiniz bittiğinde kaldırmayı unutmayın,
-# özellikle bir notebook'ta hücreleri tekrar tekrar çalıştırıyorsanız.
-hook_handle.remove()
\ No newline at end of file
+# Always remove hooks when you're done, especially if you are repeatedly
+# executing cells in a notebook environment.
+for handle in hook_handles:
+    handle.remove()
diff --git a/Genel-5/dit_implementation.py b/Genel-5/dit_implementation.py
index 6741aa2..725df21 100644
--- a/Genel-5/dit_implementation.py
+++ b/Genel-5/dit_implementation.py
@@ -1,133 +1,133 @@
-# Gerekli kütüphaneleri içe aktar
-import torch  # PyTorch kütüphanesi
-import torch.nn as nn  # Sinir ağı modülleri
-import torch.nn.functional as F  # Fonksiyonel operasyonlar
-import math  # Matematiksel işlemler
-import numpy as np  # Sayısal işlemler
-from typing import Optional, Tuple  # Tip ipuçları
-import matplotlib.pyplot as plt  # Görselleştirme için
+# Import required libraries
+import torch  # PyTorch framework
+import torch.nn as nn  # Neural network modules
+import torch.nn.functional as F  # Functional operations
+import math  # Mathematical utilities
+import numpy as np  # Numerical computations
+from typing import Optional, Tuple  # Type hints
+import matplotlib.pyplot as plt  # Visualization
 
 class TimestepEmbedding(nn.Module):
-    """Zaman adımları için sinüzoidal gömme vektörleri oluşturur, transformer pozisyonel kodlamalarına benzer"""
+    """Generate sinusoidal embeddings for timesteps, similar to transformer positional encodings."""
     
     def __init__(self, dim: int):
         super().__init__()
-        self.dim = dim  # Gömme boyutu
-        
+        self.dim = dim  # Embedding size
+
     def forward(self, timesteps: torch.Tensor) -> torch.Tensor:
-        device = timesteps.device  # Hesaplamanın yapılacağı cihaz (CPU/GPU)
-        half_dim = self.dim // 2  # Boyutun yarısı
-        # Logaritmik ölçekli frekanslar oluştur
+        device = timesteps.device  # Device where the computation happens (CPU/GPU)
+        half_dim = self.dim // 2  # Half of the embedding size
+        # Create logarithmically scaled frequencies
         embeddings = math.log(10000) / (half_dim - 1)
-        # Üstel fonksiyonla frekansları hesapla
+        # Compute the frequencies with an exponential schedule
         embeddings = torch.exp(torch.arange(half_dim, device=device) * -embeddings)
-        # Zaman adımlarıyla çarparak gömme matrisini oluştur
+        # Multiply with timesteps to create the embedding matrix
         embeddings = timesteps[:, None] * embeddings[None, :]
-        # Sinüs ve kosinüs değerlerini birleştir
+        # Concatenate sine and cosine representations
         embeddings = torch.cat([torch.sin(embeddings), torch.cos(embeddings)], dim=-1)
         return embeddings
 
 class MultiHeadAttention(nn.Module):
-    """Çok kafalı dikkat mekanizması"""
-    
+    """Multi-head attention module."""
+
     def __init__(self, d_model: int, n_heads: int, dropout: float = 0.1):
         super().__init__()
-        # Model boyutunun kafa sayısına bölünebilir olması gerekir
+        # The model dimension must be divisible by the head count
         assert d_model % n_heads == 0
-        
-        self.d_model = d_model  # Girdi boyutu
-        self.n_heads = n_heads    # Kafa sayısı
-        self.d_k = d_model // n_heads  # Her kafanın boyutu
-        
-        # Sorgu, anahtar, değer ve çıkış dönüşümleri
-        self.w_q = nn.Linear(d_model, d_model)  # Sorgu dönüşümü
-        self.w_k = nn.Linear(d_model, d_model)  # Anahtar dönüşümü
-        self.w_v = nn.Linear(d_model, d_model)  # Değer dönüşümü
-        self.w_o = nn.Linear(d_model, d_model)  # Çıkış dönüşümü
-        
-        self.dropout = nn.Dropout(dropout)  # Aşırı öğrenmeyi önlemek için dropout
-        
+
+        self.d_model = d_model  # Input dimension
+        self.n_heads = n_heads  # Number of heads
+        self.d_k = d_model // n_heads  # Dimension per head
+
+        # Linear projections for query, key, value, and output
+        self.w_q = nn.Linear(d_model, d_model)  # Query projection
+        self.w_k = nn.Linear(d_model, d_model)  # Key projection
+        self.w_v = nn.Linear(d_model, d_model)  # Value projection
+        self.w_o = nn.Linear(d_model, d_model)  # Output projection
+
+        self.dropout = nn.Dropout(dropout)  # Dropout regularization
+
     def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        batch_size, seq_len, d_model = x.shape  # Girdi boyutlarını al
-        
-        # Çok kafalı dikkat için doğrusal dönüşümler ve yeniden şekillendirme
-        # Sorgu, anahtar ve değer matrislerini hesapla ve kafalara böl
+        batch_size, seq_len, d_model = x.shape  # Extract input dimensions
+
+        # Apply linear transformations and reshape for the heads
+        # Compute query, key, and value matrices and split across heads
         Q = self.w_q(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         K = self.w_k(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
         V = self.w_v(x).view(batch_size, seq_len, self.n_heads, self.d_k).transpose(1, 2)
-        
-        # Ölçeklendirilmiş nokta çarpımı dikkat mekanizması
-        # Anahtarların transpozu ile sorguları çarp ve ölçeklendir
+
+        # Scaled dot-product attention
+        # Multiply queries with the transposed keys and scale
         scores = torch.matmul(Q, K.transpose(-2, -1)) / math.sqrt(self.d_k)
-        
-        # Maske uygula (varsa)
+
+        # Apply mask if provided
         if mask is not None:
             scores = scores.masked_fill(mask == 0, -1e9)
-            
-        # Dikkat ağırlıklarını hesapla ve softmax uygula
+
+        # Compute attention weights and apply softmax
         attention_weights = F.softmax(scores, dim=-1)
-        attention_weights = self.dropout(attention_weights)  # Dropout uygula
-        
-        # Dikkat ağırlıklarını değerlerle çarparak çıktıyı hesapla
+        attention_weights = self.dropout(attention_weights)  # Apply dropout
+
+        # Multiply attention weights with values to get the output
         attention_output = torch.matmul(attention_weights, V)
-        
-        # Kafaları birleştir ve son lineer katmandan geçir
+
+        # Merge heads and pass through the final linear layer
         attention_output = attention_output.transpose(1, 2).contiguous().view(
             batch_size, seq_len, d_model
         )
-        
-        return self.w_o(attention_output)  # Son lineer dönüşümü uygula
+
+        return self.w_o(attention_output)  # Apply the output projection
 
 class FeedForward(nn.Module):
-    """Konum bazlı ileri beslemeli ağ"""
-    
+    """Position-wise feedforward network."""
+
     def __init__(self, d_model: int, d_ff: int, dropout: float = 0.1):
         super().__init__()
-        self.linear1 = nn.Linear(d_model, d_ff)  # Girişten gizli katmana
-        self.linear2 = nn.Linear(d_ff, d_model)   # Gizli katmandan çıkışa
-        self.dropout = nn.Dropout(dropout)         # Aşırı öğrenmeyi önlemek için
-        
+        self.linear1 = nn.Linear(d_model, d_ff)  # Input to hidden layer
+        self.linear2 = nn.Linear(d_ff, d_model)  # Hidden layer to output
+        self.dropout = nn.Dropout(dropout)        # Prevent overfitting
+
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # İleri yayılım: Lineer -> ReLU -> Dropout -> Lineer
+        # Forward pass: Linear -> ReLU -> Dropout -> Linear
         return self.linear2(self.dropout(F.relu(self.linear1(x))))
 
 class TransformerBlock(nn.Module):
-    """Dikkat ve ileri beslemeli katmanlara sahip tek bir transformer bloğu"""
-    
+    """Single transformer block with attention and feedforward layers."""
+
     def __init__(self, d_model: int, n_heads: int, d_ff: int, dropout: float = 0.1):
         super().__init__()
-        self.attention = MultiHeadAttention(d_model, n_heads, dropout)  # Çok kafalı dikkat katmanı
-        self.feed_forward = FeedForward(d_model, d_ff, dropout)         # İleri beslemeli ağ
-        self.norm1 = nn.LayerNorm(d_model)  # İlk normalizasyon katmanı
-        self.norm2 = nn.LayerNorm(d_model)  # İkinci normalizasyon katmanı
-        self.dropout = nn.Dropout(dropout)   # Dropout katmanı
-        
+        self.attention = MultiHeadAttention(d_model, n_heads, dropout)  # Multi-head attention layer
+        self.feed_forward = FeedForward(d_model, d_ff, dropout)         # Feedforward network
+        self.norm1 = nn.LayerNorm(d_model)  # First normalization layer
+        self.norm2 = nn.LayerNorm(d_model)  # Second normalization layer
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+
     def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None) -> torch.Tensor:
-        # Öz-dikkat mekanizması ve artık bağlantı
-        attn_output = self.attention(self.norm1(x), mask)  # Normalizasyon ve dikkat
-        x = x + self.dropout(attn_output)  # Artık bağlantı ve dropout
-        
-        # İleri beslemeli ağ ve artık bağlantı
-        ff_output = self.feed_forward(self.norm2(x))  # Normalizasyon ve ileri besleme
-        x = x + self.dropout(ff_output)  # İkinci artık bağlantı ve dropout
-        
+        # Self-attention and residual connection
+        attn_output = self.attention(self.norm1(x), mask)  # Normalization followed by attention
+        x = x + self.dropout(attn_output)  # Residual connection and dropout
+
+        # Feedforward network and residual connection
+        ff_output = self.feed_forward(self.norm2(x))  # Normalization and feedforward
+        x = x + self.dropout(ff_output)  # Second residual connection and dropout
+
         return x
 
 class DiffusionTransformer(nn.Module):
     """
-    Görüntü oluşturma için Diffusion Transformer (DiT) modeli
-    
-    Argümanlar:
-        img_size: Giriş görüntülerinin boyutu (kare olduğu varsayılır)
-        patch_size: Görüntünün bölüneceği yama boyutu
-        d_model: Transformer'ın gizli boyutu
-        n_layers: Transformer katman sayısı
-        n_heads: Dikkat başlığı sayısı
-        d_ff: İleri beslemeli ağın gizli boyutu
-        num_classes: Koşullu üretim için sınıf sayısı
-        dropout: Dropout oranı
+    Diffusion Transformer (DiT) model for image generation.
+
+    Args:
+        img_size: Size of the input images (assumes square inputs).
+        patch_size: Size of the patches extracted from the image.
+        d_model: Hidden size of the transformer.
+        n_layers: Number of transformer layers.
+        n_heads: Number of attention heads.
+        d_ff: Hidden dimension of the feedforward network.
+        num_classes: Number of classes for conditional generation.
+        dropout: Dropout probability.
     """
-    
+
     def __init__(
         self,
         img_size: int = 32,
@@ -141,45 +141,45 @@ def __init__(
     ):
         super().__init__()
         
-        self.img_size = img_size  # Görüntü boyutu
-        self.patch_size = patch_size  # Yama boyutu
-        self.d_model = d_model  # Modelin gizli boyutu
-        self.num_patches = (img_size // patch_size) ** 2  # Toplam yama sayısı
-        self.patch_dim = 3 * patch_size ** 2  # RGB yamaları için boyut (3 kanal * yama alanı)
-        
-        # Yama gömme katmanı
+        self.img_size = img_size  # Image size
+        self.patch_size = patch_size  # Patch size
+        self.d_model = d_model  # Model hidden size
+        self.num_patches = (img_size // patch_size) ** 2  # Total number of patches
+        self.patch_dim = 3 * patch_size ** 2  # Patch dimension for RGB images (3 channels * patch area)
+
+        # Patch embedding layer
         self.patch_embedding = nn.Linear(self.patch_dim, d_model)
-        
-        # Konumsal gömme (pozisyonel kodlama)
+
+        # Positional embedding
         self.pos_embedding = nn.Parameter(torch.randn(1, self.num_patches, d_model))
-        
-        # Zaman adımı gömme
+
+        # Timestep embedding
         self.time_embedding = TimestepEmbedding(d_model)
         self.time_mlp = nn.Sequential(
-            nn.Linear(d_model, d_model * 4),  # Zaman gömme için MLP
-            nn.GELU(),  # Gaussian Error Linear Unit aktivasyonu
-            nn.Linear(d_model * 4, d_model)  # Çıkış katmanı
+            nn.Linear(d_model, d_model * 4),  # MLP for the timestep embedding
+            nn.GELU(),  # Gaussian Error Linear Unit activation
+            nn.Linear(d_model * 4, d_model)  # Output layer
         )
-        
-        # Sınıf gömme (koşullu üretim için)
+
+        # Class embedding for conditional generation
         self.class_embedding = nn.Embedding(num_classes, d_model)
-        
-        # Transformer katmanları
+
+        # Transformer layers
         self.transformer_layers = nn.ModuleList([
             TransformerBlock(d_model, n_heads, d_ff, dropout)
-            for _ in range(n_layers)  # Belirtilen sayıda transformer katmanı oluştur
+            for _ in range(n_layers)  # Instantiate the requested number of transformer layers
         ])
-        
-        # Çıkış projeksiyonu
-        self.norm = nn.LayerNorm(d_model)  # Son normalizasyon katmanı
-        self.output_projection = nn.Linear(d_model, self.patch_dim)  # Çıkış boyutuna dönüşüm
-        
-        self.dropout = nn.Dropout(dropout)  # Dropout katmanı
-        
+
+        # Output projection
+        self.norm = nn.LayerNorm(d_model)  # Final normalization layer
+        self.output_projection = nn.Linear(d_model, self.patch_dim)  # Project back to the patch dimension
+
+        self.dropout = nn.Dropout(dropout)  # Dropout layer
+
     def patchify(self, x: torch.Tensor) -> torch.Tensor:
-        """Görüntüyü yamalara dönüştür"""
+        """Convert an image into a collection of patches."""
         batch_size, channels, height, width = x.shape
-        
+
         # Reshape to patches
         x = x.reshape(
             batch_size, channels,
@@ -190,21 +190,21 @@ def patchify(self, x: torch.Tensor) -> torch.Tensor:
         x = x.reshape(batch_size, self.num_patches, -1)
         
         return x
-    
+
     def unpatchify(self, x: torch.Tensor) -> torch.Tensor:
-        """Yamaları tekrar görüntüye dönüştür"""
-        batch_size = x.shape[0]  # Toplu iş boyutu
-        height = width = int(self.num_patches ** 0.5)  # Orijinal ızgara boyutları
-        
-        # Yamaları tekrar orijinal formata dönüştür
+        """Reconstruct the image from a collection of patches."""
+        batch_size = x.shape[0]  # Batch size
+        height = width = int(self.num_patches ** 0.5)  # Original patch grid size
+
+        # Rearrange patches back to the original image
         x = x.reshape(
             batch_size, height, width, 3, self.patch_size, self.patch_size
         )
-        # Boyutları yeniden düzenle: [batch, channels, height, patch_h, width, patch_w]
+        # Reorder to [batch, channels, height, patch_h, width, patch_w]
         x = x.permute(0, 3, 1, 4, 2, 5).contiguous()
-        # Yama boyutlarını birleştirerek orijinal görüntü boyutuna getir
+        # Merge patch dimensions to recover the original image size
         x = x.reshape(batch_size, 3, height * self.patch_size, width * self.patch_size)
-        
+
         return x
     
     def forward(
@@ -214,261 +214,261 @@ def forward(
         class_labels: Optional[torch.Tensor] = None
     ) -> torch.Tensor:
         """
-        Diffusion transformer'ın ileri geçişi
-        
-        Argümanlar:
-            x: Girdi tensörü (batch_size, channels, height, width)
-            timesteps: Toplu işteki her örnek için zaman adımı
-            class_labels: Koşullu üretim için isteğe bağlı sınıf etiketleri
-            
-        Dönüş:
-            Girdiyle aynı şekilde gürültü tahmini
+        Forward pass of the diffusion transformer.
+
+        Args:
+            x: Input tensor shaped as (batch_size, channels, height, width).
+            timesteps: Timestep per sample in the batch.
+            class_labels: Optional class labels for conditional generation.
+
+        Returns:
+            Predicted noise with the same shape as the input.
         """
-        batch_size = x.shape[0]  # Toplu iş boyutu
-        device = x.device  # Hesaplama cihazı
-        
-        # Görüntüyü yamalara dönüştür
-        x = self.patchify(x)  # Yamalara dönüştürme
-        
-        # Yamaları gömme boyutuna yansıt
-        x = self.patch_embedding(x)  # Yama gömme
-        
-        # Konumsal gömme ekle
-        x = x + self.pos_embedding  # Konumsal bilgi ekle
-        
-        # Zaman adımı gömme ekle
-        t_emb = self.time_embedding(timesteps)  # Zaman adımları için gömme
-        t_emb = self.time_mlp(t_emb)  # Zaman gömme için MLP'den geçir
-        x = x + t_emb.unsqueeze(1)  # Zamansal bilgiyi ekle
-        
-        # İsteğe bağlı olarak sınıf gömme ekle
+        batch_size = x.shape[0]  # Batch size
+        device = x.device  # Device used for computation
+
+        # Convert the image into patches
+        x = self.patchify(x)
+
+        # Project patches to the embedding dimension
+        x = self.patch_embedding(x)
+
+        # Add positional embeddings
+        x = x + self.pos_embedding
+
+        # Add timestep embedding
+        t_emb = self.time_embedding(timesteps)
+        t_emb = self.time_mlp(t_emb)
+        x = x + t_emb.unsqueeze(1)
+
+        # Optionally add class embeddings
         if class_labels is not None:
-            class_emb = self.class_embedding(class_labels)  # Sınıf gömme
-            x = x + class_emb.unsqueeze(1)  # Sınıf bilgisini ekle
-        
-        # Transformer katmanlarını uygula
+            class_emb = self.class_embedding(class_labels)
+            x = x + class_emb.unsqueeze(1)
+
+        # Run through the transformer layers
         for layer in self.transformer_layers:
-            x = layer(x)  # Her bir transformer katmanından geçir
-        
-        # Son normalizasyon katmanı
-        x = self.norm(x)  # Normalizasyon
-        
-        # Yama boyutuna geri yansıt
-        x = self.output_projection(x)  # Çıkış projeksiyonu
-        
-        # Yamaları tekrar görüntüye dönüştür
-        x = self.unpatchify(x)  # Görüntüye dönüştür
-        
-        return x  # Gürültü tahminini döndür
+            x = layer(x)
+
+        # Final normalization
+        x = self.norm(x)
+
+        # Project back to patch space
+        x = self.output_projection(x)
+
+        # Convert patches back to an image
+        x = self.unpatchify(x)
+
+        return x
 
 class DDPMScheduler:
-    """Diffusion süreci için DDPM gürültü çizelgeleyici"""
-    
+    """DDPM noise scheduler for the diffusion process."""
+
     def __init__(self, num_timesteps: int = 1000, beta_start: float = 0.0001, beta_end: float = 0.02):
-        self.num_timesteps = num_timesteps  # Toplam zaman adımı sayısı
-        
-        # Doğrusal beta çizelgesi
-        self.betas = torch.linspace(beta_start, beta_end, num_timesteps)  # Beta değerleri
-        self.alphas = 1.0 - self.betas  # Alfa değerleri (1 - beta)
-        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)  # Kümülatif çarpım
-        # Önceki kümülatif çarpım (kaydırmalı)
+        self.num_timesteps = num_timesteps  # Total number of timesteps
+
+        # Linear beta schedule
+        self.betas = torch.linspace(beta_start, beta_end, num_timesteps)  # Beta values
+        self.alphas = 1.0 - self.betas  # Alpha values (1 - beta)
+        self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)  # Cumulative product of alphas
+        # Previous cumulative product (shifted)
         self.alphas_cumprod_prev = F.pad(self.alphas_cumprod[:-1], (1, 0), value=1.0)
-        
-        # q(x_t | x_{t-1}) dağılımı için hesaplamalar
-        self.sqrt_alphas_cumprod = torch.sqrt(self.alphas_cumprod)  # Karekök alfa kümülatif çarpım
-        # Karekök (1 - alfa kümülatif çarpım)
+
+        # Quantities for q(x_t | x_{t-1})
+        self.sqrt_alphas_cumprod = torch.sqrt(self.alphas_cumprod)  # Square root of the cumulative product
+        # Square root of (1 - cumulative product of alphas)
         self.sqrt_one_minus_alphas_cumprod = torch.sqrt(1.0 - self.alphas_cumprod)
-        
-        # q(x_{t-1} | x_t, x_0) posterior dağılımı için hesaplamalar
+
+        # Quantities for the posterior q(x_{t-1} | x_t, x_0)
         self.posterior_variance = self.betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
-        
+
     def add_noise(self, x_0: torch.Tensor, noise: torch.Tensor, timesteps: torch.Tensor) -> torch.Tensor:
-        """Temiz görüntülere gürültü çizelgesine göre gürültü ekle"""
-        # İlgili zaman adımları için ölçeklendirme faktörlerini al
+        """Add noise to clean images according to the noise schedule."""
+        # Gather scaling factors for the selected timesteps
         sqrt_alpha_prod = self.sqrt_alphas_cumprod[timesteps].view(-1, 1, 1, 1).to(x_0.device)
         sqrt_one_minus_alpha_prod = self.sqrt_one_minus_alphas_cumprod[timesteps].view(-1, 1, 1, 1).to(x_0.device)
-        
-        # Temiz görüntülere gürültü ekle
+
+        # Add noise to the clean images
         x_t = sqrt_alpha_prod * x_0 + sqrt_one_minus_alpha_prod * noise
-        
-        return x_t  # Gürültülü görüntüyü döndür
-        
+
+        return x_t  # Return the noisy images
+
     def sample_prev_timestep(self, x_t: torch.Tensor, noise_pred: torch.Tensor, timestep: int) -> torch.Tensor:
-        """x_t ve tahmin edilen gürültü verildiğinde x_{t-1} örnekle"""
+        """Sample x_{t-1} given x_t and the predicted noise."""
         if timestep == 0:
-            return x_t  # Son zaman adımında, sadece tahmin edilen x_0'ı döndür
-            
-        # Bu zaman adımı için parametreleri al
-        alpha_t = self.alphas[timestep]  # Mevcut alfa
-        alpha_cumprod_t = self.alphas_cumprod[timestep]  # Kümülatif alfa
-        alpha_cumprod_prev_t = self.alphas_cumprod_prev[timestep]  # Önceki kümülatif alfa
-        beta_t = self.betas[timestep]  # Mevcut beta
-        sqrt_one_minus_alpha_cumprod_t = self.sqrt_one_minus_alphas_cumprod[timestep]  # Karekök(1 - alfa kümülatif)
-        
-        # Ters süreç için varyans hesapla
+            return x_t  # At the final step simply return x_t
+
+        # Retrieve parameters for this timestep
+        alpha_t = self.alphas[timestep]
+        alpha_cumprod_t = self.alphas_cumprod[timestep]
+        alpha_cumprod_prev_t = self.alphas_cumprod_prev[timestep]
+        beta_t = self.betas[timestep]
+        sqrt_one_minus_alpha_cumprod_t = self.sqrt_one_minus_alphas_cumprod[timestep]
+
+        # Variance for the reverse process
         posterior_variance_t = self.posterior_variance[timestep]
-        
-        # x_t ve tahmin edilen gürültüden x_0'ı tahmin et
+
+        # Estimate x_0 from x_t and the predicted noise
         pred_x0 = (x_t - sqrt_one_minus_alpha_cumprod_t * noise_pred) / torch.sqrt(alpha_cumprod_t)
-        
-        # q(x_{t-1} | x_t, x_0) dağılımının ortalamasını hesapla
-        mean = (torch.sqrt(alpha_cumprod_prev_t) * beta_t * pred_x0 + 
+
+        # Compute the mean of q(x_{t-1} | x_t, x_0)
+        mean = (torch.sqrt(alpha_cumprod_prev_t) * beta_t * pred_x0 +
                 torch.sqrt(alpha_t) * (1 - alpha_cumprod_prev_t) * x_t) / (1 - alpha_cumprod_t)
-        
-        # q(x_{t-1} | x_t, x_0) dağılımından örnekle
+
+        # Sample from the posterior
         if timestep > 0:
-            noise = torch.randn_like(x_t)  # Rastgele gürültü üret
-            variance = torch.sqrt(posterior_variance_t) * noise  # Varyansı uygula
+            noise = torch.randn_like(x_t)  # Random noise
+            variance = torch.sqrt(posterior_variance_t) * noise  # Apply variance term
         else:
-            variance = 0  # Son adımda varyans yok
-            
-        x_prev = mean + variance  # Ortalama ve varyansı topla
-        
-        return x_prev  # Önceki zaman adımındaki görüntüyü döndür
+            variance = 0  # No variance at the last step
+
+        x_prev = mean + variance  # Combine mean and variance
 
-def train_step(model: DiffusionTransformer, 
-               scheduler: DDPMScheduler, 
-               x_batch: torch.Tensor, 
+        return x_prev  # Return the sample for the previous timestep
+
+def train_step(model: DiffusionTransformer,
+               scheduler: DDPMScheduler,
+               x_batch: torch.Tensor,
                class_labels: Optional[torch.Tensor] = None) -> torch.Tensor:
-    """Diffusion transformer için tek bir eğitim adımı"""
-    
-    batch_size = x_batch.shape[0]  # Toplu iş boyutu
-    device = x_batch.device  # Hesaplama cihazı
-    
-    # Toplu işteki her görüntü için rastgele zaman adımları seç
+    """Perform a single training step for the diffusion transformer."""
+
+    batch_size = x_batch.shape[0]  # Batch size
+    device = x_batch.device  # Device used for computation
+
+    # Select random timesteps for each image in the batch
     timesteps = torch.randint(0, scheduler.num_timesteps, (batch_size,), device=device)
-    
-    # Görüntülere eklenecek gürültüyü örnekle
-    noise = torch.randn_like(x_batch)  # Gürültü tensörü oluştur
-    
-    # Temiz görüntülere, her zaman adımındaki gürültü büyüklüğüne göre gürültü ekle
+
+    # Sample the noise that will be added to the images
+    noise = torch.randn_like(x_batch)
+
+    # Add noise scaled according to the timestep schedule
     noisy_images = scheduler.add_noise(x_batch, noise, timesteps)
-    
-    # Gürültü artığını tahmin et
+
+    # Predict the noise residual
     noise_pred = model(noisy_images, timesteps, class_labels)
-    
-    # Kaybı hesapla (tahmin edilen gürültü ile gerçek gürültü arasındaki ortalama kare hata)
+
+    # Compute the mean squared error between predicted and true noise
     loss = F.mse_loss(noise_pred, noise)
-    
-    return loss  # Hata değerini döndür
+
+    return loss
 
 @torch.no_grad()
 def sample_images(
-    model: DiffusionTransformer, 
-    scheduler: DDPMScheduler, 
-    num_samples: int = 4, 
+    model: DiffusionTransformer,
+    scheduler: DDPMScheduler,
+    num_samples: int = 4,
     class_labels: Optional[torch.Tensor] = None,
     device: str = 'cpu'
 ) -> torch.Tensor:
-    """Eğitilmiş diffusion transformer kullanarak örnek görüntüler oluştur"""
-    model.eval()  # Modeli değerlendirme moduna al
-    
-    # Başlangıç gizli değişkeni olarak rastgele gürültü örnekle
-    img_size = model.img_size  # Görüntü boyutu
+    """Generate sample images with a trained diffusion transformer."""
+    model.eval()  # Switch to evaluation mode
+
+    # Sample initial noise as the latent variable
+    img_size = model.img_size  # Image size
     x_t = torch.randn((num_samples, 3, img_size, img_size), device=device)
-    
-    # Eğer sınıf etiketleri verilmediyse ve model koşullu ise rastgele sınıf etiketleri oluştur
+
+    # If no class labels are provided for a conditional model, sample random labels
     if class_labels is None and hasattr(model, 'class_embedding'):
-        num_classes = model.class_embedding.num_embeddings  # Toplam sınıf sayısı
-        class_labels = torch.randint(0, num_classes, (num_samples,), device=device)  # Rastgele sınıf etiketleri
-    
-    # Modelden örnekleme yap
-    with torch.no_grad():  # Gradyan hesaplaması yapma
-        # Zaman adımlarını tersten dolaş
+        num_classes = model.class_embedding.num_embeddings  # Total number of classes
+        class_labels = torch.randint(0, num_classes, (num_samples,), device=device)  # Random class labels
+
+    # Run the reverse diffusion process
+    with torch.no_grad():
+        # Iterate over timesteps in reverse order
         for t in reversed(range(scheduler.num_timesteps)):
-            # Zaman adımları için tensor oluştur
+            # Create a tensor filled with the current timestep index
             timesteps = torch.full((num_samples,), t, device=device, dtype=torch.long)
-            
-            # Gürültüyü tahmin et
+
+            # Predict the noise for the current latent
             noise_pred = model(x_t, timesteps, class_labels)
-            
-            # Bir önceki örneği al
+
+            # Sample the previous timestep
             x_t = scheduler.sample_prev_timestep(x_t, noise_pred, t)
-    
-    # Geçerli piksel aralığına kırp
+
+    # Clamp to the valid pixel range
     x_t = torch.clamp(x_t, -1.0, 1.0)
-    
-    # [-1, 1] aralığından [0, 1] aralığına ölçekle
+
+    # Scale from [-1, 1] to [0, 1]
     x_t = (x_t + 1) / 2
-    
-    return x_t  # Oluşturulan görüntüleri döndür
+
+    return x_t  # Return the generated samples
 
 # Example usage and training loop
 def example_usage():
-    """Diffusion transformer'ın nasıl kullanılacağını göster"""
-    # Hesaplama cihazını ayarla (GPU varsa kullan, yoksa CPU)
+    """Showcase how to use the diffusion transformer."""
+    # Select the compute device (prefer GPU when available)
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Kullanılan cihaz: {device}")
-    
-    # Modeli ve çizelgeleyiciyi başlat
+    print(f"Using device: {device}")
+
+    # Initialize the model and scheduler
     model = DiffusionTransformer(
-        img_size=32,     # Görüntü boyutu
-        patch_size=4,    # Yama boyutu
-        d_model=256,     # Modelin gizli boyutu
-        n_layers=6,      # Transformer katman sayısı
-        n_heads=8,       # Dikkat başlığı sayısı
-        d_ff=1024,       # İleri beslemeli ağın gizli boyutu
-        num_classes=10,  # Sınıf sayısı (CIFAR-10 için)
-        dropout=0.1      # Dropout oranı
-    ).to(device)  # Modeli uygun cihaza taşı
-    
-    # Gürültü çizelgeleyiciyi başlat
+        img_size=32,     # Image size
+        patch_size=4,    # Patch size
+        d_model=256,     # Model hidden size
+        n_layers=6,      # Number of transformer layers
+        n_heads=8,       # Number of attention heads
+        d_ff=1024,       # Hidden size of the feedforward network
+        num_classes=10,  # Number of classes (e.g., CIFAR-10)
+        dropout=0.1      # Dropout probability
+    ).to(device)  # Move model to the selected device
+
+    # Instantiate the noise scheduler
     scheduler = DDPMScheduler(num_timesteps=1000)
-    
-    # Örnek veri oluştur
-    batch_size = 4  # Toplu iş boyutu
-    x = torch.randn(batch_size, 3, 32, 32, device=device)  # Rastgele giriş görüntüleri
-    timesteps = torch.randint(0, 1000, (batch_size,), device=device)  # Rastgele zaman adımları
-    class_labels = torch.randint(0, 10, (batch_size,), device=device)  # Rastgele sınıf etiketleri
-    
-    # İleri geçiş
-    noise_pred = model(x, timesteps, class_labels)  # Gürültü tahmini yap
-    print(f"Girdi şekli: {x.shape}")
-    print(f"Gürültü tahmini şekli: {noise_pred.shape}")
-    
-    # Eğitim adımı
-    loss = train_step(model, scheduler, x, class_labels)  # Eğitim adımını çalıştır
-    print(f"Eğitim kaybı: {loss.item():.4f}")
-    
-    # Örnek görüntüler oluştur
+
+    # Create sample data
+    batch_size = 4  # Batch size
+    x = torch.randn(batch_size, 3, 32, 32, device=device)  # Random input images
+    timesteps = torch.randint(0, 1000, (batch_size,), device=device)  # Random timesteps
+    class_labels = torch.randint(0, 10, (batch_size,), device=device)  # Random class labels
+
+    # Forward pass
+    noise_pred = model(x, timesteps, class_labels)
+    print(f"Input shape: {x.shape}")
+    print(f"Noise prediction shape: {noise_pred.shape}")
+
+    # Single training step
+    loss = train_step(model, scheduler, x, class_labels)
+    print(f"Training loss: {loss.item():.4f}")
+
+    # Generate sample images
     samples = sample_images(model, scheduler, num_samples=4, device=device)
-    print(f"Oluşturulan örneklerin şekli: {samples.shape}")
-    
-    # Örnekleri görselleştir
-    fig, axes = plt.subplots(1, 4, figsize=(12, 3))  # 1x4'lük bir ızgara oluştur
+    print(f"Generated samples shape: {samples.shape}")
+
+    # Visualize the samples
+    fig, axes = plt.subplots(1, 4, figsize=(12, 3))  # Create a 1x4 grid
     for i, ax in enumerate(axes):
-        # Görüntüyü [C, H, W]'dan [H, W, C]'ye çevir ve göster
+        # Convert from [C, H, W] to [H, W, C] and display
         ax.imshow(samples[i].permute(1, 2, 0).cpu().numpy())
-        ax.axis('off')  # Eksenleri kapat
-    plt.tight_layout()  # Görsel düzenlemeyi iyileştir
-    plt.show()  # Görseli göster
-    
-    # Eğitim döngüsunu başlat
-    print("Eğitim başlatılıyor...")
+        ax.axis('off')  # Hide axes
+    plt.tight_layout()  # Improve layout
+    plt.show()  # Display the figure
+
+    # Start a training loop
+    print("Starting training...")
     optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
-    
-    # Modeli eğitim moduna al
+
+    # Switch to training mode
     model.train()
-    
-    # Tek bir eğitim adımı gerçekleştir
+
+    # Perform a single training iteration
     loss = train_step(model, scheduler, x, class_labels)
-    
-    # Geri yayılım ve parametre güncelleme
-    optimizer.zero_grad()  # Gradyanları sıfırla
-    loss.backward()  # Gradyanları hesapla
-    optimizer.step()  # Parametreleri güncelle
-    
-    print(f"Eğitim kaybı: {loss.item():.4f}")
-    
-    # Örnek görüntüler oluştur
-    print("Örnek görüntüler oluşturuluyor...")
-    sample_labels = torch.arange(4, device=device)  # İlk 4 sınıf için birer örnek oluştur
+
+    # Backpropagation and parameter update
+    optimizer.zero_grad()  # Reset gradients
+    loss.backward()  # Compute gradients
+    optimizer.step()  # Update parameters
+
+    print(f"Training loss: {loss.item():.4f}")
+
+    # Generate additional sample images
+    print("Generating sample images...")
+    sample_labels = torch.arange(4, device=device)  # Create one sample for the first four classes
     generated_images = sample_images(model, scheduler, num_samples=4, class_labels=sample_labels, device=device)
-    
-    print(f"Oluşturulan görüntülerin boyutu: {generated_images.shape}")
-    print("Örnek oluşturma tamamlandı!")
-    
-    return model, scheduler, generated_images  # Modeli, çizelgeleyiciyi ve oluşturulan görüntüleri döndür
+
+    print(f"Generated images shape: {generated_images.shape}")
+    print("Sampling complete!")
+
+    return model, scheduler, generated_images  # Return the model, scheduler, and generated samples
 
 # Run example
 if __name__ == "__main__":
diff --git a/Genel-5/llada.py b/Genel-5/llada.py
index 5162c4f..d5b8018 100644
--- a/Genel-5/llada.py
+++ b/Genel-5/llada.py
@@ -8,20 +8,20 @@
 from tqdm import tqdm
 from collections import Counter
 
-# HuggingFace veri setini yükle
+# Load the Hugging Face dataset
 dataset = load_dataset('salihturkoglu/se_data_set', split='train')
 instructions = [ex['instruction'] for ex in dataset]
 responses = [ex['response'] for ex in dataset]
 
-# Gelişmiş Türkçe tokenizer
+# Advanced Turkish tokenizer
 def turkish_tokenize(text):
-    # Noktalama, sayılar, Türkçe karakterler ve kelime kökleri için daha iyi ayrıştırma
+    # Better segmentation for punctuation, numbers, Turkish characters, and word stems
     text = re.sub(r"([.,!?;:()\"'])", r" \1 ", text)
     text = re.sub(r"([0-9]+)", r" \1 ", text)
     text = re.sub(r"\s+", " ", text)
     return text.lower().strip().split()
 
-# Vocab oluştur (daha büyük ve çeşitli)
+# Build a richer vocabulary
 PAD_TOKEN = "<PAD>"
 UNK_TOKEN = "<UNK>"
 all_texts = instructions + responses
@@ -36,7 +36,7 @@ def encode(text):
     return [vocab.get(tok, vocab[UNK_TOKEN]) for tok in turkish_tokenize(text)]
 
 def decode(token_ids):
-    # <UNK> oranını azaltmak için tekrarları ve padleri temizle
+    # Remove repeats and pad tokens to reduce the <UNK> ratio
     words = []
     for idx in token_ids:
         if idx == vocab[PAD_TOKEN]:
@@ -47,7 +47,7 @@ def decode(token_ids):
     return " ".join(words)
 
 def build_prompt(instruction, response=None):
-    # Prompt formatı
+    # Prompt format
     if response is not None:
         return f"Instruction: {instruction} Response: {response}"
     else:
@@ -67,7 +67,7 @@ def __init__(self, instructions, responses, vocab, max_len=128, prompt_len=64):
             resp_ids = encode(resp)[:(max_len - prompt_len)]
             resp_ids += [vocab[PAD_TOKEN]] * ((max_len - prompt_len) - len(resp_ids))
             self.inputs.append(torch.tensor(prompt_ids, dtype=torch.long))
-            self.targets.append(torch.tensor(resp_ids, dtype=torch.long))  # Sadece response target!
+            self.targets.append(torch.tensor(resp_ids, dtype=torch.long))  # Response-only target!
 
     def __len__(self):
         return len(self.inputs)
@@ -91,7 +91,7 @@ def add_noise(batch, noise_level=0.5):
     noisy[mask] = random_tokens[mask]
     return noisy
 
-# Cosine noise schedule (daha iyi diffusion için)
+# Cosine noise schedule (better diffusion behavior)
 def cosine_noise_schedule(step, total_steps):
     import math
     return math.cos((step / total_steps) * math.pi / 2)
@@ -115,7 +115,7 @@ def forward(self, prompt, x, timestep, prompt_emb, src_key_padding_mask=None):
         t_emb = self.timestep_embed(timestep).unsqueeze(1)
         prompt_cond = self.prompt_proj(prompt_emb).unsqueeze(1)
         emb = torch.cat([prompt_embs, x_embs], dim=1) + t_emb + prompt_cond
-        # src_key_padding_mask shape düzeltme
+        # Fix the src_key_padding_mask shape
         if src_key_padding_mask is not None:
             # src_key_padding_mask: (batch, response_len) -> (batch, prompt_len + response_len)
             pad = torch.zeros((src_key_padding_mask.shape[0], prompt_embs.shape[1]), dtype=torch.bool, device=src_key_padding_mask.device)
@@ -157,7 +157,7 @@ def train_diffusion_model(model, dataloader, epochs=10, steps=16):
             mask = (batch_targets == vocab[PAD_TOKEN])
             optimizer.zero_grad()
             outputs = model(batch_prompts, noisy_targets, timestep, prompt_emb, src_key_padding_mask=mask)
-            # .view yerine .reshape kullan
+            # Prefer reshape over view
             loss = criterion(outputs.reshape(-1, outputs.size(-1)), batch_targets.reshape(-1))
             loss.backward()
             optimizer.step()
@@ -174,7 +174,7 @@ def generate_response(model, instruction, steps=16, max_len=256, prompt_len=64):
     prompt_ids += [vocab[PAD_TOKEN]] * (prompt_len - len(prompt_ids))
     prompt_tensor = torch.tensor([prompt_ids], dtype=torch.long, device=device)
     prompt_emb = get_prompt_embedding([prompt], vocab, model, prompt_len=prompt_len)
-    # Response kısmı random başlatılır
+    # Initialize the response portion randomly
     response_len = max_len - prompt_len
     response_part = torch.randint(2, len(vocab), (1, response_len), device=device)
     generated = response_part.clone()
@@ -193,12 +193,12 @@ def generate_response(model, instruction, steps=16, max_len=256, prompt_len=64):
 
 test_instruction = instructions[0]
 print('Instruction:', test_instruction)
-print('Gerçek Response:', responses[0])
+print('Ground Truth Response:', responses[0])
 print('Model Response:', generate_response(model, test_instruction, steps=16, max_len=max_len, prompt_len=prompt_len))
 
-test_instruction = "Çift anadal veya yandal yapmak istiyorum. Hangi bölümlerle yapabilirim?"
+test_instruction = "I want to pursue a double major or minor. Which departments can I pair it with?"
 print('Instruction:', test_instruction)
-print('Gerçek Response:', responses[instructions.index(test_instruction)] if test_instruction in instructions else "Yok")
+print('Ground Truth Response:', responses[instructions.index(test_instruction)] if test_instruction in instructions else "Not Found")
 print('Model Response:', generate_response(model, test_instruction, steps=16, max_len=max_len, prompt_len=prompt_len))
 
 def evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=256, prompt_len=64):
@@ -228,6 +228,6 @@ def evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=25
         correct += ((generated == tgt) & mask).sum().item()
         loop.set_postfix(acc=(correct/total if total > 0 else 0.0))
     accuracy = correct / total if total > 0 else 0.0
-    print(f"Test doğruluğu: {accuracy:.2%} ({correct}/{total})")
+    print(f"Test accuracy: {accuracy:.2%} ({correct}/{total})")
 
 evaluate_diffusion_model(model, dataset, n_samples=100, steps=16, max_len=max_len, prompt_len=prompt_len)
\ No newline at end of file
diff --git a/Genel-5/modern_llm_components.py b/Genel-5/modern_llm_components.py
index f066ccd..f11c641 100644
--- a/Genel-5/modern_llm_components.py
+++ b/Genel-5/modern_llm_components.py
@@ -5,13 +5,13 @@
 from typing import Optional, Tuple
 import numpy as np
 
-# 1. RoPE (Rotary Position Embedding) - Llama'da kullanılan
+# 1. RoPE (Rotary Position Embedding) - Used in Llama
 class RotaryPositionalEmbedding(nn.Module):
     """
-    RoPE, pozisyonel bilgiyi doğrudan attention hesaplamasına entegre eder.
-    Avantajları:
-    - Extrapolation capability (training'den uzun sequence'larda çalışır)
-    - Relative position bilgisi
+    RoPE integrates positional information directly into the attention computation.
+    Advantages:
+    - Extrapolation capability (works on sequences longer than the training context)
+    - Relative positional information
     - Efficiency
     """
     def __init__(self, dim: int, max_seq_len: int = 2048, base: float = 10000.0):
@@ -20,11 +20,11 @@ def __init__(self, dim: int, max_seq_len: int = 2048, base: float = 10000.0):
         self.max_seq_len = max_seq_len
         self.base = base
         
-        # Frequency hesaplama
+        # Compute frequencies
         inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
         self.register_buffer('inv_freq', inv_freq)
         
-        # Cache için sin/cos değerleri
+        # Cache sin/cos values
         self._set_cos_sin_cache(max_seq_len)
     
     def _set_cos_sin_cache(self, seq_len: int):
@@ -42,9 +42,9 @@ def forward(self, x: torch.Tensor, seq_len: int = None):
         return self.cos_cached[:seq_len], self.sin_cached[:seq_len]
 
 def apply_rotary_pos_emb(q, k, cos, sin):
-    """RoPE uygulama fonksiyonu"""
+    """Apply RoPE to queries and keys"""
     def rotate_half(x):
-        # x'in yarısını rotate et
+        # Rotate half of x
         x1, x2 = x[..., :x.shape[-1]//2], x[..., x.shape[-1]//2:]
         return torch.cat((-x2, x1), dim=-1)
     
@@ -52,12 +52,12 @@ def rotate_half(x):
     k_embed = k * cos + rotate_half(k) * sin
     return q_embed, k_embed
 
-# 2. RMSNorm - LayerNorm'dan daha verimli
+# 2. RMSNorm - More efficient than LayerNorm
 class RMSNorm(nn.Module):
     """
     Root Mean Square Normalization
-    - LayerNorm'dan daha hızlı (mean hesaplama yok)
-    - Llama'da kullanılır
+    - Faster than LayerNorm (no mean computation)
+    - Used in Llama
     """
     def __init__(self, dim: int, eps: float = 1e-6):
         super().__init__()
@@ -65,16 +65,16 @@ def __init__(self, dim: int, eps: float = 1e-6):
         self.weight = nn.Parameter(torch.ones(dim))
     
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        # RMS hesaplama
+        # Compute RMS
         norm = x * torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
         return self.weight * norm
 
-# 3. SwiGLU Activation - Llama'nın kullandığı
+# 3. SwiGLU Activation - Used by Llama
 class SwiGLU(nn.Module):
     """
     Swish-Gated Linear Unit
     - GLU (Gated Linear Unit) + Swish activation
-    - Standard FFN'den daha iyi performance
+    - Better performance than a standard FFN
     """
     def __init__(self, dim: int, hidden_dim: int):
         super().__init__()
@@ -91,9 +91,9 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
 # 4. Grouped Query Attention (GQA) - Memory efficient
 class GroupedQueryAttention(nn.Module):
     """
-    GQA - Query/Key/Value head'leri farklı sayıda
-    - Multi-Head Attention ve Multi-Query Attention arası compromise
-    - Memory efficiency + quality balance
+    GQA - Different numbers of query/key/value heads
+    - A compromise between Multi-Head Attention and Multi-Query Attention
+    - Balances memory efficiency and quality
     """
     def __init__(self, dim: int, n_heads: int, n_kv_heads: int):
         super().__init__()
@@ -102,13 +102,13 @@ def __init__(self, dim: int, n_heads: int, n_kv_heads: int):
         self.head_dim = dim // n_heads
         self.group_size = n_heads // n_kv_heads
         
-        # Query için tüm head'ler
+        # Full set of heads for the queries
         self.wq = nn.Linear(dim, n_heads * self.head_dim, bias=False)
-        # Key/Value için daha az head
+        # Fewer heads for keys/values
         self.wk = nn.Linear(dim, n_kv_heads * self.head_dim, bias=False)
         self.wv = nn.Linear(dim, n_kv_heads * self.head_dim, bias=False)
         self.wo = nn.Linear(n_heads * self.head_dim, dim, bias=False)
-        
+
         # RoPE
         self.rope = RotaryPositionalEmbedding(self.head_dim)
     
@@ -124,11 +124,11 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
         cos, sin = self.rope(x, seq_len)
         q, k = apply_rotary_pos_emb(q, k, cos, sin)
         
-        # K, V'yi group_size kadar repeat et
+        # Repeat K and V by group_size
         k = k.repeat_interleave(self.group_size, dim=2)
         v = v.repeat_interleave(self.group_size, dim=2)
         
-        # Attention hesaplama
+        # Compute attention
         q = q.transpose(1, 2)  # (bsz, n_heads, seq_len, head_dim)
         k = k.transpose(1, 2)
         v = v.transpose(1, 2)
@@ -141,7 +141,7 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
         attn = F.softmax(scores, dim=-1)
         out = torch.matmul(attn, v)
         
-        # Reshape ve output projection
+        # Reshape and project back
         out = out.transpose(1, 2).contiguous().view(bsz, seq_len, -1)
         return self.wo(out)
 
@@ -149,7 +149,7 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
 class TransformerBlock(nn.Module):
     """
     Modern transformer block:
-    - Pre-normalization (norm önce gelir)
+    - Pre-normalization (norm comes first)
     - Residual connections
     - SwiGLU FFN
     - GQA attention
@@ -172,7 +172,7 @@ def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None):
         
         return x
 
-# 6. Weight Tying ve Advanced Initialization
+# 6. Weight tying and advanced initialization
 def scaled_init_(tensor: torch.Tensor, scale: float = 1.0):
     """Modern weight initialization"""
     std = scale / math.sqrt(tensor.shape[-1])
@@ -207,7 +207,7 @@ def __init__(self, vocab_size: int, dim: int, n_layers: int,
         # Output projection (weight tying ile)
         self.output = nn.Linear(dim, vocab_size, bias=False)
         
-        # Weight tying: input ve output embedding'leri paylaş
+        # Weight tying: share input and output embeddings
         self.output.weight = self.tok_embeddings.weight
         
         # Modern initialization
@@ -227,7 +227,7 @@ def forward(self, tokens: torch.Tensor, targets: Optional[torch.Tensor] = None):
         # Token embeddings
         x = self.tok_embeddings(tokens)
         
-        # Causal mask oluştur
+        # Create a causal mask
         mask = torch.tril(torch.ones(seq_len, seq_len, device=tokens.device))
         mask = mask.unsqueeze(0).unsqueeze(0)  # (1, 1, seq_len, seq_len)
         
@@ -242,29 +242,29 @@ def forward(self, tokens: torch.Tensor, targets: Optional[torch.Tensor] = None):
         logits = self.output(x)
         
         if targets is not None:
-            # Training loss hesaplama
+            # Compute the training loss
             loss = F.cross_entropy(
                 logits.view(-1, self.vocab_size),
                 targets.view(-1),
                 ignore_index=-100
             )
             return logits, loss
-        
+
         return logits
 
 # Usage example
 if __name__ == "__main__":
-    # Model parametreleri (Llama-style)
+    # Model hyperparameters (Llama-style)
     model = ModernLLM(
         vocab_size=32000,
         dim=4096,
         n_layers=32,
         n_heads=32,
-        n_kv_heads=8,  # GQA için daha az KV head
+        n_kv_heads=8,  # Fewer KV heads for GQA
         norm_eps=1e-6
     )
     
-    # Örnek input
+    # Example input
     batch_size, seq_len = 2, 512
     tokens = torch.randint(0, 32000, (batch_size, seq_len))
     
diff --git a/Genel-5/training_inference_techniques.py b/Genel-5/training_inference_techniques.py
index c74f132..52fb1bd 100644
--- a/Genel-5/training_inference_techniques.py
+++ b/Genel-5/training_inference_techniques.py
@@ -12,7 +12,7 @@ class CosineScheduler:
     """
     Cosine Learning Rate Scheduling - Modern optimization
     - Warmup + Cosine decay
-    - Llama ve GPT-4'te kullanılır
+    - Used in models such as Llama and GPT-4
     """
     def __init__(self, optimizer, warmup_steps: int, max_steps: int, 
                  min_lr: float = 0.0, max_lr: float = 1e-4):
@@ -43,13 +43,13 @@ class GradientClipper:
     """
     Gradient Clipping - Training stability
     - Global norm clipping
-    - Exploding gradient problemini çözer
+    - Mitigates exploding gradient issues
     """
     def __init__(self, max_norm: float = 1.0):
         self.max_norm = max_norm
     
     def clip_gradients(self, model: nn.Module) -> float:
-        # Global gradient norm hesapla
+        # Compute the global gradient norm
         total_norm = 0.0
         for p in model.parameters():
             if p.grad is not None:
diff --git a/Time series - Transformers/predict.py b/Time series - Transformers/predict.py
index e16fba1..418f5ff 100644
--- a/Time series - Transformers/predict.py	
+++ b/Time series - Transformers/predict.py	
@@ -5,22 +5,22 @@
 from train import TimeSeriesTransformer
 
 def predict():
-    # Argümanlar
+    # Arguments
     parser = argparse.ArgumentParser()
     parser.add_argument('--model_path', type=str, default='model.pth',
-                      help='Eğitilmiş model dosya yolu')
+                      help='Path to the trained model file')
     parser.add_argument('--data', type=str, default='daily-total-female-births.csv',
-                      help='Veri dosya yolu')
+                      help='Path to the data file')
     parser.add_argument('--steps', type=int, default=10,
-                      help='Tahmin adedi')
+                      help='Number of forecast steps')
     args = parser.parse_args()
     
-    # Cihaz
+    # Device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Kullanılan cihaz: {device}")
+    print(f"Using device: {device}")
     
-    # Modeli yükle
-    print(f"Model yükleniyor: {args.model_path}")
+    # Load the model
+    print(f"Loading model from: {args.model_path}")
     checkpoint = torch.load(args.model_path, map_location=device)
     
     model = TimeSeriesTransformer(
@@ -32,12 +32,12 @@ def predict():
     model.load_state_dict(checkpoint['model_state_dict'])
     model.eval()
     
-    # Scaler'ı yükle
+    # Restore scaler configuration
     scaler = checkpoint['scaler']
     seq_length = checkpoint['seq_length']
     
-    # Veriyi yükle
-    print(f"Veri yükleniyor: {args.data}")
+    # Load the data
+    print(f"Loading data: {args.data}")
     df = pd.read_csv(args.data)
     if 'Date' in df.columns:
         dates = pd.to_datetime(df['Date'])
@@ -45,45 +45,45 @@ def predict():
     else:
         dates = pd.RangeIndex(start=0, stop=len(df))
     
-    # Son sequence'i al ve normalize et
+    # Extract the latest sequence and normalise
     data = scaler.transform(df.values)
     last_sequence = torch.FloatTensor(data[-seq_length:]).unsqueeze(0).to(device)
     
-    # Tahmin yap
-    print(f"{args.steps} adım tahmin yapılıyor...")
+    # Perform the forecast
+    print(f"Generating {args.steps} step predictions...")
     predictions = []
     with torch.no_grad():
         current_sequence = last_sequence
         for step in range(args.steps):
-            # Tahmin yap
+            # Predict the next value
             pred = model(current_sequence)
             pred_value = pred.item()
             predictions.append(pred_value)
             
-            # Yeni sequence oluştur (sadece ilk sütunu güncelle)
+            # Build the next sequence (update only the first feature)
             next_step = torch.zeros_like(current_sequence[:, 0:1])
-            next_step[0, 0] = pred_value  # Sadece ilk özelliği güncelle
+            next_step[0, 0] = pred_value  # Update only the first feature
             
-            # Yeni sequence: mevcut sequence'nin son seq_length-1 adımını al + yeni tahmin
+            # New sequence: drop the oldest step and append the prediction
             current_sequence = torch.cat([
-                current_sequence[:, 1:],  # İlk adımı çıkar
-                next_step.unsqueeze(1)    # Yeni tahmini ekle
+                current_sequence[:, 1:],  # Remove the first timestep
+                next_step.unsqueeze(1)    # Append the new forecast
             ], dim=1)
-    
-    # Tahminleri orijinal ölçeğe çevir
+
+    # Rescale predictions back to the original domain
     dummy = np.zeros((len(predictions), data.shape[1]))
     dummy[:, 0] = predictions
     predictions = scaler.inverse_transform(dummy)[:, 0]
     
-    # Sonuçları yazdır
-    print("\nTahminler:")
+    # Display predictions
+    print("\nPredictions:")
     last_date = dates[-1] if 'dates' in locals() else len(dates) - 1
     for i, pred in enumerate(predictions, 1):
         if 'dates' in locals():
             pred_date = last_date + pd.DateOffset(days=i)
             print(f"{pred_date.strftime('%Y-%m-%d')}: {pred:.2f}")
         else:
-            print(f"Adım {i}: {pred:.2f}")
+            print(f"Step {i}: {pred:.2f}")
 
 if __name__ == '__main__':
     predict()
diff --git a/Time series - Transformers/train.py b/Time series - Transformers/train.py
index e0fbf08..b89dc59 100644
--- a/Time series - Transformers/train.py	
+++ b/Time series - Transformers/train.py	
@@ -40,7 +40,7 @@ def forward(self, src):
         return self.output_linear(output[:, -1, :])
 
 def train():
-    # Argümanlar
+    # Arguments
     parser = argparse.ArgumentParser()
     parser.add_argument('--data', type=str, default='daily-total-female-births.csv')
     parser.add_argument('--seq_length', type=int, default=24)
@@ -50,23 +50,23 @@ def train():
     parser.add_argument('--model_path', type=str, default='model.pth')
     args = parser.parse_args()
 
-    # Cihaz
+    # Device
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     
-    # Veriyi yükle ve işle
+    # Load and prepare the data
     df = pd.read_csv(args.data)
     if 'Date' in df.columns:
         df = df.set_index('Date')
     
-    # Normalizasyon
+    # Normalisation
     scaler = MinMaxScaler()
     data = scaler.fit_transform(df.values)
     
-    # Sequence oluştur
+    # Build sliding-window sequences
     X, y = [], []
     for i in range(len(data) - args.seq_length):
         X.append(data[i:i + args.seq_length])
-        y.append(data[i + args.seq_length, 0])  # İlk sütunu tahmin et
+        y.append(data[i + args.seq_length, 0])  # Predict the first column
     
     X = torch.FloatTensor(np.array(X))
     y = torch.FloatTensor(np.array(y)).unsqueeze(-1)
@@ -86,13 +86,13 @@ def train():
         num_layers=2
     ).to(device)
     
-    # Eğitim
+    # Training setup
     criterion = nn.MSELoss()
     optimizer = optim.Adam(model.parameters(), lr=args.lr)
     
     best_val_loss = float('inf')
     for epoch in range(args.epochs):
-        # Train
+        # Training loop
         model.train()
         for X_batch, y_batch in train_loader:
             X_batch, y_batch = X_batch.to(device), y_batch.to(device)
@@ -102,7 +102,7 @@ def train():
             loss.backward()
             optimizer.step()
         
-        # Validation
+        # Validation loop
         model.eval()
         val_loss = 0
         with torch.no_grad():
@@ -124,7 +124,7 @@ def train():
                 'input_dim': X.shape[2]
             }, args.model_path)
     
-    print(f'Model kaydedildi: {args.model_path}')
+    print(f'Model saved to: {args.model_path}')
 
 if __name__ == '__main__':
     train()
diff --git a/Tokenizer/basit_tokenizer.py b/Tokenizer/basit_tokenizer.py
index e7e2500..2e86984 100644
--- a/Tokenizer/basit_tokenizer.py
+++ b/Tokenizer/basit_tokenizer.py
@@ -1,25 +1,16 @@
 from typing import List
 
 class SimpleTokenizer:
-    """
-    Basit bir tokenizer sınıfı. Bu sınıf, metni tokenlara ayırır ve tokenları tekrar metne dönüştürür.
-    """
+    """A minimal tokenizer that maps whitespace-separated tokens to IDs and back."""
 
     def __init__(self):
-        """
-        Tokenizer'ı başlatır. Bu örnekte, boşluklara göre tokenlara ayırma işlemi yapılır.
-        """
-        self.vocab = {}  # Tokenları saklamak için bir sözlük
-        self.id_to_token = {}  # ID'den tokena eşleme yapmak için bir sözlük
-        self.next_id = 0  # Bir sonraki token ID'si
+        """Initialise the tokenizer using whitespace tokenisation."""
+        self.vocab = {}  # Token to ID mapping
+        self.id_to_token = {}  # Reverse lookup from ID to token
+        self.next_id = 0  # Next available token ID
 
     def add_token(self, token: str) -> int:
-        """
-        Yeni bir token ekler ve bir ID atar.
-
-        :param token: Eklenmek istenen token.
-        :return: Token'a atanmış ID.
-        """
+        """Register a new token and return its ID."""
         if token not in self.vocab:
             self.vocab[token] = self.next_id
             self.id_to_token[self.next_id] = token
@@ -27,41 +18,31 @@ def add_token(self, token: str) -> int:
         return self.vocab[token]
 
     def tokenize(self, text: str) -> List[int]:
-        """
-        Metni tokenlara ayırır ve token ID'lerini döndürür.
-
-        :param text: Tokenlara ayrılacak metin.
-        :return: Token ID'lerinin listesi.
-        """
-        tokens = text.split()  # Metni boşluklara göre ayır
+        """Split text into tokens and return their IDs."""
+        tokens = text.split()  # Split on whitespace
         token_ids = []
         for token in tokens:
-            token_id = self.add_token(token)  # Token'ı ekle ve ID'sini al
+            token_id = self.add_token(token)  # Add token and retrieve its ID
             token_ids.append(token_id)
         return token_ids
 
     def detokenize(self, token_ids: List[int]) -> str:
-        """
-        Token ID'lerini metne dönüştürür.
-
-        :param token_ids: Token ID'lerinin listesi.
-        :return: Tokenlardan oluşturulmuş metin.
-        """
+        """Convert token IDs back into a whitespace-separated string."""
         tokens = []
         for token_id in token_ids:
-            token = self.id_to_token.get(token_id, "")  # ID'ye karşılık gelen token'ı al
+            token = self.id_to_token.get(token_id, "")  # Look up the token for each ID
             tokens.append(token)
-        return " ".join(tokens)  # Tokenları birleştir ve metni oluştur
+        return " ".join(tokens)  # Join tokens back into text
 
-# Örnek kullanım
+# Example usage
 if __name__ == "__main__":
     tokenizer = SimpleTokenizer()
 
-    # Metni tokenlara ayır
-    text = "Merhaba dünya! Bu bir örnek metin."
+    # Tokenise text
+    text = "Hello world! This is a sample sentence."
     token_ids = tokenizer.tokenize(text)
-    print(f"Token ID'leri: {token_ids}")
+    print(f"Token IDs: {token_ids}")
 
-    # Token ID'lerini metne dönüştür
+    # Convert IDs back into text
     decoded_text = tokenizer.detokenize(token_ids)
-    print(f"Çözülen metin: {decoded_text}")""
+    print(f"Decoded text: {decoded_text}")