Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
199 changes: 99 additions & 100 deletions Architecture/partial-rope-full-rope.py

Large diffs are not rendered by default.

148 changes: 74 additions & 74 deletions Architecture/sigmoid-gates.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import matplotlib.pyplot as plt

class SigmoidGateExamples(nn.Module):
"""Farklı sigmoid gate örnekleri"""
"""Examples of different sigmoid gate mechanisms."""

def __init__(self, input_dim, hidden_dim):
super().__init__()
Expand All @@ -29,88 +29,88 @@ def __init__(self, input_dim, hidden_dim):
self.highway_transform = nn.Linear(input_dim, input_dim)

def simple_gate(self, x):
"""Basit sigmoid gate örneği"""
# Gate değeri hesapla (0-1 arası)
"""Basic sigmoid gate example."""
# Compute gate value between 0 and 1
gate = torch.sigmoid(self.gate_linear(x))

# Gate'i uygula: çıktı = gate * input
# Apply the gate: output = gate * input
output = gate * x[:, :self.hidden_dim]

return output, gate

def lstm_gates_example(self, x, h, c):
"""LSTM'deki 4 sigmoid gate"""
"""The four sigmoid gates used in an LSTM."""
# x: input, h: hidden state, c: cell state
combined = torch.cat([x, h], dim=1)
gates = self.lstm_gates(combined)

# 4 gate'e ayır
# Split into the four gates
i, f, g, o = gates.chunk(4, dim=1)

# Sigmoid gates
i = torch.sigmoid(i) # Input gate: neyi hatırlayacağız
f = torch.sigmoid(f) # Forget gate: neyi unutacağız
o = torch.sigmoid(o) # Output gate: neyi çıktı olarak vereceğiz
g = torch.tanh(g) # Candidate values (gate değil)
i = torch.sigmoid(i) # Input gate: what we keep in memory
f = torch.sigmoid(f) # Forget gate: what we discard
o = torch.sigmoid(o) # Output gate: what we expose as output
g = torch.tanh(g) # Candidate values (not a sigmoid gate)

# Yeni cell state
# Updated cell state
c_new = f * c + i * g

# Yeni hidden state
# Updated hidden state
h_new = o * torch.tanh(c_new)

return h_new, c_new, {'input': i, 'forget': f, 'output': o}

def gru_gates_example(self, x, h):
"""GRU'daki sigmoid gates"""
"""Sigmoid gates inside a GRU."""
combined = torch.cat([x, h], dim=1)
gates = self.gru_gates(combined)

# 3 kısma ayır
# Split into three sections
r, z, n = gates.chunk(3, dim=1)

# Reset gate: önceki bilginin ne kadarını kullanacağız
# Reset gate: how much of the previous state to use
r = torch.sigmoid(r)

# Update gate: yeni ve eski bilgiyi nasıl birleştireceğiz
# Update gate: how to mix new and old information
z = torch.sigmoid(z)

# Yeni hidden state adayı
# Candidate hidden state
n = torch.tanh(n)

# Yeni hidden state
# Updated hidden state
h_new = (1 - z) * n + z * h

return h_new, {'reset': r, 'update': z}

def glu_example(self, x):
"""Gated Linear Unit (GLU)"""
# Linear dönüşüm
"""Gated Linear Unit (GLU)."""
# Linear projection
output = self.glu_linear(x)

# İkiye böl
# Split in two
a, b = output.chunk(2, dim=1)

# GLU: a * sigmoid(b)
return a * torch.sigmoid(b)

def highway_gate_example(self, x):
"""Highway Network gate"""
# Transform gate (T): ne kadar dönüşüm uygulayacağız
"""Highway Network gate."""
# Transform gate (T): how much of the transformed signal to use
T = torch.sigmoid(self.highway_gate(x))

# Dönüştürülmüş veri
# Transformed data
H = torch.relu(self.highway_transform(x))

# Highway formülü: y = T * H + (1 - T) * x
# T=1: tamamen dönüşüm, T=0: girdiyi olduğu gibi geçir
# Highway formula: y = T * H + (1 - T) * x
# T=1: full transform, T=0: passthrough the input
output = T * H + (1 - T) * x

return output, T


class AttentionGate(nn.Module):
"""Attention mekanizmasında sigmoid gate kullanımı"""
"""Attention mechanism augmented with sigmoid gating."""

def __init__(self, hidden_dim):
super().__init__()
Expand All @@ -125,18 +125,18 @@ def forward(self, query, keys, values):
"""
batch_size, seq_len, hidden_dim = keys.shape

# Query'yi genişlet
# Broadcast the query across the sequence dimension
query_expanded = query.unsqueeze(1).expand(-1, seq_len, -1)

# Attention hesapla
# Compute the attention projection
combined = torch.cat([query_expanded, keys], dim=2)
attention_hidden = torch.tanh(self.attention_linear(combined))

# Sigmoid gate ile attention weights
# Attention weights from the sigmoid gate
attention_scores = self.gate_linear(attention_hidden).squeeze(-1)
attention_weights = torch.sigmoid(attention_scores)

# Normalize (opsiyonel - soft attention için)
# Normalize (optional, for soft attention)
attention_weights = attention_weights / (attention_weights.sum(dim=1, keepdim=True) + 1e-8)

# Weighted sum
Expand All @@ -146,13 +146,13 @@ def forward(self, query, keys, values):


class SigmoidGatingMechanism(nn.Module):
"""Genel amaçlı sigmoid gating mekanizması"""
"""General-purpose sigmoid gating mechanism."""

def __init__(self, input_dim, num_experts=4):
super().__init__()
self.num_experts = num_experts

# Her expert için bir ağ
# A small network for each expert
self.experts = nn.ModuleList([
nn.Linear(input_dim, input_dim) for _ in range(num_experts)
])
Expand All @@ -165,61 +165,61 @@ def __init__(self, input_dim, num_experts=4):
)

def forward(self, x):
# Expert çıktıları
# Collect expert outputs
expert_outputs = torch.stack([expert(x) for expert in self.experts], dim=1)

# Gate değerleri (sigmoid)
# Sigmoid gate values
gates = torch.sigmoid(self.gate_network(x))
gates = gates.unsqueeze(-1)

# Ağırlıklı toplam
# Weighted sum of expert outputs
output = (gates * expert_outputs).sum(dim=1)

return output, gates.squeeze(-1)


def visualize_sigmoid_gate():
"""Sigmoid fonksiyonunu ve gate davranışını görselleştir"""
"""Visualize the sigmoid function and gate behaviour."""
x = np.linspace(-10, 10, 1000)
sigmoid = 1 / (1 + np.exp(-x))

fig, axes = plt.subplots(2, 2, figsize=(12, 10))

# 1. Sigmoid fonksiyonu
# 1. Sigmoid function
axes[0, 0].plot(x, sigmoid, 'b-', linewidth=2)
axes[0, 0].axhline(y=0.5, color='r', linestyle='--', alpha=0.5)
axes[0, 0].axvline(x=0, color='r', linestyle='--', alpha=0.5)
axes[0, 0].set_title('Sigmoid Fonksiyonu')
axes[0, 0].set_title('Sigmoid Function')
axes[0, 0].set_xlabel('x')
axes[0, 0].set_ylabel('σ(x)')
axes[0, 0].grid(True, alpha=0.3)

# 2. Gate çarpımı etkisi
# 2. Effect of multiplying by the gate
input_signal = np.sin(x)
gated_signal = sigmoid * input_signal

axes[0, 1].plot(x, input_signal, 'g-', label='Giriş sinyali', alpha=0.7)
axes[0, 1].plot(x, sigmoid, 'r-', label='Gate değeri', alpha=0.7)
axes[0, 1].plot(x, gated_signal, 'b-', label='Gate * Sinyal', linewidth=2)
axes[0, 1].set_title('Gate Çarpımı Etkisi')
axes[0, 1].plot(x, input_signal, 'g-', label='Input signal', alpha=0.7)
axes[0, 1].plot(x, sigmoid, 'r-', label='Gate value', alpha=0.7)
axes[0, 1].plot(x, gated_signal, 'b-', label='Gate * Signal', linewidth=2)
axes[0, 1].set_title('Effect of Gate Multiplication')
axes[0, 1].set_xlabel('x')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# 3. Farklı gate değerleri
# 3. Different gate values
gate_values = [0.1, 0.3, 0.5, 0.7, 0.9]
colors = plt.cm.viridis(np.linspace(0, 1, len(gate_values)))

for gate, color in zip(gate_values, colors):
axes[1, 0].plot(x, gate * np.sin(x), color=color, label=f'Gate={gate}')
axes[1, 0].set_title('Farklı Gate Değerlerinin Etkisi')

axes[1, 0].set_title('Effect of Different Gate Values')
axes[1, 0].set_xlabel('x')
axes[1, 0].set_ylabel('Gate * sin(x)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 4. LSTM gate dinamikleri
# 4. LSTM gate dynamics
time_steps = 50
forget_gate = np.random.beta(5, 2, time_steps)
input_gate = np.random.beta(2, 5, time_steps)
Expand All @@ -228,9 +228,9 @@ def visualize_sigmoid_gate():
axes[1, 1].plot(forget_gate, 'r-', label='Forget gate', linewidth=2)
axes[1, 1].plot(input_gate, 'g-', label='Input gate', linewidth=2)
axes[1, 1].plot(output_gate, 'b-', label='Output gate', linewidth=2)
axes[1, 1].set_title('LSTM Gate Dinamikleri (Örnek)')
axes[1, 1].set_xlabel('Zaman adımı')
axes[1, 1].set_ylabel('Gate değeri')
axes[1, 1].set_title('LSTM Gate Dynamics (Sample)')
axes[1, 1].set_xlabel('Time step')
axes[1, 1].set_ylabel('Gate value')
axes[1, 1].legend()
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].set_ylim(0, 1)
Expand All @@ -241,10 +241,10 @@ def visualize_sigmoid_gate():


def demonstrate_gate_effects():
"""Gate'lerin etkilerini göster"""
print("=== Sigmoid Gate Etkileri Demonstrasyonu ===\n")
"""Showcase how different gates behave."""
print("=== Sigmoid Gate Effects Demonstration ===\n")

# Örnek veri
# Sample data
batch_size = 2
input_dim = 4
hidden_dim = 4
Expand All @@ -253,48 +253,48 @@ def demonstrate_gate_effects():
h = torch.randn(batch_size, hidden_dim)
c = torch.randn(batch_size, hidden_dim)

# Model oluştur
# Build model
model = SigmoidGateExamples(input_dim, hidden_dim)

# 1. Basit gate
print("1. Basit Sigmoid Gate:")
# 1. Simple gate
print("1. Simple Sigmoid Gate:")
output, gate = model.simple_gate(x)
print(f" Giriş boyutu: {x.shape}")
print(f" Gate değerleri: {gate[0, :4].detach().numpy()}")
print(f" Çıktı: {output[0, :4].detach().numpy()}\n")
print(f" Input shape: {x.shape}")
print(f" Gate values: {gate[0, :4].detach().numpy()}")
print(f" Output: {output[0, :4].detach().numpy()}\n")

# 2. LSTM gates
print("2. LSTM Gates:")
h_new, c_new, lstm_gates = model.lstm_gates_example(x, h, c)
print(f" Input gate ortalaması: {lstm_gates['input'].mean().item():.3f}")
print(f" Forget gate ortalaması: {lstm_gates['forget'].mean().item():.3f}")
print(f" Output gate ortalaması: {lstm_gates['output'].mean().item():.3f}\n")
print(f" Input gate mean: {lstm_gates['input'].mean().item():.3f}")
print(f" Forget gate mean: {lstm_gates['forget'].mean().item():.3f}")
print(f" Output gate mean: {lstm_gates['output'].mean().item():.3f}\n")

# 3. GRU gates
print("3. GRU Gates:")
h_new, gru_gates = model.gru_gates_example(x, h)
print(f" Reset gate ortalaması: {gru_gates['reset'].mean().item():.3f}")
print(f" Update gate ortalaması: {gru_gates['update'].mean().item():.3f}\n")
print(f" Reset gate mean: {gru_gates['reset'].mean().item():.3f}")
print(f" Update gate mean: {gru_gates['update'].mean().item():.3f}\n")

# 4. Highway gate
print("4. Highway Gate:")
output, transform_gate = model.highway_gate_example(x)
print(f" Transform gate ortalaması: {transform_gate.mean().item():.3f}")
print(f" Bypass oranı: {(1 - transform_gate).mean().item():.3f}\n")
print(f" Transform gate mean: {transform_gate.mean().item():.3f}")
print(f" Bypass rate: {(1 - transform_gate).mean().item():.3f}\n")

# 5. Expert gating
print("5. Expert Gating:")
expert_model = SigmoidGatingMechanism(input_dim, num_experts=4)
output, expert_gates = expert_model(x)
print(f" Expert gate değerleri: {expert_gates[0].detach().numpy()}")
print(f" En aktif expert: {expert_gates[0].argmax().item()}")
print(f" Expert gate values: {expert_gates[0].detach().numpy()}")
print(f" Most active expert: {expert_gates[0].argmax().item()}")


if __name__ == "__main__":
# Görselleştirme
# Visualization
visualize_sigmoid_gate()
# Demonstrasyon

# Demonstration
demonstrate_gate_effects()
print("\n✅ Sigmoid gates demonstrasyonu tamamlandı!")

print("\n✅ Sigmoid gates demonstration completed!")
8 changes: 4 additions & 4 deletions Genel-2/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@
import polars as pl
from huggingface_hub import login

# Hugging Face'e giriş yapmak - Environment variable kullan
# Log in to Hugging Face using the environment variable
hf_token = os.getenv('HUGGINGFACE_TOKEN')
if hf_token:
login(hf_token)
else:
print("Uyarı: HUGGINGFACE_TOKEN environment variable bulunamadı. Bazı özel modellere erişiminiz olmayabilir.")
print("Warning: HUGGINGFACE_TOKEN environment variable not found. You may not have access to private models.")

# Hugging Face'ten doğru dosyayı yüklemek için veri kümesinin yolunu doğru şekilde kontrol edin
try:
df = pl.read_parquet('hf://datasets/HuggingFaceM4/the_cauldron/textcaps/train-00011-of-00012-baf9399db4a7051d.parquet')
print("Veri kümesi yüklendi!")
print("Dataset loaded!")
print(df.head())
except Exception as e:
print("Veri kümesi yüklenirken bir hata oluştu:", e)
print("An error occurred while loading the dataset:", e)
Loading
Loading