-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
121 lines (99 loc) · 3.92 KB
/
main.py
File metadata and controls
121 lines (99 loc) · 3.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import os
import torch
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from torch.cuda.amp import GradScaler
from torch.optim.lr_scheduler import CosineAnnealingLR
from transformer.transformer import Transformer
from train import train_epoch, eval_model, get_predictions
from dataset import train_dataloader, val_dataloader, test_dataloader
vocab = torch.load(f'models/vocab.pth')
print(f'Vocab size: {len(vocab)}')
VOCAB_SIZE = len(vocab)
EMBED_SIZE = 512
NUM_LAYERS = 6
HEADS = 8
FORWARD_EXPANSION = 4
DROPOUT = 0.1
MAX_LENGTH = 512
NUM_CLASSES = 7
LEARNING_RATE = 1e-6
def train():
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
if device == 'cpu':
torch.set_num_threads(16)
else:
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
scaler = GradScaler()
epochs = int(input('Enter the number of epochs: '))
model = Transformer(EMBED_SIZE, NUM_LAYERS, HEADS, device, FORWARD_EXPANSION, DROPOUT, MAX_LENGTH, VOCAB_SIZE, NUM_CLASSES).to(device)
optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, betas=(0.9, 0.98), eps=1e-9)
loss_fn = CrossEntropyLoss(ignore_index=0).to(device)
scheduler = CosineAnnealingLR(optimizer, T_max=epochs)
train_losses = []
train_accs = []
val_losses = []
val_accs = []
learning_rates = []
for epoch in range(epochs):
print(f'Epoch {epoch + 1}/{epochs}')
print('-' * 10)
if device == 'cpu':
train_acc, train_loss = train_epoch(model, train_dataloader, loss_fn, optimizer, device, scheduler)
else:
train_acc, train_loss = train_epoch(model, train_dataloader, loss_fn, optimizer, device, scheduler, scaler)
print(f'Train loss: {train_loss}, Accuracy: {train_acc}')
train_losses.append(train_loss)
val_acc, val_loss = eval_model(model, val_dataloader, loss_fn, device)
print(f'Val loss: {val_loss}, Accuracy: {val_acc}')
print()
train_losses.append(train_loss)
train_accs.append(train_acc)
val_losses.append(val_loss)
val_accs.append(val_acc)
learning_rates.append(optimizer.param_groups[0]['lr'])
history = {
'train_losses': train_losses,
'train_accs': train_accs,
'val_losses': val_losses,
'val_accs': val_accs,
'learning_rates': learning_rates
}
hyper_params = {
'VOCAB_SIZE': VOCAB_SIZE,
'EMBED_SIZE': EMBED_SIZE,
'NUM_LAYERS': NUM_LAYERS,
'HEADS': HEADS,
'FORWARD_EXPANSION': FORWARD_EXPANSION,
'DROPOUT': DROPOUT,
'MAX_LENGTH': MAX_LENGTH,
'NUM_CLASSES': NUM_CLASSES,
'LEARNING_RATE': LEARNING_RATE
}
if not os.path.exists('models'):
os.makedirs('models')
torch.save(model.state_dict(), 'models/model_state.pth')
torch.save(model, 'models/model.pth')
torch.save(optimizer.state_dict(), 'models/optimizer.pth')
torch.save(history, 'models/history.pth')
torch.Save(hyper_params, 'models/hyper_params.pth')
def test():
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f"Using device: {device}")
if device == 'cpu':
torch.set_num_threads(16)
else:
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
model = Transformer(EMBED_SIZE, NUM_LAYERS, HEADS, device, FORWARD_EXPANSION, DROPOUT, MAX_LENGTH, VOCAB_SIZE, NUM_CLASSES).to(device)
model.load_state_dict(torch.load('models/model.pth'))
loss_fn = CrossEntropyLoss(ignore_index=0).to(device)
test_acc, test_loss = get_predictions(model, test_dataloader, loss_fn, device)
print(f'Test loss: {test_loss}, Accuracy: {test_acc}')
if __name__ == '__main__':
train()
test()