-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathutils.py
More file actions
97 lines (73 loc) · 2.64 KB
/
utils.py
File metadata and controls
97 lines (73 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import os
import random
import pandas as pd
import torch
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader, Dataset
from params import param
def read_data(file_path_dataset):
return pd.read_csv(file_path_dataset, delimiter='\t')
def make_cuda(tensor):
"""Use CUDA if it's available."""
if torch.cuda.is_available():
tensor = tensor.cuda()
return tensor
def init_random_seed(manual_seed):
"""Init random seed."""
if manual_seed is None:
seed = random.randint(1, 10000)
else:
seed = manual_seed
print("use random seed: {}".format(seed))
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(seed)
def init_model(net, restore=None):
# restore model weights
if restore is not None and os.path.exists(restore):
net.load_state_dict(torch.load(restore))
print("Restore model from: {}".format(os.path.abspath(restore)))
# check if cuda is available
if torch.cuda.is_available():
cudnn.benchmark = True
net.cuda()
return net
def save_model(net, filename):
"""Save trained model."""
if not os.path.exists(param.model_root):
os.makedirs(param.model_root)
torch.save(net.state_dict(),
os.path.join(param.model_root, filename))
print("save pretrained model to: {}".format(os.path.join(param.model_root,
filename)))
def get_data_loader(sequences, labels, maxlen=None):
# dataset and data loader
text_dataset = TextDataset(sequences, labels, maxlen)
text_data_loader = DataLoader(
dataset=text_dataset,
batch_size=param.batch_size,
shuffle=True)
return text_data_loader
class TextDataset(Dataset):
def __init__(self, sequences, labels, maxlen):
seqlen = max([len(sequence) for sequence in sequences])
if maxlen is None or maxlen > seqlen:
maxlen = seqlen
seq_data = list()
for sequence in sequences:
sequence.insert(0, 101) # insert [CLS] token
seqlen = len(sequence)
if seqlen < maxlen:
sequence.extend([0] * (maxlen-seqlen))
else:
sequence = sequence[:maxlen]
seq_data.append(sequence)
self.data = torch.LongTensor(seq_data).cuda()
self.labels = torch.LongTensor(labels).cuda()
self.dataset_size = len(self.data)
def __getitem__(self, index):
review, label = self.data[index], self.labels[index]
return review, label
def __len__(self):
return self.dataset_size