-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathutils.py
More file actions
70 lines (61 loc) · 2.13 KB
/
utils.py
File metadata and controls
70 lines (61 loc) · 2.13 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
from itertools import combinations
import torch
import numpy as np
import scipy.sparse as sp
def make_window(sentences, window_size) :
# In this case, sentences = train_data[][1]
windows = []
for sentence in sentences :
sentence_length = len(sentence)
if sentence_length <= window_size :
windows.append(sentence)
else :
for j in range(sentence_length - window_size + 1) :
window = sentence[j:j+window_size]
windows.append(window)
return windows
def count_word(windows, word) :
count = 0
for window in windows :
if word in window :
count += 1
return count
def count_word_freq(vocab, windows) :
word_freq = {}
for word in vocab :
if word not in word_freq :
word_freq[word] = count_word(windows, word)
return word_freq
def count_pair_freq(windows) :
pair_freq = dict()
for i, window in enumerate(windows) :
combination = list(combinations(window, 2))
for comb in combination :
if (comb[0], comb[1]) in pair_freq :
pair_freq[(comb[0], comb[1])] += 1
elif (comb[1], comb[0]) in pair_freq :
pair_freq[(comb[1], comb[0])] += 1
else :
pair_freq[(comb[0], comb[1])] = 1
return pair_freq
def sparse_mx_to_torch_sparse_tensor(sparse_mx):
"""Convert a scipy sparse matrix to a torch sparse tensor."""
sparse_mx = sparse_mx.tocoo().astype(np.float32)
indices = torch.from_numpy(
np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
values = torch.from_numpy(sparse_mx.data)
shape = torch.Size(sparse_mx.shape)
return torch.sparse.FloatTensor(indices, values, shape)
def accuracy(output, labels):
preds = output.max(1)[1].type_as(labels)
correct = preds.eq(labels).double()
correct = correct.sum()
return correct / len(labels)
def normalize(mx):
"""Row-normalize sparse matrix"""
rowsum = np.array(mx.sum(1))
r_inv = np.power(rowsum, -1).flatten()
r_inv[np.isinf(r_inv)] = 0.
r_mat_inv = sp.diags(r_inv)
mx = r_mat_inv.dot(mx)
return mx