Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions recognition/45616756-GCN/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Multi-layer GCN on Facebook Large Page-Page Network Dataset

## Introduction to GCN:
GCN is a neural network for a graph data. The graph consists of two components: nodes and edges. Each node in the graph have features. Our task is to label each node with given categorical class (Node Classification).

![GCN](./data/GCN.png)

## How GCN Works in General:
1. Create N by N adjacency matrix (N is nodes number)
2. Create N by D matrix (D is features number)
3. Normalize the adjacency and the features matrix
4. Create a Two Layer Graph Convolutional Network
5. Train & test the dataset

## Dependencies Required:
- Python
- Numpy
- Pytorch
- Matplotlib
- Sklearn
- Scipy
- Pandas

## Results:
#### Loss Plot
![GCN](./data/Loss.png)
#### Training Plot
![GCN](./data/Accuracy.png)
#### Node embeddings
![GCN](./data/Embedding.png)

## Reference
[1] https://arxiv.org/abs/1609.02907
Binary file added recognition/45616756-GCN/data/Accuracy.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added recognition/45616756-GCN/data/Embedding.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added recognition/45616756-GCN/data/GCN.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added recognition/45616756-GCN/data/Loss.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
162 changes: 162 additions & 0 deletions recognition/45616756-GCN/driver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
import numpy as np
import torch
import torch.nn.functional as F
import torch.optim as optim
from scipy.sparse import coo_matrix, csr_matrix, eye, diags
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE

from model import GCN


def accuracy(output, features):
predicts = output.max(1)[1].type_as(features)
correct = predicts.eq(features).double()
correct = correct.sum()
return correct / len(features)


def main():
# Load Facebook dataset
data = np.load('./data/facebook.npz')

# Create an adjacency matrix representation based on the edges
facebook_edges = data['edges']
adjacency_matrix = np.zeros((22470, 22470), dtype='float32')
for edge in facebook_edges:
adjacency_matrix[edge[0]][edge[1]] = 1
adjacency_matrix = coo_matrix(adjacency_matrix)

# Nodes features
facebook_features = data['features']
facebook_features = csr_matrix(facebook_features)

# Convert each categorical value (One-hot encoding)
facebook_target = data['target']
lb = preprocessing.LabelBinarizer()
facebook_target = lb.fit_transform(facebook_target)

# Split the target (20:20:60)
facebook_train_target, facebook_test_target = train_test_split(
facebook_target, train_size=0.20, shuffle=False
)
facebook_validation_target, facebook_test_target = train_test_split(
facebook_test_target, train_size=0.20, shuffle=False
)

# Normalize the adjacency matrix
a_tilde = adjacency_matrix + eye(22470, dtype='float32') # adjacency matrix + self-loop
d = diags(np.array(a_tilde.sum(axis=1)).flatten()) # degree matrix
degrees_inverse = np.power(d.diagonal(), -1)
d_inverse = diags(degrees_inverse) # degree matrix inverse
adjacency_matrix = d_inverse.dot(a_tilde).tocoo()

# Normalize the adjacency matrix (Ver.2)
# A_tilde = adjacency_matrix + np.eye(22470)
# D_tilde = np.matrix(np.diag(np.array(np.sum(A_tilde, axis=0))[0]))
# D_tilde_invroot = np.linalg.inv(sqrtm(D_tilde))
# A_hat = np.matmul(np.matmul(A_tilde, D_tilde_invroot), D_tilde_invroot)

# Normalize the features matrix
d = diags(np.array(facebook_features.sum(axis=1)).flatten()) # degree matrix
degrees_inverse = np.power(d.diagonal(), -1)
d_inverse = diags(degrees_inverse) # degree matrix inverse
facebook_features = d_inverse.dot(facebook_features)

# Convert to tensor
facebook_features = torch.FloatTensor(np.array(facebook_features.todense()))
facebook_train_target = torch.LongTensor(np.where(facebook_train_target)[1])
facebook_validation_target = torch.LongTensor(np.where(facebook_validation_target)[1])
facebook_test_target = torch.LongTensor(np.where(facebook_test_target)[1])
adjacency_matrix = torch.sparse.FloatTensor(
torch.LongTensor(np.vstack((adjacency_matrix.row, adjacency_matrix.col))),
torch.FloatTensor(adjacency_matrix.data),
torch.Size(adjacency_matrix.shape)
)

# Print output
print('facebook_features:', facebook_features)
print('facebook_train_target:', facebook_train_target.size())
print('facebook_validation_target:', facebook_validation_target.size())
print('facebook_test_target:', facebook_test_target.size())
print('adjacency_matrix:', adjacency_matrix)

# Create model
model = GCN(input_size=facebook_features.shape[1],
hidden_size=16,
num_classes=4,
dropout=0.5)
optimizer = optim.Adam(model.parameters(),
lr=0.01,
weight_decay=5e-4)

train_size = facebook_train_target.size()[0]
validation_size = facebook_validation_target.size()[0]
test_size = facebook_test_target.size()[0]
total_target_size = (facebook_train_target.size()[0] +
facebook_validation_target.size()[0] +
facebook_test_target.size()[0])

train_losses = []
train_accuracies = []
validation_losses = []
validation_accuracies = []

# Training
for epoch in range(200):
model.train()
optimizer.zero_grad()
output = model(facebook_features, adjacency_matrix)
train_loss = F.nll_loss(output[range(0, train_size)],
facebook_train_target)
train_losses.append(train_loss.item())
train_accuracy = accuracy(output[range(0, train_size)],
facebook_train_target)
train_accuracies.append(train_accuracy.item())
train_loss.backward()
optimizer.step()

model.eval()
output = model(facebook_features, adjacency_matrix)
validation_loss = F.nll_loss(output[range(train_size, (train_size + validation_size))],
facebook_validation_target)
validation_losses.append(validation_loss.item())
validation_accuracy = accuracy(output[range(train_size, (train_size + validation_size))],
facebook_validation_target)
validation_accuracies.append(validation_accuracy)

print('Epoch: {:04d}'.format(epoch + 1),
'Train loss: {:.4f}'.format(train_loss.item()),
'Train accuracy: {:.4f}'.format(train_accuracy.item()),
'Validation loss: {:.4f}'.format(validation_loss.item()),
'Validation accuracy: {:.4f}'.format(validation_accuracy.item()))

# Test
model.eval()
output = model(facebook_features, adjacency_matrix)
test_loss = F.nll_loss(output[range((train_size + validation_size), total_target_size)],
facebook_test_target)
test_accuracy = accuracy(output[range((train_size + validation_size), total_target_size)],
facebook_test_target)

print('Test set results:',
'Test loss: {:.4f}'.format(test_loss.item()),
'Test accuracy: {:.4f}'.format(test_accuracy.item()))

np.save('train_losses', train_losses)
np.save('train_accuracies', train_accuracies)
np.save('validation_losses', validation_losses)
np.save('validation_accuracies', validation_accuracies)

# Node embeddings
model.eval()
with torch.no_grad():
x = model(facebook_features, adjacency_matrix)
x_embedded = TSNE(n_components=2).fit_transform(x)

np.save('x_embedded', x_embedded)


if __name__ == '__main__':
main()
43 changes: 43 additions & 0 deletions recognition/45616756-GCN/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter


class GraphConvolution(nn.Module):
"""
GCN Layer
"""

def __init__(self, in_features, out_features):
super(GraphConvolution, self).__init__()
self.weight = Parameter(torch.FloatTensor(in_features, out_features))
self.bias = Parameter(torch.FloatTensor(out_features))
stdv = 1. / math.sqrt(self.weight.size(1))
self.weight.data.uniform_(-stdv, stdv)
self.bias.data.uniform_(-stdv, stdv)

def forward(self, x, adjacency_matrix):
x = torch.mm(x, self.weight)
x = torch.spmm(adjacency_matrix, x)
return x + self.bias


class GCN(nn.Module):
"""
Model
"""

def __init__(self, input_size, hidden_size, num_classes, dropout=0.5):
super(GCN, self).__init__()
self.gconv1 = GraphConvolution(input_size, hidden_size)
self.gconv2 = GraphConvolution(hidden_size, num_classes)
self.dropout = dropout

def forward(self, x, adjacency_matrix):
x = self.gconv1(x, adjacency_matrix)
x = F.relu(x)
x = F.dropout(x, self.dropout, training=self.training)
x = self.gconv2(x, adjacency_matrix)
return F.log_softmax(x, dim=1)
41 changes: 41 additions & 0 deletions recognition/45616756-GCN/plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd


train_losses = np.load('train_losses.npy')
train_accuracies = np.load('train_accuracies.npy')
validation_losses = np.load('validation_losses.npy')
validation_accuracies = np.load('validation_accuracies.npy')
x_embedded = np.load('x_embedded.npy')

plt.figure(figsize=(10, 5))
plt.title('Training and Validation Loss')
plt.plot(train_losses, label='Train')
plt.plot(validation_losses, label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

plt.figure(figsize=(10, 5))
plt.title('Training and Validation Accuracy')
plt.plot(train_accuracies, label='Train')
plt.plot(validation_accuracies, label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

data = np.load('./data/facebook.npz')
df = pd.DataFrame(data['target'])

fig, ax = plt.subplots(figsize=(7, 7))
ax.scatter(x_embedded[:, 0],
x_embedded[:, 1],
c=df[0].astype('category').cat.codes)
ax.set(aspect="equal",
xlabel="$X_1$",
ylabel="$X_2$",
title="Visualization of GCN embeddings for Facebook dataset")
plt.show()