Article directory
GitHub address
Original project GitHub (no result visualization):Graph Convolutional Networks in PyTorch
#####Visualization + code comment GitHub:
Modification of Graph Convolutional Networks in PyTorch
Visualization results display
Visualization is completed by visdom, and data dimensionality reduction is completed by t-SNE algorithm.
Dimension reduction to 2D: Reduce dimension to 3D:code annotation
layers.pyimport math import torch from torch.nn.parameter import Parameter from torch.nn.modules.module import Module class GraphConvolution(Module): """ Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 """ '''Defining the properties of an object''' def __init__(self, in_features, out_features, bias=True): super(GraphConvolution, self).__init__() self.in_features = in_features self.out_features = out_features self.weight = Parameter(torch.FloatTensor(in_features, out_features)) # in_features × out_features if bias: self.bias = Parameter(torch.FloatTensor(out_features)) else: self.register_parameter('bias', None) self.reset_parameters() '''Generating weights''' def reset_parameters(self): stdv = 1. / math.sqrt(self.weight.size(1)) self.weight.data.uniform_(-stdv, stdv) # . uniform(): fills the sensor with values sampled from a uniform distribution. if self.bias is not None: self.bias.data.uniform_(-stdv, stdv) '''Forward propagation of Within one floor: that is, the calculation method of this floor: A_hat * X * W ''' def forward(self, input, adj): support = torch.mm(input, self.weight) # torch.mm: Matrix multiply, input and weight realize matrix point multiplication. output = torch.spmm(adj, support) # torch.spmm: sparse matrix multiplication, sp is sparse. if self.bias is not None: return output + self.bias else: return output '''Express an object in the form of string for identification, and display information when the terminal calls''' def __repr__(self): return self.__class__.__name__ + ' (' \ + str(self.in_features) + ' -> ' \ + str(self.out_features) + ')'models.py
import torch.nn as nn import torch.nn.functional as F from pygcn.layers import GraphConvolution '''GCN class''' class GCN(nn.Module): def __init__(self, nfeat, nhid, nclass, dropout): super(GCN, self).__init__() self.gc1 = GraphConvolution(nfeat, nhid) # First floor self.gc2 = GraphConvolution(nhid, nclass) # The second floor self.dropout = dropout # Define dropout '''Forward propagation of Inter layer: forward propagation mode of the whole network: relu(gc1) --> dropout --> gc2 --> log_softmax''' def forward(self, x, adj): x = F.relu(self.gc1(x, adj)) x = F.dropout(x, self.dropout, training=self.training) x = self.gc2(x, adj) return F.log_softmax(x, dim=1)train.py
from __future__ import division from __future__ import print_function # Path initialization import os, sys curPath = os.path.abspath(os.path.dirname(__file__)) rootPath = os.path.split(curPath)[0] sys.path.append(rootPath) sys.path.append('E:\\Anaconda\\lib\\site-packages\\') # print(sys.path) print('Path initialization finished!\n') # Visual add path from time import time from sklearn import manifold, datasets # visdom display module from visdom import Visdom import time import argparse import numpy as np import torch import torch.nn.functional as F import torch.optim as optim from pygcn.utils import load_data, accuracy from pygcn.models import GCN def show_Hyperparameter(args): argsDict = args.__dict__ print(argsDict) print('the settings are as following:\n') for key in argsDict: print(key,':',argsDict[key]) def train(epoch): t = time.time() model.train() optimizer.zero_grad() '''When calculating the output, calculate the output for all nodes''' output = model(features, adj) '''Loss function, which only calculates the nodes of training set, that is, the optimization is only carried out on the data of training set''' loss_train = F.nll_loss(output[idx_train], labels[idx_train]) # Calculation accuracy acc_train = accuracy(output[idx_train], labels[idx_train]) # Back propagation loss_train.backward() # optimization optimizer.step() '''fastmode ? ''' if not args.fastmode: # Evaluate validation set performance separately, # deactivates dropout during validation run. model.eval() output = model(features, adj) '''Validation set loss and accuracy ''' loss_val = F.nll_loss(output[idx_val], labels[idx_val]) acc_val = accuracy(output[idx_val], labels[idx_val]) '''Output training set+Validation set loss and accuracy ''' print('Epoch: {:04d}'.format(epoch+1), 'loss_train: {:.4f}'.format(loss_train.item()), 'acc_train: {:.4f}'.format(acc_train.item()), 'loss_val: {:.4f}'.format(loss_val.item()), 'acc_val: {:.4f}'.format(acc_val.item()), 'time: {:.4f}s'.format(time.time() - t)) def test(): model.eval() output = model(features, adj) loss_test = F.nll_loss(output[idx_test], labels[idx_test]) acc_test = accuracy(output[idx_test], labels[idx_test]) print("Test set results:", "loss= {:.4f}".format(loss_test.item()), "accuracy= {:.4f}".format(acc_test.item())) return output # Visual return output # t-SNE dimension reduction def t_SNE(output, dimention): # output: data to be dimensioned down # Dimension: dimension reduced to tsne = manifold.TSNE(n_components=dimention, init='pca', random_state=0) result = tsne.fit_transform(output) return result # Visualization with visdom def Visualization(result, labels): vis=Visdom() vis.scatter( X = result, Y = labels+1, # Change the minimum value of label from 0 to 1. The label cannot be 0 when displayed opts=dict(markersize=5,title='Dimension reduction to %dD' %(result.shape[1])), ) '''Code main function start''' # Training settings parser = argparse.ArgumentParser() parser.add_argument('--no-cuda', action='store_true', default=False, help='Disables CUDA training.') parser.add_argument('--fastmode', action='store_true', default=False, help='Validate during training pass.') parser.add_argument('--seed', type=int, default=42, help='Random seed.') parser.add_argument('--epochs', type=int, default=200, help='Number of epochs to train.') parser.add_argument('--lr', type=float, default=0.01, help='Initial learning rate.') parser.add_argument('--weight_decay', type=float, default=5e-4, help='Weight decay (L2 loss on parameters).') parser.add_argument('--hidden', type=int, default=16, help='Number of hidden units.') parser.add_argument('--dropout', type=float, default=0.5, help='Dropout rate (1 - keep probability).') args = parser.parse_args() # Display args show_Hyperparameter(args) # Use CUDA or not args.cuda = not args.no_cuda and torch.cuda.is_available() np.random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) # Load data adj, features, labels, idx_train, idx_val, idx_test = load_data() # Return the labels to be used for visualization # Model model = GCN(nfeat=features.shape[1], nhid=args.hidden, nclass=labels.max().item() + 1, dropout=args.dropout) # optimizer optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) # to CUDA if args.cuda: model.cuda() features = features.cuda() adj = adj.cuda() labels = labels.cuda() idx_train = idx_train.cuda() idx_val = idx_val.cuda() idx_test = idx_test.cuda() # Train model t_total = time.time() for epoch in range(args.epochs): train(epoch) print("Optimization Finished!") print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) # Testing output=test() # Return to output # Format conversion of output output=output.cpu().detach().numpy() labels=labels.cpu().detach().numpy() # # View result information # print(result) # print(type(result)) # <class 'numpy.ndarray'> # print(result.shape) # (2708, 2) # print(labels) # print(type(labels)) # <class 'numpy.ndarray'> # print(labels.shape) # (2708, 2) # Visualization with visdom result=t_SNE(output,2) Visualization(result,labels) result=t_SNE(output,3) Visualization(result,labels)utils.py
import numpy as np import scipy.sparse as sp import torch def encode_onehot(labels): classes = set(labels) # The set() function creates an unordered set of distinct elements # The enumerate() function generates a sequence with index i and value c. # This sentence changes the label of string type to that of int type to establish the mapping relationship classes_dict = {c: np.identity(len(classes))[i, :] for i, c in enumerate(classes)} # map() maps the specified sequence based on the function provided. # This sentence replaces the label of string type with the label of int type labels_onehot = np.array(list(map(classes_dict.get, labels)), dtype=np.int32) # Return label of type int return labels_onehot '''data fetch''' # Change the path. Changed from.. / to C:\Users416\PycharmProjects\PyGCN def load_data(path="C:/Users/73416/PycharmProjects/PyGCN_Visualization/data/cora/", dataset="cora"): """Load citation network dataset (cora only for now)""" print('Loading {} dataset...'.format(dataset)) ''' cora.content Introduction: cora.content There are 2708 lines in total, each line represents a sample point, that is, a paper. //Each line consists of three parts: //Is the number of the paper, such as 31336; //The word vector of the paper, a 1433 bit binary; //Categories of papers, such as neural u networks. Total 7 categories (label) //The first is the paper number, the last is the paper category, and the middle is the feature ''' '''read feature and label''' # Read the dataset file as a string: the respective information. idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), dtype=np.dtype(str)) # CSR matrix: Compressed Sparse Row marix, compression of sparse np.array # Idx ﹣ features ﹣ labels [:, 1: - 1] means to skip the paper number and paper category, and only take its own information (feature of node) features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) # Idx ﹣ features ﹣ labels [:, - 1] means that only the last one, i.e. paper category, is taken, and the return value is a label of type int labels = encode_onehot(idx_features_labels[:, -1]) # build graph # IDX? Features? Labelsidx? Features? Labels [:, 0] means the paper number idx = np.array(idx_features_labels[:, 0], dtype=np.int32) # By establishing the sequence of thesis serial number, the dictionary of thesis serial number is obtained idx_map = {j: i for i, j in enumerate(idx)} edges_unordered = np.genfromtxt("{}{}.cites".format(path, dataset), dtype=np.int32) # Mapping the serial number of a paper # The paper number is not used, it needs to be renumbered (starting from 0), and then the original number is replaced. # So the purpose is to change the discrete original number into a continuous number of 0 - 2707 edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32).reshape(edges_unordered.shape) # Coo matrix(): compression of coefficient matrix. Define the non-zero elements, row and col corresponding to each non-zero element, and finally define the shape of sparse matrix. adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), shape=(labels.shape[0], labels.shape[0]), dtype=np.float32) # build symmetric adjacency matrix adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) # feature and adj normalization features = normalize(features) adj = normalize(adj + sp.eye(adj.shape[0])) # train set, validation set, test set. idx_train = range(140) idx_val = range(200, 500) idx_test = range(500, 1500) # Data type to sensor features = torch.FloatTensor(np.array(features.todense())) labels = torch.LongTensor(np.where(labels)[1]) adj = sparse_mx_to_torch_sparse_tensor(adj) idx_train = torch.LongTensor(idx_train) idx_val = torch.LongTensor(idx_val) idx_test = torch.LongTensor(idx_test) # Return data return adj, features, labels, idx_train, idx_val, idx_test '''Normalization function''' def normalize(mx): """Row-normalize sparse matrix""" rowsum = np.array(mx.sum(1)) r_inv = np.power(rowsum, -1).flatten() r_inv[np.isinf(r_inv)] = 0. r_mat_inv = sp.diags(r_inv) mx = r_mat_inv.dot(mx) return mx '''Calculation accuracy''' def accuracy(output, labels): preds = output.max(1)[1].type_as(labels) correct = preds.eq(labels).double() correct = correct.sum() return correct / len(labels) '''Sparse matrix to sparse tensor''' def sparse_mx_to_torch_sparse_tensor(sparse_mx): """Convert a scipy sparse matrix to a torch sparse tensor.""" sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = torch.from_numpy( np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) values = torch.from_numpy(sparse_mx.data) shape = torch.Size(sparse_mx.shape) return torch.sparse.FloatTensor(indices, values, shape) e_mx_to_torch_sparse_tensor(sparse_mx): """Convert a scipy sparse matrix to a torch sparse tensor.""" sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = torch.from_numpy( np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) values = torch.from_numpy(sparse_mx.data) shape = torch.Size(sparse_mx.shape) return torch.sparse.FloatTensor(indices, values, shape)qq_41683065 122 original articles published, 43 praised, 20000 visitors+ Private letter follow