import tensorlayerx as tlx
from collections import defaultdict
import numpy as np
from ..utils.num_nodes import maybe_num_nodes
from gammagl.loader import RandomWalk
random_walk = RandomWalk("deepwalk")
EPS = 1e-15
[docs]
class DeepWalkModel(tlx.nn.Module):
r"""The DeepWalk model from the
`"DeepWalk: Online Learning of Social Representations"
<https://arxiv.org/abs/1403.6652>`_ paper where random walks of
length :obj:`walk_length` are sampled in a given graph, and node embeddings
are learned via negative sampling optimization.
Parameters
----------
edge_index: Iterable
The edge indices.
edge_weight: Iterable
The edge weight.
embedding_dim: int
The size of each embedding vector.
walk_length: int
The walk length.
num_walks: int, optional
The number of walks to sample for each node.
window_size: int, optional
The actual context size which is considered for
positive samples. This parameter increases the effective sampling
rate by reusing samples across different source nodes.
num_negatives: int, optional
The number of negative samples to use for each positive sample.
num_nodes: int, optional
The number of nodes.
name: str, optional
model name.
"""
def __init__(
self,
edge_index,
edge_weight,
embedding_dim,
walk_length,
num_walks=10,
window_size=5,
num_negatives=1,
num_nodes=None,
name=None
):
super(DeepWalkModel, self).__init__(name=name)
assert walk_length >= window_size
self.edge_index = edge_index
self.edge_weight = edge_weight
self.N = maybe_num_nodes(edge_index, num_nodes)
self.embedding_dim = embedding_dim
self.walk_length = walk_length
self.num_walks = num_walks
self.window_size = window_size
self.num_negatives = num_negatives
self.random_walks = random_walk(self.edge_index, self.num_walks, self.walk_length, num_nodes=self.N)
self.embedding = tlx.nn.Embedding(self.N, embedding_dim)
[docs]
def forward(self, edge_index):
return self.loss(self.pos_sample(), self.neg_sample())
[docs]
def pos_sample(self):
rw = self.random_walks
rw = np.array(rw)
walks = []
num_walks_per_rw = 1 + self.walk_length - self.window_size
for j in range(num_walks_per_rw):
walks.append(rw[:, j:j + self.window_size])
walks = tlx.convert_to_tensor(walks)
return tlx.concat([walks[i] for i in range(len(walks))], axis=0)
[docs]
def neg_sample(self):
rw = np.random.randint(low=0, high=self.N,
size=(self.N * self.num_walks * self.num_negatives, self.walk_length))
rw = np.array(rw)
walks = []
num_walks_per_rw = 1 + self.walk_length - self.window_size
for j in range(num_walks_per_rw):
walks.append(rw[:, j:j + self.window_size])
walks = tlx.convert_to_tensor(walks)
return tlx.concat([walks[i] for i in range(len(walks))], axis=0)
[docs]
def loss(self, pos_rw, neg_rw):
# Positive loss.
start = pos_rw[:, 0]
rest = tlx.gather(pos_rw, tlx.convert_to_tensor([1]), axis = 1)
h_start = tlx.reshape(self.embedding(start), (pos_rw.shape[0], 1, self.embedding_dim))
h_rest = tlx.reshape(self.embedding(tlx.reshape(rest, (-1, 1))), (pos_rw.shape[0], -1, self.embedding_dim))
out = tlx.reshape(tlx.ops.reduce_sum((h_start * h_rest), axis=-1), (-1, 1))
pos_loss = -tlx.ops.reduce_mean(tlx.log(tlx.sigmoid(out) + EPS))
# Negative loss.
start = neg_rw[:, 0]
rest = tlx.gather(neg_rw, tlx.convert_to_tensor([1]), axis = 1)
h_start = tlx.reshape(self.embedding(start), (neg_rw.shape[0], 1, self.embedding_dim))
h_rest = tlx.reshape(self.embedding(tlx.reshape(rest, (-1, 1))), (neg_rw.shape[0], -1, self.embedding_dim))
out = tlx.reshape(tlx.ops.reduce_sum((h_start * h_rest), axis=-1), (-1, 1))
neg_loss = -tlx.ops.reduce_mean(tlx.log(1 - tlx.sigmoid(out) + EPS))
return pos_loss + neg_loss
[docs]
def campute(self):
emb = self.embedding.all_weights
return emb