Source code for gammagl.loader.random_walk_sampler

import tensorlayerx as tlx
import numpy as np
from collections import defaultdict
from gammagl.utils.num_nodes import maybe_num_nodes



[docs] class RandomWalk: def __init__(self, model): self.model = model def __call__(self, edge_index, num_walks, walk_length, edge_weight=None, p=1.0, q=1.0, num_nodes=None): if edge_weight == None: edge_weight = tlx.ops.ones(shape=(edge_index.shape[1],), dtype=tlx.float32) if num_nodes == None: num_nodes = maybe_num_nodes(edge_index) src, dst = edge_index[0], edge_index[1] src = tlx.convert_to_numpy(src) dst = tlx.convert_to_numpy(dst) # get source node neighbors. # node_neighbor = {} neighbor_dict = defaultdict(list) nodes_weight = {} index = 0 for src_node in src: # if src_node not in node_neighbor.keys(): # node_neighbor[src_node] = list() neighbor_dict[src_node].append(dst[index]) nodes_weight[(src_node, dst[index])] = edge_weight[index] index += 1 walks = list() if self.model == "node2vec": probs = compute_probabilities(neighbor_dict, edge_index, nodes_weight, p, q, num_nodes) walks = node2vec_generate_random_walks(neighbor_dict, probs, edge_index, num_walks, walk_length) elif self.model == "deepwalk": from gammagl.loader.rw_utils import rw_sample_by_edge_index walks = rw_sample_by_edge_index(edge_index, edge_index[0, :], walk_length, num_walks) return walks
def compute_probabilities(neighbor, edge_index, nodes_weight, p, q, num_nodes): probs = defaultdict(dict) for node in range(num_nodes): probs[node]['probabilities'] = dict() src = edge_index[0] src = tlx.convert_to_numpy(src) node = set(src) for source_node in node: for current_node in neighbor[source_node]: probs_ = list() for destination in neighbor[current_node]: weight = tlx.convert_to_numpy(nodes_weight[(current_node, destination)]) if source_node == destination: prob_ = (1 / p) * weight elif destination in neighbor[source_node]: prob_ = 1 * weight else: prob_ = (1 / q) * weight probs_.append(prob_) probs[source_node]['probabilities'][current_node] = probs_ / np.sum(probs_) return probs def node2vec_generate_random_walks(neighbor, probs, edge_index, num_walks, walk_length): src = edge_index[0] src = tlx.convert_to_numpy(src) node = set(src) walks = list() for start_node in node: for i in range(num_walks): walk = [int(start_node)] walk_options = neighbor[walk[-1]] if len(walk_options) == 0: break first_step = np.random.choice(walk_options) walk.append(int(first_step)) for k in range(walk_length - 2): walk_options = neighbor[walk[-1]] if len(walk_options) == 0: break probabilities = probs[walk[-2]]['probabilities'][walk[-1]] if tlx.BACKEND == 'paddle': probabilities = np.concatenate(probabilities, axis=0) next_step = np.random.choice(walk_options, p=probabilities) walk.append(int(next_step)) walks.append(walk) np.random.shuffle(walks) return walks def deepwalk_generate_random_walks(neighbor, edge_index, num_walks, walk_length): src = edge_index[0] src = tlx.convert_to_numpy(src) node = set(src) walks = list() for i in range(num_walks): for start_node in node: walk = [start_node] for k in range(walk_length - 1): walk_options = neighbor[walk[-1]] if len(walk_options) == 0: break next_step = np.random.choice(walk_options) walk.append(next_step) walks.append(walk) np.random.shuffle(walks) return walks