Source code for gammagl.loader.random_walk_sampler

import tensorlayerx as tlx
import numpy as np
from collections import defaultdict
from gammagl.utils.num_nodes import maybe_num_nodes




[docs]
class RandomWalk:
    def __init__(self, model):
        self.model = model

    def __call__(self, edge_index, num_walks, walk_length, edge_weight=None, p=1.0, q=1.0, num_nodes=None):

        if edge_weight == None:
            edge_weight = tlx.ops.ones(shape=(edge_index.shape[1],), dtype=tlx.float32)
        if num_nodes == None:
            num_nodes = maybe_num_nodes(edge_index)

        src, dst = edge_index[0], edge_index[1]
        src = tlx.convert_to_numpy(src)
        dst = tlx.convert_to_numpy(dst)

        # get source node neighbors.
        # node_neighbor = {}
        neighbor_dict = defaultdict(list)
        nodes_weight = {}
        index = 0
        for src_node in src:
            # if src_node not in node_neighbor.keys():
            #     node_neighbor[src_node] = list()
            neighbor_dict[src_node].append(dst[index])
            nodes_weight[(src_node, dst[index])] = edge_weight[index]
            index += 1

        walks = list()
        if self.model == "node2vec":
            probs = compute_probabilities(neighbor_dict, edge_index, nodes_weight, p, q, num_nodes)
            walks = node2vec_generate_random_walks(neighbor_dict, probs, edge_index, num_walks, walk_length)

        elif self.model == "deepwalk":
            from gammagl.loader.rw_utils import rw_sample_by_edge_index
            walks = rw_sample_by_edge_index(edge_index, edge_index[0, :], walk_length, num_walks)

        return walks



def compute_probabilities(neighbor, edge_index, nodes_weight, p, q, num_nodes):
    probs = defaultdict(dict)
    for node in range(num_nodes):
        probs[node]['probabilities'] = dict()

    src = edge_index[0]
    src = tlx.convert_to_numpy(src)
    node = set(src)

    for source_node in node:
        for current_node in neighbor[source_node]:
            probs_ = list()
            for destination in neighbor[current_node]:
                weight = tlx.convert_to_numpy(nodes_weight[(current_node, destination)])
                if source_node == destination:
                    prob_ = (1 / p) * weight
                elif destination in neighbor[source_node]:
                    prob_ = 1 * weight
                else:
                    prob_ = (1 / q) * weight

                probs_.append(prob_)

            probs[source_node]['probabilities'][current_node] = probs_ / np.sum(probs_)

    return probs


def node2vec_generate_random_walks(neighbor, probs, edge_index, num_walks, walk_length):
    src = edge_index[0]
    src = tlx.convert_to_numpy(src)
    node = set(src)

    walks = list()
    for start_node in node:
        for i in range(num_walks):

            walk = [int(start_node)]
            walk_options = neighbor[walk[-1]]
            if len(walk_options) == 0:
                break
            first_step = np.random.choice(walk_options)
            walk.append(int(first_step))

            for k in range(walk_length - 2):
                walk_options = neighbor[walk[-1]]
                if len(walk_options) == 0:
                    break
                probabilities = probs[walk[-2]]['probabilities'][walk[-1]]
                if tlx.BACKEND == 'paddle':
                    probabilities = np.concatenate(probabilities, axis=0)
                next_step = np.random.choice(walk_options, p=probabilities)
                walk.append(int(next_step))

            walks.append(walk)

    np.random.shuffle(walks)

    return walks


def deepwalk_generate_random_walks(neighbor, edge_index, num_walks, walk_length):
    src = edge_index[0]
    src = tlx.convert_to_numpy(src)
    node = set(src)

    walks = list()
    for i in range(num_walks):
        for start_node in node:
            walk = [start_node]
            for k in range(walk_length - 1):
                walk_options = neighbor[walk[-1]]
                if len(walk_options) == 0:
                    break
                next_step = np.random.choice(walk_options)
                walk.append(next_step)
            walks.append(walk)

    np.random.shuffle(walks)

    return walks