import json
import os
import os.path as osp
from typing import Callable, List, Optional
import numpy as np
import scipy.sparse as sp
import tensorlayerx as tlx
from gammagl.data import InMemoryDataset, download_url, Graph
[docs]
class Flickr(InMemoryDataset):
r"""The Flickr dataset from the `"GraphSAINT: Graph Sampling Based
Inductive Learning Method" <https://arxiv.org/abs/1907.04931>`_ paper,
containing descriptions and common properties of images.
Parameters
----------
root: str, optional
Root directory where the dataset should be saved.
transform: callable, optional
A function/transform that takes in an
:obj:`gammagl.data.Graph` object and returns a transformed
version. The data object will be transformed before every access.
(default: :obj:`None`)
pre_transform: callable, optional
A function/transform that takes in
an :obj:`gammagl.data.Graph` object and returns a
transformed version. The data object will be transformed before
being saved to disk. (default: :obj:`None`)
force_reload (bool, optional): Whether to re-process the dataset.
(default: :obj:`False`)
Tip
---
.. list-table::
:widths: 10 10 10 10
:header-rows: 1
* - #nodes
- #edges
- #features
- #classes
* - 89,250
- 899,756
- 500
- 7
"""
url = 'https://docs.google.com/uc?export=download&id={}&confirm=t'
adj_full_id = '1crmsTbd1-2sEXsGwa2IKnIB7Zd3TmUsy'
feats_id = '1join-XdvX3anJU_MLVtick7MgeAQiWIZ'
class_map_id = '1uxIkbtg5drHTsKt-PAsZZ4_yJmgFmle9'
role_id = '1htXCtuktuCW8TR8KiKfrFDAxUgekQoV7'
def __init__(self, root: str = None, transform: Optional[Callable] = None,
pre_transform: Optional[Callable] = None, force_reload: bool = False):
super().__init__(root, transform, pre_transform, force_reload = force_reload)
self.data, self.slices = self.load_data(self.processed_paths[0])
@property
def raw_file_names(self) -> List[str]:
return ['adj_full.npz', 'feats.npy', 'class_map.json', 'role.json']
@property
def processed_file_names(self) -> str:
return tlx.BACKEND + 'data.pt'
[docs]
def download(self):
path = download_url(self.url.format(self.adj_full_id), self.raw_dir)
os.rename(path, osp.join(self.raw_dir, 'adj_full.npz'))
path = download_url(self.url.format(self.feats_id), self.raw_dir)
os.rename(path, osp.join(self.raw_dir, 'feats.npy'))
path = download_url(self.url.format(self.class_map_id), self.raw_dir)
os.rename(path, osp.join(self.raw_dir, 'class_map.json'))
path = download_url(self.url.format(self.role_id), self.raw_dir)
os.rename(path, osp.join(self.raw_dir, 'role.json'))
[docs]
def process(self):
f = np.load(osp.join(self.raw_dir, 'adj_full.npz'))
adj = sp.csr_matrix((f['data'], f['indices'], f['indptr']), f['shape'])
adj = adj.tocoo()
row = adj.row
col = adj.col
edge_index = np.array([row, col], dtype=np.int64)
x = np.load(osp.join(self.raw_dir, 'feats.npy'))
ys = [-1] * x.shape[0]
with open(osp.join(self.raw_dir, 'class_map.json')) as f:
class_map = json.load(f)
for key, item in class_map.items():
ys[int(key)] = item
with open(osp.join(self.raw_dir, 'role.json')) as f:
role = json.load(f)
train_mask = np.zeros(x.shape[0], dtype=np.bool8)
train_mask[role['tr']] = True
val_mask = np.zeros(x.shape[0], dtype=np.bool8)
val_mask[role['va']] = True
test_mask = np.zeros(x.shape[0], dtype=np.bool8)
test_mask[role['te']] = True
data = Graph(x=x, edge_index=edge_index, y=np.array(ys), to_tensor=True)
data.train_mask = train_mask
data.val_mask = val_mask
data.test_mask = test_mask
data = data if self.pre_transform is None else self.pre_transform(data)
self.save_data(self.collate([data]), self.processed_paths[0])