---------------------------------------------- -- Graph Analytics with Greenplum and MADlib ---------------------------------------------- -- CREATE VERTEX TABLE DROP TABLE IF EXISTS vertex; CREATE TABLE vertex( id INTEGER ) DISTRIBUTED RANDOMLY; INSERT INTO vertex VALUES (0), (1), (2), (3), (4), (5), (6); select * from vertex -- CREATE EDGE TABLE DROP TABLE IF EXISTS edge; CREATE TABLE edge( src INTEGER, dest INTEGER, user_id INTEGER ) DISTRIBUTED BY (user_id); INSERT INTO edge VALUES (0, 1, 1), (0, 2, 1), -- user id 1 (0, 4, 1), (1, 2, 1), (1, 3, 1), (2, 3, 1), (2, 5, 1), (2, 6, 1), (3, 0, 1), (4, 0, 1), (5, 6, 1), (6, 3, 1), (0, 1, 2), (0, 2, 2), -- user id 2 (0, 4, 2), (1, 2, 2), (1, 3, 2), (2, 3, 2), (3, 0, 2), (4, 0, 2), (5, 6, 2), (6, 3, 2); select * from edge; -- (1) Compute the PageRank with All IDs DROP TABLE IF EXISTS pagerank_out, pagerank_out_summary; SELECT madlib.pagerank( 'vertex' -- Vertex table , 'id' -- Vertex id column , 'edge' -- Edge table , 'src=src, dest=dest' -- Comma delimited string of edge arguments , 'pagerank_out' -- Output table of RageRank , NULL -- Default damping factor (0.85) ); SELECT * FROM pagerank_out ORDER BY pagerank DESC; -- (2) Compute the PageRank of vertices associated with each user using the grouping feature DROP TABLE IF EXISTS pagerank_gr_out, pagerank_gr_out_summary; SELECT madlib.pagerank( 'vertex' -- Vertex table , 'id' -- Vertex id column , 'edge' -- Edge table , 'src=src, dest=dest' -- Comma delimited string of edge arguments , 'pagerank_gr_out' -- Output table of PageRank , NULL -- Default damping factor (0.85) , NULL -- Default max iterations (100) , 0.00000001 -- Threshold , 'user_id'); -- Grouping column name SELECT * FROM pagerank_gr_out ORDER BY user_id, pagerank DESC; -- (3) Personalized PageRank of vertices {2, 4} DROP TABLE IF EXISTS pagerank_pers_out, pagerank_pers_out_summary; SELECT madlib.pagerank( 'vertex' -- Vertex table , 'id' -- Vertex id column , 'edge' -- Edge table , 'src=src, dest=dest' -- Comma delimited string of edge arguments , 'pagerank_pers_out' -- Output table of PageRank , NULL -- Default damping factor (0.85) , NULL -- Default max iterations (100) , NULL -- Default Threshold (1/number of vertices*1000) , NULL -- No Grouping , '{2, 4}' -- Personalization vertices ); SELECT * FROM pagerank_pers_out ORDER BY pagerank DESC; SELECT * FROM pagerank_pers_out_summary;