Skip to content
Snippets Groups Projects
Commit d06a4179 authored by cs451's avatar cs451
Browse files

multi source sequential pagerank added

parent a06ec81b
Branches master
No related tags found
No related merge requests found
......@@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.bespin.java.mapreduce.pagerank;
package ca.uwaterloo.cs451.a4;
import com.google.common.base.Function;
import edu.uci.ics.jung.algorithms.cluster.WeakComponentClusterer;
......@@ -32,10 +32,11 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.ArrayList;
/**
* <p>
* Program that computes personalized PageRank for a graph using the <a
* Program that computes PageRank for a graph using the <a
* href="https://github.com/jrtom/jung">JUNG</a> package. Program takes two command-line
* arguments: the first is a file containing the graph data, and the second is the random jump
* factor (a typical setting is 0.15).
......@@ -49,8 +50,10 @@ import java.util.Set;
* lines will simply contain one token, the source node id).
* </p>
*
* @author Ali Abedi
* @author Jimmy Lin
*/
public class SequentialPersonalizedPageRank {
private SequentialPersonalizedPageRank() {}
......@@ -58,15 +61,18 @@ public class SequentialPersonalizedPageRank {
@Option(name = "-input", metaVar = "[path]", required = true, usage = "input path")
String input;
@Option(name = "-source", metaVar = "[node]", required = true,
usage = "source node (i.e., destination of the random jump)")
String source;
@Option(name = "-sources", metaVar = "[node]", required = true,
usage = "source nodes set. Comma seperated.")
String sources;
@Option(name = "-jump", metaVar = "[num]", usage = "random jump factor")
float alpha = 0.15f;
}
public static void main(String[] argv) throws IOException {
// Keeping track of source nodes
ArrayList sources = new ArrayList();
final Args args = new Args();
CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));
......@@ -78,6 +84,12 @@ public class SequentialPersonalizedPageRank {
System.exit(-1);
}
String[] tmp = args.sources.split(",");
for (int i = 0; i < tmp.length; i++) {
sources.add(tmp[i]);
}
int edgeCnt = 0;
DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<>();
......@@ -94,12 +106,15 @@ public class SequentialPersonalizedPageRank {
}
data.close();
if (!graph.containsVertex(args.source)) {
System.err.println("Error: source node not found in the graph!");
System.exit(-1);
for (int i = 0; i < sources.size(); i++){
if (!graph.containsVertex((String) sources.get(i))) {
System.err.println("Error: source node not found in the graph!");
System.exit(-1);
}
}
WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<>();
Set<Set<String>> components = clusterer.apply(graph);
......@@ -107,16 +122,16 @@ public class SequentialPersonalizedPageRank {
System.out.println("Number of edges: " + graph.getEdgeCount());
System.out.println("Number of nodes: " + graph.getVertexCount());
System.out.println("Random jump factor: " + args.alpha);
System.out.println("Source nodes: " + args.sources);
// Compute personalized PageRank.
PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<>(graph,
new Function<String, Double>() {
@Override
public Double apply(String vertex) {
return vertex.equals(args.source) ? 1.0 : 0;
return sources.contains(vertex) ? 1.0 / (float) sources.size() : 0;
}
}, args.alpha);
// Note that the Altiscale cluster is still on Java 7, so we don't have lambdas.
ranker.evaluate();
// Use priority queue to sort vertices by PageRank values.
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment