Commit d06a4179 authored by cs451's avatar cs451
Browse files

multi source sequential pagerank added

parent a06ec81b
......@@ -14,7 +14,7 @@
* limitations under the License.
*/
package io.bespin.java.mapreduce.pagerank;
package ca.uwaterloo.cs451.a4;
import com.google.common.base.Function;
import edu.uci.ics.jung.algorithms.cluster.WeakComponentClusterer;
......@@ -32,10 +32,11 @@ import java.io.IOException;
import java.io.InputStreamReader;
import java.util.PriorityQueue;
import java.util.Set;
import java.util.ArrayList;
/**
* <p>
* Program that computes personalized PageRank for a graph using the <a
* Program that computes PageRank for a graph using the <a
* href="https://github.com/jrtom/jung">JUNG</a> package. Program takes two command-line
* arguments: the first is a file containing the graph data, and the second is the random jump
* factor (a typical setting is 0.15).
......@@ -49,8 +50,10 @@ import java.util.Set;
* lines will simply contain one token, the source node id).
* </p>
*
* @author Ali Abedi
* @author Jimmy Lin
*/
public class SequentialPersonalizedPageRank {
private SequentialPersonalizedPageRank() {}
......@@ -58,15 +61,18 @@ public class SequentialPersonalizedPageRank {
@Option(name = "-input", metaVar = "[path]", required = true, usage = "input path")
String input;
@Option(name = "-source", metaVar = "[node]", required = true,
usage = "source node (i.e., destination of the random jump)")
String source;
@Option(name = "-sources", metaVar = "[node]", required = true,
usage = "source nodes set. Comma seperated.")
String sources;
@Option(name = "-jump", metaVar = "[num]", usage = "random jump factor")
float alpha = 0.15f;
}
public static void main(String[] argv) throws IOException {
// Keeping track of source nodes
ArrayList sources = new ArrayList();
final Args args = new Args();
CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));
......@@ -78,6 +84,12 @@ public class SequentialPersonalizedPageRank {
System.exit(-1);
}
String[] tmp = args.sources.split(",");
for (int i = 0; i < tmp.length; i++) {
sources.add(tmp[i]);
}
int edgeCnt = 0;
DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<>();
......@@ -94,12 +106,15 @@ public class SequentialPersonalizedPageRank {
}
data.close();
if (!graph.containsVertex(args.source)) {
System.err.println("Error: source node not found in the graph!");
System.exit(-1);
for (int i = 0; i < sources.size(); i++){
if (!graph.containsVertex((String) sources.get(i))) {
System.err.println("Error: source node not found in the graph!");
System.exit(-1);
}
}
WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<>();
Set<Set<String>> components = clusterer.apply(graph);
......@@ -107,16 +122,16 @@ public class SequentialPersonalizedPageRank {
System.out.println("Number of edges: " + graph.getEdgeCount());
System.out.println("Number of nodes: " + graph.getVertexCount());
System.out.println("Random jump factor: " + args.alpha);
System.out.println("Source nodes: " + args.sources);
// Compute personalized PageRank.
PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<>(graph,
new Function<String, Double>() {
@Override
public Double apply(String vertex) {
return vertex.equals(args.source) ? 1.0 : 0;
return sources.contains(vertex) ? 1.0 / (float) sources.size() : 0;
}
}, args.alpha);
// Note that the Altiscale cluster is still on Java 7, so we don't have lambdas.
ranker.evaluate();
// Use priority queue to sort vertices by PageRank values.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment