Commit d06a4179 authored by cs451's avatar cs451
Browse files

multi source sequential pagerank added

parent a06ec81b
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
* limitations under the License. * limitations under the License.
*/ */
package io.bespin.java.mapreduce.pagerank; package ca.uwaterloo.cs451.a4;
import com.google.common.base.Function; import com.google.common.base.Function;
import edu.uci.ics.jung.algorithms.cluster.WeakComponentClusterer; import edu.uci.ics.jung.algorithms.cluster.WeakComponentClusterer;
...@@ -32,10 +32,11 @@ import java.io.IOException; ...@@ -32,10 +32,11 @@ import java.io.IOException;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.PriorityQueue; import java.util.PriorityQueue;
import java.util.Set; import java.util.Set;
import java.util.ArrayList;
/** /**
* <p> * <p>
* Program that computes personalized PageRank for a graph using the <a * Program that computes PageRank for a graph using the <a
* href="https://github.com/jrtom/jung">JUNG</a> package. Program takes two command-line * href="https://github.com/jrtom/jung">JUNG</a> package. Program takes two command-line
* arguments: the first is a file containing the graph data, and the second is the random jump * arguments: the first is a file containing the graph data, and the second is the random jump
* factor (a typical setting is 0.15). * factor (a typical setting is 0.15).
...@@ -49,8 +50,10 @@ import java.util.Set; ...@@ -49,8 +50,10 @@ import java.util.Set;
* lines will simply contain one token, the source node id). * lines will simply contain one token, the source node id).
* </p> * </p>
* *
* @author Ali Abedi
* @author Jimmy Lin * @author Jimmy Lin
*/ */
public class SequentialPersonalizedPageRank { public class SequentialPersonalizedPageRank {
private SequentialPersonalizedPageRank() {} private SequentialPersonalizedPageRank() {}
...@@ -58,15 +61,18 @@ public class SequentialPersonalizedPageRank { ...@@ -58,15 +61,18 @@ public class SequentialPersonalizedPageRank {
@Option(name = "-input", metaVar = "[path]", required = true, usage = "input path") @Option(name = "-input", metaVar = "[path]", required = true, usage = "input path")
String input; String input;
@Option(name = "-source", metaVar = "[node]", required = true, @Option(name = "-sources", metaVar = "[node]", required = true,
usage = "source node (i.e., destination of the random jump)") usage = "source nodes set. Comma seperated.")
String source; String sources;
@Option(name = "-jump", metaVar = "[num]", usage = "random jump factor") @Option(name = "-jump", metaVar = "[num]", usage = "random jump factor")
float alpha = 0.15f; float alpha = 0.15f;
} }
public static void main(String[] argv) throws IOException { public static void main(String[] argv) throws IOException {
// Keeping track of source nodes
ArrayList sources = new ArrayList();
final Args args = new Args(); final Args args = new Args();
CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100)); CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));
...@@ -78,6 +84,12 @@ public class SequentialPersonalizedPageRank { ...@@ -78,6 +84,12 @@ public class SequentialPersonalizedPageRank {
System.exit(-1); System.exit(-1);
} }
String[] tmp = args.sources.split(",");
for (int i = 0; i < tmp.length; i++) {
sources.add(tmp[i]);
}
int edgeCnt = 0; int edgeCnt = 0;
DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<>(); DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<>();
...@@ -94,12 +106,15 @@ public class SequentialPersonalizedPageRank { ...@@ -94,12 +106,15 @@ public class SequentialPersonalizedPageRank {
} }
data.close(); data.close();
if (!graph.containsVertex(args.source)) { for (int i = 0; i < sources.size(); i++){
System.err.println("Error: source node not found in the graph!"); if (!graph.containsVertex((String) sources.get(i))) {
System.exit(-1); System.err.println("Error: source node not found in the graph!");
System.exit(-1);
}
} }
WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<>(); WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<>();
Set<Set<String>> components = clusterer.apply(graph); Set<Set<String>> components = clusterer.apply(graph);
...@@ -107,16 +122,16 @@ public class SequentialPersonalizedPageRank { ...@@ -107,16 +122,16 @@ public class SequentialPersonalizedPageRank {
System.out.println("Number of edges: " + graph.getEdgeCount()); System.out.println("Number of edges: " + graph.getEdgeCount());
System.out.println("Number of nodes: " + graph.getVertexCount()); System.out.println("Number of nodes: " + graph.getVertexCount());
System.out.println("Random jump factor: " + args.alpha); System.out.println("Random jump factor: " + args.alpha);
System.out.println("Source nodes: " + args.sources);
// Compute personalized PageRank. // Compute personalized PageRank.
PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<>(graph, PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<>(graph,
new Function<String, Double>() { new Function<String, Double>() {
@Override @Override
public Double apply(String vertex) { public Double apply(String vertex) {
return vertex.equals(args.source) ? 1.0 : 0; return sources.contains(vertex) ? 1.0 / (float) sources.size() : 0;
} }
}, args.alpha); }, args.alpha);
// Note that the Altiscale cluster is still on Java 7, so we don't have lambdas.
ranker.evaluate(); ranker.evaluate();
// Use priority queue to sort vertices by PageRank values. // Use priority queue to sort vertices by PageRank values.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment