multi source sequential pagerank added

d06a4179 · cs451 · a06ec81b · d06a4179
Commit d06a4179 authored 3 years ago by cs451
--- a/src/main/java/io/bespin/java/mapreduce/pagerank/SequentialPersonalizedPageRank.java
+++ b/src/main/java/io/bespin/java/mapreduce/pagerank/SequentialPersonalizedPageRank.java
@@ -14,7 +14,7 @@
 * limitations under the License.
 */

-package io.bespin.java.mapreduce.pagerank;
+package ca.uwaterloo.cs451.a4;

 import com.google.common.base.Function;
 import edu.uci.ics.jung.algorithms.cluster.WeakComponentClusterer;
@@ -32,10 +32,11 @@ import java.io.IOException;
 import java.io.InputStreamReader;
 import java.util.PriorityQueue;
 import java.util.Set;
+import java.util.ArrayList;

 /**
 * <p>
- * Program that computes personalized PageRank for a graph using the <a
+ * Program that computes PageRank for a graph using the <a
 * href="https://github.com/jrtom/jung">JUNG</a> package. Program takes two command-line
 * arguments: the first is a file containing the graph data, and the second is the random jump
 * factor (a typical setting is 0.15).
@@ -49,8 +50,10 @@ import java.util.Set;
 * lines will simply contain one token, the source node id).
 * </p>
 *
+ * @author Ali Abedi
 * @author Jimmy Lin
 */
+
 public class SequentialPersonalizedPageRank {
  private SequentialPersonalizedPageRank() {}

@@ -58,15 +61,18 @@ public class SequentialPersonalizedPageRank {
    @Option(name = "-input", metaVar = "[path]", required = true, usage = "input path")
    String input;

-    @Option(name = "-source", metaVar = "[node]", required = true,
-        usage = "source node (i.e., destination of the random jump)")
-    String source;
+    @Option(name = "-sources", metaVar = "[node]", required = true,
+        usage = "source nodes set. Comma seperated.")
+    String sources;

    @Option(name = "-jump", metaVar = "[num]", usage = "random jump factor")
    float alpha = 0.15f;
  }

+
  public static void main(String[] argv) throws IOException {
+    // Keeping track of source nodes
+    ArrayList sources = new ArrayList();
    final Args args = new Args();
    CmdLineParser parser = new CmdLineParser(args, ParserProperties.defaults().withUsageWidth(100));

@@ -78,6 +84,12 @@ public class SequentialPersonalizedPageRank {
      System.exit(-1);
    }

+    String[] tmp = args.sources.split(",");
+    for (int i = 0; i < tmp.length; i++) {
+      sources.add(tmp[i]);
+    }
+
+
    int edgeCnt = 0;
    DirectedSparseGraph<String, Integer> graph = new DirectedSparseGraph<>();

@@ -94,12 +106,15 @@ public class SequentialPersonalizedPageRank {
    }

    data.close();
-
-    if (!graph.containsVertex(args.source)) {
-      System.err.println("Error: source node not found in the graph!");
-      System.exit(-1);
+    
+    for (int i = 0; i < sources.size(); i++){
+      if (!graph.containsVertex((String) sources.get(i))) {
+        System.err.println("Error: source node not found in the graph!");
+        System.exit(-1);
+      }
    }

+
    WeakComponentClusterer<String, Integer> clusterer = new WeakComponentClusterer<>();
    Set<Set<String>> components = clusterer.apply(graph);

@@ -107,16 +122,16 @@ public class SequentialPersonalizedPageRank {
    System.out.println("Number of edges: " + graph.getEdgeCount());
    System.out.println("Number of nodes: " + graph.getVertexCount());
    System.out.println("Random jump factor: " + args.alpha);
+    System.out.println("Source nodes: " + args.sources);

    // Compute personalized PageRank.
    PageRankWithPriors<String, Integer> ranker = new PageRankWithPriors<>(graph,
        new Function<String, Double>() {
          @Override
          public Double apply(String vertex) {
-            return vertex.equals(args.source) ? 1.0 : 0;
+            return sources.contains(vertex) ? 1.0 / (float) sources.size() : 0;
          }
        }, args.alpha);
-    // Note that the Altiscale cluster is still on Java 7, so we don't have lambdas.
    ranker.evaluate();

    // Use priority queue to sort vertices by PageRank values.