From 76516529e31a5507afc0a1de31b30104a4d7c6ad Mon Sep 17 00:00:00 2001 From: Shahid Date: Wed, 29 Aug 2018 23:39:14 +0530 Subject: [PATCH] [SPARK-25268][GraphX]runParallelPersonalizedPageRank throws serialization Exception mapValues in scala is currently not serializable. To avoid the serialization issue while running pageRank, we need to use map instead of mapValues. --- .../main/scala/org/apache/spark/graphx/lib/PageRank.scala | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala index 96b635f9a144e..1305c059b89ce 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/lib/PageRank.scala @@ -198,9 +198,11 @@ object PageRank extends Logging { val zero = Vectors.sparse(sources.size, List()).asBreeze // map of vid -> vector where for each vid, the _position of vid in source_ is set to 1.0 - val sourcesInitMap = sources.zipWithIndex.toMap.mapValues { i => - Vectors.sparse(sources.size, Array(i), Array(1.0)).asBreeze - } + val sourcesInitMap = sources.zipWithIndex.map { case (vid, i) => + val v = Vectors.sparse(sources.size, Array(i), Array(1.0)).asBreeze + (vid, v) + }.toMap + val sc = graph.vertices.sparkContext val sourcesInitMapBC = sc.broadcast(sourcesInitMap) // Initialize the PageRank graph with each edge attribute having