From 956046d1a451585af8b658be4c1a2df6bf7c958d Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 7 Oct 2020 18:46:30 -0700 Subject: [PATCH 1/2] add "appended" and "deleted" files in index metadata --- .../com/microsoft/hyperspace/index/IndexLogEntry.scala | 10 +++++++++- .../microsoft/hyperspace/index/IndexLogEntryTest.scala | 8 ++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala b/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala index 23a8e8980..7d74c56a4 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala @@ -325,7 +325,7 @@ case class Hdfs(properties: Hdfs.Properties) { val kind = "HDFS" } object Hdfs { - case class Properties(content: Content) + case class Properties(content: Content, deleted: Seq[String] = Nil, appended: Seq[String] = Nil) } // IndexLogEntry-specific Relation that represents the source relation. @@ -379,6 +379,14 @@ case class IndexLogEntry( .toSet } + def deletedFiles: Seq[String] = { + relations.head.data.properties.deleted + } + + def appendedFiles: Seq[String] = { + relations.head.data.properties.appended + } + def bucketSpec: BucketSpec = BucketSpec( numBuckets = numBuckets, diff --git a/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala b/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala index 85541c3e4..1deed4c20 100644 --- a/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala +++ b/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala @@ -125,7 +125,9 @@ class IndexLogEntryTest extends SparkFunSuite with SQLHelper with BeforeAndAfter | "kind" : "NoOp", | "properties" : { } | } - | } + | }, + | "deleted" : ["file:/rootpath/f1"], + | "appended" : ["file:/rootpath/f3"] | }, | "kind" : "HDFS" | }, @@ -163,7 +165,9 @@ class IndexLogEntryTest extends SparkFunSuite with SQLHelper with BeforeAndAfter Seq(Relation( Seq("rootpath"), Hdfs(Hdfs.Properties(Content( - Directory("", Seq(FileInfo("f1", 100L, 100L), FileInfo("f2", 200L, 200L)), Seq())))), + Directory("", Seq(FileInfo("f1", 100L, 100L), FileInfo("f2", 200L, 200L)), Seq())), + Seq("file:/rootpath/f1"), + Seq("file:/rootpath/f3"))), "schema", "type", Map())), From f89c94f07a2d7cbf0cdf354d24ba0c6fcc03f870 Mon Sep 17 00:00:00 2001 From: Apoorve Dave Date: Wed, 7 Oct 2020 20:17:02 -0700 Subject: [PATCH 2/2] review comments --- .../hyperspace/index/IndexLogEntry.scala | 16 +++++++++++++--- .../hyperspace/index/IndexLogEntryTest.scala | 8 ++++---- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala b/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala index 7d74c56a4..1bbbb0ba4 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala @@ -325,7 +325,17 @@ case class Hdfs(properties: Hdfs.Properties) { val kind = "HDFS" } object Hdfs { - case class Properties(content: Content, deleted: Seq[String] = Nil, appended: Seq[String] = Nil) + + /** + * Hdfs file properties. + * @param content Content object representing Hdfs file based data source. + * @param appendedFiles Appended files since the last time derived dataset was updated. + * @param deletedFiles Deleted files since the last time derived dataset was updated. + */ + case class Properties( + content: Content, + appendedFiles: Seq[String] = Nil, + deletedFiles: Seq[String] = Nil) } // IndexLogEntry-specific Relation that represents the source relation. @@ -380,11 +390,11 @@ case class IndexLogEntry( } def deletedFiles: Seq[String] = { - relations.head.data.properties.deleted + relations.head.data.properties.deletedFiles } def appendedFiles: Seq[String] = { - relations.head.data.properties.appended + relations.head.data.properties.appendedFiles } def bucketSpec: BucketSpec = diff --git a/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala b/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala index 1deed4c20..6669bf0a6 100644 --- a/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala +++ b/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala @@ -126,8 +126,8 @@ class IndexLogEntryTest extends SparkFunSuite with SQLHelper with BeforeAndAfter | "properties" : { } | } | }, - | "deleted" : ["file:/rootpath/f1"], - | "appended" : ["file:/rootpath/f3"] + | "deletedFiles" : ["file:/rootpath/f1"], + | "appendedFiles" : ["file:/rootpath/f3"] | }, | "kind" : "HDFS" | }, @@ -166,8 +166,8 @@ class IndexLogEntryTest extends SparkFunSuite with SQLHelper with BeforeAndAfter Seq("rootpath"), Hdfs(Hdfs.Properties(Content( Directory("", Seq(FileInfo("f1", 100L, 100L), FileInfo("f2", 200L, 200L)), Seq())), - Seq("file:/rootpath/f1"), - Seq("file:/rootpath/f3"))), + Seq("file:/rootpath/f3"), + Seq("file:/rootpath/f1"))), "schema", "type", Map())),