diff --git a/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala b/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala index 23a8e8980..1bbbb0ba4 100644 --- a/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala +++ b/src/main/scala/com/microsoft/hyperspace/index/IndexLogEntry.scala @@ -325,7 +325,17 @@ case class Hdfs(properties: Hdfs.Properties) { val kind = "HDFS" } object Hdfs { - case class Properties(content: Content) + + /** + * Hdfs file properties. + * @param content Content object representing Hdfs file based data source. + * @param appendedFiles Appended files since the last time derived dataset was updated. + * @param deletedFiles Deleted files since the last time derived dataset was updated. + */ + case class Properties( + content: Content, + appendedFiles: Seq[String] = Nil, + deletedFiles: Seq[String] = Nil) } // IndexLogEntry-specific Relation that represents the source relation. @@ -379,6 +389,14 @@ case class IndexLogEntry( .toSet } + def deletedFiles: Seq[String] = { + relations.head.data.properties.deletedFiles + } + + def appendedFiles: Seq[String] = { + relations.head.data.properties.appendedFiles + } + def bucketSpec: BucketSpec = BucketSpec( numBuckets = numBuckets, diff --git a/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala b/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala index 85541c3e4..6669bf0a6 100644 --- a/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala +++ b/src/test/scala/com/microsoft/hyperspace/index/IndexLogEntryTest.scala @@ -125,7 +125,9 @@ class IndexLogEntryTest extends SparkFunSuite with SQLHelper with BeforeAndAfter | "kind" : "NoOp", | "properties" : { } | } - | } + | }, + | "deletedFiles" : ["file:/rootpath/f1"], + | "appendedFiles" : ["file:/rootpath/f3"] | }, | "kind" : "HDFS" | }, @@ -163,7 +165,9 @@ class IndexLogEntryTest extends SparkFunSuite with SQLHelper with BeforeAndAfter Seq(Relation( Seq("rootpath"), Hdfs(Hdfs.Properties(Content( - Directory("", Seq(FileInfo("f1", 100L, 100L), FileInfo("f2", 200L, 200L)), Seq())))), + Directory("", Seq(FileInfo("f1", 100L, 100L), FileInfo("f2", 200L, 200L)), Seq())), + Seq("file:/rootpath/f3"), + Seq("file:/rootpath/f1"))), "schema", "type", Map())),