From d3ff47c30a365a1efd7df94612bbdc4194225523 Mon Sep 17 00:00:00 2001 From: Mike Lambert Date: Tue, 16 Mar 2021 23:11:20 -0500 Subject: [PATCH 1/3] Simplify available configuration options, remove archiveDebug --- app/Global.scala | 27 ++++----- app/services/mongodb/MongoDBFileService.scala | 58 +++++++++---------- conf/application.conf | 16 ++--- 3 files changed, 47 insertions(+), 54 deletions(-) diff --git a/app/Global.scala b/app/Global.scala index 24adc32b8..f28527631 100644 --- a/app/Global.scala +++ b/app/Global.scala @@ -64,21 +64,18 @@ object Global extends WithFilters(new GzipFilter(), new Jsonp(), CORSFilter()) w val archiveEnabled = Play.application.configuration.getBoolean("archiveEnabled", false) if (archiveEnabled && archivalTimer == null) { - val archiveDebug = Play.application.configuration.getBoolean("archiveDebug", false) - val interval = if (archiveDebug) { 5 minutes } else { 1 day } - - // Determine time until next midnight - val now = ZonedDateTime.now - val midnight = now.truncatedTo(ChronoUnit.DAYS) - val sinceLastMidnight = Duration.between(midnight, now).getSeconds - val delay = if (archiveDebug) { 10 seconds } else { - (Duration.ofDays(1).getSeconds - sinceLastMidnight) seconds - } - - Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval) - archivalTimer = Akka.system.scheduler.schedule(delay, interval) { - Logger.info("Starting auto archive process...") - files.autoArchiveCandidateFiles() + // Set archiveAutoInterval == 0 to disable auto archiving + val archiveAutoInterval = Play.application.configuration.getLong("archiveAutoInterval", 0) + if (archiveAutoInterval > 0) { + val interval = FiniteDuration(archiveAutoInterval, SECONDS) + val archiveAutoDelay = Play.application.configuration.getLong("archiveAutoDelay", 0) + val delay = FiniteDuration(archiveAutoDelay, SECONDS) + + Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval) + archivalTimer = Akka.system.scheduler.schedule(delay, interval) { + Logger.info("Starting auto archive process...") + files.autoArchiveCandidateFiles() + } } } diff --git a/app/services/mongodb/MongoDBFileService.scala b/app/services/mongodb/MongoDBFileService.scala index 9a9e87fe9..00195b44f 100644 --- a/app/services/mongodb/MongoDBFileService.scala +++ b/app/services/mongodb/MongoDBFileService.scala @@ -4,23 +4,23 @@ import play.api.mvc.Request import services._ import models._ import com.mongodb.casbah.commons.{Imports, MongoDBObject} -import java.text.SimpleDateFormat +import java.text.SimpleDateFormat import _root_.util.{License, Parsers, SearchUtils} import scala.collection.mutable.ListBuffer import Transformation.LidoToCidocConvertion -import java.util.{ArrayList, Calendar} -import java.io._ +import java.util.{ArrayList, Calendar, Date} +import java.io._ import org.apache.commons.io.FileUtils import org.json.JSONObject import play.api.libs.json.{JsValue, Json} import com.mongodb.util.JSON + import java.nio.file.{FileSystems, Files} import java.nio.file.attribute.BasicFileAttributes -import java.time.LocalDateTime - +import java.time.Instant import collection.JavaConverters._ import scala.collection.JavaConversions._ import javax.inject.{Inject, Singleton} @@ -31,8 +31,6 @@ import scala.util.parsing.json.JSONArray import play.api.libs.json.JsArray import models.File import play.api.libs.json.JsObject -import java.util.Date - import com.novus.salat.dao.{ModelCompanion, SalatDAO} import MongoContext.context import play.api.Play._ @@ -40,6 +38,9 @@ import com.mongodb.casbah.Imports._ import models.FileStatus.FileStatus import org.bson.types.ObjectId +import java.time.temporal.ChronoUnit +import scala.concurrent.duration.FiniteDuration + /** * Use mongo for both metadata and blobs. @@ -201,48 +202,41 @@ class MongoDBFileService @Inject() ( * This may be expanded to support per-space configuration in the future. * * Reads the following parameters from Clowder configuration: - * - archiveAutoAfterDaysInactive - timeout after which files are considered + * - archiveAutoAfterInactiveCount - timeout after which files are considered * to be candidates for archival (see below) - * - archiveMinimumStorageSize - files below this size (in Bytes) should not be archived + * - archiveAutoAfterInactiveUnit - time unit that should be used for the timeout (see below) + * - archiveAutoAboveMinimumStorageSize - files below this size (in Bytes) should not be archived * - clowder.rabbitmq.clowderurl - the Clowder hostname to pass to the archival extractor * - commKey - the admin key to pass to the archival extractor * * Archival candidates are currently defined as follows: - * - file must be over `archiveMinimumStorageSize` Bytes in size - * - file must be over `archiveAutoAfterDaysInactive` days old + * - file's size must be greater than `archiveAutoAboveMinimumStorageSize` Bytes + * - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnit` + * (e.g. 10 days old) * - AND one of the following must be true: * - file has never been downloaded (0 downloads) * OR - * - file has not been downloaded in the past `archiveAutoAfterDaysInactive` days + * - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnit` * * */ def autoArchiveCandidateFiles() = { - val timeout = configuration(play.api.Play.current).getInt("archiveAutoAfterDaysInactive") + val timeout: Option[Long] = configuration(play.api.Play.current).getLong("archiveAutoAfterInactiveCount") timeout match { case None => Logger.info("No archival auto inactivity timeout set - skipping auto archival loop.") - case Some(days) => { - if (days == 0) { + case Some(inactiveTimeout) => { + if (inactiveTimeout == 0) { Logger.info("Archival auto inactivity timeout set to 0 - skipping auto archival loop.") } else { - // DEBUG ONLY: query for files that were uploaded within the past hour - val archiveDebug = configuration(play.api.Play.current).getBoolean("archiveDebug").getOrElse(false) - val oneHourAgo = LocalDateTime.now.minusHours(1).toString + "-00:00" - - // Query for files that haven't been accessed for at least this many days - val daysAgo = LocalDateTime.now.minusDays(days).toString + "-00:00" - val notDownloadedWithinTimeout = if (archiveDebug) { - ("stats.last_downloaded" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } else { - ("stats.last_downloaded" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } + val unit = configuration(play.api.Play.current).getString("archiveAutoAfterInactiveUnits").getOrElse("days") + val timeoutAgo = FiniteDuration(inactiveTimeout, unit) + + // Query for files that haven't been accessed for at least this many units + val since = Instant.now().minus(timeoutAgo.length.toLong, ChronoUnit.valueOf(timeoutAgo.unit.toString)).toString + "-00:00" + val notDownloadedWithinTimeout = ("stats.last_downloaded" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString) // Include files that have never been downloaded, but make sure they are old enough - val neverDownloaded = if (archiveDebug) { - ("stats.downloads" $eq 0) ++ ("uploadDate" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } else { - ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } + val neverDownloaded = ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString) // TODO: How to get host / apiKey / admin internally without a request? val host = configuration(play.api.Play.current).getString("clowder.rabbitmq.clowderurl").getOrElse("http://localhost:9000") @@ -257,7 +251,7 @@ class MongoDBFileService @Inject() ( Logger.info("Archival candidates found: " + matchingFiles.length) // Exclude candidates that do not exceed our minimum file size threshold - val minSize = configuration(play.api.Play.current).getLong("archiveMinimumStorageSize").getOrElse(1000000L) + val minSize = configuration(play.api.Play.current).getLong("archiveAutoAboveMinimumStorageSize").getOrElse(1000000L) // Loop all candidate files and submit each one for archival for (file <- matchingFiles) { diff --git a/conf/application.conf b/conf/application.conf index fcee9659b..898da5fe9 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -587,18 +587,20 @@ enableUsernamePassword = true # "archive" and "unarchive" should be purely inverse operations, such # that unarchive(archive(x)) == x for any valid input. # -# Available archival extractors: -# - ncsa.archival.disk - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-disk/browse -# - ncsa.archival.s3 - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-s3/browse +# See https://github.com/clowder-framework/extractors-archival for available extractors # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ archiveEnabled=false -archiveDebug=false +archiveAllowUnarchive=false #archiveExtractorId="ncsa.archival.s3" archiveExtractorId="ncsa.archival.disk" -archiveAllowUnarchive=false -archiveAutoAfterDaysInactive=90 -archiveMinimumStorageSize=1000000 + +# NOTE: Setting interval to zero will disable automatic archiving +archiveAutoInterval=0 # in seconds (e.g. 86400 == 24 hours) +archiveAutoDelay=120 # in seconds (e.g. 86400 == 24 hours) +archiveAutoAfterInactiveCount=90 # NOTE: Setting count to zero will disable automatic archiving +archiveAutoAfterInactiveUnits="days" +archiveAutoAboveMinimumStorageSize=1000000 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Configuration file for securesocial From eda9804f3f8bcfd33ccacb5b8e2af29938d12c3c Mon Sep 17 00:00:00 2001 From: Mike Lambert Date: Tue, 16 Mar 2021 23:19:32 -0500 Subject: [PATCH 2/3] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 195c5e5c0..879329477 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/). ### Changed - Consolidated field names sent by the EventSinkService to maxaimize reuse. +- Reworked auto-archival configuration options to make their meanings more clear. ## 1.15.0 - 2021-03-03 From 35dca63fa87c2f283a6a3d8ccd036f2a07ea646d Mon Sep 17 00:00:00 2001 From: Mike Lambert Date: Tue, 16 Mar 2021 23:47:05 -0500 Subject: [PATCH 3/3] Fix typoe Unit -> Units --- app/services/mongodb/MongoDBFileService.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/services/mongodb/MongoDBFileService.scala b/app/services/mongodb/MongoDBFileService.scala index 00195b44f..74b7cc539 100644 --- a/app/services/mongodb/MongoDBFileService.scala +++ b/app/services/mongodb/MongoDBFileService.scala @@ -204,19 +204,19 @@ class MongoDBFileService @Inject() ( * Reads the following parameters from Clowder configuration: * - archiveAutoAfterInactiveCount - timeout after which files are considered * to be candidates for archival (see below) - * - archiveAutoAfterInactiveUnit - time unit that should be used for the timeout (see below) + * - archiveAutoAfterInactiveUnits - time unit that should be used for the timeout (see below) * - archiveAutoAboveMinimumStorageSize - files below this size (in Bytes) should not be archived * - clowder.rabbitmq.clowderurl - the Clowder hostname to pass to the archival extractor * - commKey - the admin key to pass to the archival extractor * * Archival candidates are currently defined as follows: * - file's size must be greater than `archiveAutoAboveMinimumStorageSize` Bytes - * - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnit` + * - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnits` * (e.g. 10 days old) * - AND one of the following must be true: * - file has never been downloaded (0 downloads) * OR - * - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnit` + * - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnits` * * */