diff --git a/CHANGELOG.md b/CHANGELOG.md index 0600cbab6..c8721ca45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/). - Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting. If only order is specified, created date is used. String fields are not currently supported. - Added a new `/api/deleteindex` admin endpoint that will queue an action to delete an Elasticsearch index (usually prior to a reindex). +### Changed +- Consolidated field names sent by the EventSinkService to maxaimize reuse. +- Reworked auto-archival configuration options to make their meanings more clear. + ## 1.15.1 - 2021-03-12 ### Fixed diff --git a/app/Global.scala b/app/Global.scala index 24adc32b8..f28527631 100644 --- a/app/Global.scala +++ b/app/Global.scala @@ -64,21 +64,18 @@ object Global extends WithFilters(new GzipFilter(), new Jsonp(), CORSFilter()) w val archiveEnabled = Play.application.configuration.getBoolean("archiveEnabled", false) if (archiveEnabled && archivalTimer == null) { - val archiveDebug = Play.application.configuration.getBoolean("archiveDebug", false) - val interval = if (archiveDebug) { 5 minutes } else { 1 day } - - // Determine time until next midnight - val now = ZonedDateTime.now - val midnight = now.truncatedTo(ChronoUnit.DAYS) - val sinceLastMidnight = Duration.between(midnight, now).getSeconds - val delay = if (archiveDebug) { 10 seconds } else { - (Duration.ofDays(1).getSeconds - sinceLastMidnight) seconds - } - - Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval) - archivalTimer = Akka.system.scheduler.schedule(delay, interval) { - Logger.info("Starting auto archive process...") - files.autoArchiveCandidateFiles() + // Set archiveAutoInterval == 0 to disable auto archiving + val archiveAutoInterval = Play.application.configuration.getLong("archiveAutoInterval", 0) + if (archiveAutoInterval > 0) { + val interval = FiniteDuration(archiveAutoInterval, SECONDS) + val archiveAutoDelay = Play.application.configuration.getLong("archiveAutoDelay", 0) + val delay = FiniteDuration(archiveAutoDelay, SECONDS) + + Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval) + archivalTimer = Akka.system.scheduler.schedule(delay, interval) { + Logger.info("Starting auto archive process...") + files.autoArchiveCandidateFiles() + } } } diff --git a/app/services/mongodb/MongoDBFileService.scala b/app/services/mongodb/MongoDBFileService.scala index 9a9e87fe9..74b7cc539 100644 --- a/app/services/mongodb/MongoDBFileService.scala +++ b/app/services/mongodb/MongoDBFileService.scala @@ -4,23 +4,23 @@ import play.api.mvc.Request import services._ import models._ import com.mongodb.casbah.commons.{Imports, MongoDBObject} -import java.text.SimpleDateFormat +import java.text.SimpleDateFormat import _root_.util.{License, Parsers, SearchUtils} import scala.collection.mutable.ListBuffer import Transformation.LidoToCidocConvertion -import java.util.{ArrayList, Calendar} -import java.io._ +import java.util.{ArrayList, Calendar, Date} +import java.io._ import org.apache.commons.io.FileUtils import org.json.JSONObject import play.api.libs.json.{JsValue, Json} import com.mongodb.util.JSON + import java.nio.file.{FileSystems, Files} import java.nio.file.attribute.BasicFileAttributes -import java.time.LocalDateTime - +import java.time.Instant import collection.JavaConverters._ import scala.collection.JavaConversions._ import javax.inject.{Inject, Singleton} @@ -31,8 +31,6 @@ import scala.util.parsing.json.JSONArray import play.api.libs.json.JsArray import models.File import play.api.libs.json.JsObject -import java.util.Date - import com.novus.salat.dao.{ModelCompanion, SalatDAO} import MongoContext.context import play.api.Play._ @@ -40,6 +38,9 @@ import com.mongodb.casbah.Imports._ import models.FileStatus.FileStatus import org.bson.types.ObjectId +import java.time.temporal.ChronoUnit +import scala.concurrent.duration.FiniteDuration + /** * Use mongo for both metadata and blobs. @@ -201,48 +202,41 @@ class MongoDBFileService @Inject() ( * This may be expanded to support per-space configuration in the future. * * Reads the following parameters from Clowder configuration: - * - archiveAutoAfterDaysInactive - timeout after which files are considered + * - archiveAutoAfterInactiveCount - timeout after which files are considered * to be candidates for archival (see below) - * - archiveMinimumStorageSize - files below this size (in Bytes) should not be archived + * - archiveAutoAfterInactiveUnits - time unit that should be used for the timeout (see below) + * - archiveAutoAboveMinimumStorageSize - files below this size (in Bytes) should not be archived * - clowder.rabbitmq.clowderurl - the Clowder hostname to pass to the archival extractor * - commKey - the admin key to pass to the archival extractor * * Archival candidates are currently defined as follows: - * - file must be over `archiveMinimumStorageSize` Bytes in size - * - file must be over `archiveAutoAfterDaysInactive` days old + * - file's size must be greater than `archiveAutoAboveMinimumStorageSize` Bytes + * - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnits` + * (e.g. 10 days old) * - AND one of the following must be true: * - file has never been downloaded (0 downloads) * OR - * - file has not been downloaded in the past `archiveAutoAfterDaysInactive` days + * - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnits` * * */ def autoArchiveCandidateFiles() = { - val timeout = configuration(play.api.Play.current).getInt("archiveAutoAfterDaysInactive") + val timeout: Option[Long] = configuration(play.api.Play.current).getLong("archiveAutoAfterInactiveCount") timeout match { case None => Logger.info("No archival auto inactivity timeout set - skipping auto archival loop.") - case Some(days) => { - if (days == 0) { + case Some(inactiveTimeout) => { + if (inactiveTimeout == 0) { Logger.info("Archival auto inactivity timeout set to 0 - skipping auto archival loop.") } else { - // DEBUG ONLY: query for files that were uploaded within the past hour - val archiveDebug = configuration(play.api.Play.current).getBoolean("archiveDebug").getOrElse(false) - val oneHourAgo = LocalDateTime.now.minusHours(1).toString + "-00:00" - - // Query for files that haven't been accessed for at least this many days - val daysAgo = LocalDateTime.now.minusDays(days).toString + "-00:00" - val notDownloadedWithinTimeout = if (archiveDebug) { - ("stats.last_downloaded" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } else { - ("stats.last_downloaded" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } + val unit = configuration(play.api.Play.current).getString("archiveAutoAfterInactiveUnits").getOrElse("days") + val timeoutAgo = FiniteDuration(inactiveTimeout, unit) + + // Query for files that haven't been accessed for at least this many units + val since = Instant.now().minus(timeoutAgo.length.toLong, ChronoUnit.valueOf(timeoutAgo.unit.toString)).toString + "-00:00" + val notDownloadedWithinTimeout = ("stats.last_downloaded" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString) // Include files that have never been downloaded, but make sure they are old enough - val neverDownloaded = if (archiveDebug) { - ("stats.downloads" $eq 0) ++ ("uploadDate" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } else { - ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString) - } + val neverDownloaded = ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString) // TODO: How to get host / apiKey / admin internally without a request? val host = configuration(play.api.Play.current).getString("clowder.rabbitmq.clowderurl").getOrElse("http://localhost:9000") @@ -257,7 +251,7 @@ class MongoDBFileService @Inject() ( Logger.info("Archival candidates found: " + matchingFiles.length) // Exclude candidates that do not exceed our minimum file size threshold - val minSize = configuration(play.api.Play.current).getLong("archiveMinimumStorageSize").getOrElse(1000000L) + val minSize = configuration(play.api.Play.current).getLong("archiveAutoAboveMinimumStorageSize").getOrElse(1000000L) // Loop all candidate files and submit each one for archival for (file <- matchingFiles) { diff --git a/conf/application.conf b/conf/application.conf index fcee9659b..898da5fe9 100644 --- a/conf/application.conf +++ b/conf/application.conf @@ -587,18 +587,20 @@ enableUsernamePassword = true # "archive" and "unarchive" should be purely inverse operations, such # that unarchive(archive(x)) == x for any valid input. # -# Available archival extractors: -# - ncsa.archival.disk - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-disk/browse -# - ncsa.archival.s3 - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-s3/browse +# See https://github.com/clowder-framework/extractors-archival for available extractors # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ archiveEnabled=false -archiveDebug=false +archiveAllowUnarchive=false #archiveExtractorId="ncsa.archival.s3" archiveExtractorId="ncsa.archival.disk" -archiveAllowUnarchive=false -archiveAutoAfterDaysInactive=90 -archiveMinimumStorageSize=1000000 + +# NOTE: Setting interval to zero will disable automatic archiving +archiveAutoInterval=0 # in seconds (e.g. 86400 == 24 hours) +archiveAutoDelay=120 # in seconds (e.g. 86400 == 24 hours) +archiveAutoAfterInactiveCount=90 # NOTE: Setting count to zero will disable automatic archiving +archiveAutoAfterInactiveUnits="days" +archiveAutoAboveMinimumStorageSize=1000000 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Configuration file for securesocial