Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Added a `sort` and `order` parameter to `/api/search` endpoint that supports date and numeric field sorting. If only order is specified, created date is used. String fields are not currently supported.
- Added a new `/api/deleteindex` admin endpoint that will queue an action to delete an Elasticsearch index (usually prior to a reindex).

### Changed
- Consolidated field names sent by the EventSinkService to maxaimize reuse.
- Reworked auto-archival configuration options to make their meanings more clear.

## 1.15.1 - 2021-03-12

### Fixed
Expand Down
27 changes: 12 additions & 15 deletions app/Global.scala
Original file line number Diff line number Diff line change
Expand Up @@ -64,21 +64,18 @@ object Global extends WithFilters(new GzipFilter(), new Jsonp(), CORSFilter()) w

val archiveEnabled = Play.application.configuration.getBoolean("archiveEnabled", false)
if (archiveEnabled && archivalTimer == null) {
val archiveDebug = Play.application.configuration.getBoolean("archiveDebug", false)
val interval = if (archiveDebug) { 5 minutes } else { 1 day }

// Determine time until next midnight
val now = ZonedDateTime.now
val midnight = now.truncatedTo(ChronoUnit.DAYS)
val sinceLastMidnight = Duration.between(midnight, now).getSeconds
val delay = if (archiveDebug) { 10 seconds } else {
(Duration.ofDays(1).getSeconds - sinceLastMidnight) seconds
}

Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
Logger.info("Starting auto archive process...")
files.autoArchiveCandidateFiles()
// Set archiveAutoInterval == 0 to disable auto archiving
val archiveAutoInterval = Play.application.configuration.getLong("archiveAutoInterval", 0)
if (archiveAutoInterval > 0) {
val interval = FiniteDuration(archiveAutoInterval, SECONDS)
val archiveAutoDelay = Play.application.configuration.getLong("archiveAutoDelay", 0)
val delay = FiniteDuration(archiveAutoDelay, SECONDS)

Logger.info("Starting archival loop - first iteration in " + delay + ", next iteration after " + interval)
archivalTimer = Akka.system.scheduler.schedule(delay, interval) {
Logger.info("Starting auto archive process...")
files.autoArchiveCandidateFiles()
}
}
}

Expand Down
58 changes: 26 additions & 32 deletions app/services/mongodb/MongoDBFileService.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@ import play.api.mvc.Request
import services._
import models._
import com.mongodb.casbah.commons.{Imports, MongoDBObject}
import java.text.SimpleDateFormat

import java.text.SimpleDateFormat
import _root_.util.{License, Parsers, SearchUtils}

import scala.collection.mutable.ListBuffer
import Transformation.LidoToCidocConvertion
import java.util.{ArrayList, Calendar}
import java.io._

import java.util.{ArrayList, Calendar, Date}
import java.io._
import org.apache.commons.io.FileUtils
import org.json.JSONObject
import play.api.libs.json.{JsValue, Json}
import com.mongodb.util.JSON

import java.nio.file.{FileSystems, Files}
import java.nio.file.attribute.BasicFileAttributes
import java.time.LocalDateTime

import java.time.Instant
import collection.JavaConverters._
import scala.collection.JavaConversions._
import javax.inject.{Inject, Singleton}
Expand All @@ -31,15 +31,16 @@ import scala.util.parsing.json.JSONArray
import play.api.libs.json.JsArray
import models.File
import play.api.libs.json.JsObject
import java.util.Date

import com.novus.salat.dao.{ModelCompanion, SalatDAO}
import MongoContext.context
import play.api.Play._
import com.mongodb.casbah.Imports._
import models.FileStatus.FileStatus
import org.bson.types.ObjectId

import java.time.temporal.ChronoUnit
import scala.concurrent.duration.FiniteDuration


/**
* Use mongo for both metadata and blobs.
Expand Down Expand Up @@ -201,48 +202,41 @@ class MongoDBFileService @Inject() (
* This may be expanded to support per-space configuration in the future.
*
* Reads the following parameters from Clowder configuration:
* - archiveAutoAfterDaysInactive - timeout after which files are considered
* - archiveAutoAfterInactiveCount - timeout after which files are considered
* to be candidates for archival (see below)
* - archiveMinimumStorageSize - files below this size (in Bytes) should not be archived
* - archiveAutoAfterInactiveUnits - time unit that should be used for the timeout (see below)
* - archiveAutoAboveMinimumStorageSize - files below this size (in Bytes) should not be archived
* - clowder.rabbitmq.clowderurl - the Clowder hostname to pass to the archival extractor
* - commKey - the admin key to pass to the archival extractor
*
* Archival candidates are currently defined as follows:
* - file must be over `archiveMinimumStorageSize` Bytes in size
* - file must be over `archiveAutoAfterDaysInactive` days old
* - file's size must be greater than `archiveAutoAboveMinimumStorageSize` Bytes
* - file's age must be greater than `archiveAutoAfterInactiveCount` * `archiveAutoAfterInactiveUnits`
* (e.g. 10 days old)
* - AND one of the following must be true:
* - file has never been downloaded (0 downloads)
* OR
* - file has not been downloaded in the past `archiveAutoAfterDaysInactive` days
* - file has not been downloaded in the past `archiveAutoAfterInactiveCount` `archiveAutoAfterInactiveUnits`
*
*
*/
def autoArchiveCandidateFiles() = {
val timeout = configuration(play.api.Play.current).getInt("archiveAutoAfterDaysInactive")
val timeout: Option[Long] = configuration(play.api.Play.current).getLong("archiveAutoAfterInactiveCount")
timeout match {
case None => Logger.info("No archival auto inactivity timeout set - skipping auto archival loop.")
case Some(days) => {
if (days == 0) {
case Some(inactiveTimeout) => {
if (inactiveTimeout == 0) {
Logger.info("Archival auto inactivity timeout set to 0 - skipping auto archival loop.")
} else {
// DEBUG ONLY: query for files that were uploaded within the past hour
val archiveDebug = configuration(play.api.Play.current).getBoolean("archiveDebug").getOrElse(false)
val oneHourAgo = LocalDateTime.now.minusHours(1).toString + "-00:00"

// Query for files that haven't been accessed for at least this many days
val daysAgo = LocalDateTime.now.minusDays(days).toString + "-00:00"
val notDownloadedWithinTimeout = if (archiveDebug) {
("stats.last_downloaded" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
} else {
("stats.last_downloaded" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
}
val unit = configuration(play.api.Play.current).getString("archiveAutoAfterInactiveUnits").getOrElse("days")
val timeoutAgo = FiniteDuration(inactiveTimeout, unit)

// Query for files that haven't been accessed for at least this many units
val since = Instant.now().minus(timeoutAgo.length.toLong, ChronoUnit.valueOf(timeoutAgo.unit.toString)).toString + "-00:00"
val notDownloadedWithinTimeout = ("stats.last_downloaded" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString)

// Include files that have never been downloaded, but make sure they are old enough
val neverDownloaded = if (archiveDebug) {
("stats.downloads" $eq 0) ++ ("uploadDate" $gte Parsers.fromISO8601(oneHourAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
} else {
("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(daysAgo)) ++ ("status" $eq FileStatus.PROCESSED.toString)
}
val neverDownloaded = ("stats.downloads" $eq 0) ++ ("uploadDate" $lt Parsers.fromISO8601(since)) ++ ("status" $eq FileStatus.PROCESSED.toString)

// TODO: How to get host / apiKey / admin internally without a request?
val host = configuration(play.api.Play.current).getString("clowder.rabbitmq.clowderurl").getOrElse("http://localhost:9000")
Expand All @@ -257,7 +251,7 @@ class MongoDBFileService @Inject() (
Logger.info("Archival candidates found: " + matchingFiles.length)

// Exclude candidates that do not exceed our minimum file size threshold
val minSize = configuration(play.api.Play.current).getLong("archiveMinimumStorageSize").getOrElse(1000000L)
val minSize = configuration(play.api.Play.current).getLong("archiveAutoAboveMinimumStorageSize").getOrElse(1000000L)

// Loop all candidate files and submit each one for archival
for (file <- matchingFiles) {
Expand Down
16 changes: 9 additions & 7 deletions conf/application.conf
Original file line number Diff line number Diff line change
Expand Up @@ -587,18 +587,20 @@ enableUsernamePassword = true
# "archive" and "unarchive" should be purely inverse operations, such
# that unarchive(archive(x)) == x for any valid input.
#
# Available archival extractors:
# - ncsa.archival.disk - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-disk/browse
# - ncsa.archival.s3 - https://opensource.ncsa.illinois.edu/bitbucket/projects/CATS/repos/extractors-archival-s3/browse
# See https://github.com/clowder-framework/extractors-archival for available extractors
#
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
archiveEnabled=false
archiveDebug=false
archiveAllowUnarchive=false
#archiveExtractorId="ncsa.archival.s3"
archiveExtractorId="ncsa.archival.disk"
archiveAllowUnarchive=false
archiveAutoAfterDaysInactive=90
archiveMinimumStorageSize=1000000

# NOTE: Setting interval to zero will disable automatic archiving
archiveAutoInterval=0 # in seconds (e.g. 86400 == 24 hours)
archiveAutoDelay=120 # in seconds (e.g. 86400 == 24 hours)
archiveAutoAfterInactiveCount=90 # NOTE: Setting count to zero will disable automatic archiving
archiveAutoAfterInactiveUnits="days"
archiveAutoAboveMinimumStorageSize=1000000

# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Configuration file for securesocial
Expand Down