From 3aca1879aefde1301b27453b536cdcd0b5afb1d2 Mon Sep 17 00:00:00 2001 From: Ohad Dahan Date: Sat, 15 Aug 2020 17:27:06 +0300 Subject: [PATCH 1/5] Run git gc periodically on the crates.io index --- src/config.rs | 3 +++ src/docbuilder/queue.rs | 4 ++++ src/index/mod.rs | 20 +++++++++++++++++++- src/utils/daemon.rs | 8 +++++++- 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/src/config.rs b/src/config.rs index 936018b6a..d84f12872 100644 --- a/src/config.rs +++ b/src/config.rs @@ -30,6 +30,8 @@ pub struct Config { pub(crate) max_file_size_html: usize, // The most memory that can be used to parse an HTML file pub(crate) max_parse_memory: usize, + // Time between 'git gc --auto' calls in seconds + pub(crate) registry_gc_interval: u64, } impl Config { @@ -61,6 +63,7 @@ impl Config { // LOL HTML only uses as much memory as the size of the start tag! // https://github.com/rust-lang/docs.rs/pull/930#issuecomment-667729380 max_parse_memory: env("DOCSRS_MAX_PARSE_MEMORY", 5 * 1024 * 1024)?, + registry_gc_interval: env("DOCSRS_REGISTRY_GC_INTERVAL", 60 * 60)?, }) } diff --git a/src/docbuilder/queue.rs b/src/docbuilder/queue.rs index ae3bfc76e..d357fd2d6 100644 --- a/src/docbuilder/queue.rs +++ b/src/docbuilder/queue.rs @@ -84,4 +84,8 @@ impl DocBuilder { Ok(processed) } + + pub fn run_git_gc(&self) { + self.index.run_git_gc(); + } } diff --git a/src/index/mod.rs b/src/index/mod.rs index 68e806bcd..1b524c7e5 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -1,4 +1,7 @@ -use std::path::{Path, PathBuf}; +use std::{ + path::{Path, PathBuf}, + process::Command, +}; use url::Url; @@ -71,6 +74,21 @@ impl Index { pub fn api(&self) -> &Api { &self.api } + + pub fn run_git_gc(&self) { + let gc = Command::new("git") + .arg("-C") + .arg(&self.path) + .args(&["gc", "--auto"]) + .output(); + if let Err(err) = gc { + log::error!( + "Failed to run `git gc --auto`\nPath: {:#?}\nError:{:#?}", + &self.path, + err + ); + } + } } impl Clone for Index { diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 683b9a64b..af8436c94 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -10,16 +10,18 @@ use chrono::{Timelike, Utc}; use failure::Error; use log::{debug, error, info}; use std::thread; -use std::time::Duration; +use std::time::{Duration, Instant}; fn start_registry_watcher(opts: DocBuilderOptions, context: &dyn Context) -> Result<(), Error> { let pool = context.pool()?; let build_queue = context.build_queue()?; + let config = context.config()?; thread::Builder::new() .name("registry index reader".to_string()) .spawn(move || { // space this out to prevent it from clashing against the queue-builder thread on launch thread::sleep(Duration::from_secs(30)); + let mut last_gc = Instant::now(); loop { let mut doc_builder = DocBuilder::new(opts.clone(), pool.clone(), build_queue.clone()); @@ -34,6 +36,10 @@ fn start_registry_watcher(opts: DocBuilderOptions, context: &dyn Context) -> Res } } + if last_gc.elapsed().as_secs() >= config.registry_gc_interval { + doc_builder.run_git_gc(); + last_gc = Instant::now(); + } thread::sleep(Duration::from_secs(60)); } })?; From 3db355adc679ad18240364f76359655f5cf5501f Mon Sep 17 00:00:00 2001 From: ohaddahan Date: Sat, 15 Aug 2020 19:11:30 +0300 Subject: [PATCH 2/5] Update src/index/mod.rs Co-authored-by: Chase Wilson --- src/index/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/index/mod.rs b/src/index/mod.rs index 1b524c7e5..b0b1d0d16 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -81,6 +81,7 @@ impl Index { .arg(&self.path) .args(&["gc", "--auto"]) .output(); + if let Err(err) = gc { log::error!( "Failed to run `git gc --auto`\nPath: {:#?}\nError:{:#?}", From 5e9cf66ee0671b80e808d80548c4188d6c2cd942 Mon Sep 17 00:00:00 2001 From: ohaddahan Date: Sat, 15 Aug 2020 19:11:37 +0300 Subject: [PATCH 3/5] Update src/utils/daemon.rs Co-authored-by: Chase Wilson --- src/utils/daemon.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index af8436c94..9bed302da 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -16,6 +16,7 @@ fn start_registry_watcher(opts: DocBuilderOptions, context: &dyn Context) -> Res let pool = context.pool()?; let build_queue = context.build_queue()?; let config = context.config()?; + thread::Builder::new() .name("registry index reader".to_string()) .spawn(move || { From bbb5cbe6c1a72b7b350cf9b8b5ac42eaf6bbb40c Mon Sep 17 00:00:00 2001 From: ohaddahan Date: Sat, 15 Aug 2020 19:12:53 +0300 Subject: [PATCH 4/5] Update src/index/mod.rs Co-authored-by: Joshua Nelson --- src/index/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/index/mod.rs b/src/index/mod.rs index b0b1d0d16..c6ca75869 100644 --- a/src/index/mod.rs +++ b/src/index/mod.rs @@ -84,7 +84,7 @@ impl Index { if let Err(err) = gc { log::error!( - "Failed to run `git gc --auto`\nPath: {:#?}\nError:{:#?}", + "failed to run `git gc --auto`\npath: {:#?}\nerror: {:#?}", &self.path, err ); From d4603f5904728b6e7b334a73e25037c029599e98 Mon Sep 17 00:00:00 2001 From: ohaddahan Date: Sat, 15 Aug 2020 19:17:06 +0300 Subject: [PATCH 5/5] Update src/utils/daemon.rs Co-authored-by: Chase Wilson --- src/utils/daemon.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 9bed302da..61a9fa062 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -22,6 +22,7 @@ fn start_registry_watcher(opts: DocBuilderOptions, context: &dyn Context) -> Res .spawn(move || { // space this out to prevent it from clashing against the queue-builder thread on launch thread::sleep(Duration::from_secs(30)); + let mut last_gc = Instant::now(); loop { let mut doc_builder =