From 1a6a0ba0eb4e6af40fd517fdfc542608c79c925b Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 1 Jul 2019 23:47:54 +0200 Subject: [PATCH 1/2] fixed keyword replacement for single doc bubbles --- server/preprocessing/other-scripts/preprocess.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/server/preprocessing/other-scripts/preprocess.R b/server/preprocessing/other-scripts/preprocess.R index 36048bd21..22bea1e27 100644 --- a/server/preprocessing/other-scripts/preprocess.R +++ b/server/preprocessing/other-scripts/preprocess.R @@ -73,10 +73,11 @@ deduplicate_titles <- function(metadata, list_size) { replace_keywords_if_empty <- function(metadata, stops, service) { missing_subjects = which(lapply(metadata$subject, function(x) {nchar(x)}) <= 1) + if (service == "linkedcat" || service == "linkedcat_authorview") { metadata$subject[missing_subjects] <- metadata$bkl_caption[missing_subjects] } else { - candidates = mapply(paste, metadata$title[missing_subjects]) + candidates = mapply(paste, metadata$title) candidates = lapply(candidates, function(x)paste(removeWords(x, stops), collapse="")) candidates = lapply(candidates, function(x) {gsub("[^[:alpha:]]", " ", x)}) candidates = lapply(candidates, function(x) {gsub(" +", " ", x)}) @@ -93,7 +94,7 @@ replace_keywords_if_empty <- function(metadata, stops, service) { replacement_keywords = lapply(replacement_keywords, FUN = function(x) {paste(unlist(x), collapse=";")}) replacement_keywords = gsub("_", " ", replacement_keywords) - metadata$subject[missing_subjects] <- replacement_keywords + metadata$subject[missing_subjects] <- replacement_keywords[missing_subjects] } return(metadata) } From 832639d1bc7b259c89ec60e2ea2a1a0ca4b6ff4a Mon Sep 17 00:00:00 2001 From: Christopher Kittel Date: Mon, 1 Jul 2019 23:58:08 +0200 Subject: [PATCH 2/2] remove empty line --- server/preprocessing/other-scripts/preprocess.R | 1 - 1 file changed, 1 deletion(-) diff --git a/server/preprocessing/other-scripts/preprocess.R b/server/preprocessing/other-scripts/preprocess.R index 22bea1e27..c9c972272 100644 --- a/server/preprocessing/other-scripts/preprocess.R +++ b/server/preprocessing/other-scripts/preprocess.R @@ -73,7 +73,6 @@ deduplicate_titles <- function(metadata, list_size) { replace_keywords_if_empty <- function(metadata, stops, service) { missing_subjects = which(lapply(metadata$subject, function(x) {nchar(x)}) <= 1) - if (service == "linkedcat" || service == "linkedcat_authorview") { metadata$subject[missing_subjects] <- metadata$bkl_caption[missing_subjects] } else {