From c865ee7caad15c035b4d52ddb40bb61ffd44f6c9 Mon Sep 17 00:00:00 2001 From: Janek Date: Wed, 25 Jan 2017 11:14:16 +0100 Subject: [PATCH 01/20] refactor noncyc fitting --- R/mboostLSS.R | 452 +++++++++++++++++++++----------------------------- 1 file changed, 191 insertions(+), 261 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 51ca303..f311d31 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -74,241 +74,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, fun = mboost, funchar = "mboost", call = NULL, method, ...){ - - if(method == "cycling") - iBoost_cycling <- function(niter) { - - start <- sapply(fit, mstop) - mvals <- vector("list", length(niter)) - for (j in 1:length(niter)){ - mvals[[j]] <- rep(start[j] + niter[j], max(niter)) - if (niter[j] > 0) - mvals[[j]][1:niter[j]] <- (start[j] + 1):(start[j] + niter[j]) - } - - ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) - - for (i in 1:max(niter)){ - for (j in mods){ - ## update value of nuisance parameters - ## use response(fitted()) as this is much quicker than fitted(, type = response) - for (k in mods[-j]) - assign(names(fit)[k], families[[k]]@response(fitted(fit[[k]])), - environment(get("ngradient", environment(fit[[j]]$subset)))) - ## update value of u, i.e. compute ngradient with new nuisance parameters - - ENV[[j]][["u"]] <- ENV[[j]][["ngradient"]](ENV[[j]][["y"]], ENV[[j]][["fit"]], - ENV[[j]][["weights"]]) - # same as: - # evalq(u <- ngradient(y, fit, weights), environment(fit[[j]]$subset)) - - ## update j-th component to "m-th" boosting step - fit[[j]][mvals[[j]][i]] - } - if (trace){ - firstRun <- firstRun - ## which is the current risk? rev() needed to get the last - ## list element with maximum length - whichRisk <- names(which.max(rev(lapply(lapply(fit, function(x) x$risk()), length)))) - do_trace(current = max(sapply(mvals, function(x) x[i])), - mstart = ifelse(firstRun, 0, max(start)), - mstop = ifelse(firstRun, max(niter) + 1, max(niter)), - risk = fit[[whichRisk]]$risk()) - } - } - return(TRUE) - } - if(method == "outer") - iBoost_outer <- function(niter){ - - #this is the case for boosting from the beginning - if(is.null(attr(fit, "combined_risk")) | niter == 0){ - combined_risk <- vapply(fit, risk, numeric(1)) - } - - best <- which(names(fit) == tail(names(combined_risk), 1)) - - - - ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) - - for (i in seq_len(niter)){ - - ## update value of nuisance parameters - ## use response(fitted()) as this is much quicker than fitted(, type = response) - for( k in mods[-best]){ - assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), - environment(get("ngradient", environment(fit[[k]]$subset)))) - - #evalq(u <- ngradient(y, fit, weights), ENV[[k]]) - ENV[[k]][["u"]] <- ENV[[k]][["ngradient"]](ENV[[k]][["y"]], - ENV[[k]][["fit"]], - ENV[[k]][["weights"]]) - } - - if (funchar == "glmboost") { - lik_risks <- list() - coefs <- list() - - for(i in mods){ - coefs[[i]] <- evalq(environment(get("fit1"))[["est"]](u)/ - environment(get("fit1"))[["sxtx"]], envir = ENV[[i]]) - - all_fitted <- sweep(environment(ENV[[i]][["fit1"]])[["X"]], 2, - coefs[[i]] ,`*`) - - lik_risks[[i]] <- sapply(1:ncol(all_fitted), function(j) - ENV[[i]][["triskfct"]](ENV[[i]][["y"]], - ENV[[i]][["fit"]] + nu[i] * all_fitted[,j])) - } - - #do a mboost step per hand - best <- which.min(vapply(lik_risks, min, - FUN.VALUE = numeric(1), ...)) - - ENV[[best]]$all_fitted <- all_fitted - ENV[[best]]$lik_risks <- lik_risks - ENV[[best]]$best <- best - ENV[[best]]$coefs <- coefs - - evalq({ - xs <- which.min(lik_risks[[best]]) - basses <- list(model = c(coef = coefs[[best]][xs], - xselect = xs, - p = length(coefs[[best]])), - fitted = function() { - return(coefs[[best]][xs] * environment(get("fit1"))[["X"]][, xs, drop = FALSE]) - }) - class(basses) <- c("bm_cwlin", "bm_lin", "bm") - fit <- fit + nu * basses$fitted() - u <- get("ngradient")(get("y"), fit, weights) - mrisk[(mstop + 1)] <- get("triskfct")(get("y"), fit) - ens[[(mstop + 1)]] <- basses - xselect[(mstop + 1)] <- xs - nuisance[[(mstop +1)]] <- family@nuisance() - mstop <- mstop + 1}, - envir <- ENV[[best]]) - } - - #all other baselearner - else{ - risks <- list() - for( i in mods){ - risks[[i]] <- evalq({ - ss_new <- lapply(get("blfit", envir = environment(get("basefit"))), - function(x) x(u)) - sapply(ss_new, function(x) get("riskfct")(get("y"), fit + nu * x$fitted(), weights)) - }, envir = ENV[[i]]) - } - - best <- which.min(vapply(risks, min, - FUN.VALUE = numeric(1))) - - evalq({ - ss <- lapply(get("blfit", envir = environment(get("basefit"))), - function(x) x(u)) - xselect[mstop + 1] <- which.min(sapply(ss, function(x) - get("riskfct")(get("y"), fit + nu * x$fitted(), weights))) - fit <- fit + nu * ss[[tail(xselect, 1)]]$fitted() - u <- get("ngradient")(get("y"), fit, weights) - mrisk[(mstop + 1)] <- get("triskfct")(get("y"), fit) - ens[[(mstop + 1)]] <- ss[[tail(xselect, 1)]] - nuisance[[(mstop + 1)]] <- family@nuisance() - mstop <- mstop + 1}, - envir = ENV[[best]]) - - } - - combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) - names(combined_risk)[length(combined_risk)] <- names(fit)[best] - - if (trace){ - do_trace(current = length(combined_risk[combined_risk != 0]), - mstart = ifelse(firstRun, length(fit) + 1, - length(combined_risk[combined_risk != 0])), - mstop = ifelse(firstRun, niter - length(fit), niter), - risk = combined_risk[combined_risk != 0]) - } - } - combined_risk <<- combined_risk - return(TRUE) - } - if(method == "inner" ) - iBoost_inner <- function(niter){ - - - #this is the case for boosting from the beginning - if(is.null(attr(fit, "combined_risk")) | niter == 0){ - combined_risk <- vapply(fit, risk, numeric(1)) - } - - best <- which(names(fit) == tail(names(combined_risk), 1)) - - - - ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) - - for (i in seq_len(niter)){ - - ## update value of nuisance parameters - ## use response(fitted()) as this is much quicker than fitted(, type = response) - for( k in mods[-best]){ - assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), - environment(get("ngradient", environment(fit[[k]]$subset)))) - } - - risks <- numeric(length(fit)) - for(b in 1:length(fit)){ - st <- mstop(fit[[b]]) - fit[[b]][st + 1] - #risks[b] <- evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) - risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], - ENV[[b]][["fit"]], - ENV[[b]][["weights"]]) - fit[[b]][st] - - ## fit[[b]][st] is not enough to reduce the model back to beginning, so - ## so all these values have to be reduced, so that they are calculated - ## correctly the next time - evalq({xselect <- xselect[1:mstop]; - mrisk <- mrisk[1:mstop]; - ens <- ens[1:mstop]; - nuisance <- nuisance[1:mstop]}, - environment(fit[[b]]$subset)) - } - - - - best <- which.min(risks) - - ## update value of u, i.e. compute ngradient with new nuisance parameters - ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], - ENV[[best]][["fit"]], - ENV[[best]][["weights"]]) - - # same as: - # evalq(u <- ngradient(y, fit, weights), environment(fit[[j]]$subset)) - - - ## update selected component by 1 - fit[[best]][mstop(fit[[best]]) + 1] - - - combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) - names(combined_risk)[length(combined_risk)] <- names(fit)[best] - - if (trace){ - do_trace(current = length(combined_risk[combined_risk != 0]), - mstart = ifelse(firstRun, length(fit) + 1, - length(combined_risk[combined_risk != 0])), - mstop = ifelse(firstRun, niter - length(fit), niter), - risk = combined_risk[combined_risk != 0]) - } - combined_risk <<- combined_risk - } - return(TRUE) - } - + if (length(families) == 0) stop(sQuote("families"), " not specified") @@ -414,28 +180,199 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), mstop = max(mstop), risk = fit[[length(fit)]]$risk()) - #select the correct fitting method - if (method == "cycling") { - if (any(mstop > 1)){ - firstRun <- TRUE - ## actually go for initial mstop iterations! - tmp <- iBoost_cycling(mstop - 1) + iBoost <- function(niter, method) { + + if (method %in% c("inner", "outer")) { + ### noncyclical fitting ### + + #this is the case for boosting from the beginning + if (is.null(attr(fit, "combined_risk")) | niter == 0) { + combined_risk <- vapply(fit, risk, numeric(1)) } - } - - else { - if (mstop >= length(fit)){ - firstRun <- TRUE - combined_risk <- NULL - tmp <- ifelse(method == "inner", - iBoost_inner(mstop - length(fit)), - iBoost_outer(mstop - length(fit))) + + best <- which(names(fit) == tail(names(combined_risk), 1)) + } else { + ### cyclical fitting ### + start <- sapply(fit, mstop) + mvals <- vector("list", length(niter)) + for (j in 1:length(niter)) { + mvals[[j]] <- rep(start[j] + niter[j], max(niter)) + if (niter[j] > 0) + mvals[[j]][1:niter[j]] <- (start[j] + 1):(start[j] + niter[j]) } + } + + ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) + ## main loop starts here ## + for (i in 1:max(niter)){ + if (method %in% c("inner", "outer")) { + ### noncyclical fitting ### + + ## update value of nuisance parameters + ## use response(fitted()) as this is much quicker than fitted(, type = response) + for( k in mods[-best]) { + assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), environment(get("ngradient", environment(fit[[k]]$subset)))) + if (method == "outer") + ENV[[k]][["u"]] <- ENV[[k]][["ngradient"]](ENV[[k]][["y"]], ENV[[k]][["fit"]], ENV[[k]][["weights"]]) + } + + ## noncyclical inner method ## + if (method == "inner") { + risks <- numeric(length(fit)) + for(b in 1:length(fit)){ + st <- mstop(fit[[b]]) + fit[[b]][st + 1] + #risks[b] <- evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) + risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) + fit[[b]][st] + + ## fit[[b]][st] is not enough to reduce the model back to beginning, so + ## so all these values have to be reduced, so that they are calculated + ## correctly the next time + evalq({ + xselect <- xselect[1:mstop]; + mrisk <- mrisk[1:mstop]; + ens <- ens[1:mstop]; + nuisance <- nuisance[1:mstop]}, + environment(fit[[b]]$subset)) + } + + best <- which.min(risks) + + ## update value of u, i.e. compute ngradient with new nuisance parameters + ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) + + ## update selected component by 1 + fit[[best]][mstop(fit[[best]]) + 1] + + } else { + ## noncylical outer method ## + if (funchar == "glmboost") { + lik_risks <- list() + coefs <- list() + + for (i in mods) { + coefs[[i]] <- evalq(environment(get("fit1"))[["est"]](u) / environment(get("fit1"))[["sxtx"]], envir = ENV[[i]]) + + all_fitted <- sweep(environment(ENV[[i]][["fit1"]])[["X"]], 2, coefs[[i]] ,`*`) + + lik_risks[[i]] <- sapply(1:ncol(all_fitted), + function(j) ENV[[i]][["triskfct"]](ENV[[i]][["y"]], ENV[[i]][["fit"]] + nu[i] * all_fitted[,j])) + } + + #do a mboost step per hand + best <- which.min(vapply(lik_risks, min, FUN.VALUE = numeric(1), ...)) + + ENV[[best]]$all_fitted <- all_fitted + ENV[[best]]$lik_risks <- lik_risks + ENV[[best]]$best <- best + ENV[[best]]$coefs <- coefs + + evalq({ + xs <- which.min(lik_risks[[best]]) + basses <- list(model = c(coef = coefs[[best]][xs], + xselect = xs, + p = length(coefs[[best]])), + fitted = function() { + return(coefs[[best]][xs] * environment(get("fit1"))[["X"]][, xs, drop = FALSE]) + }) + class(basses) <- c("bm_cwlin", "bm_lin", "bm") + fit <- fit + nu * basses$fitted() + u <- get("ngradient")(get("y"), fit, weights) + mrisk[(mstop + 1)] <- get("triskfct")(get("y"), fit) + ens[[(mstop + 1)]] <- basses + xselect[(mstop + 1)] <- xs + nuisance[[(mstop +1)]] <- family@nuisance() + mstop <- mstop + 1}, + envir <- ENV[[best]]) + } + + #all other baselearner + else{ + risks <- list() + for( i in mods){ + risks[[i]] <- evalq({ss_new <- lapply(get("blfit", envir = environment(get("basefit"))), + function(x) x(u)) + sapply(ss_new, function(x) get("riskfct")(get("y"), fit + nu * x$fitted(), weights)) + }, envir = ENV[[i]]) + } + + best <- which.min(vapply(risks, min, + FUN.VALUE = numeric(1))) + + evalq({ + ss <- lapply(get("blfit", envir = environment(get("basefit"))), + function(x) x(u)) + xselect[mstop + 1] <- which.min(sapply(ss, function(x) + get("riskfct")(get("y"), fit + nu * x$fitted(), weights))) + fit <- fit + nu * ss[[tail(xselect, 1)]]$fitted() + u <- get("ngradient")(get("y"), fit, weights) + mrisk[(mstop + 1)] <- get("triskfct")(get("y"), fit) + ens[[(mstop + 1)]] <- ss[[tail(xselect, 1)]] + nuisance[[(mstop + 1)]] <- family@nuisance() + mstop <- mstop + 1}, + envir = ENV[[best]]) + + } + } + + ## update risk list + combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) + names(combined_risk)[length(combined_risk)] <- names(fit)[best] + combined_risk <<- combined_risk + + } else { + ### cyclical fitting ### + for (j in mods){ + ## update value of nuisance parameters + ## use response(fitted()) as this is much quicker than fitted(, type = response) + for (k in mods[-j]) + assign(names(fit)[k], families[[k]]@response(fitted(fit[[k]])), + environment(get("ngradient", environment(fit[[j]]$subset)))) + ## update value of u, i.e. compute ngradient with new nuisance parameters + + ENV[[j]][["u"]] <- ENV[[j]][["ngradient"]](ENV[[j]][["y"]], ENV[[j]][["fit"]], + ENV[[j]][["weights"]]) + # same as: + # evalq(u <- ngradient(y, fit, weights), environment(fit[[j]]$subset)) + + ## update j-th component to "m-th" boosting step + fit[[j]][mvals[[j]][i]] + } + } + + if (trace){ + if (method %in% c("inner", "outer")) { + do_trace(current = length(combined_risk[combined_risk != 0]), + mstart = ifelse(firstRun, length(fit) + 1, + length(combined_risk[combined_risk != 0])), + mstop = ifelse(firstRun, niter - length(fit), niter), + risk = combined_risk[combined_risk != 0]) + } else { + ## which is the current risk? rev() needed to get the last + ## list element with maximum length + whichRisk <- names(which.max(rev(lapply(lapply(fit, function(x) x$risk()), length)))) + do_trace(current = max(sapply(mvals, function(x) x[i])), + mstart = ifelse(firstRun, 0, max(start)), + mstop = ifelse(firstRun, max(niter) + 1, max(niter)), + risk = fit[[whichRisk]]$risk()) + } + + } + } + return(TRUE) } + firstRun <- any(mstop > 1) & method == "cycling" | mstop >= length(fit) & method != "cycling" + #number of steps in initialization + red <- ifelse(method == "cycling", 1, length(fit)) + + if (firstRun) + tmp <- iBoost(mstop - red, method = method) + firstRun <- FALSE - class(fit) <- c(paste(funchar, "LSS", sep=""), "mboostLSS") + class(fit) <- c(paste0(funchar, "LSS"), "mboostLSS") if(method != "cycling"){ class(fit) <- c("nc_mboostLSS", class(fit)) } @@ -494,7 +431,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## set negative values to 0 ## (only applicable if some parameters do not need to be touched inc <- ifelse(i - minStart > 0, i - minStart, 0) - tmp <- iBoost_cycling(inc) + tmp <- iBoost(inc, method = method) } mstop <<- i @@ -559,16 +496,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## now increase models (when necessary) else if (niter > 0){ - if (method == "inner") { - tmp <- iBoost_inner(niter) - } - else { - tmp <- iBoost_outer(niter) - } - - + tmp <- iBoost(niter, method = method) } - + mstop <<- i } } From 34891bda05346e525f15f6c2e6deae43ff9d2b9a Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 13:26:21 +0100 Subject: [PATCH 02/20] remove outer version, rename inner to noncyclical --- R/mboostLSS.R | 153 ++++++++++++----------------------------------- man/mboostLSS.Rd | 28 +++++---- 2 files changed, 54 insertions(+), 127 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index f311d31..18627d4 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -5,7 +5,7 @@ mboostLSS <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, - method = c("cycling", "inner", "outer"), ...){ + method = c("cyclic", "noncyclic"), ...){ cl <- match.call() if(is.null(cl$families)) @@ -21,7 +21,7 @@ mboostLSS <- function(formula, data = list(), families = GaussianLSS(), glmboostLSS <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, - method = c("cycling", "inner", "outer"), ...){ + method = c("cyclic", "noncyclic"), ...){ cl <- match.call() if(is.null(cl$families)) @@ -38,7 +38,7 @@ glmboostLSS <- function(formula, data = list(), families = GaussianLSS(), gamboostLSS <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, - method = c("cycling", "inner", "outer"), ...){ + method = c("cyclic", "noncyclic"), ...){ cl <- match.call() if(is.null(cl$families)) @@ -55,7 +55,7 @@ gamboostLSS <- function(formula, data = list(), families = GaussianLSS(), blackboostLSS <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, - method = c("cycling", "inner", "outer"), ...){ + method = c("cyclic", "noncyclic"), ...){ cl <- match.call() if(is.null(cl$families)) cl$families <- families @@ -108,10 +108,10 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), mstop <- mstoparg <- control$mstop control$mstop <- 1 - if (method == "cycling") { + if (method == "cyclic") { mstop <- check(mstop, "mstop", names(families)) } else { - #check mstop for inner and outer loss fitting methods + #check mstop for noncyclic fitting method if (length(mstop) != 1 | mstop %% 1 != 0 | mstop < length(families)) stop(sQuote("mstop"), " has to be an integer larger than ", length(families)) @@ -182,7 +182,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), iBoost <- function(niter, method) { - if (method %in% c("inner", "outer")) { + if (method == "noncyclic") { ### noncyclical fitting ### #this is the case for boosting from the beginning @@ -205,117 +205,42 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) ## main loop starts here ## for (i in 1:max(niter)){ - if (method %in% c("inner", "outer")) { + if (method == "noncyclic") { ### noncyclical fitting ### ## update value of nuisance parameters ## use response(fitted()) as this is much quicker than fitted(, type = response) for( k in mods[-best]) { assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), environment(get("ngradient", environment(fit[[k]]$subset)))) - if (method == "outer") - ENV[[k]][["u"]] <- ENV[[k]][["ngradient"]](ENV[[k]][["y"]], ENV[[k]][["fit"]], ENV[[k]][["weights"]]) } - - ## noncyclical inner method ## - if (method == "inner") { - risks <- numeric(length(fit)) - for(b in 1:length(fit)){ - st <- mstop(fit[[b]]) - fit[[b]][st + 1] - #risks[b] <- evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) - risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) - fit[[b]][st] - - ## fit[[b]][st] is not enough to reduce the model back to beginning, so - ## so all these values have to be reduced, so that they are calculated - ## correctly the next time - evalq({ - xselect <- xselect[1:mstop]; - mrisk <- mrisk[1:mstop]; - ens <- ens[1:mstop]; - nuisance <- nuisance[1:mstop]}, - environment(fit[[b]]$subset)) - } - - best <- which.min(risks) - - ## update value of u, i.e. compute ngradient with new nuisance parameters - ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) - - ## update selected component by 1 - fit[[best]][mstop(fit[[best]]) + 1] - - } else { - ## noncylical outer method ## - if (funchar == "glmboost") { - lik_risks <- list() - coefs <- list() - - for (i in mods) { - coefs[[i]] <- evalq(environment(get("fit1"))[["est"]](u) / environment(get("fit1"))[["sxtx"]], envir = ENV[[i]]) - - all_fitted <- sweep(environment(ENV[[i]][["fit1"]])[["X"]], 2, coefs[[i]] ,`*`) - - lik_risks[[i]] <- sapply(1:ncol(all_fitted), - function(j) ENV[[i]][["triskfct"]](ENV[[i]][["y"]], ENV[[i]][["fit"]] + nu[i] * all_fitted[,j])) - } - - #do a mboost step per hand - best <- which.min(vapply(lik_risks, min, FUN.VALUE = numeric(1), ...)) - - ENV[[best]]$all_fitted <- all_fitted - ENV[[best]]$lik_risks <- lik_risks - ENV[[best]]$best <- best - ENV[[best]]$coefs <- coefs - - evalq({ - xs <- which.min(lik_risks[[best]]) - basses <- list(model = c(coef = coefs[[best]][xs], - xselect = xs, - p = length(coefs[[best]])), - fitted = function() { - return(coefs[[best]][xs] * environment(get("fit1"))[["X"]][, xs, drop = FALSE]) - }) - class(basses) <- c("bm_cwlin", "bm_lin", "bm") - fit <- fit + nu * basses$fitted() - u <- get("ngradient")(get("y"), fit, weights) - mrisk[(mstop + 1)] <- get("triskfct")(get("y"), fit) - ens[[(mstop + 1)]] <- basses - xselect[(mstop + 1)] <- xs - nuisance[[(mstop +1)]] <- family@nuisance() - mstop <- mstop + 1}, - envir <- ENV[[best]]) - } + + risks <- numeric(length(fit)) + for(b in 1:length(fit)){ + st <- mstop(fit[[b]]) + fit[[b]][st + 1] + #risks[b] <- evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) + risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) + fit[[b]][st] - #all other baselearner - else{ - risks <- list() - for( i in mods){ - risks[[i]] <- evalq({ss_new <- lapply(get("blfit", envir = environment(get("basefit"))), - function(x) x(u)) - sapply(ss_new, function(x) get("riskfct")(get("y"), fit + nu * x$fitted(), weights)) - }, envir = ENV[[i]]) - } - - best <- which.min(vapply(risks, min, - FUN.VALUE = numeric(1))) - - evalq({ - ss <- lapply(get("blfit", envir = environment(get("basefit"))), - function(x) x(u)) - xselect[mstop + 1] <- which.min(sapply(ss, function(x) - get("riskfct")(get("y"), fit + nu * x$fitted(), weights))) - fit <- fit + nu * ss[[tail(xselect, 1)]]$fitted() - u <- get("ngradient")(get("y"), fit, weights) - mrisk[(mstop + 1)] <- get("triskfct")(get("y"), fit) - ens[[(mstop + 1)]] <- ss[[tail(xselect, 1)]] - nuisance[[(mstop + 1)]] <- family@nuisance() - mstop <- mstop + 1}, - envir = ENV[[best]]) - - } + ## fit[[b]][st] is not enough to reduce the model back to beginning, so + ## so all these values have to be reduced, so that they are calculated + ## correctly the next time + evalq({ + xselect <- xselect[1:mstop]; + mrisk <- mrisk[1:mstop]; + ens <- ens[1:mstop]; + nuisance <- nuisance[1:mstop]}, + environment(fit[[b]]$subset)) } + best <- which.min(risks) + + ## update value of u, i.e. compute ngradient with new nuisance parameters + ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) + + ## update selected component by 1 + fit[[best]][mstop(fit[[best]]) + 1] + ## update risk list combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) names(combined_risk)[length(combined_risk)] <- names(fit)[best] @@ -342,7 +267,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), } if (trace){ - if (method %in% c("inner", "outer")) { + if (method == "noncyclic") { do_trace(current = length(combined_risk[combined_risk != 0]), mstart = ifelse(firstRun, length(fit) + 1, length(combined_risk[combined_risk != 0])), @@ -363,9 +288,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), return(TRUE) } - firstRun <- any(mstop > 1) & method == "cycling" | mstop >= length(fit) & method != "cycling" + firstRun <- any(mstop > 1 & method == "cyclic" | mstop >= length(fit) & method != "cyclic") #number of steps in initialization - red <- ifelse(method == "cycling", 1, length(fit)) + red <- ifelse(method == "cyclic", 1, length(fit)) if (firstRun) tmp <- iBoost(mstop - red, method = method) @@ -373,7 +298,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), firstRun <- FALSE class(fit) <- c(paste0(funchar, "LSS"), "mboostLSS") - if(method != "cycling"){ + if(method != "cyclic"){ class(fit) <- c("nc_mboostLSS", class(fit)) } @@ -382,7 +307,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ### i > mstop needs additional computations ### updates take place in THIS ENVIRONMENT, ### some models are CHANGED! - if(method == "cycling") { + if(method == "cyclic") { attr(fit, "subset") <- function(i) { i <- check(i, "mstop", names(families)) @@ -540,7 +465,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), attr(fit, "call") <- call attr(fit, "data") <- data attr(fit, "families") <- families - if(method != "cycling") + if(method != "cyclic") attr(fit, "combined_risk") <- function() combined_risk return(fit) diff --git a/man/mboostLSS.Rd b/man/mboostLSS.Rd index bac10ef..b8fafec 100644 --- a/man/mboostLSS.Rd +++ b/man/mboostLSS.Rd @@ -18,13 +18,13 @@ } \usage{ mboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, method = c("cycling", "inner", "outer"), ...) + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...) glmboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, method = c("cycling", "inner", "outer"), ...) + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...) gamboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, method = c("cycling", "inner", "outer"), ...) + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...) blackboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL,method = c("cycling", "inner", "outer"), ...) + control = boost_control(), weights = NULL,method = c("cyclic", "noncyclic"), ...) ## fit function: mboostLSS_fit(formula, data = list(), families = GaussianLSS(), @@ -58,9 +58,9 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), \item{call}{ used to forward the call from \code{mboostLSS}, \code{glmboostLSS}, \code{gamboostLSS} and \code{blackboostLSS}. This argument should not be directly specified by users!} - \item{method}{ fitting method, currently three methods are supported: - \code{"cycling"}, \code{"inner"} and \code{"outer"}. The latter two require a - one dimensional \code{mstop} value.} + \item{method}{ fitting method, currently two methods are supported: + \code{"cyclic"} and \code{"noncycli"}. The latter two requires a one dimensional \code{mstop} + value.} \item{\dots}{Further arguments to be passed to \code{mboostLSS_fit}. In \code{mboostLSS_fit}, \code{\dots} represent further arguments to be passed to \code{\link{mboost}} and \code{\link{mboost_fit}}. So @@ -99,7 +99,7 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), function without \code{LSS}. For further possible arguments see these functions as well as \code{\link{mboost_fit}}. - For \code{method = "cycling"} it is possible to specify one or + For \code{method = "cyclic"} it is possible to specify one or multiple \code{mstop} and \code{nu} values via \code{\link{boost_control}}. In the case of one single value, this value is used for all distribution parameters of the GAMLSS model. @@ -113,12 +113,11 @@ mboostLSS_fit(formula, data = list(), families = GaussianLSS(), specify, e.g., \code{mstop = 100} via \code{\link{boost_control}}. For more-dimensional stopping, one can specify, e.g., \code{mstop = list(mu = 100, sigma = 200)} (see examples). - If \code{method} is set to \code{"inner"} or \code{"outer"}, \code{mstop} has + If \code{method} is set to \code{"noncyclic"}, \code{mstop} has to be a one dimensional integer. Instead of cycling through all distribution - parameters, in each iteration only the best baselearner is used. For - \code{"inner"} one baselearner of every parameter is selected via RSS, the - distribution parameter is then chosen via the loss. For \code{"outer"} the - selection is solely based on the (outer) loss. + parameters, in each iteration only the best baselearner is used. One baselearner of every + parameter is selected via RSS, the distribution parameter is then chosen via the loss. + For details on the noncyclic fitting method see Thomas et. al. (2016). To (potentially) stabilize the model estimation by standardizing the negative gradients one can use the argument \code{stabilization} of @@ -152,6 +151,9 @@ Statistical Society, Series C (Applied Statistics), 54, 507-554. Buehlmann, P. and Hothorn, T. (2007), Boosting algorithms: Regularization, prediction and model fitting. Statistical Science, 22(4), 477--505. + +Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016), +Stability selection for component-wise gradient boosting in multiple dimensions. arXiv preprint arXiv:1611.10171. } \seealso{ From 993167f528f4b187b383f4eccd5650a37d5064b8 Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 13:34:13 +0100 Subject: [PATCH 03/20] citation --- inst/CITATION | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/inst/CITATION b/inst/CITATION index 0fa5bd7..2483a85 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -59,4 +59,22 @@ citEntry( ) +citEntry( + entry= "Article", + title = "Stability selection for component-wise gradient boosting in multiple dimensions", + author = personList(as.person("Janek Thomas"), + as.person("Andreas Mayr"), + as.person("Bernd Bischl"), + as.person("Matthias Schmid"), + as.person("Adam Smith") + as.person("Benjamin Hofner")), + year = "2016", + header = "To cite the noncyclical fitting method of 'gamboostLSS' use:", + url = "https://arxiv.org/abs/1611.10171", + textVersion = + paste("Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016).", + "Stability selection for component-wise gradient boosting in multiple dimensions.", + "arXiv preprint arXiv:1611.10171.") + + citFooter('\nUse ', sQuote('toBibtex(citation("gamboostLSS"))'), ' to extract BibTeX references.') From ec4b3f356c5ea1e194edcdba3a60a56fcfc3b4e9 Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 13:36:13 +0100 Subject: [PATCH 04/20] Readme --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 93d7311..7c79fd5 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,10 @@ shape). For installation instructions see below. Instructions on how to use `gamboostLSS` can be found here: -- [gamboostLSS tutorial](http://arxiv.org/pdf/1407.1774v1); This is apreliminary version currently under review. +- [gamboostLSS tutorial](http://arxiv.org/pdf/1407.1774v1); This is a preliminary version currently under review. + +Details on the noncyclical fitting method can be found here: +- [noncyclical fitting](https://arxiv.org/abs/1611.10171); This is a preleminary version currently under review. ## Issues & Feature Requests From 9585836295cd2a8761a92e798ae5a0254acf845e Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 13:55:59 +0100 Subject: [PATCH 05/20] slight speed improvements --- R/mboostLSS.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 18627d4..8413d39 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -218,8 +218,8 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), for(b in 1:length(fit)){ st <- mstop(fit[[b]]) fit[[b]][st + 1] - #risks[b] <- evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) - risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) + risks[b] <- evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) + #risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) fit[[b]][st] ## fit[[b]][st] is not enough to reduce the model back to beginning, so @@ -236,7 +236,8 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), best <- which.min(risks) ## update value of u, i.e. compute ngradient with new nuisance parameters - ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) + evalq({u = ngradient(y, fit, weights)}, ENV[[best]]) + #ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) ## update selected component by 1 fit[[best]][mstop(fit[[best]]) + 1] From 942b6317f6463720e472b0c71531a29bb852364b Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 15:52:39 +0100 Subject: [PATCH 06/20] cleanup --- R/cvrisk.nc_mboostLSS.R | 2 +- R/methods.R | 5 +- man/cvrisk.Rd | 9 ++-- man/stabsel.mboostLSS.Rd | 2 +- tests/regtest-noncyclic_fitting.R | 82 ++++--------------------------- 5 files changed, 17 insertions(+), 83 deletions(-) diff --git a/R/cvrisk.nc_mboostLSS.R b/R/cvrisk.nc_mboostLSS.R index 02793e3..0743e34 100644 --- a/R/cvrisk.nc_mboostLSS.R +++ b/R/cvrisk.nc_mboostLSS.R @@ -99,7 +99,7 @@ plot.nc_cvriskLSS <- function(x, type = "lines", main = attr(x, "type"), ...) { if (type != "lines") - warning("Only ", sQuote('type = "lines"'), " supported for non-cyclical fitting") + warning("Only ", sQuote('type = "lines"'), " supported for noncyclical fitting") plot.cvriskLSS(x = x, type = "lines", xlab = xlab, ylab = ylab, ylim = ylim, main = main, ...) } diff --git a/R/methods.R b/R/methods.R index aa01e19..f5f3daf 100644 --- a/R/methods.R +++ b/R/methods.R @@ -453,8 +453,7 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, if (nsel >= q) break } - #this changes nothing for method = "cycling" but fixes mstop for - #method = "inner" or "outer" + #this changes nothing for method = "cyclic" but fixes mstop for method = "noncyclic" mstop <- check(mstop, "mstop", names(x)) ## complete paths if (any(sapply(xs, length) < mstop)) { @@ -474,7 +473,7 @@ stabsel.mboostLSS <- function(x, cutoff, q, PFER, ret <- unlist(ret) ## compute selection paths - #merging for method cycling + #merging for method cyclic if(!inherits(x, "nc_mboostLSS")){ sequences <- lapply(1:length(xs), function(i) { res <- matrix(FALSE, nrow = length(nms[[i]]), ncol = mstop[[i]]) diff --git a/man/cvrisk.Rd b/man/cvrisk.Rd index 0eb483a..240bf53 100644 --- a/man/cvrisk.Rd +++ b/man/cvrisk.Rd @@ -40,13 +40,13 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE, bootstrap samples. } \item{grid}{ - If the model was fitted with \code{method = "cycling"}, grid is + If the model was fitted with \code{method = "cyclic"}, grid is a matrix of stopping parameters the empirical risk is to be evaluated for. Each row represents a parameter combination. The number of columns must be - equal to the number of parameters of the GAMLSS family. Per default, + equal to the number of parameters of the GAMLSS family. Per default, \code{make.grid(mstop(object))} is used. - Otherwise (i.e., for \code{method = "inner"} or \code{method = "outer"}) grid + Otherwise (i.e., for \code{method = "noncyclic"}) grid is a vector of mstop values. Per default all steps up to the intial stopping iteration, i.e., \code{1:mstop(object)} are used. } @@ -132,8 +132,7 @@ make.grid(max, length.out = 10, min = NULL, log = TRUE, fold to fold. The heatmap shows only the average risk but in a nicer fashion. - For the noncyclical fitting methods (i.e. \code{method = "inner"} or - \code{method = "outer"}) only the line plot for exists. + For the \code{method = "noncyclic"} only the line plot for exists. Hofner et al. (2015) provide a detailed description of cross-validation for \code{\link{gamboostLSS}} models and show a diff --git a/man/stabsel.mboostLSS.Rd b/man/stabsel.mboostLSS.Rd index 3f7c8ef..84800a0 100644 --- a/man/stabsel.mboostLSS.Rd +++ b/man/stabsel.mboostLSS.Rd @@ -127,7 +127,7 @@ dat <- data.frame(x1, x2, x3, x4, x5, x6, y) ### linear model with y ~ . for both components: 400 boosting iterations model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, control = boost_control(mstop = 400), - center = TRUE, method = "inner") + center = TRUE, method = "noncyclic") \donttest{### Do not test the following code per default on CRAN as it takes some time to run: diff --git a/tests/regtest-noncyclic_fitting.R b/tests/regtest-noncyclic_fitting.R index 09ffc58..9d8f3c0 100644 --- a/tests/regtest-noncyclic_fitting.R +++ b/tests/regtest-noncyclic_fitting.R @@ -20,36 +20,23 @@ dat <- data.frame(x1, x2, x3, x4, x5, x6, y) #glmboost model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "outer") - -model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "inner") + control = boost_control(mstop = 3), method = "noncyclic") #linear baselearner with bols model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "outer", - baselearner = "bols") - -model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "inner", + control = boost_control(mstop = 3), method = "noncyclic", baselearner = "bols") #nonlinear bbs baselearner model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "outer", + control = boost_control(mstop = 3), method = "noncyclic", baselearner = "bbs") -model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "inner", - baselearner = "bbs") - - - #reducing model and increasing it afterwards should yield the same fit model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 50), method = "outer") + control = boost_control(mstop = 50), method = "noncyclic") m_co <- coef(model) @@ -60,7 +47,7 @@ stopifnot(all.equal(m_co, coef(model))) model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 50), method = "outer", + control = boost_control(mstop = 50), method = "noncyclic", baselearner = "bols") m_co <- coef(model) @@ -72,7 +59,7 @@ stopifnot(all.equal(m_co, coef(model))) model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 50), method = "outer", + control = boost_control(mstop = 50), method = "noncyclic", baselearner = "bbs") m_co <- coef(model) @@ -83,32 +70,8 @@ mstop(model) <- 50 stopifnot(all.equal(m_co, coef(model))) -##inner### - -model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 50), method = "inner") - -m_co <- coef(model) - -mstop(model) <- 5 -mstop(model) <- 50 - -stopifnot(all.equal(m_co, coef(model))) - model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 50), method = "inner", - baselearner = "bols") - -m_co <- coef(model) - -mstop(model) <- 5 -mstop(model) <- 50 - -stopifnot(all.equal(m_co, coef(model))) - - -model <- gamboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 50), method = "inner", + control = boost_control(mstop = 50), method = "noncyclic", baselearner = "bbs") m_co <- coef(model) @@ -120,37 +83,10 @@ stopifnot(all.equal(m_co, coef(model))) ## check cvrisk for noncyclic models model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "outer") + control = boost_control(mstop = 3), method = "noncyclic") cvr1 <- cvrisk(model, grid = 1:50, cv(model.weights(model), B = 5)) cvr1 plot(cvr1) risk(model, merge = TRUE) -risk(model, merge = FALSE) - -model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "inner") -cvr2 <- cvrisk(model, grid = 1:50, cv(model.weights(model), B = 5)) -cvr2 -plot(cvr2) - -risk(model, merge = TRUE) -risk(model, merge = FALSE) - -model <- glmboostLSS(y ~ ., families = NBinomialLSS(), data = dat, - control = boost_control(mstop = 3), method = "cycling") -cvr3 <- cvrisk(model, grid = make.grid(c(mu = 25, sigma = 25)), cv(model.weights(model), B = 5)) -cvr3 -plot(cvr3) -plot(cvr3, type = "lines") - -risk(model, merge = TRUE) -risk(model, merge = FALSE) - -par(mfrow = c(2, 2)) -plot(cvr1) -plot(cvr2) -plot(cvr3, type = "heatmap") -plot(cvr3, type = "lines") - - +risk(model, merge = FALSE) \ No newline at end of file From f22530c035f332de281315718ee172a4c7c75f91 Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 17:27:01 +0100 Subject: [PATCH 07/20] missing , -.- --- inst/CITATION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/CITATION b/inst/CITATION index 2483a85..0df3c04 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -66,7 +66,7 @@ citEntry( as.person("Andreas Mayr"), as.person("Bernd Bischl"), as.person("Matthias Schmid"), - as.person("Adam Smith") + as.person("Adam Smith"), as.person("Benjamin Hofner")), year = "2016", header = "To cite the noncyclical fitting method of 'gamboostLSS' use:", From 371267b3da8a4896ef8bb150cac9925fd7dedd87 Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 26 Jan 2017 18:50:31 +0100 Subject: [PATCH 08/20] stop destroying the CITATION file... --- inst/CITATION | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/CITATION b/inst/CITATION index 0df3c04..2fc3363 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -75,6 +75,7 @@ citEntry( paste("Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016).", "Stability selection for component-wise gradient boosting in multiple dimensions.", "arXiv preprint arXiv:1611.10171.") +) citFooter('\nUse ', sQuote('toBibtex(citation("gamboostLSS"))'), ' to extract BibTeX references.') From 17c9674d59d4907014b6d304e7c1813b3331ef3b Mon Sep 17 00:00:00 2001 From: Janek Date: Mon, 13 Feb 2017 13:34:45 +0100 Subject: [PATCH 09/20] start with mstop=0 --- R/mboostLSS.R | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 8413d39..8a8d004 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -107,15 +107,10 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), } mstop <- mstoparg <- control$mstop - control$mstop <- 1 - if (method == "cyclic") { + control$mstop <- 0 + if (method == "cyclic") mstop <- check(mstop, "mstop", names(families)) - } else { - #check mstop for noncyclic fitting method - if (length(mstop) != 1 | mstop %% 1 != 0 | mstop < length(families)) - stop(sQuote("mstop"), " has to be an integer larger than ", - length(families)) - } + nu <- control$nu nu <- check(nu, "nu", names(families)) @@ -290,11 +285,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), } firstRun <- any(mstop > 1 & method == "cyclic" | mstop >= length(fit) & method != "cyclic") - #number of steps in initialization - red <- ifelse(method == "cyclic", 1, length(fit)) if (firstRun) - tmp <- iBoost(mstop - red, method = method) + tmp <- iBoost(mstop, method = method) firstRun <- FALSE From 7e061b49534ebca65365e7c9fa977cc9bb23baed Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 16 Feb 2017 16:02:57 +0100 Subject: [PATCH 10/20] fix risk --- R/mboostLSS.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 8a8d004..2aedb5f 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -221,10 +221,10 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## so all these values have to be reduced, so that they are calculated ## correctly the next time evalq({ - xselect <- xselect[1:mstop]; - mrisk <- mrisk[1:mstop]; - ens <- ens[1:mstop]; - nuisance <- nuisance[1:mstop]}, + xselect <- xselect[seq_len(mstop)]; + mrisk <- mrisk[seq_len(mstop + 1)]; + ens <- ens[seq_len(mstop)]; + nuisance <- nuisance[seq_len(mstop)]}, environment(fit[[b]]$subset)) } From f6b551c828643a16e4e18625c4e4433f3191e748 Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 16 Feb 2017 17:21:34 +0100 Subject: [PATCH 11/20] fix subset increase from zero --- R/mboostLSS.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index b8c6701..1fc9b20 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -382,10 +382,10 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## remove additional boosting iterations from environments lapply(fit, function(obj){ - evalq({xselect <- xselect[1:mstop]; - mrisk <- mrisk[1:mstop]; - ens <- ens[1:mstop]; - nuisance <- nuisance[1:mstop]}, + evalq({xselect <- xselect[seq_len(mstop)]; + mrisk <- mrisk[seq_len(mstop + 1)]; + ens <- ens[seq_len(mstop)]; + nuisance <- nuisance[seq_len(mstop)]}, environment(obj$subset)) }) From b41a55627245ca5b9526bdbeafec55bb32c52a2a Mon Sep 17 00:00:00 2001 From: Janek Date: Thu, 16 Feb 2017 18:24:31 +0100 Subject: [PATCH 12/20] fix citation --- inst/CITATION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/CITATION b/inst/CITATION index 2fc3363..5341b0a 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -60,7 +60,7 @@ citEntry( citEntry( - entry= "Article", + entry= "TechReport", title = "Stability selection for component-wise gradient boosting in multiple dimensions", author = personList(as.person("Janek Thomas"), as.person("Andreas Mayr"), From 0dd7fe31853b29db050f3fba30f275abd4d0eb51 Mon Sep 17 00:00:00 2001 From: Janek Date: Mon, 20 Feb 2017 15:21:19 +0100 Subject: [PATCH 13/20] fix trace --- R/helpers.R | 20 ++++++++++---------- R/mboostLSS.R | 25 +++++++++---------------- 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/R/helpers.R b/R/helpers.R index f8e7629..d7a593c 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -119,20 +119,20 @@ rescale_weights <- function(w) { ## helper function in a modified version based on mboost_2.2-3 ## print trace of boosting iterations -do_trace <- function(current, mstart, risk, - linebreak = options("width")$width / 2, mstop = 1000) { - current <- current - mstart +do_trace <- function(current, risk, mstart, + linebreak = round(options("width")$width / 2), mstop = 1000) { + + current <- current - mstart + if (current != mstop) { - if ((current - 1) %/% linebreak == (current - 1) / linebreak) { + if (current %% linebreak == 1) { mchr <- formatC(current + mstart, format = "d", width = nchar(mstop) + 1, big.mark = "'") cat(paste("[", mchr, "] ",sep = "")) - } else { - if ((current %/% linebreak != current / linebreak)) { - cat(".") - } else { - cat(" -- risk:", risk[current + mstart], "\n") - } + } + cat(".") + if (current %% linebreak == 0) { + cat(" -- risk:", risk[current + mstart], "\n") } } else { cat("\nFinal risk:", risk[current + mstart], "\n") diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 1fc9b20..be93567 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -170,16 +170,13 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control=control, weights = w, ...)) } - if (trace) - do_trace(current = 1, mstart = 0, - mstop = max(mstop), - risk = fit[[length(fit)]]$risk()) - + iBoost <- function(niter, method) { + start <- sapply(fit, mstop) + if (method == "noncyclic") { ### noncyclical fitting ### - #this is the case for boosting from the beginning if (is.null(attr(fit, "combined_risk")) | niter == 0) { combined_risk <- vapply(fit, risk, numeric(1)) @@ -188,7 +185,6 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), best <- which(names(fit) == tail(names(combined_risk), 1)) } else { ### cyclical fitting ### - start <- sapply(fit, mstop) mvals <- vector("list", length(niter)) for (j in 1:length(niter)) { mvals[[j]] <- rep(start[j] + niter[j], max(niter)) @@ -265,19 +261,16 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), if (trace){ if (method == "noncyclic") { - do_trace(current = length(combined_risk[combined_risk != 0]), - mstart = ifelse(firstRun, length(fit) + 1, - length(combined_risk[combined_risk != 0])), - mstop = ifelse(firstRun, niter - length(fit), niter), - risk = combined_risk[combined_risk != 0]) + do_trace(current = length(combined_risk) - length(fit), mstart = sum(start), + risk = combined_risk[-(1:length(fit))], mstop = niter) } else { ## which is the current risk? rev() needed to get the last ## list element with maximum length - whichRisk <- names(which.max(rev(lapply(lapply(fit, function(x) x$risk()), length)))) + whichRisk <- names(which.max(rev(lapply(fit, function(x) length(risk(x)))))) do_trace(current = max(sapply(mvals, function(x) x[i])), - mstart = ifelse(firstRun, 0, max(start)), - mstop = ifelse(firstRun, max(niter) + 1, max(niter)), - risk = fit[[whichRisk]]$risk()) + mstart = max(start), + mstop = max(niter), + risk = risk(fit[[whichRisk]])[-1]) } } From 5b5190d7256f57a85feddd62959648f77fee271d Mon Sep 17 00:00:00 2001 From: Janek Date: Mon, 20 Feb 2017 15:48:17 +0100 Subject: [PATCH 14/20] fix citation... again --- inst/CITATION | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/CITATION b/inst/CITATION index 5341b0a..cdc0a33 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -69,6 +69,7 @@ citEntry( as.person("Adam Smith"), as.person("Benjamin Hofner")), year = "2016", + institution = "ArXiv", header = "To cite the noncyclical fitting method of 'gamboostLSS' use:", url = "https://arxiv.org/abs/1611.10171", textVersion = From de4be0dec1c1e9b0fe7205ad9feb7521cb01a73b Mon Sep 17 00:00:00 2001 From: Benjamin Hofner Date: Tue, 28 Feb 2017 13:52:51 +0100 Subject: [PATCH 15/20] make appveyor use github packages --- appveyor.yml | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 181e617..1c78173 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,21 +12,13 @@ install: - ps: Bootstrap # Adapt as necessary starting from here - -# environment: -# matrix: -# - TEST_DIR: patch -# - TEST_DIR: pkg - before_build: -# - cp ../travis-tool.sh ./travis-tool.sh -# - cp travis-tool.sh.cmd %TEST_DIR%/travis-tool.sh.cmd -# - cd %TEST_DIR% - bash -c "echo '^travis-tool\.sh\.cmd$' >> .Rbuildignore" -# - dir build_script: - travis-tool.sh install_deps + - travis-tool.sh install_github hofnerb/stabs + - travis-tool.sh install_github boost-R/mboost test_script: - travis-tool.sh run_tests From 50ddb7b036f82b3abeb071d597407beae12634e5 Mon Sep 17 00:00:00 2001 From: Benjamin Hofner Date: Tue, 28 Feb 2017 14:05:55 +0100 Subject: [PATCH 16/20] shorten \usage lines wider than 90 characters --- man/mboostLSS.Rd | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/man/mboostLSS.Rd b/man/mboostLSS.Rd index b8fafec..c6dfb2d 100644 --- a/man/mboostLSS.Rd +++ b/man/mboostLSS.Rd @@ -18,13 +18,17 @@ } \usage{ mboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...) + control = boost_control(), weights = NULL, + method = c("cyclic", "noncyclic"), ...) glmboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...) + control = boost_control(), weights = NULL, + method = c("cyclic", "noncyclic"), ...) gamboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...) + control = boost_control(), weights = NULL, + method = c("cyclic", "noncyclic"), ...) blackboostLSS(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL,method = c("cyclic", "noncyclic"), ...) + control = boost_control(), weights = NULL, + method = c("cyclic", "noncyclic"), ...) ## fit function: mboostLSS_fit(formula, data = list(), families = GaussianLSS(), @@ -153,7 +157,8 @@ Buehlmann, P. and Hothorn, T. (2007), Boosting algorithms: Regularization, prediction and model fitting. Statistical Science, 22(4), 477--505. Thomas, J., Mayr, A., Bischl, B., Schmid, M., Smith, A., and Hofner, B. (2016), -Stability selection for component-wise gradient boosting in multiple dimensions. arXiv preprint arXiv:1611.10171. +Stability selection for component-wise gradient boosting in multiple dimensions. +arXiv preprint arXiv:1611.10171. } \seealso{ From 019666251f6e372511293f513245b72d36186769 Mon Sep 17 00:00:00 2001 From: Benjamin Hofner Date: Tue, 28 Feb 2017 14:13:05 +0100 Subject: [PATCH 17/20] reindent lines --- R/mboostLSS.R | 216 +++++++++++++++++++++++++------------------------- 1 file changed, 109 insertions(+), 107 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index be93567..65d2a61 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -74,7 +74,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, fun = mboost, funchar = "mboost", call = NULL, method, ...){ - + if (length(families) == 0) stop(sQuote("families"), " not specified") @@ -110,7 +110,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control$mstop <- 0 if (method == "cyclic") mstop <- check(mstop, "mstop", names(families)) - + nu <- control$nu nu <- check(nu, "nu", names(families)) @@ -170,116 +170,118 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control=control, weights = w, ...)) } - + iBoost <- function(niter, method) { - - start <- sapply(fit, mstop) - - if (method == "noncyclic") { - ### noncyclical fitting ### - #this is the case for boosting from the beginning - if (is.null(attr(fit, "combined_risk")) | niter == 0) { - combined_risk <- vapply(fit, risk, numeric(1)) - } - best <- which(names(fit) == tail(names(combined_risk), 1)) - } else { - ### cyclical fitting ### - mvals <- vector("list", length(niter)) - for (j in 1:length(niter)) { - mvals[[j]] <- rep(start[j] + niter[j], max(niter)) - if (niter[j] > 0) - mvals[[j]][1:niter[j]] <- (start[j] + 1):(start[j] + niter[j]) - } - } - - ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) - ## main loop starts here ## - for (i in 1:max(niter)){ + start <- sapply(fit, mstop) + if (method == "noncyclic") { - ### noncyclical fitting ### - - ## update value of nuisance parameters - ## use response(fitted()) as this is much quicker than fitted(, type = response) - for( k in mods[-best]) { - assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), environment(get("ngradient", environment(fit[[k]]$subset)))) - } - - risks <- numeric(length(fit)) - for(b in 1:length(fit)){ - st <- mstop(fit[[b]]) - mstop(fit[[b]]) = st + 1 - risks[b] <- tail(risk(fit[[b]]), 1) - #evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) - #risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) - fit[[b]][st] + ### noncyclical fitting ### + # this is the case for boosting from the beginning + if (is.null(attr(fit, "combined_risk")) | niter == 0) { + combined_risk <- vapply(fit, risk, numeric(1)) + } - ## fit[[b]][st] is not enough to reduce the model back to beginning, so - ## so all these values have to be reduced, so that they are calculated - ## correctly the next time - evalq({ - xselect <- xselect[seq_len(mstop)]; - mrisk <- mrisk[seq_len(mstop + 1)]; - ens <- ens[seq_len(mstop)]; - nuisance <- nuisance[seq_len(mstop)]}, - environment(fit[[b]]$subset)) - } - - best <- which.min(risks) - - ## update value of u, i.e. compute ngradient with new nuisance parameters - evalq({u = ngradient(y, fit, weights)}, ENV[[best]]) - #ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) - - ## update selected component by 1 - fit[[best]][mstop(fit[[best]]) + 1] - - ## update risk list - combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) - names(combined_risk)[length(combined_risk)] <- names(fit)[best] - combined_risk <<- combined_risk - + best <- which(names(fit) == tail(names(combined_risk), 1)) } else { - ### cyclical fitting ### - for (j in mods){ - ## update value of nuisance parameters - ## use response(fitted()) as this is much quicker than fitted(, type = response) - for (k in mods[-j]) - assign(names(fit)[k], families[[k]]@response(fitted(fit[[k]])), - environment(get("ngradient", environment(fit[[j]]$subset)))) - ## update value of u, i.e. compute ngradient with new nuisance parameters - - ENV[[j]][["u"]] <- ENV[[j]][["ngradient"]](ENV[[j]][["y"]], ENV[[j]][["fit"]], - ENV[[j]][["weights"]]) - # same as: - # evalq(u <- ngradient(y, fit, weights), environment(fit[[j]]$subset)) - - ## update j-th component to "m-th" boosting step - fit[[j]][mvals[[j]][i]] - } + ### cyclical fitting ### + mvals <- vector("list", length(niter)) + for (j in 1:length(niter)) { + mvals[[j]] <- rep(start[j] + niter[j], max(niter)) + if (niter[j] > 0) + mvals[[j]][1:niter[j]] <- (start[j] + 1):(start[j] + niter[j]) + } } - if (trace){ - if (method == "noncyclic") { - do_trace(current = length(combined_risk) - length(fit), mstart = sum(start), - risk = combined_risk[-(1:length(fit))], mstop = niter) - } else { - ## which is the current risk? rev() needed to get the last - ## list element with maximum length - whichRisk <- names(which.max(rev(lapply(fit, function(x) length(risk(x)))))) - do_trace(current = max(sapply(mvals, function(x) x[i])), - mstart = max(start), - mstop = max(niter), - risk = risk(fit[[whichRisk]])[-1]) - } - + ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) + ## main loop starts here ## + for (i in 1:max(niter)){ + if (method == "noncyclic") { + ### noncyclical fitting ### + + ## update value of nuisance parameters + ## use response(fitted()) as this is much quicker than fitted(, type = response) + for( k in mods[-best]) { + assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), + environment(get("ngradient", environment(fit[[k]]$subset)))) + } + + risks <- numeric(length(fit)) + for(b in 1:length(fit)){ + st <- mstop(fit[[b]]) + mstop(fit[[b]]) = st + 1 + risks[b] <- tail(risk(fit[[b]]), 1) + #evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) + #risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) + fit[[b]][st] + + ## fit[[b]][st] is not enough to reduce the model back to beginning, so + ## so all these values have to be reduced, so that they are calculated + ## correctly the next time + evalq({ + xselect <- xselect[seq_len(mstop)]; + mrisk <- mrisk[seq_len(mstop + 1)]; + ens <- ens[seq_len(mstop)]; + nuisance <- nuisance[seq_len(mstop)] + }, + environment(fit[[b]]$subset)) + } + + best <- which.min(risks) + + ## update value of u, i.e. compute ngradient with new nuisance parameters + evalq({u <- ngradient(y, fit, weights)}, ENV[[best]]) + #ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) + + ## update selected component by 1 + fit[[best]][mstop(fit[[best]]) + 1] + + ## update risk list + combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) + names(combined_risk)[length(combined_risk)] <- names(fit)[best] + combined_risk <<- combined_risk + + } else { + ### cyclical fitting ### + for (j in mods){ + ## update value of nuisance parameters + ## use response(fitted()) as this is much quicker than fitted(, type = response) + for (k in mods[-j]) + assign(names(fit)[k], families[[k]]@response(fitted(fit[[k]])), + environment(get("ngradient", environment(fit[[j]]$subset)))) + ## update value of u, i.e. compute ngradient with new nuisance parameters + + ENV[[j]][["u"]] <- ENV[[j]][["ngradient"]](ENV[[j]][["y"]], ENV[[j]][["fit"]], + ENV[[j]][["weights"]]) + # same as: + # evalq(u <- ngradient(y, fit, weights), environment(fit[[j]]$subset)) + + ## update j-th component to "m-th" boosting step + fit[[j]][mvals[[j]][i]] + } + } + + if (trace){ + if (method == "noncyclic") { + do_trace(current = length(combined_risk) - length(fit), mstart = sum(start), + risk = combined_risk[-(1:length(fit))], mstop = niter) + } else { + ## which is the current risk? rev() needed to get the last + ## list element with maximum length + whichRisk <- names(which.max(rev(lapply(fit, function(x) length(risk(x)))))) + do_trace(current = max(sapply(mvals, function(x) x[i])), + mstart = max(start), + mstop = max(niter), + risk = risk(fit[[whichRisk]])[-1]) + } + + } } - } - return(TRUE) + return(TRUE) } if (any(mstop > 0)) - tmp <- iBoost(mstop, method = method) + tmp <- iBoost(mstop, method = method) class(fit) <- c(paste0(funchar, "LSS"), "mboostLSS") if(method != "cyclic"){ @@ -375,10 +377,10 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## remove additional boosting iterations from environments lapply(fit, function(obj){ - evalq({xselect <- xselect[seq_len(mstop)]; - mrisk <- mrisk[seq_len(mstop + 1)]; - ens <- ens[seq_len(mstop)]; - nuisance <- nuisance[seq_len(mstop)]}, + evalq({xselect <- xselect[seq_len(mstop)]; + mrisk <- mrisk[seq_len(mstop + 1)]; + ens <- ens[seq_len(mstop)]; + nuisance <- nuisance[seq_len(mstop)]}, environment(obj$subset)) }) @@ -401,9 +403,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## now increase models (when necessary) else if (niter > 0){ - tmp <- iBoost(niter, method = method) + tmp <- iBoost(niter, method = method) } - + mstop <<- i } } From 86ebeb2a55e37c727d7443a192895bb313d15e87 Mon Sep 17 00:00:00 2001 From: Benjamin Hofner Date: Tue, 28 Feb 2017 14:22:09 +0100 Subject: [PATCH 18/20] initialize combined risk --- R/mboostLSS.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 65d2a61..984eb1b 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -174,6 +174,8 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), iBoost <- function(niter, method) { start <- sapply(fit, mstop) + # initialize combined_risk + combined_risk <- NA if (method == "noncyclic") { ### noncyclical fitting ### From 4c2d159f4ecfbddba851eda3e85a3ab67d42e1f0 Mon Sep 17 00:00:00 2001 From: Janek Date: Wed, 29 Mar 2017 10:51:43 +0200 Subject: [PATCH 19/20] remove notes --- R/AAA.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/AAA.R b/R/AAA.R index 2692bd9..705b537 100644 --- a/R/AAA.R +++ b/R/AAA.R @@ -3,3 +3,6 @@ ### the variance of the negative gradient options(gamboostLSS_stab_ngrad = FALSE) } + +# get rid of NOTEs in R CMD check for "undefined global functions or variables" +globalVariables(c("ngradient", "y", "combined_risk")) From 6309410fc4538f1a4bac3e15c673185bc9c78d06 Mon Sep 17 00:00:00 2001 From: Janek Date: Wed, 29 Mar 2017 11:39:43 +0200 Subject: [PATCH 20/20] initialize combined_risk in subset --- R/mboostLSS.R | 153 +++++++++++++++++++++++++------------------------- 1 file changed, 76 insertions(+), 77 deletions(-) diff --git a/R/mboostLSS.R b/R/mboostLSS.R index 984eb1b..3e9fd19 100644 --- a/R/mboostLSS.R +++ b/R/mboostLSS.R @@ -4,14 +4,14 @@ ### (glm/gam/m/black)boostLSS functions mboostLSS <- function(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...){ - + cl <- match.call() if(is.null(cl$families)) cl$families <- families method <- match.arg(method) - + fit <- mboostLSS_fit(formula = formula, data = data, families = families, control = control, weights = weights, ..., fun = mboost, funchar = "mboost", call = cl, @@ -20,47 +20,47 @@ mboostLSS <- function(formula, data = list(), families = GaussianLSS(), } glmboostLSS <- function(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...){ - + cl <- match.call() if(is.null(cl$families)) cl$families <- families method <- match.arg(method) - + fit <- mboostLSS_fit(formula = formula, data = data, families = families, control = control, weights = weights, ..., fun = glmboost, funchar = "glmboost", call = cl, method = method) - + return(fit) } gamboostLSS <- function(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...){ - + cl <- match.call() if(is.null(cl$families)) cl$families <- families method <- match.arg(method) - + fit <- mboostLSS_fit(formula = formula, data = data, families = families, control = control, weights = weights, ..., fun = gamboost, funchar = "gamboost", call = cl, method = method) - + return(fit) } blackboostLSS <- function(formula, data = list(), families = GaussianLSS(), - control = boost_control(), weights = NULL, + control = boost_control(), weights = NULL, method = c("cyclic", "noncyclic"), ...){ cl <- match.call() if(is.null(cl$families)) cl$families <- families method <- match.arg(method) - + fit <- mboostLSS_fit(formula = formula, data = data, families = families, control = control, weights = weights, ..., fun = blackboost, funchar = "blackboost", call = cl, @@ -74,15 +74,15 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control = boost_control(), weights = NULL, fun = mboost, funchar = "mboost", call = NULL, method, ...){ - + if (length(families) == 0) stop(sQuote("families"), " not specified") - + if ("offset" %in% names(list(...))) stop("Do not use argument ", sQuote("offset"), ". Please specify offsets via families") ### Use mu in "families" to specify offset in mu-family etc. - + if (is.list(formula)){ if (!all(names(formula) %in% names(families)) || length(unique(names(formula))) != length(names(families))) @@ -105,23 +105,23 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), tmp[[i]] <- formula formula <- tmp } - + mstop <- mstoparg <- control$mstop control$mstop <- 0 if (method == "cyclic") mstop <- check(mstop, "mstop", names(families)) - - + + nu <- control$nu nu <- check(nu, "nu", names(families)) - + if (is.list(control$risk) || is.list(control$center) || is.list(control$trace)) stop(sQuote("risk"),", ", sQuote("center"), " and ", sQuote("trace") , " cannot be lists in ", sQuote("boost_control")) - + trace <- control$trace control$trace <- FALSE - + w <- weights if (is.null(weights)){ if (!is.list(response)) { @@ -131,12 +131,12 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), } } weights <- rescale_weights(weights) - + fit <- vector("list", length = length(families)) names(fit) <- names(families) - + mods <- 1:length(fit) - + offset <- vector("list", length = length(mods)) names(offset) <- names(families) for (j in mods){ @@ -170,20 +170,20 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), control=control, weights = w, ...)) } - + iBoost <- function(niter, method) { - + start <- sapply(fit, mstop) # initialize combined_risk combined_risk <- NA - + if (method == "noncyclic") { ### noncyclical fitting ### # this is the case for boosting from the beginning if (is.null(attr(fit, "combined_risk")) | niter == 0) { combined_risk <- vapply(fit, risk, numeric(1)) } - + best <- which(names(fit) == tail(names(combined_risk), 1)) } else { ### cyclical fitting ### @@ -194,20 +194,20 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), mvals[[j]][1:niter[j]] <- (start[j] + 1):(start[j] + niter[j]) } } - + ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) ## main loop starts here ## for (i in 1:max(niter)){ if (method == "noncyclic") { ### noncyclical fitting ### - + ## update value of nuisance parameters ## use response(fitted()) as this is much quicker than fitted(, type = response) for( k in mods[-best]) { - assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), + assign(names(fit)[best], families[[best]]@response(fitted(fit[[best]])), environment(get("ngradient", environment(fit[[k]]$subset)))) } - + risks <- numeric(length(fit)) for(b in 1:length(fit)){ st <- mstop(fit[[b]]) @@ -216,9 +216,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), #evalq({riskfct(y, fit, weights)}, envir = ENV[[b]]) #risks[b] <- ENV[[b]][["riskfct"]](ENV[[b]][["y"]], ENV[[b]][["fit"]], ENV[[b]][["weights"]]) fit[[b]][st] - + ## fit[[b]][st] is not enough to reduce the model back to beginning, so - ## so all these values have to be reduced, so that they are calculated + ## so all these values have to be reduced, so that they are calculated ## correctly the next time evalq({ xselect <- xselect[seq_len(mstop)]; @@ -228,23 +228,23 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), }, environment(fit[[b]]$subset)) } - + best <- which.min(risks) - + ## update value of u, i.e. compute ngradient with new nuisance parameters evalq({u <- ngradient(y, fit, weights)}, ENV[[best]]) #ENV[[best]][["u"]] <- ENV[[best]][["ngradient"]](ENV[[best]][["y"]], ENV[[best]][["fit"]], ENV[[best]][["weights"]]) - + ## update selected component by 1 fit[[best]][mstop(fit[[best]]) + 1] - + ## update risk list combined_risk[(length(combined_risk) + 1)] <- tail(risk(fit[[best]]), 1) names(combined_risk)[length(combined_risk)] <- names(fit)[best] combined_risk <<- combined_risk - + } else { - ### cyclical fitting ### + ### cyclical fitting ### for (j in mods){ ## update value of nuisance parameters ## use response(fitted()) as this is much quicker than fitted(, type = response) @@ -252,20 +252,20 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), assign(names(fit)[k], families[[k]]@response(fitted(fit[[k]])), environment(get("ngradient", environment(fit[[j]]$subset)))) ## update value of u, i.e. compute ngradient with new nuisance parameters - + ENV[[j]][["u"]] <- ENV[[j]][["ngradient"]](ENV[[j]][["y"]], ENV[[j]][["fit"]], ENV[[j]][["weights"]]) # same as: # evalq(u <- ngradient(y, fit, weights), environment(fit[[j]]$subset)) - + ## update j-th component to "m-th" boosting step fit[[j]][mvals[[j]][i]] } } - + if (trace){ if (method == "noncyclic") { - do_trace(current = length(combined_risk) - length(fit), mstart = sum(start), + do_trace(current = length(combined_risk) - length(fit), mstart = sum(start), risk = combined_risk[-(1:length(fit))], mstop = niter) } else { ## which is the current risk? rev() needed to get the last @@ -276,20 +276,20 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), mstop = max(niter), risk = risk(fit[[whichRisk]])[-1]) } - + } } return(TRUE) } - + if (any(mstop > 0)) tmp <- iBoost(mstop, method = method) - + class(fit) <- c(paste0(funchar, "LSS"), "mboostLSS") if(method != "cyclic"){ class(fit) <- c("nc_mboostLSS", class(fit)) } - + ### update to a new number of boosting iterations mstop ### i <= mstop means less iterations than current ### i > mstop needs additional computations @@ -297,9 +297,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ### some models are CHANGED! if(method == "cyclic") { attr(fit, "subset") <- function(i) { - + i <- check(i, "mstop", names(families)) - + msf <- mstop(fit) niter <- i - msf if (all(niter == 0)) { @@ -308,22 +308,22 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), } else { minStart <- min(msf[niter != 0], i[niter != 0]) } - + ## check if minStart bigger than mstop of parameters that are not touched #if (length(msf[niter == 0]) > 0 && minStart < min(msf[niter == 0])) #minStart <- min(msf[niter == 0]) - + ## reduce models first (when necessary) if (any(msf > minStart)){ cf <- class(fit) class(fit) <- "list" ## needed to use [] operator for lists - + #cat("processed parameters: ", paste(names(fit[msf > minStart]), # collapse = ", "), "\n") - + lapply(fit[msf > minStart], function(obj) obj$subset(minStart)) - + ## remove additional boosting iterations from environments lapply(fit[msf > minStart], function(obj){ evalq({xselect <- xselect[seq_len(mstop)]; @@ -332,13 +332,13 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), nuisance <- nuisance[seq_len(mstop)]}, environment(obj$subset)) }) - + class(fit) <- cf if (trace) cat("Model first reduced to mstop = ", minStart, ".\n", "Now continue ...\n", sep ="") } - + ## now increase models (when necessary) if (any(i > minStart)){ ## set negative values to 0 @@ -346,37 +346,36 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), inc <- ifelse(i - minStart > 0, i - minStart, 0) tmp <- iBoost(inc, method = method) } - + mstop <<- i } - } + } else { attr(fit, "subset") <- function(i) { msf <- sum(mstop(fit)) niter <- i - msf - + ## check if minStart bigger than mstop of parameters that are not touched #if (length(msf[niter == 0]) > 0 && minStart < min(msf[niter == 0])) #minStart <- min(msf[niter == 0]) - + ## reduce models first (when necessary) if (niter < 0 ){ cf <- class(fit) d = length(fit) class(fit) <- "list" ## needed to use [] operator for lists - #cat("processed parameters: ", paste(names(fit[msf > minStart]), # collapse = ", "), "\n") - #reduce the combined risk values + combined_risk <- attr(fit, "combined_risk")() combined_risk <<- attr(fit, "combined_risk")()[seq_len(i + d)] new_stop_value <- table(names(attr(fit, "combined_risk")())) - 1 - - + + for(o in names(new_stop_value)){ fit[[o]]$subset(new_stop_value[o]) } - + ## remove additional boosting iterations from environments lapply(fit, function(obj){ evalq({xselect <- xselect[seq_len(mstop)]; @@ -385,7 +384,7 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), nuisance <- nuisance[seq_len(mstop)]}, environment(obj$subset)) }) - + #update ALL nuisance parameters to last update ENV <- lapply(mods, function(j) environment(fit[[j]]$subset)) for(j in names(new_stop_value)){ @@ -394,24 +393,24 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), environment(get("ngradient", environment(fit[[j]]$subset)))) } } - + for(k in mods){ evalq(u <- get("ngradient")(get("y"), fit, weights), ENV[[k]]) } - - + + class(fit) <- cf } - + ## now increase models (when necessary) else if (niter > 0){ tmp <- iBoost(niter, method = method) } - + mstop <<- i } } - + ## make call in submodels nicer: cl <- call cl[[1]] <- as.name(gsub("LSS", "", cl[[1]])) @@ -421,9 +420,9 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), ## This is not really nice fit[[i]]$call$family <- families[[i]] } - + attr(fit, "(weights)") <- weights ## attach weights used for fitting - + ## update to new weights; just a fresh start attr(fit, "update") <- function(weights = NULL, oobweights = NULL, risk = NULL, trace = NULL, mstop = NULL) { @@ -451,6 +450,6 @@ mboostLSS_fit <- function(formula, data = list(), families = GaussianLSS(), attr(fit, "families") <- families if(method != "cyclic") attr(fit, "combined_risk") <- function() combined_risk - + return(fit) }