diff --git a/DESCRIPTION b/DESCRIPTION index 9ce8f283..40872949 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: CLVTools Title: Tools for Customer Lifetime Value Estimation -Version: 0.11.2 -Date: 2024-12-01 +Version: 0.12.0 +Date: 2025-09-22 Authors@R: c( person(given="Patrick", family="Bachmann", email = "pbachma@ethz.ch", role = c("cre","aut")), person(given="Niels", family="Kuebler", email = "niels.kuebler@uzh.ch", role = "aut"), @@ -50,7 +50,8 @@ Suggests: rmarkdown, xml2, testthat (>= 3.0.0), - lmtest + lmtest, + R.rsp License: GPL-3 URL: https://github.com/bachmannpatrick/CLVTools BugReports: https://github.com/bachmannpatrick/CLVTools/issues @@ -122,6 +123,7 @@ Collate: 'f_interface_clvdata.R' 'f_interface_gg.R' 'f_interface_ggomnbd.R' + 'f_interface_hessian.R' 'f_interface_latentattrition.R' 'f_interface_lrtest.R' 'f_interface_newcustomer.R' @@ -155,6 +157,6 @@ Collate: 'pnbd_dyncov_expectation.R' 'pnbd_dyncov_palive.R' RoxygenNote: 7.3.2 -VignetteBuilder: knitr +VignetteBuilder: knitr, R.rsp Config/testthat/parallel: false Config/testthat/edition: 3 diff --git a/NAMESPACE b/NAMESPACE index 1b983919..6dc2c7d6 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ S3method(lmtest::lrtest,clv.fitted) S3method(logLik,clv.fitted) S3method(nobs,clv.data) S3method(nobs,clv.fitted) +S3method(numDeriv::hessian,clv.fitted) S3method(plot,clv.data) S3method(plot,clv.fitted.spending) S3method(plot,clv.fitted.transactions) @@ -53,6 +54,7 @@ exportMethods(bgbb) exportMethods(bgnbd) exportMethods(gg) exportMethods(ggomnbd) +exportMethods(hessian) exportMethods(lrtest) exportMethods(plot) exportMethods(pmf) diff --git a/NEWS.md b/NEWS.md index 4f59b64c..51480455 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,18 @@ +# CLVTools 0.12.0 + +### NEW FEATURES +* `clvdata(data.end)`: Add parameter `data.end` to specify a data end beyond the last actual transaction +* `summary()`: Always set `zval` and `pval` to NA for the main model parameters +* `hessian()`: Add method to calculate hessian matrix for already fitted models +* Add 3 new vignettes covering: Advanced modelling techniques, model intuition, and the internal class system + +### BUG FIXES +* Fix CRAN notes: Replace `arma::is_finite()` -> `std::isfinite()` +* Dyncov PNBD: Rename `predicted.CLV` -> `predicted.period.CLV` +* `predict()`: Rename `{predicted, actual}.total.spending` -> `{predicted, actual}.period.spending` + + + # CLVTools 0.11.2 ### NEW FEATURES diff --git a/R/all_generics.R b/R/all_generics.R index 51eb2e86..ed8c56c9 100644 --- a/R/all_generics.R +++ b/R/all_generics.R @@ -245,8 +245,10 @@ setGeneric("clv.data.create.bootstrapping.data", def = function(clv.data, ids){ #' #' @export as.clv.data <- function(x, - date.format="ymd", time.unit="weeks", + date.format="ymd", + time.unit="weeks", estimation.split = NULL, + data.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ UseMethod("as.clv.data", x) diff --git a/R/class_clv_data.R b/R/class_clv_data.R index d130740a..79519299 100644 --- a/R/class_clv_data.R +++ b/R/class_clv_data.R @@ -253,7 +253,21 @@ clv.data.make.descriptives <- function(clv.data, ids){ dt.interp <- clv.data.mean.interpurchase.times(clv.data=clv.data, dt.transactions = dt.data) dt.num.trans.by.cust <- dt.data[, .N, by="Id"] + tp.period.start <- switch( + sample.name, + Estimation=clv.time@timepoint.estimation.start, + Holdout=clv.time@timepoint.holdout.start, + Total=clv.time@timepoint.estimation.start) + + tp.period.end <- switch( + sample.name, + Estimation=clv.time@timepoint.estimation.end, + Holdout=clv.time@timepoint.holdout.end, + Total=clv.time@timepoint.holdout.end) + l.desc <- list( + "Period Start" = clv.time.format.timepoint(clv.time=clv.time, timepoint=tp.period.start), + "Period End" = clv.time.format.timepoint(clv.time=clv.time, timepoint=tp.period.end), "Number of customers" = if(sample.name=="Total"){nrow(dt.num.trans.by.cust)}else{"-"}, "First Transaction in period" = clv.time.format.timepoint(clv.time=clv.time, timepoint=dt.data[, min(Date)]), "Last Transaction in period" = clv.time.format.timepoint(clv.time=clv.time, timepoint=dt.data[, max(Date)]), diff --git a/R/class_clv_fitted.R b/R/class_clv_fitted.R index 1a168c95..d3042107 100644 --- a/R/class_clv_fitted.R +++ b/R/class_clv_fitted.R @@ -49,3 +49,63 @@ setClass(Class = "clv.fitted", contains = "VIRTUAL", optimx.hessian = matrix(data = numeric(0)))) + + +clv.fitted.get.LL <- function(clv.fitted){ + + # Calling the LL with the exact same inputs/specification as when the fitting + # is not trivial as there are plenty of options. + # To reproduce it, the exact steps of the estimation are repeated here. + + # Start parameters are not really required, they are just stored as item + # `par` for `optimx()` + final.coefs <- drop(tail(coef(clv.fitted@optimx.estimation.output), n=1)) + + prepared.optimx.args <- clv.controlflow.estimate.prepare.optimx.args( + clv.fitted=clv.fitted, + start.params.all= final.coefs) + + prepared.optimx.args <- clv.model.prepare.optimx.args( + clv.model=clv.fitted@clv.model, + clv.fitted=clv.fitted, + prepared.optimx.args=prepared.optimx.args) + + prepared.optimx.args[["LL.param.names.to.optimx"]] <- names(prepared.optimx.args$par) + + # In the estimation procedure, the user can also supply custom `optimx.args` + # which override `prepared.optimx.args` here. Because optimx is not called + # here, there is no need to add `optimx.args` here. + + # The generated args also contain parameters for optimx. These are not required + # for the LL and need to be removed. This also removes the LL itself (`fn`). + names.optimx.args <- setdiff(formalArgs(optimx), "...") + call.args <- prepared.optimx.args[!(names(prepared.optimx.args) %in% names.optimx.args)] + + # Could save memory as the returned method is a closure and has the environment + # in which is was defined attached. Hence all variables in this method here + # which may be large. However, it can also be useful to have these objects. + + # Wrapper to call the LL with the original args. + # It is preferred to return a method rather than calling it immediately + # because generating the call args may take time. + LL <- function(params){ + req.names <- call.args$LL.param.names.to.optimx + + # Ensure named same as original + if(!(identical(sort(names(params)), sort(req.names)))){ + check_err_msg(paste0( + "'params' has to be named ", + paste(req.names, collapse = ", "), + ". Often, `drop(coef(model@optimx.estimation.output))` is useful.")) + } + + # Ensure name and position are the same as original order. This is required as + # any param input will be re-named by slapping `LL.param.names.to.optimx` on + # them (using names() <- ) in the first interlayer and then further accessed + # by name. + call.args$LL.params <- params[req.names] # bring to correct order + return(do.call(what = prepared.optimx.args$fn, args = call.args)) + } + + return(LL) +} diff --git a/R/class_clv_time.R b/R/class_clv_time.R index 6a32a3a5..204be747 100644 --- a/R/class_clv_time.R +++ b/R/class_clv_time.R @@ -75,10 +75,24 @@ clv.time.has.holdout <- function(clv.time){ # set.sample.periods ------------------------------------------------------------------------ #' @importFrom lubridate period -clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end){ +clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end, user.data.end){ tp.estimation.start <- tp.first.transaction + if(is.null(user.data.end)){ + tp.data.end <- tp.last.transaction + }else{ + tp.data.end <- clv.time.convert.user.input.to.timepoint( + clv.time=clv.time, + user.timepoint=user.data.end) + + # Data end may not be before last transaction + if(tp.data.end < tp.last.transaction){ + stop("The given data.end may not be before the last recorded transaction!") + } + } + + if(!is.null(user.estimation.end)){ # specific end @@ -98,24 +112,29 @@ clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last. user.timepoint=user.estimation.end) } + + # Before the last transaction to ensure there is at least 1 transaction in the holdout period. + # Needed additionally to holdout >=2 periods + if(tp.estimation.end >= tp.last.transaction) + stop("Parameter estimation.split needs to indicate a point before the last transaction!", call. = FALSE) + # Need to be 2 periods because otherwise for days, holdout can be not on estimation.end but still be of length zero # ie 2 periods to still have 1 as holdout - if(tp.estimation.end > tp.last.transaction-clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) - stop("Parameter estimation.split needs to indicate a point at least 2 periods before the last transaction!", call. = FALSE) + if(tp.estimation.end > tp.data.end - clv.time.number.timeunits.to.timeperiod(clv.time, 2L)) + stop("Parameter estimation.split needs to indicate a point in time such that it yields a holdout period of at least 2 time.units!", call. = FALSE) # + 1 day is the same for all because most fine-grained change that Date can do tp.holdout.start <- tp.estimation.end + clv.time.epsilon(clv.time=clv.time) - tp.holdout.end <- tp.last.transaction + tp.holdout.end <- tp.data.end holdout.period.in.tu <- clv.time.interval.in.number.tu(clv.time, interv=interval(start = tp.holdout.start, end = tp.holdout.end)) }else{ - # NULL: no specific end - until end of data (last transaction) - # **TODO: last transaction or full period where last transaction is in? + # NULL: no specific end - until data end - # tp.holdout.start and .end HAVE to be end of estimation period as this is used elsewhere! + # tp.holdout.start/.end and HAVE to be end of estimation period as this is used elsewhere! # ie to ensure prediction.end (with clv.time.get.prediction.table) finds correct end if user gives NULL - tp.estimation.end <- tp.last.transaction + tp.estimation.end <- tp.data.end tp.holdout.start <- tp.estimation.end tp.holdout.end <- tp.estimation.end holdout.period.in.tu <- 0 diff --git a/R/clv_template_controlflow_predict.R b/R/clv_template_controlflow_predict.R index 0b5cd976..cec24662 100644 --- a/R/clv_template_controlflow_predict.R +++ b/R/clv_template_controlflow_predict.R @@ -81,7 +81,9 @@ clv.controlflow.predict.add.uncertainty.estimates <- function(clv.fitted, dt.pre dt.boots[, Id := sub("_BOOTSTRAP_ID_[0-9]+$", "", Id)] # quantiles for each predicted quantity: select only the existing ones - cols.predictions <- c("PAlive", "CET", "DERT", "DECT", "predicted.mean.spending", "predicted.total.spending", "predicted.CLV") + cols.predictions <- c("PAlive", "CET", "DERT", "DECT", + "predicted.mean.spending", "predicted.period.spending", + "predicted.CLV", "predicted.period.CLV") cols.predictions <- cols.predictions[cols.predictions %in% colnames(dt.boots)] # Long-format for easier handling of different prediction columns diff --git a/R/f_clvdata_inputchecks.R b/R/f_clvdata_inputchecks.R index eee0d4f4..42be999f 100644 --- a/R/f_clvdata_inputchecks.R +++ b/R/f_clvdata_inputchecks.R @@ -210,6 +210,31 @@ check_userinput_datanocov_estimationsplit <- function(estimation.split, date.for return(c()) } +#' @importFrom lubridate is.POSIXt is.Date parse_date_time +check_userinput_datanocov_dataend <- function(data.end, date.format){ + + # May be NULL + if(is.null(data.end)) + return(c()) + + if(length(data.end) != 1) + return("data.end must contain exactly one single element!") + + if(anyNA(data.end)) + return("data.end may not contain any NAs!") + + if(!is.character(data.end) + & !is.Date(data.end) + & !is.POSIXt(data.end)) + return("data.end needs to either of type character or date-like (Date or POSIXt)") + + if(is.character(data.end)) + if(anyNA(parse_date_time(x=data.end, quiet=TRUE, orders=date.format))) + return("Please provide a valid data.end to that can be converted with the given date.format!") + + return(c()) +} + #' @importFrom lubridate is.POSIXct check_userinput_datanocov_datatransactions <- function(data.transactions.dt, has.spending){ diff --git a/R/f_generics_clvfitted.R b/R/f_generics_clvfitted.R index 08e05676..7e2a1423 100644 --- a/R/f_generics_clvfitted.R +++ b/R/f_generics_clvfitted.R @@ -35,3 +35,4 @@ setMethod("clv.fitted.estimate.same.specification.on.new.data", signature = "clv new.fitted@call <- cl return(new.fitted) }) + diff --git a/R/f_generics_clvfittedtransactions.R b/R/f_generics_clvfittedtransactions.R index c98fcda8..c4eaf868 100644 --- a/R/f_generics_clvfittedtransactions.R +++ b/R/f_generics_clvfittedtransactions.R @@ -160,15 +160,15 @@ setMethod("clv.controlflow.predict.get.has.actuals", signature(clv.fitted="clv.f # .clv.controlflow.predict.add.actuals --------------------------------------------------------------------------------- setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitted.transactions"), definition = function(clv.fitted, dt.predictions, has.actuals, verbose, ...){ - actual.total.spending <- i.actual.total.spending <- Price <- Date <- period.first <- period.last <- actual.x <- i.actual.x <- NULL + actual.period.spending <- i.actual.period.spending <- Price <- Date <- period.first <- period.last <- actual.x <- i.actual.x <- NULL # Only if: # - there is a holdout # - the prediction is not beyond holdout # # Data until prediction end - # actual.x: number of transactions - # actual.total.spending: total spending + # actual.x: number of transactions + # actual.period.spending: total spending timepoint.prediction.first <- dt.predictions[1, period.first] timepoint.prediction.last <- dt.predictions[1, period.last] @@ -184,8 +184,8 @@ setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitte incbounds = TRUE)] if(clv.data.has.spending(clv.fitted@clv.data)){ - dt.actuals <- dt.actuals.transactions[, list(actual.x = .N, - actual.total.spending = sum(Price)), + dt.actuals <- dt.actuals.transactions[, list(actual.x = .N, + actual.period.spending = sum(Price)), keyby="Id"] }else{ dt.actuals <- dt.actuals.transactions[, list(actual.x = .N), keyby="Id"] @@ -195,8 +195,8 @@ setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitte dt.predictions[is.na(actual.x), actual.x := 0] if(clv.data.has.spending(clv.fitted@clv.data)){ - dt.predictions[dt.actuals, actual.total.spending := i.actual.total.spending, on="Id"] - dt.predictions[is.na(actual.total.spending), actual.total.spending := 0] + dt.predictions[dt.actuals, actual.period.spending := i.actual.period.spending, on="Id"] + dt.predictions[is.na(actual.period.spending), actual.period.spending := 0] } return(dt.predictions) } @@ -205,8 +205,8 @@ setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitte # .clv.controlflow.predict.post.process.prediction.table ------------------------------------------------------------------------------ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = signature(clv.fitted="clv.fitted.transactions"), function(clv.fitted, dt.predictions, has.actuals, verbose, predict.spending, ...){ - predicted.mean.spending <- i.predicted.mean.spending <- actual.total.spending <- i.actual.total.spending <- predicted.total.spending <- NULL - predicted.CLV <- CET <- DECT <- DERT <- NULL + predicted.mean.spending <- i.predicted.mean.spending <- actual.period.spending <- i.actual.period.spending <- predicted.period.spending <- NULL + predicted.CLV <- predicted.period.CLV <- CET <- DECT <- DERT <- NULL # Predict spending --------------------------------------------------------------------------------------- # depends on content of predict.spending: @@ -224,7 +224,7 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s dt.predictions[dt.spending, predicted.mean.spending := i.predicted.mean.spending, on = "Id"] # The actual.mean.spending from dt.spending is not added anymore - # actual.total.spending is already in prediction table + # actual.period.spending is already in prediction table return(dt.predictions) } @@ -289,10 +289,10 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s if("DERT" %in% colnames(dt.predictions)) dt.predictions[, predicted.CLV := DERT * predicted.mean.spending] if("DECT" %in% colnames(dt.predictions)) - dt.predictions[, predicted.CLV := DECT * predicted.mean.spending] + dt.predictions[, predicted.period.CLV := DECT * predicted.mean.spending] - # If spending is predicted, also add predicted.total.spending - dt.predictions[, predicted.total.spending := predicted.mean.spending * CET] + # If spending is predicted, also add predicted.period.spending + dt.predictions[, predicted.period.spending := predicted.mean.spending * CET] } # Present cols in desired order ------------------------------------------------------------ @@ -302,8 +302,8 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s cols <- c(cols, "actual.x") # cannot determine otherwise alone from has.actuals - if("actual.total.spending" %in% colnames(dt.predictions)) - cols <- c(cols, "actual.total.spending") + if("actual.period.spending" %in% colnames(dt.predictions)) + cols <- c(cols, "actual.period.spending") } if("DERT" %in% colnames(dt.predictions)) @@ -312,10 +312,12 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s cols <- c(cols, "PAlive", "CET", "DECT") if(do.predict.spending) - cols <- c(cols, c("predicted.mean.spending", "predicted.total.spending")) + cols <- c(cols, c("predicted.mean.spending", "predicted.period.spending")) if("predicted.CLV" %in% colnames(dt.predictions)) cols <- c(cols, "predicted.CLV") + if("predicted.period.CLV" %in% colnames(dt.predictions)) + cols <- c(cols, "predicted.period.CLV") setcolorder(dt.predictions, cols) diff --git a/R/f_interface_clvdata.R b/R/f_interface_clvdata.R index 08d7c6b1..cb5e2446 100644 --- a/R/f_interface_clvdata.R +++ b/R/f_interface_clvdata.R @@ -32,8 +32,17 @@ #' (i.e., "2010-06-17") is indicated with \code{"ymd"}. Other combinations such as \code{"dmy"}, \code{"dym"}, #' \code{"ymd HMS"}, or \code{"HMS dmy"} are possible as well. #' +#' \code{data.end} A point in time beyond the last purchase at which the data should fictionally end. +#' It defines the total time frame in which customers could be observed: The combined estimation and holdout periods. +#' For example, when the last recorded transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". +#' Using \code{data.end="2000-12-31"} without holdout period, +#' the estimation period will be until "2000-12-31" and the prediction period will start on "2001-01-01". +#' Required to be after the last recorded transaction. +#' #' \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint -#' (either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. +#' (either as character, Date, or POSIXct) at which the estimation period ends. +#' Required to be before the last transaction. +#' The indicated timepoint itself will be part of the estimation sample. #' If no value is provided or set to \code{NULL}, the whole dataset will used for fitting the model (no holdout sample). #' #' @details ## Aggregation of Transactions @@ -84,6 +93,15 @@ #' time.unit = "w", #' estimation.split = "1997-10-15") #' +#' # Extend data fictionally until 31th Dec 1998 +#' # In this case, this only moves the holdout period and has no effect on the +#' # estimation. +#' clv.data.cdnow <- clvdata(data.transactions = cdnow, +#' date.format="ymd", +#' time.unit = "w", +#' data.end = "1998-12-31", +#' estimation.split = "1997-10-15") +#' #' # summary of the transaction data #' summary(clv.data.cdnow) #' @@ -112,7 +130,7 @@ #' #' #' @export -clvdata <- function(data.transactions, date.format, time.unit, estimation.split=NULL, name.id="Id", name.date="Date", name.price="Price"){ +clvdata <- function(data.transactions, date.format, time.unit, estimation.split=NULL, data.end=NULL, name.id="Id", name.date="Date", name.price="Price"){ # silence CRAN notes Date <- Price <- Id <- x <- previous <- date.first.actual.trans <- NULL @@ -136,6 +154,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= err.msg <- c(err.msg, .check_userinput_charactervec(char=date.format, var.name = "date.format", n=1)) err.msg <- c(err.msg, check_userinput_datanocov_estimationsplit(estimation.split=estimation.split, date.format=date.format)) + err.msg <- c(err.msg, check_userinput_datanocov_dataend(data.end=data.end, date.format=date.format)) check_err_msg(err.msg) @@ -208,14 +227,9 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= clv.t <- clv.time.set.sample.periods(clv.time = clv.t, tp.first.transaction = tp.first.transaction, tp.last.transaction = tp.last.transaction, + user.data.end = data.end, user.estimation.end = estimation.split) - if(clv.t@timepoint.estimation.end > dt.trans[, max(Date)]) - stop("Parameter estimation.split needs to indicate a point in the data!", call. = FALSE) - - if(clv.t@estimation.period.in.tu < 1) - stop("Parameter estimation.split needs to be at least 1 time.unit after the start!", call. = FALSE) - # Check if the estimation.split is valid ---------------------------------------- # - estimation period long enough @@ -229,7 +243,7 @@ clvdata <- function(data.transactions, date.format, time.unit, estimation.split= everyones.first.trans <- dt.trans[, list(date.first.actual.trans = min(Date)), by="Id"] date.last.first.trans <- everyones.first.trans[, max(date.first.actual.trans)] if(clv.t@timepoint.estimation.end < date.last.first.trans) - stop("The estimation split is too short! Not all customers of this cohort had their first actual transaction until the specified estimation.split!", call. = F) + stop("The estimation period is too short! Not all customers had their first transaction until the end of the estimation period!", call. = FALSE) diff --git a/R/f_interface_hessian.R b/R/f_interface_hessian.R new file mode 100644 index 00000000..08ae213d --- /dev/null +++ b/R/f_interface_hessian.R @@ -0,0 +1,83 @@ + +#' @name hessian +#' @title Calculate hessian for a fitted model +#' +#' @description Calculate a numerical approximation to the Hessian matrix at +#' the final estimated parameters using \code{numDeriv::hessian}. +#' +#' @param object Fitted model +#' @param method.args List of options forwarded to the numerical approximation +#' method. See \link[numDeriv:hessian]{numDeriv::hessian}. +#' @template template_param_dots +#' +#' @returns The hessian matrix, with column and row names set to the parameter +#' names used to call the LL. +NULL + + +#' @rdname hessian +#' @importFrom numDeriv hessian +#' @importFrom utils tail +#' @importFrom methods formalArgs +#' @exportS3Method numDeriv::hessian +hessian.clv.fitted <- function(object, method.args = list()){ + + # Register for dispatch on a method defined in another package by using + # @exportS3Method which adds `S3method(numDeriv::hessian,clv.fitted)` to NAMESPACE + + # Get final parameters + # To get coefficients at the same scale (log-transformed) and names as when + # estimating the model, get them directly from the optimx output. + # Cannot use coef() as the reported parameters are transformed back and named + # differently. + final.coefs <- drop(tail(coef(object@optimx.estimation.output), n=1)) + + if(anyNA(final.coefs)){ + check_err_msg("Cannot proceed because there are NAs in the estimated coefficients!") + } + + # Get LL + fn.LL <- clv.fitted.get.LL(object) + + # fn.call.LL.named <- function(par){ + # names(par) <- names(final.coefs) + # return(fn.LL(par)) + # } + + # Have to refer to `numDeriv` namespace directly (`::`) as `hessian()` would + # dispatch to `CLVTools::hessian` and fail if `numDeriv` is not attached + H <- numDeriv::hessian( + func=fn.LL, + x=final.coefs, + method="Richardson", + method.args = method.args) + # Names as in optimx (log-scale etc) + colnames(H) <- rownames(H) <- names(final.coefs) + + return(H) +} + + +# In order to be able to use `hessian()` without having `numDeriv` loaded or even +# installed, define and export `hessian()` as a generic in CLVTools. +# The S4 generic is NOT defined with the exact same signature as the +# S3 generic `numDeriv::hessian <- function(func, x, method, method.args, ...){ ... }`. +# +# The numDeriv package exports an S3 generic `hessian()` what masks the generic (whether S3 +# or S4) exported by CLVTools if the numDeriv package is loaded after CLVTools. +# Therefore, define and export also as a S3 method `CLVTools::hessian.clv.fitted`. +# +# ?Methods_for_Nongenerics on dispatching an S4 object to S3 generics method in +# another package: Recommends to define both methods: The S3 method and also supply +# the identical function as the definition of the S4 method. +#' @rdname hessian +#' @exportMethod hessian +setGeneric(name = "hessian", def=function(object, ...) + standardGeneric("hessian")) + + +#' @rdname hessian +#' @include all_generics.R +#' @exportMethod hessian +setMethod("hessian", signature(object="clv.fitted"), definition = hessian.clv.fitted) + diff --git a/R/f_interface_predict_clvfittedtransactions.R b/R/f_interface_predict_clvfittedtransactions.R index e8586259..46adc050 100644 --- a/R/f_interface_predict_clvfittedtransactions.R +++ b/R/f_interface_predict_clvfittedtransactions.R @@ -97,10 +97,11 @@ #' \item{CET}{The Conditional Expected Transactions: The number of transactions expected until prediction.end.} #' \item{DERT or DECT}{Discounted Expected Residual Transactions or Discounted Expected Conditional Transactions for dynamic covariates models} #' \item{actual.x}{Actual number of transactions until prediction.end. Only if there is a holdout period and the prediction ends in it, otherwise not reported.} -#' \item{actual.total.spending}{Actual total spending until prediction.end. Only if there is a holdout period and the prediction ends in it, otherwise not reported.} +#' \item{actual.period.spending}{Actual total spending until prediction.end. Only if there is a holdout period and the prediction ends in it, otherwise not reported.} #' \item{predicted.mean.spending}{The mean spending per transactions as predicted by the spending model.} -#' \item{predicted.total.spending}{The predicted total spending until prediction.end (\code{CET*predicted.mean.spending}).} -#' \item{predicted.CLV}{Customer Lifetime Value based on \code{DERT/DECT} and \code{predicted.mean.spending}.} +#' \item{predicted.period.spending}{The predicted total spending until prediction.end (\code{CET*predicted.mean.spending}).} +#' \item{predicted.CLV}{Customer Lifetime Value based on \code{DERT*predicted.mean.spending}.} +#' \item{predicted.period.CLV}{Customer Lifetime Value until prediction.end based on \code{DECT*predicted.mean.spending}.} #' #' If predicting for new customers (using \code{newcustomer()}), a numeric scalar indicating the expected #' number of transactions is returned instead. diff --git a/R/f_s3generics_clvdata.R b/R/f_s3generics_clvdata.R index 198ccdfd..e12f6402 100644 --- a/R/f_s3generics_clvdata.R +++ b/R/f_s3generics_clvdata.R @@ -291,8 +291,10 @@ subset.clv.data <- function(x, #' @rdname as.clv.data #' @export as.clv.data.data.frame <- function(x, - date.format="ymd", time.unit="weeks", + date.format="ymd", + time.unit="weeks", estimation.split = NULL, + data.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ return(clvdata(data.transactions = x, @@ -304,8 +306,10 @@ as.clv.data.data.frame <- function(x, #' @rdname as.clv.data #' @export as.clv.data.data.table <- function(x, - date.format="ymd", time.unit="weeks", + date.format="ymd", + time.unit="weeks", estimation.split = NULL, + data.end = NULL, name.id="Id", name.date="Date", name.price="Price", ...){ return(clvdata(data.transactions = x, diff --git a/R/f_s3generics_clvdata_plot.R b/R/f_s3generics_clvdata_plot.R index 054ecdf7..c6acac54 100644 --- a/R/f_s3generics_clvdata_plot.R +++ b/R/f_s3generics_clvdata_plot.R @@ -95,7 +95,7 @@ #' \item{variable}{"tracking": The number of actual repeat transactions in the period that ends at \code{period.until}.\cr #' "timings": Coordinate (x or y) for which to use the value in this row for.} #' \item{value}{"timings": Date or numeric (stored as string) \cr -#' "tracking": numeric} +#' "tracking": numeric, may be \code{NA} if no repeat-transactions were recorded in this period} #' #' #' @examples @@ -310,10 +310,26 @@ clv.data.plot.tracking <- function(x, prediction.end, cumulative, plot, verbose, dt.dates.expectation[dt.repeat.trans, (label.transactions) := get(label.transactions), on="period.until"] dt.plot <- melt(dt.dates.expectation, id.vars="period.until") - # last period often has NA as it marks the full span of the period - dt.plot <- dt.plot[!is.na(value)] - - # data.table does not print when returned because it is returned directly after last [:=] + # The last period usually is set to NA because the data does not reach to the end of it. + # The last period has to be a full period because of the expectation plot. + # At the same time, the transaction data often ends before the last period (is only a partial period). + # This leads to a much lower number of transactions recorded in the last period + # and a noticeable, hard-to-explain drop at the end. + # Periods for which there are no transactions contain 0 not NA. Only the last + # period may contain NA. + # We remove it to not have it in the data and not raise a warning when plotting. + # + # Since introducing `data.end`, we no loner remove NAs as now there can be many + # periods without transactions and these should be shown (plotted) and known (returned data). + # Instead `geom_line(na.rm=T)` is used to remove them during plotting. + # Returning them helps users who want to create their own plots to plot the + # correct range (total time span of data). + # Alternative: Drop NA but set x-axis scale until holdout.end using + # `+ xlim(c(x@clv.time@timepoint.estimation.start, x@clv.time@timepoint.holdout.end))` + # + # dt.plot <- dt.plot[!is.na(value)] + + # # data.table does not print when returned because it is returned directly after last [:=] # " if a := is used inside a function with no DT[] before the end of the function, then the next # time DT or print(DT) is typed at the prompt, nothing will be printed. A repeated DT or print(DT) # will print. To avoid this: include a DT[] after the last := in your function." diff --git a/R/f_s3generics_clvfitted.R b/R/f_s3generics_clvfitted.R index 8e28df53..f4e193ae 100644 --- a/R/f_s3generics_clvfitted.R +++ b/R/f_s3generics_clvfitted.R @@ -370,6 +370,14 @@ summary.clv.fitted <- function(object, ...){ rownames(res$coefficients) <- names(all.est.params) colnames(res$coefficients) <- c("Estimate","Std. Error", "z-val", "Pr(>|z|)") + # Set z-val and Pvalue of the model parameters to NA as they are by definition always > 0 ("significant") + # Printing "-" would be preferred but + # - setting to "-" would convert the whole matrix to character + # - using `printCoefmat(na.print='-')` would also show "-" for NA parameter estimates + # - therefore would likely require to re-implement `printCoefmat()` to only na.print + # for some params & columns but this is cumbersome as na.print is from the internal `print.default` + res$coefficients[object@clv.model@names.original.params.model, c("z-val", "Pr(>|z|)")] <- NA_real_ + # Optimization ------------------------------------------------------------------- res$estimated.LL <- as.vector(logLik(object)) res$AIC <- AIC(object) diff --git a/R/f_s3generics_clvfittedtransactions_plot.R b/R/f_s3generics_clvfittedtransactions_plot.R index 7217629e..d2b1f454 100644 --- a/R/f_s3generics_clvfittedtransactions_plot.R +++ b/R/f_s3generics_clvfittedtransactions_plot.R @@ -78,7 +78,7 @@ #' \item{period.until}{The timepoint that marks the end (up until and including) of the period to which the data in this row refers.} #' \item{variable}{Type of variable that 'value' refers to. Either "model name" or "Actual" (if \code{transactions=TRUE}).} #' \item{value}{Depending on variable either (Actual) the actual number of repeat transactions in the period that ends at \code{period.until}, -#' or the unconditional expectation for the period that ends on \code{period.until} ("model name").} +#' or the unconditional expectation for the period that ends on \code{period.until} ("model name"). Actuals may be \code{NA} if no transaction was recorded.} #' #' For the PMF plot: #' \item{num.transactions}{The number of repeat transactions in the estimation period (as ordered factor).} @@ -241,7 +241,7 @@ clv.controlflow.plot.tracking.base <- function(dt.plot, clv.data, color.mapping, # Plotting order dt.plot[, variable := factor(variable, levels=names(color.mapping), ordered = TRUE)] - p <- ggplot(data = dt.plot, aes(x=period.until, y=value, colour=variable)) + geom_line() + p <- ggplot(data = dt.plot, aes(x=period.until, y=value, colour=variable)) + geom_line(na.rm = TRUE) # Add holdout line if there is a holdout period if(clv.data.has.holdout(clv.data)){ @@ -382,7 +382,13 @@ clv.fitted.transactions.plot.tracking.get.data <- function(x, prediction.end, cu dt.plot <- melt(dt.dates.expectation, id.vars='period.until') # last period often has NA as it marks the full span of the period - dt.plot <- dt.plot[!is.na(value)] + # The last period usually was NA because of explanations it was a partial + # period. See explanations in `clv.data.plot.tracking`. + # dt.plot <- dt.plot[!is.na(value)] + # Since introducing `data.end`, many periods can be NA. The NAs are now removed + # during plotting (`geom_line(na.rm=T)`). For consistency with plot(clvdata), + # the returned data also keeps the NA. + return(dt.plot) } diff --git a/README.md b/README.md index 1017a5aa..ddfbaa63 100644 --- a/README.md +++ b/README.md @@ -310,7 +310,7 @@ results <- predict(est.pnbd) #> Starting estimation... #> Estimation finished! print(results) -#> Id period.first period.last period.length actual.x actual.total.spending +#> Id period.first period.last period.length actual.x actual.period.spending #> 1: 1 2005-10-11 2006-07-16 39.85714 0 0.00 #> 2: 10 2005-10-11 2006-07-16 39.85714 0 0.00 #> 3: 100 2005-10-11 2006-07-16 39.85714 23 737.53 diff --git a/cran-comments.md b/cran-comments.md index a0598fc3..a3ca625c 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,9 +1,14 @@ # Comment from the authors -This is version 0.11.2 of the CLVTools package. +This is version 0.12.0 of the CLVTools package. The most relevant changes in this version are: -* `newcustomer.spending()`: Predict average spending per transaction for customers without order history -* Improved optimizer defaults +* Add 3 new vignettes covering: Advanced modelling techniques, model intuition, and the internal class system +* Add method `hessian()` to calculate hessian matrix for already fitted models +* Correct significance indicators NA in `summary()` +* Add new parameter to data preparation method +* Renaming prediction output columns +* Fix CRAN notes + diff --git a/man-roxygen/template_examples_nocovmodelinterface.R b/man-roxygen/template_examples_nocovmodelinterface.R index b92de087..df3de503 100644 --- a/man-roxygen/template_examples_nocovmodelinterface.R +++ b/man-roxygen/template_examples_nocovmodelinterface.R @@ -21,7 +21,10 @@ #' # estimated coefs #' coef(apparel.<%=name_model_short%>) #' -#' # summary of the fitted model +#' # summary of the fitted model. +#' # Note that the significance indicators are set to NA on purpose because all +#' # model parameters are by definition strictly positive. A hypothesis test +#' # relative to a null of 0 therefore does not make sense. #' summary(apparel.<%=name_model_short%>) #' #' # predict CLV etc for holdout period diff --git a/man-roxygen/template_params_clvdata.R b/man-roxygen/template_params_clvdata.R index e3cf6454..7e184d4b 100644 --- a/man-roxygen/template_params_clvdata.R +++ b/man-roxygen/template_params_clvdata.R @@ -1,5 +1,6 @@ #' @param date.format Character string that indicates the format of the date variable in the data used. See details. #' @param time.unit What time unit defines a period. May be abbreviated, capitalization is ignored. See details. +#' @param data.end The fictional end of the data, after the last recorded transaction in \code{<%=name_param_trans%>}. See details. #' @param estimation.split Indicates the length of the estimation period. See details. #' @param name.id Column name of the customer id in \code{<%=name_param_trans%>}. #' @param name.date Column name of the transaction date in \code{<%=name_param_trans%>}. diff --git a/man-roxygen/template_summary_clvfitted.R b/man-roxygen/template_summary_clvfitted.R index ca0a1d49..092fad8e 100644 --- a/man-roxygen/template_summary_clvfitted.R +++ b/man-roxygen/template_summary_clvfitted.R @@ -14,6 +14,11 @@ #' (for example if specified in parameter \code{optimx.args}), all information here refers to #' the last method/row of the resulting \code{optimx} object. #' +#' Note that for the main model coefficients (all coefs not for covariates), +#' the significance indicators \code{z-val} and p-values are set to \code{NA} because they are by definition always +#' strictly positive and hypothesis test relative to a null of 0 does not make sense. +#' +#' #' @return This function computes and returns a list of summary information of the fitted model #' given in \code{object}. It returns a list of class \code{summary.clv.no.covariates} that contains the #' following components: diff --git a/man-roxygen/template_summary_data.R b/man-roxygen/template_summary_data.R index bca429a2..7f5ed15e 100644 --- a/man-roxygen/template_summary_data.R +++ b/man-roxygen/template_summary_data.R @@ -16,6 +16,8 @@ #' be limited to a subset of customers. #' \describe{ #' \item{\code{Number of customers}}{Count of individual customers.} +#' \item{\code{Period Start}}{Start of the indicated period.} +#' \item{\code{Period End}}{End of indicated period.} #' \item{\code{First Transaction in period}}{Time point of the first transaction occurring in the indicated period.} #' \item{\code{Last Transaction in period}}{Time point of the last transaction occurring in the indicated period.} #' \item{\code{Total # Transactions}}{Count of transactions occurring in the indicated period.} diff --git a/man/as.clv.data.Rd b/man/as.clv.data.Rd index 297a620f..652068f4 100644 --- a/man/as.clv.data.Rd +++ b/man/as.clv.data.Rd @@ -11,6 +11,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -22,6 +23,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -33,6 +35,7 @@ as.clv.data( date.format = "ymd", time.unit = "weeks", estimation.split = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price", @@ -48,6 +51,8 @@ as.clv.data( \item{estimation.split}{Indicates the length of the estimation period. See details.} +\item{data.end}{The fictional end of the data, after the last recorded transaction in \code{x}. See details.} + \item{name.id}{Column name of the customer id in \code{x}.} \item{name.date}{Column name of the transaction date in \code{x}.} diff --git a/man/bgnbd.Rd b/man/bgnbd.Rd index b9fc4e32..8023a9d6 100644 --- a/man/bgnbd.Rd +++ b/man/bgnbd.Rd @@ -130,7 +130,10 @@ apparel.bgnbd <- bgnbd(clv.data.apparel, # estimated coefs coef(apparel.bgnbd) -# summary of the fitted model +# summary of the fitted model. +# Note that the significance indicators are set to NA on purpose because all +# model parameters are by definition strictly positive. A hypothesis test +# relative to a null of 0 therefore does not make sense. summary(apparel.bgnbd) # predict CLV etc for holdout period diff --git a/man/clvdata.Rd b/man/clvdata.Rd index 6987312e..1eeae7d3 100644 --- a/man/clvdata.Rd +++ b/man/clvdata.Rd @@ -9,6 +9,7 @@ clvdata( date.format, time.unit, estimation.split = NULL, + data.end = NULL, name.id = "Id", name.date = "Date", name.price = "Price" @@ -23,6 +24,8 @@ clvdata( \item{estimation.split}{Indicates the length of the estimation period. See details.} +\item{data.end}{The fictional end of the data, after the last recorded transaction in \code{data.transactions}. See details.} + \item{name.id}{Column name of the customer id in \code{data.transactions}.} \item{name.date}{Column name of the transaction date in \code{data.transactions}.} @@ -62,8 +65,17 @@ and hence all formats it accepts in argument \code{orders} can be used. For exam (i.e., "2010-06-17") is indicated with \code{"ymd"}. Other combinations such as \code{"dmy"}, \code{"dym"}, \code{"ymd HMS"}, or \code{"HMS dmy"} are possible as well. +\code{data.end} A point in time beyond the last purchase at which the data should fictionally end. +It defines the total time frame in which customers could be observed: The combined estimation and holdout periods. +For example, when the last recorded transaction was on "2000-12-29" but customers were actually observed until "2000-12-31". +Using \code{data.end="2000-12-31"} without holdout period, +the estimation period will be until "2000-12-31" and the prediction period will start on "2001-01-01". +Required to be after the last recorded transaction. + \code{estimation.split} May be specified as either the number of periods since the first transaction or the timepoint -(either as character, Date, or POSIXct) at which the estimation period ends. The indicated timepoint itself will be part of the estimation sample. +(either as character, Date, or POSIXct) at which the estimation period ends. +Required to be before the last transaction. +The indicated timepoint itself will be part of the estimation sample. If no value is provided or set to \code{NULL}, the whole dataset will used for fitting the model (no holdout sample). \subsection{Aggregation of Transactions}{ @@ -101,6 +113,15 @@ clv.data.cdnow <- clvdata(data.transactions = cdnow, time.unit = "w", estimation.split = "1997-10-15") +# Extend data fictionally until 31th Dec 1998 +# In this case, this only moves the holdout period and has no effect on the +# estimation. +clv.data.cdnow <- clvdata(data.transactions = cdnow, + date.format="ymd", + time.unit = "w", + data.end = "1998-12-31", + estimation.split = "1997-10-15") + # summary of the transaction data summary(clv.data.cdnow) diff --git a/man/ggomnbd.Rd b/man/ggomnbd.Rd index 6fcbbad0..708082eb 100644 --- a/man/ggomnbd.Rd +++ b/man/ggomnbd.Rd @@ -116,7 +116,10 @@ apparel.ggomnbd <- ggomnbd(clv.data.apparel, # estimated coefs coef(apparel.ggomnbd) -# summary of the fitted model +# summary of the fitted model. +# Note that the significance indicators are set to NA on purpose because all +# model parameters are by definition strictly positive. A hypothesis test +# relative to a null of 0 therefore does not make sense. summary(apparel.ggomnbd) # predict CLV etc for holdout period diff --git a/man/hessian.Rd b/man/hessian.Rd new file mode 100644 index 00000000..16454dbc --- /dev/null +++ b/man/hessian.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/f_interface_hessian.R +\name{hessian} +\alias{hessian} +\alias{hessian.clv.fitted} +\alias{hessian,clv.fitted-method} +\title{Calculate hessian for a fitted model} +\usage{ +\method{hessian}{clv.fitted}(object, method.args = list()) + +hessian(object, ...) + +\S4method{hessian}{clv.fitted}(object, method.args = list()) +} +\arguments{ +\item{object}{Fitted model} + +\item{method.args}{List of options forwarded to the numerical approximation +method. See \link[numDeriv:hessian]{numDeriv::hessian}.} + +\item{...}{Ignored} +} +\value{ +The hessian matrix, with column and row names set to the parameter +names used to call the LL. +} +\description{ +Calculate a numerical approximation to the Hessian matrix at +the final estimated parameters using \code{numDeriv::hessian}. +} diff --git a/man/plot.clv.data.Rd b/man/plot.clv.data.Rd index bac706d1..b871fedd 100644 --- a/man/plot.clv.data.Rd +++ b/man/plot.clv.data.Rd @@ -81,7 +81,7 @@ excluding customers with no repeat-transactions.} \item{variable}{"tracking": The number of actual repeat transactions in the period that ends at \code{period.until}.\cr "timings": Coordinate (x or y) for which to use the value in this row for.} \item{value}{"timings": Date or numeric (stored as string) \cr - "tracking": numeric} + "tracking": numeric, may be \code{NA} if no repeat-transactions were recorded in this period} } \description{ Depending on the value of parameter \code{which}, one of the following plots will be produced. diff --git a/man/plot.clv.fitted.transactions.Rd b/man/plot.clv.fitted.transactions.Rd index 93ac58ad..236374e6 100644 --- a/man/plot.clv.fitted.transactions.Rd +++ b/man/plot.clv.fitted.transactions.Rd @@ -82,7 +82,7 @@ For the Tracking plot: \item{period.until}{The timepoint that marks the end (up until and including) of the period to which the data in this row refers.} \item{variable}{Type of variable that 'value' refers to. Either "model name" or "Actual" (if \code{transactions=TRUE}).} \item{value}{Depending on variable either (Actual) the actual number of repeat transactions in the period that ends at \code{period.until}, -or the unconditional expectation for the period that ends on \code{period.until} ("model name").} +or the unconditional expectation for the period that ends on \code{period.until} ("model name"). Actuals may be \code{NA} if no transaction was recorded.} For the PMF plot: \item{num.transactions}{The number of repeat transactions in the estimation period (as ordered factor).} diff --git a/man/pnbd.Rd b/man/pnbd.Rd index 956570d9..f4a774e4 100644 --- a/man/pnbd.Rd +++ b/man/pnbd.Rd @@ -173,7 +173,10 @@ apparel.pnbd <- pnbd(clv.data.apparel, # estimated coefs coef(apparel.pnbd) -# summary of the fitted model +# summary of the fitted model. +# Note that the significance indicators are set to NA on purpose because all +# model parameters are by definition strictly positive. A hypothesis test +# relative to a null of 0 therefore does not make sense. summary(apparel.pnbd) # predict CLV etc for holdout period diff --git a/man/predict.clv.fitted.transactions.Rd b/man/predict.clv.fitted.transactions.Rd index 0ee14796..3595c22e 100644 --- a/man/predict.clv.fitted.transactions.Rd +++ b/man/predict.clv.fitted.transactions.Rd @@ -64,10 +64,11 @@ An object of class \code{data.table} with columns: \item{CET}{The Conditional Expected Transactions: The number of transactions expected until prediction.end.} \item{DERT or DECT}{Discounted Expected Residual Transactions or Discounted Expected Conditional Transactions for dynamic covariates models} \item{actual.x}{Actual number of transactions until prediction.end. Only if there is a holdout period and the prediction ends in it, otherwise not reported.} -\item{actual.total.spending}{Actual total spending until prediction.end. Only if there is a holdout period and the prediction ends in it, otherwise not reported.} +\item{actual.period.spending}{Actual total spending until prediction.end. Only if there is a holdout period and the prediction ends in it, otherwise not reported.} \item{predicted.mean.spending}{The mean spending per transactions as predicted by the spending model.} -\item{predicted.total.spending}{The predicted total spending until prediction.end (\code{CET*predicted.mean.spending}).} -\item{predicted.CLV}{Customer Lifetime Value based on \code{DERT/DECT} and \code{predicted.mean.spending}.} +\item{predicted.period.spending}{The predicted total spending until prediction.end (\code{CET*predicted.mean.spending}).} +\item{predicted.CLV}{Customer Lifetime Value based on \code{DERT*predicted.mean.spending}.} +\item{predicted.period.CLV}{Customer Lifetime Value until prediction.end based on \code{DECT*predicted.mean.spending}.} If predicting for new customers (using \code{newcustomer()}), a numeric scalar indicating the expected number of transactions is returned instead. diff --git a/man/summary.clv.data.Rd b/man/summary.clv.data.Rd index b5999cfd..e7cb5aa5 100644 --- a/man/summary.clv.data.Rd +++ b/man/summary.clv.data.Rd @@ -60,6 +60,8 @@ for the overall time period (estimation + holdout). By using the \code{ids} argu be limited to a subset of customers. \describe{ \item{\code{Number of customers}}{Count of individual customers.} +\item{\code{Period Start}}{Start of the indicated period.} +\item{\code{Period End}}{End of indicated period.} \item{\code{First Transaction in period}}{Time point of the first transaction occurring in the indicated period.} \item{\code{Last Transaction in period}}{Time point of the last transaction occurring in the indicated period.} \item{\code{Total # Transactions}}{Count of transactions occurring in the indicated period.} diff --git a/man/summary.clv.fitted.Rd b/man/summary.clv.fitted.Rd index e0a3a0a5..566d4596 100644 --- a/man/summary.clv.fitted.Rd +++ b/man/summary.clv.fitted.Rd @@ -69,6 +69,10 @@ Summary method for fitted CLV models that provides statistics about the estimate and information about the optimization process. If multiple optimization methods were used (for example if specified in parameter \code{optimx.args}), all information here refers to the last method/row of the resulting \code{optimx} object. + +Note that for the main model coefficients (all coefs not for covariates), +the significance indicators \code{z-val} and p-values are set to \code{NA} because they are by definition always +strictly positive and hypothesis test relative to a null of 0 does not make sense. } \examples{ \donttest{ diff --git a/src/pnbd_dyncov_LL.cpp b/src/pnbd_dyncov_LL.cpp index 44ea143e..e89271c6 100644 --- a/src/pnbd_dyncov_LL.cpp +++ b/src/pnbd_dyncov_LL.cpp @@ -1,7 +1,7 @@ #include "pnbd_dyncov_LL.h" void Customer::set_real_walk_life(const arma::vec& adj_covdata_real_life, const arma::rowvec& walkinfo_real_life){ - if(arma::is_finite(walkinfo_real_life(0)) && arma::is_finite(walkinfo_real_life(1))){ + if(std::isfinite(walkinfo_real_life(0)) && std::isfinite(walkinfo_real_life(1))){ this->real_walk_life = LifetimeWalk(adj_covdata_real_life, walkinfo_real_life); }else{ this->real_walk_life = EmptyLifetimeWalk(); @@ -570,7 +570,7 @@ double pnbd_dyncov_LL_i_F2_3(const double r, const double alpha_0, const double } // abort immediately, do not waste more loops - if(!arma::is_finite(F2_3)){ + if(!std::isfinite(F2_3)){ return(F2_3); } } @@ -666,7 +666,7 @@ double pnbd_dyncov_LL_i_F2(const double r, const double alpha_0, const double s, a1, b1, A1T, C1T); intermediate_results(9) = F2_1; - if(!arma::is_finite(F2_1)){ + if(!std::isfinite(F2_1)){ return(F2_1); } @@ -676,7 +676,7 @@ double pnbd_dyncov_LL_i_F2(const double r, const double alpha_0, const double s, aT, bT, AkT, CkT); intermediate_results(10) = F2_2; - if(!arma::is_finite(F2_2)){ + if(!std::isfinite(F2_2)){ return(F2_2); } @@ -820,7 +820,7 @@ Rcpp::NumericVector pnbd_dyncov_LL_i(const double r, const double alpha_0, const // advantage of the much simpler case F2==0 because also very small abs(F2) are really relevant for correct results! // double LL = 0; - if(!arma::is_finite(F2)){ + if(!std::isfinite(F2)){ LL = F2; }else{ @@ -1001,7 +1001,7 @@ Rcpp::NumericMatrix pnbd_dyncov_LL_ind(const arma::vec& params, // Check whether customer has real trans walks // Could also check x==0, but saver to look at actual content - if(arma::is_finite(walkinfo_trans_real_from(i))){ + if(std::isfinite(walkinfo_trans_real_from(i))){ // Repeat customer (with real trans walks) arma::uword wi_real_trans_from = static_cast(walkinfo_trans_real_from(i)) - 1; diff --git a/tests/testthat/helper_arrange.R b/tests/testthat/helper_arrange.R index 938e3649..3ea9a1a5 100644 --- a/tests/testthat/helper_arrange.R +++ b/tests/testthat/helper_arrange.R @@ -19,7 +19,12 @@ fct.helper.load.apparelDynCov <- function(){.load.data.locally("apparelDynCov")} -fct.helper.create.clvdata.cdnow <- function(data.cdnow = NULL, estimation.split = 37, name.price = "Price") { +fct.helper.create.clvdata.cdnow <- function( + data.cdnow = NULL, + data.end = NULL, + estimation.split = 37, + name.price = "Price") +{ if (is.null(data.cdnow)) { data.cdnow <- fct.helper.load.cdnow() } @@ -27,6 +32,7 @@ fct.helper.create.clvdata.cdnow <- function(data.cdnow = NULL, estimation.split data.transactions = data.cdnow, date.format = "ymd", time.unit = "w", + data.end = data.end, estimation.split = estimation.split, name.price = name.price ) @@ -35,6 +41,7 @@ fct.helper.create.clvdata.cdnow <- function(data.cdnow = NULL, estimation.split fct.helper.create.clvdata.apparel.nocov <- function( data.apparelTrans = NULL, + data.end = NULL, estimation.split = 104) { if (is.null(data.apparelTrans)) { @@ -45,6 +52,7 @@ fct.helper.create.clvdata.apparel.nocov <- function( data.transactions = data.apparelTrans, date.format = "ymd", time.unit = "W", + data.end = data.end, estimation.split = estimation.split )) } @@ -52,6 +60,7 @@ fct.helper.create.clvdata.apparel.nocov <- function( fct.helper.create.clvdata.apparel.staticcov <- function( data.apparelTrans = NULL, data.apparelStaticCov = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("Gender", "Channel"), names.cov.trans = c("Gender", "Channel")) { @@ -64,7 +73,10 @@ fct.helper.create.clvdata.apparel.staticcov <- function( return(SetStaticCovariates( clvdata( - data.transactions = data.apparelTrans, date.format = "ymd", time.unit = "W", + data.transactions = data.apparelTrans, + date.format = "ymd", + time.unit = "W", + data.end = data.end, estimation.split = estimation.split ), data.cov.life = data.apparelStaticCov, @@ -77,6 +89,7 @@ fct.helper.create.clvdata.apparel.staticcov <- function( fct.helper.create.clvdata.apparel.dyncov <- function( data.apparelTrans = NULL, data.apparelDynCov = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("High.Season", "Gender", "Channel"), names.cov.trans = c("High.Season", "Gender", "Channel")) { @@ -92,6 +105,7 @@ fct.helper.create.clvdata.apparel.dyncov <- function( data = data.apparelTrans, date.format = "ymd", time.unit = "w", + data.end = data.end, estimation.split = estimation.split )) @@ -110,15 +124,17 @@ fct.helper.create.clvdata.apparel.dyncov <- function( fit.cdnow <- function( data.cdnow = NULL, + data.end = NULL, estimation.split = 37, name.price = 'Price', model = pnbd, - start.params.model = c(), verbose = FALSE, - optimx.args = list()) { + ... + ) { clv.cdnow <- fct.helper.create.clvdata.cdnow( data.cdnow = data.cdnow, + data.end = data.end, estimation.split = estimation.split, name.price=name.price ) @@ -127,9 +143,8 @@ fit.cdnow <- function( what = model, args = list( clv.data = clv.cdnow, - start.params.model = start.params.model, - optimx.args = optimx.args, - verbose = verbose + verbose = verbose, + ... ) )) } @@ -138,17 +153,18 @@ fit.cdnow <- function( fit.apparel.nocov <- function( data.apparelTrans = NULL, + data.end = NULL, estimation.split = 104, model = pnbd, verbose=FALSE, # start.params.model = c(), - # verbose = FALSE, # optimx.args = list() ... ) { clv.data.apparel <- fct.helper.create.clvdata.apparel.nocov( data.apparelTrans = data.apparelTrans, + data.end = data.end, estimation.split = estimation.split ) @@ -165,6 +181,7 @@ fit.apparel.nocov <- function( fit.apparel.static <- function( data.apparelTrans = NULL, data.apparelStaticCov = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("Gender", "Channel"), names.cov.trans = c("Gender", "Channel"), @@ -178,6 +195,7 @@ fit.apparel.static <- function( clv.data.apparel.cov <- fct.helper.create.clvdata.apparel.staticcov( data.apparelTrans = data.apparelTrans, data.apparelStaticCov = data.apparelStaticCov, + data.end = data.end, estimation.split = estimation.split, names.cov.life = names.cov.life, names.cov.trans = names.cov.trans @@ -197,6 +215,7 @@ fit.apparel.static <- function( fit.apparel.dyncov <- function( data.apparelTrans = NULL, data.apparelDynCov = NULL, + data.end = NULL, estimation.split = 104, names.cov.life = c("High.Season", "Gender", "Channel"), names.cov.trans = c("High.Season", "Gender", "Channel"), @@ -208,6 +227,7 @@ fit.apparel.dyncov <- function( data.apparelTrans = data.apparelTrans, data.apparelDynCov = data.apparelDynCov, estimation.split = estimation.split, + data.end = data.end, names.cov.life = names.cov.life, names.cov.trans = names.cov.trans ) diff --git a/tests/testthat/helper_s3_fitted_plot.R b/tests/testthat/helper_s3_fitted_plot.R index b9f4005c..936a004b 100644 --- a/tests/testthat/helper_s3_fitted_plot.R +++ b/tests/testthat/helper_s3_fitted_plot.R @@ -120,10 +120,12 @@ fct.testthat.runability.clvfittedtransactions.plot.tracking <- function(clv.fitt test_that("Works with newdata", { skip_on_cran() expect_silent(dt.plot <- plot(clv.fitted, newdata = clv.newdata.nohold, prediction.end=3, plot=FALSE, verbose=FALSE)) - expect_false(anyNA(dt.plot)) + # Since introducing data.end: Actuals after last transaction are kept and contain NA. Only check model + expect_false(anyNA(dt.plot[variable != "Actual"])) expect_warning(dt.plot <- plot(clv.fitted, newdata = clv.newdata.withhold, prediction.end=3, plot=FALSE, verbose=FALSE), regexp = "full holdout") - expect_false(anyNA(dt.plot)) + # Since introducing data.end: Actuals after last transaction are kept and contain NA. Only check model + expect_false(anyNA(dt.plot[variable != "Actual"])) }) test_that("Works for prediction.end in different formats, after holdout", { diff --git a/tests/testthat/helper_s3_fitted_predict.R b/tests/testthat/helper_s3_fitted_predict.R index 76257a90..ac99921e 100644 --- a/tests/testthat/helper_s3_fitted_predict.R +++ b/tests/testthat/helper_s3_fitted_predict.R @@ -13,17 +13,17 @@ fct.testthat.runability.clvfittedtransactions.predict <- function(fitted.transac expect_true(c("actual.x" %in% colnames(dt.pred))) if(clv.data.has.spending(fitted.transactions@clv.data)){ - expect_true(c("actual.total.spending" %in% colnames(dt.pred))) + expect_true(c("actual.period.spending" %in% colnames(dt.pred))) }else{ - expect_false(c("actual.total.spending" %in% colnames(dt.pred))) + expect_false(c("actual.period.spending" %in% colnames(dt.pred))) } }) } # Test - # Sum of actual.total.spending same as sum based on data + # Sum of actual.period.spending same as sum based on data # sum of actual.x same as sum based on data - # actual.total.spending all > 0 + # actual.period.spending all > 0 # actual.transactions all > 0 # predicted CLV is = X*Y if(clv.data.has.spending(fitted.transactions@clv.data)){ @@ -113,14 +113,14 @@ fct.testthat.runability.clvfittedtransactions.predict <- function(fitted.transac expect_true(dt.pred[, is.numeric(actual.x)]) if(clv.data.has.spending(fitted.transactions@clv.data)){ - expect_true("actual.total.spending" %in% colnames(dt.pred)) - expect_true(dt.pred[, is.numeric(actual.total.spending)]) + expect_true("actual.period.spending" %in% colnames(dt.pred)) + expect_true(dt.pred[, is.numeric(actual.period.spending)]) }else{ - expect_false("actual.total.spending" %in% colnames(dt.pred)) + expect_false("actual.period.spending" %in% colnames(dt.pred)) } }else{ expect_false("actual.x" %in% colnames(dt.pred)) - expect_false("actual.total.spending" %in% colnames(dt.pred)) + expect_false("actual.period.spending" %in% colnames(dt.pred)) } }) diff --git a/tests/testthat/helper_testthat_consistency.R b/tests/testthat/helper_testthat_consistency.R index 535cb5d0..38a69f33 100644 --- a/tests/testthat/helper_testthat_consistency.R +++ b/tests/testthat/helper_testthat_consistency.R @@ -72,7 +72,10 @@ fct.testthat.consistency.cov.params.0.predict.same <- function(fitted.nocov, fit fct.compare.prediction.result <- function(dt.pred.nocov, dt.pred.cov){ if(is.dyncov == TRUE){ # DERT unequal to DECT because only predict short period! - expect_silent(data.table::setnames(dt.pred.cov, old="DECT",new = "DERT")) + expect_silent(data.table::setnames( + dt.pred.cov, + old=c("DECT", "predicted.period.CLV"), + new = c("DERT", "predicted.CLV"))) expect_true(isTRUE(all.equal(dt.pred.nocov[, !c("DERT", "predicted.CLV")], dt.pred.cov[, !c("DERT", "predicted.CLV")]))) }else{ diff --git a/tests/testthat/helper_testthat_correctness_clvfitted.R b/tests/testthat/helper_testthat_correctness_clvfitted.R index 45503b06..86cefbcc 100644 --- a/tests/testthat/helper_testthat_correctness_clvfitted.R +++ b/tests/testthat/helper_testthat_correctness_clvfitted.R @@ -3,15 +3,19 @@ fct.testthat.correctness.clvfitted.flawless.results.out.of.the.box <- function(m skip_on_cran() expect_silent(fitted <- do.call(what = method, args = list(clv.data, verbose = FALSE))) expect_silent(res.sum <- summary(fitted)) - # No NAs anywhere - expect_false(any(!is.finite(coef(res.sum)))) # vcov and coef together + + # No NAs in the parameter estimates and SE (checks vcov and coef together) + # There are however NAs in zval and pval as they are on purpose set to NA for the main model params + expect_false(any(!is.finite(coef(res.sum)[, c("Estimate", "Std. Error")]))) + fct.DT.any.non.finite <- function(DT){ return(DT[, any(sapply(.SD, function(x){any(!is.finite(x))})), .SDcols = DT[, sapply(.SD, is.numeric)]]) } expect_false(fct.DT.any.non.finite(predict(fitted, verbose = FALSE))) if(is(fitted, "clv.fitted.transactions")){ - expect_false(fct.DT.any.non.finite(plot(fitted, plot = FALSE, verbose = FALSE))) + # No NAs, except last period may have (partial period that is set to NA on purpose) + expect_false(fct.DT.any.non.finite(plot(fitted, plot = FALSE, verbose = FALSE)[period.until != max(period.until)])) } # KKTs both true expect_true(res.sum$kkt1) diff --git a/tests/testthat/helper_testthat_correctness_clvtime.R b/tests/testthat/helper_testthat_correctness_clvtime.R index b0d0f1c5..ee848177 100644 --- a/tests/testthat/helper_testthat_correctness_clvtime.R +++ b/tests/testthat/helper_testthat_correctness_clvtime.R @@ -27,23 +27,35 @@ fct.helper.clv.time.create.test.objects <- function(with.holdout){ pred.tp.last <- as.Date("2008-09-27") if(with.holdout){ expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, user.estimation.end = 37, + user.data.end = NULL, tp.first.transaction = lubridate::ymd("2005-01-20", tz="UTC"), tp.last.transaction = lubridate::ymd("2008-09-27", tz="UTC"))) expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, user.estimation.end = 37, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, user.estimation.end = 37, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, user.estimation.end = 1, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) }else{ - expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, user.estimation.end = NULL, + expect_silent(clv.t.hours <-clv.time.set.sample.periods(clv.t.hours, + user.estimation.end = NULL, + user.data.end = NULL, tp.first.transaction = lubridate::ymd("2005-01-20", tz="UTC"), tp.last.transaction = lubridate::ymd("2008-09-27", tz="UTC"))) - expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, user.estimation.end = NULL, + expect_silent(clv.t.days <-clv.time.set.sample.periods(clv.t.days, + user.estimation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) - expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, user.estimation.end = NULL, + expect_silent(clv.t.weeks <-clv.time.set.sample.periods(clv.t.weeks, + user.estimation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) - expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, user.estimation.end = NULL, + expect_silent(clv.t.years <-clv.time.set.sample.periods(clv.t.years, + user.estimation.end = NULL, + user.data.end = NULL, tp.first.transaction = pred.tp.first, tp.last.transaction = pred.tp.last)) } @@ -96,7 +108,9 @@ fct.testthat.correctness.clvtime.set.sample.periods.estimation.start <- function tp.first <- fct.helper.clv.time.correct.datetype("2018-01-01", clv.t) tp.last <- fct.helper.clv.time.correct.datetype("2019-06-15", clv.t) - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = NULL, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = NULL, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.start, tp.first) @@ -112,7 +126,10 @@ fct.testthat.correctness.clvtime.set.sample.periods.no.estimation.end <- functio tp.last <- fct.helper.clv.time.correct.datetype("2019-06-15", clv.t) # Dates - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = NULL, tp.first.transaction = tp.first, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = NULL, + user.data.end = NULL, + tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.last) expect_equal(clv.t@timepoint.holdout.start, tp.last) @@ -135,7 +152,9 @@ fct.testthat.correctness.clvtime.set.sample.periods.numeric.estimation.end <- fu NULL) stopifnot(!is.null(splitting.end)) - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = splitting.end, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = splitting.end, + user.data.end = NULL, tp.first.transaction =tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.first+lubridate::period(splitting.end, period.type)) @@ -158,7 +177,9 @@ fct.testthat.correctness.clvtime.set.sample.periods.warn.partial.period <- funct NULL) stopifnot(!is.null(splitting.end)) - expect_warning(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = splitting.end, + expect_warning(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = splitting.end, + user.data.end = NULL, tp.first.transaction =tp.first, tp.last.transaction = tp.last), regexp = "partial periods") @@ -176,22 +197,26 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.estimation.period.less. # Numeric expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = 0, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = -3, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") # Date expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = tp.first, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.t, user.estimation.end = tp.first-lubridate::days(1), + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "1 time.unit after") @@ -199,21 +224,25 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.estimation.period.less. expect_error(clv.time.set.sample.periods(clv.time = clv.t.hours, user.estimation.end = "2018-01-01 00:35:49", + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.hours), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.hours)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.days, user.estimation.end = "2018-01-01", + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.days), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.days)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.weeks, user.estimation.end = "2018-01-03", # Wed + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.weeks), # Mon tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.weeks)), regexp = "1 time.unit after") expect_error(clv.time.set.sample.periods(clv.time = clv.t.years, user.estimation.end = "2018-12-31", + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.years), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.years)), regexp = "1 time.unit after") @@ -226,32 +255,37 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p tp.first <- fct.helper.clv.time.correct.datetype("2018-01-01", clv.t) tp.last <- fct.helper.clv.time.correct.datetype("2025-06-15", clv.t) expect_error(clv.time.set.sample.periods(clv.t, + user.data.end = NULL, user.estimation.end = tp.last-lubridate::hours(1), tp.first.transaction = tp.first, tp.last.transaction = tp.last), - regexp = "2 periods before") + regexp = "at least 2 time.units") } expect_error(clv.time.set.sample.periods(clv.time = clv.t.hours, user.estimation.end = "2025-06-14 22:40:11", + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.hours), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.hours)), - regexp = "2 periods before") + regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.days, user.estimation.end = "2025-06-14", + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.days), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.days)), - regexp = "2 periods before") + regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.weeks, user.estimation.end = "2025-06-11", # Wed + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.weeks), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-13", clv.t.weeks)),# Fr - regexp = "2 periods before") + regexp = "at least 2 time.units") expect_error(clv.time.set.sample.periods(clv.time = clv.t.years, user.estimation.end = "2025-01-01", + user.data.end = NULL, tp.first.transaction = fct.helper.clv.time.correct.datetype("2018-01-01", clv.t.years), tp.last.transaction = fct.helper.clv.time.correct.datetype("2025-06-15", clv.t.years)), - regexp = "2 periods before") + regexp = "at least 2 time.units") } @@ -265,13 +299,17 @@ fct.testthat.correctness.clvtime.set.sample.periods.date.estimation.end <- funct if(is(clv.t, "clv.time.datetime")){ # POSIX dates in transactions - but split with Date (ymd by user) - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = tp.split, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) # Split same Date but as posix expect_equal(clv.t@timepoint.estimation.end, as.POSIXct.POSIXlt(as.POSIXlt.Date(tp.split), tz = "UTC")) }else{ - expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, + user.estimation.end = tp.split, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) expect_equal(clv.t@timepoint.estimation.end, tp.split) @@ -297,6 +335,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.posixct.estimation.end <- fu if(is(clv.t, "clv.time.datetime")){ expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) @@ -306,6 +345,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.posixct.estimation.end <- fu # Date transactions - but split with POSIXct (given by user) expect_message(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last), regexp = "is ignored") @@ -331,6 +371,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct tp.split <- "2019-07-19 15:36:19" expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) @@ -341,6 +382,7 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct tp.split <- "2019-07-19" expect_silent(clv.t <- clv.time.set.sample.periods(clv.t, user.estimation.end = tp.split, + user.data.end = NULL, tp.first.transaction = tp.first, tp.last.transaction = tp.last)) # Split same but as Date @@ -355,6 +397,162 @@ fct.testthat.correctness.clvtime.set.sample.periods.char.estimation.end <- funct } +fct.testthat.correctness.clvtime.set.sample.periods.data.end <- function(){ + + # + clv.t <- clv.time.weeks("ymd") + + test_that("Fail if data.end is before last transaction",{ + expect_error( + clv.time.set.sample.periods( + clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + user.estimation.end = NULL, + user.data.end = "2000-12-30"), + regexp = "may not be before the last recorded transaction" + ) + + }) + + test_that("Fail if data.end leads to holdout period < 2 periods", { + expect_error( + clv.time.set.sample.periods( + clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + user.estimation.end = "2000-12-28", + user.data.end = "2001-01-03"), + regexp = "holdout period of at least 2 time.units" + ) + + }) + + + # Not possible anymore since requiring estimation.split to be before last + # transaction and data.end after last transaction + # test_that("Fail if data.end leads to estimation period < 1 period", { + # expect_error( + # clv.time.set.sample.periods( + # clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-01-10", clv.t = clv.t), + # user.estimation.end = "2000-01-05", + # user.data.end = "2000-01-31"), + # regexp = "least 1 time.unit" + # ) + # }) + + # Not possible anymore since requiring estimation.split to be before last + # transaction and data.end after last transaction + # test_that("Fail if data.end is before estimation.split", { + # expect_error( + # clv.time.set.sample.periods( + # clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + # user.estimation.end = "2001-02-15", + # user.data.end = "2001-01-31"), + # regexp = "holdout period of at least 2 time.units" + # ) + # }) + + test_that("Same object when no data.end as when data.end=tp.last.transaction", { + l.args <- list( + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t) + ) + + # No holdout + l.args["user.estimation.end"] <- list(NULL) + expect_equal( + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = NULL))), + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = "2000-12-31"))) + ) + + # With holdout + l.args$user.estimation.end <- "2000-06-15" + expect_equal( + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = NULL))), + do.call(clv.time.set.sample.periods, c(l.args, list(user.data.end = "2000-12-31"))) + ) + }) + + + # Not possible anymore since requiring estimation.split to be before last + # transaction and data.end after last transaction + # test_that("estimation.split can be after last transaction if data.end is given", { + # expect_silent( + # clv.time.set.sample.periods( + # clv.t, + # tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t=clv.t), + # tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + # user.estimation.end = "2001-01-10", + # user.data.end = "2001-01-31") + # ) + # }) + + test_that("data.end only moves holdout.end (if estimation.split is before)",{ + clv.t.no.obsend <- clv.time.set.sample.periods( + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t = clv.t), + user.estimation.end = "2000-06-15", + user.data.end = NULL) + + clv.t.with.obsend <- clv.time.set.sample.periods( + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t=clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-31", clv.t=clv.t), + user.estimation.end = "2000-06-15", + user.data.end = "2001-02-28") + + expect_true(all( + clv.t.with.obsend@timepoint.estimation.start == clv.t.no.obsend@timepoint.estimation.start, + clv.t.with.obsend@timepoint.estimation.end == clv.t.no.obsend@timepoint.estimation.end, + clv.t.with.obsend@timepoint.holdout.start == clv.t.no.obsend@timepoint.holdout.start, + clv.t.with.obsend@estimation.period.in.tu == clv.t.no.obsend@estimation.period.in.tu + )) + + expect_true(clv.t.with.obsend@timepoint.holdout.end > clv.t.no.obsend@timepoint.holdout.end) + expect_true(clv.t.with.obsend@holdout.period.in.tu > clv.t.no.obsend@holdout.period.in.tu) + }) + + + + test_that("Manually check if yields correct timepoints", { + + + # # With holdout + split after last transaction + clv.holdout <- clv.time.set.sample.periods( + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-06-15", clv.t = clv.t), + user.estimation.end = "2000-04-04", + user.data.end = "2000-12-31") + + expect_true(clv.holdout@timepoint.estimation.start == "2000-01-01") + expect_true(clv.holdout@timepoint.estimation.end == "2000-04-04") + expect_true(clv.holdout@timepoint.holdout.start == "2000-04-05") + expect_true(clv.holdout@timepoint.holdout.end == "2000-12-31") + + + clv.no.holdout <- clv.time.set.sample.periods( + clv.time = clv.t, + tp.first.transaction = fct.helper.clv.time.correct.datetype("2000-01-01", clv.t = clv.t), + tp.last.transaction = fct.helper.clv.time.correct.datetype("2000-12-28", clv.t=clv.t), + user.estimation.end = NULL, + user.data.end = "2000-12-31") + + expect_true(clv.no.holdout@timepoint.estimation.start == "2000-01-01") + expect_true(clv.no.holdout@timepoint.estimation.end == "2000-12-31") + expect_true(clv.no.holdout@timepoint.holdout.start == "2000-12-31") + expect_true(clv.no.holdout@timepoint.holdout.end == "2000-12-31") + }) +} + + # convert.user.input.to.timepoint ------------------------------------------------------------------------------------- fct.testthat.correctness.clvtime.convert.user.input.chars.to.posixct <- function(clv.t.datetime){ stopifnot(is(clv.t.datetime, "clv.time.datetime")) diff --git a/tests/testthat/helper_testthat_correctness_transactions.R b/tests/testthat/helper_testthat_correctness_transactions.R index 0d0a32f1..5138b5d6 100644 --- a/tests/testthat/helper_testthat_correctness_transactions.R +++ b/tests/testthat/helper_testthat_correctness_transactions.R @@ -296,6 +296,69 @@ fct.testthat.correctness.clvfittedtransactions.staticcov.predict.newcustomer.0.f }) } +fct.testthat.correctness.clvfittedtransactions.data.end.moves.prediction.period <- function(method){ + + test_that("Using data.end moves prediction period",{ + # Only valid for no holdout data + skip_on_cran() + + fitted <- fit.cdnow( + model = method, + estimation.split = NULL, + data.end = "1998-07-15" + ) + + dt.pred <- predict(fitted, prediction.end = "1998-07-30") + + # Prediction period starts first `eps` after data.end + expect_true(dt.pred[1, "period.first"] == "1998-07-16") + # Nothing else changed + expect_true(dt.pred[1, "period.last"] == "1998-07-30") + }) + +} + +fct.testthat.correctness.clvfittedtransactions.nocov.plot.until.data.end <- function(method){ + test_that("Plotting until data.end",{ + + expect_warning(fitted <- fit.cdnow( + model = method, + estimation.split = NULL, + data.end = "1998-12-31", + # PNBD requires NM + optimx.args = list(method="Nelder-Mead", hessian=FALSE, control=list(kkt=FALSE))), + regexp = "Hessian could not be derived") + clv.time <- fitted@clv.data@clv.time + + # Data + expect_silent(dt.plot <- plot(fitted, verbose=FALSE, plot=FALSE)) + dt.after <- dt.plot[period.until > "1998-06-30"] + + # Actuals and expectation are for the same dates (none lost for either (mostly Actual)) + expect_true(all( + dt.plot[variable == "Actual", "period.until"] == dt.plot[variable != "Actual", "period.until"])) + + # Actuals are NA after last transaction but model not + expect_true(dt.after[variable == "Actual", all(is.na(value))]) + expect_false(dt.after[variable != "Actual", any(is.na(value))]) + + # Actuals and expectation are until data.end + expect_true(dt.after[, max(period.until)] >= clv.time@timepoint.holdout.end) + + + # Plotting + # Plots without warnings + expect_silent(p <- plot(fitted, verbose = FALSE, plot = TRUE)) + + # Plot has x-axis limits until data.end + # $x.range: What is really rendered + # $limits: The user set limits + p.xlim <- ggplot2::ggplot_build(p)$layout$panel_params[[1]]$x$limits + expect_true(min(p.xlim) <= clv.time@timepoint.estimation.start) + expect_true(max(p.xlim) >= clv.time@timepoint.holdout.end) + }) +} + fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, correct.start.params.model, correct.params.nocov.coef, correct.LL.nocov, kkt2.true){ @@ -317,6 +380,8 @@ fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, fct.testthat.correctness.clvfittedtransactions.nocov.newdata.fitting.sample.predicting.full.data.equal(method = method, clv.cdnow = clv.cdnow) fct.testhat.correctness.clvfittedtransactions.same.spending.as.independent.spending.model(method = method, clv.data = clv.cdnow) + fct.testthat.correctness.clvfittedtransactions.nocov.plot.until.data.end(method=method) + if(fct.helper.has.pmf(obj.fitted)){ fct.testthat.correctness.clvfittedtransactions.pmf.more.x.more.p(clv.fitted = obj.fitted) fct.testthat.correctness.clvfittedtransactions.pmf.valid.values(clv.fitted = obj.fitted) @@ -326,6 +391,8 @@ fct.testthat.correctness.clvfittedtransactions <- function(name.model, method, # predict(newdata=newcustomer): no cov fct.testthat.correctness.clvfittedtransactions.nocov.predict.newcustomer.0.for.num.periods.eq.0(obj.fitted) + # data.end moves start of prediction period + fct.testthat.correctness.clvfittedtransactions.data.end.moves.prediction.period(method = method) # Static cov data -------------------------------------------------------------------------------------------- # why 100 and not 104??????? diff --git a/tests/testthat/helper_testthat_dyncov.R b/tests/testthat/helper_testthat_dyncov.R index abb8a9d3..46f265e2 100644 --- a/tests/testthat/helper_testthat_dyncov.R +++ b/tests/testthat/helper_testthat_dyncov.R @@ -1,8 +1,8 @@ fct.helper.dyncov.create.longer.dyncov.data <- function(num.additional, data.apparelDynCov){ # Add additional weeks of fake cov data for all Ids - dt.additional.cov <- expand.grid(Id = unique(apparelDynCov$Id), - Cov.Date = seq(from=apparelDynCov[, max(Cov.Date)]+lubridate::weeks(1), + dt.additional.cov <- expand.grid(Id = unique(data.apparelDynCov$Id), + Cov.Date = seq(from=data.apparelDynCov[, max(Cov.Date)]+lubridate::weeks(1), length.out = num.additional, by = "week"), stringsAsFactors = FALSE) setDT(dt.additional.cov) dt.additional.cov[, High.Season := rep(c(0,1,1,0),.N/4)] diff --git a/tests/testthat/helper_testthat_inputchecks_nocov.R b/tests/testthat/helper_testthat_inputchecks_nocov.R index cf46402a..1278ba6a 100644 --- a/tests/testthat/helper_testthat_inputchecks_nocov.R +++ b/tests/testthat/helper_testthat_inputchecks_nocov.R @@ -215,7 +215,7 @@ fct.testthat.inputchecks.cannot.predict.without.spending <- function(method, is. regexp = "there is no spending data") # but works without spending expect_silent(dt.pred <- predict(clv.spending, newdata=clv.cdnow.nospending, predict.spending=FALSE, verbose=FALSE)) - expect_false(any(c("predicted.mean.spending","predicted.CLV") %in% colnames(dt.pred))) + expect_false(any(c("predicted.mean.spending","predicted.CLV", "predicted.period.CLV") %in% colnames(dt.pred))) } }) } diff --git a/tests/testthat/helper_testthat_runability_dynamiccov.R b/tests/testthat/helper_testthat_runability_dynamiccov.R index 595a85a9..38a4c180 100644 --- a/tests/testthat/helper_testthat_runability_dynamiccov.R +++ b/tests/testthat/helper_testthat_runability_dynamiccov.R @@ -36,11 +36,11 @@ fct.testthat.runability.dynamiccov.predict.works <- function(clv.fitted){ fct.testthat.runability.dynamiccov.predict.newdata.works <- function(clv.fitted){ - sample.ids <- unique(apparelTrans$Id)[101:200] - data.apparelTrans <- fct.helper.load.apparelTrans() data.apparelDynCov <- fct.helper.load.apparelDynCov() + sample.ids <- unique(data.apparelTrans$Id)[101:200] + clv.dyncov.sample <- fct.helper.create.clvdata.apparel.dyncov( data.apparelTrans=data.apparelTrans[Id %in% sample.ids], data.apparelDynCov=data.apparelDynCov[Id %in% sample.ids], diff --git a/tests/testthat/test_correctness_clvdata_s3.R b/tests/testthat/test_correctness_clvdata_s3.R index cdd7262b..3e09d1c8 100644 --- a/tests/testthat/test_correctness_clvdata_s3.R +++ b/tests/testthat/test_correctness_clvdata_s3.R @@ -268,6 +268,36 @@ test_that("Always returns a copy of the data", { # plot --------------------------------------------------------------------- +# . tracking --------------------------------------------------------------- +test_that("tracking plot - without data.end: Last period is NA and plots without warnings", { + skip_on_cran() + clv.cdnow <- fct.helper.create.clvdata.cdnow() + + # Last period is NA in cdnow (on purpose because its a partial period and no longer dropped) + dt.plot <- plot(clv.cdnow, which = "tracking", verbose = FALSE, plot=FALSE) + expect_true(dt.plot[period.until==max(period.until), is.na(value)]) + + # Plots without warnings although data contains NA + expect_silent(plot(clv.cdnow, which="tracking", verbose=FALSE, plot=TRUE)) +}) + +test_that("tracking plot - with data.end: NA after last transaction until data.end and plots w/o warning", { + skip_on_cran() + clv.cdnow <- fct.helper.create.clvdata.cdnow(data.end="1998-12-31") + + # All periods after last transaction are NA + dt.plot <- plot(clv.cdnow, which = "tracking", verbose = FALSE, plot=FALSE) + dt.plot.empty <- dt.plot[period.until > "1998-06-30"] + # Data until at least data.end + expect_true(dt.plot.empty[, max(period.until)] >= "1998-12-31") + # They are all NA + expect_true(dt.plot.empty[, all(is.na(value))]) + + # Plots without warnings although data contains NA + expect_silent(plot(clv.cdnow, which="tracking", verbose=FALSE, plot=TRUE)) +}) + + # . frequency --------------------------------------------------------------- test_that("frequency plot - actual trans has no 0", { diff --git a/tests/testthat/test_correctness_clvtime.R b/tests/testthat/test_correctness_clvtime.R index 44d16760..ccfdf7c3 100644 --- a/tests/testthat/test_correctness_clvtime.R +++ b/tests/testthat/test_correctness_clvtime.R @@ -11,6 +11,7 @@ for(clv.t in c(fct.helper.clv.time.create.test.objects(with.holdout = FALSE), # set.sample.periods -------------------------------------------------------------------------------- +# . no data.end ----------------------------------------------------------------------------- for(clv.t in list(clv.time.hours(time.format="ymd HMS"), clv.time.days( time.format="ymd"), clv.time.weeks(time.format="ymd"), @@ -33,6 +34,12 @@ fct.testthat.correctness.clvtime.set.sample.periods.stop.holdout.length.less.2.p clv.t.weeks = clv.time.weeks(time.format="ymd"), clv.t.years = clv.time.years(time.format="ymd")) +# . with data.end -------------------------------------------------------------------------- +fct.testthat.correctness.clvtime.set.sample.periods.data.end() + + + + # convert.user.input.to.timepoint -------------------------------------------------------------------------------- for(clv.t in c(fct.helper.clv.time.create.test.objects(with.holdout = FALSE), fct.helper.clv.time.create.test.objects(with.holdout = TRUE))){ diff --git a/tests/testthat/test_correctness_hessian.R b/tests/testthat/test_correctness_hessian.R new file mode 100644 index 00000000..57975e07 --- /dev/null +++ b/tests/testthat/test_correctness_hessian.R @@ -0,0 +1,134 @@ +skip_on_cran() + +optimx.args <- list(itnmax=100) + +fn.compare.hessian <- function(clv.fitted){ + expect_equal( + hessian(clv.fitted), + clv.fitted@optimx.hessian + ) +} + + +test_that("hessian() produces same result - no cov", { + skip_on_cran() + + for(m in list(pnbd, bgnbd, ggomnbd, gg)){ + fn.compare.hessian( + fit.cdnow(model=m, optimx.args = optimx.args) + ) + } + + # With cor + fn.compare.hessian( + fit.cdnow(model = pnbd, use.cor=TRUE, optimx.args = optimx.args) + ) +}) + + + +test_that("hessian() produces same result - static cov", { + skip_on_cran() + + for(m in list(pnbd, bgnbd, ggomnbd)){ + # Default specification + fn.compare.hessian( + fit.apparel.static(model=m, optimx.args = optimx.args) + ) + + # With constrained covs + fn.compare.hessian( + fit.apparel.static( + model=m, + names.cov.constr = "Gender", + optimx.args = optimx.args) + ) + + # With regularization + fn.compare.hessian( + fit.apparel.static( + model=m, + reg.lambdas = c(life = 10, trans=5), + optimx.args = optimx.args) + ) + + # With constrained covs & regularization + fn.compare.hessian( + fit.apparel.static( + model=m, + names.cov.constr = "Channel", + reg.lambdas = c(life = 10, trans=5), + optimx.args = optimx.args) + ) + } + + + # PNBD only: With cor + fn.compare.hessian( + fit.apparel.static(model = pnbd, use.cor=TRUE, optimx.args = optimx.args) + ) +}) + + +test_that("hessian() produces same result - dyn cov", { + skip_on_cran() + + # Default + fn.compare.hessian( + fit.apparel.dyncov(model = pnbd, optimx.args = optimx.args) + ) + + # With cor + fn.compare.hessian( + fit.apparel.dyncov(model = pnbd, use.cor=TRUE, optimx.args = optimx.args) + ) + + # With constrained covs + fn.compare.hessian( + fit.apparel.dyncov(model = pnbd, names.cov.constr = "Gender", optimx.args = optimx.args) + ) + + # With regularization + fn.compare.hessian( + fit.apparel.dyncov(model = pnbd, reg.lambdas = c(trans=10, life=5), optimx.args = optimx.args) + ) + +}) + + + +test_that("hessian() fails if parameters are non-finite",{ + skip_on_cran() + + p.cdnow <- fit.cdnow(optimx.args=optimx.args) + p.cdnow@optimx.estimation.output[1, "log.r"] <- NA_real_ + + expect_error(hessian(p.cdnow), regexp = "Cannot proceed") +}) + +test_that("Internal clv.fitted.get.LL: Params position and order checked", { + # Indirectly tested for correctness by being used in hessian() + skip_on_cran() + + p.reg.constr <- fit.apparel.static( + model = pnbd, + reg.lambdas = c(trans = 4, life = 9), + names.cov.constr = "Gender", + optimx.args= optimx.args) + + LL.reg.constr <- clv.fitted.get.LL(p.reg.constr) + final.coefs <- drop(coef(p.reg.constr@optimx.estimation.output)) + + # Have to be named + expect_error(LL.reg.constr(setNames(final.coefs, NULL)), regexp = "has to be named") + + # Does not work with extra coefs + expect_error(LL.reg.constr(coef(p.reg.constr)), regexp = "has to be named") + + # Results are independent of order + expect_identical( + LL.reg.constr(sort(final.coefs, decreasing = FALSE)), + LL.reg.constr(sort(final.coefs, decreasing = TRUE)) + ) + +}) diff --git a/tests/testthat/test_inputchecks_clvdata_clvdata.R b/tests/testthat/test_inputchecks_clvdata_clvdata.R index d25ed279..918719f7 100644 --- a/tests/testthat/test_inputchecks_clvdata_clvdata.R +++ b/tests/testthat/test_inputchecks_clvdata_clvdata.R @@ -202,14 +202,10 @@ test_that("Fails with split after last transaction",{ }) test_that("Fails with split in 2 periods before last transaction (ie in last period)",{ - expect_error(clvdata(estimation.split = "1998-06-30",time.unit = "d", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") expect_error(clvdata(estimation.split = "1998-06-29",time.unit = "d", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") - expect_error(clvdata(estimation.split = "1998-06-30",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") expect_error(clvdata(estimation.split = "1998-06-21",time.unit = "w", data.transactions = cdnow, - date.format="ymd"), regexp = "before the last transaction") + date.format="ymd"), regexp = "a holdout period of at least 2 time.units") }) test_that("Fails if before all first transactions by customer", { diff --git a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R index 7736d58b..17fdf0e4 100644 --- a/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R +++ b/tests/testthat/test_inputchecks_clvdata_setdynamiccov.R @@ -13,22 +13,32 @@ fct.expect.error.setdyncov <- function( names.cov.trans=c("High.Season", "Gender", "Channel"), name.id = "Id", name.date = "Cov.Date", + make.missing = NULL, regexp=NULL){ + if(!is.null(make.missing)){ + e <- environment() + # e[[make.missing]] <- NULL + rm(list=make.missing, envir = e) + } + expect_error(SetDynamicCovariates( clv.data = clv.data, data.cov.life = data.cov.life, names.cov.life = names.cov.life, data.cov.trans = data.cov.trans, - names.cov.trans = names.cov.trans - )) + names.cov.trans = names.cov.trans, + name.id = name.id, + name.date = name.date + ), + regexp = regexp) } # Parameter clv.data --------------------------------------------------------------------------------------- test_that("Fails if not clv.data input", { # missing/NA/NULL - fct.expect.error.setdyncov(clv.data = ) + fct.expect.error.setdyncov(make.missing = "clv.data") fct.expect.error.setdyncov(clv.data = NULL) fct.expect.error.setdyncov(clv.data = NA_real_) @@ -52,14 +62,14 @@ test_that("Fails if already has covariates", { test_that("Fails if is wrong type ", { # data.cov.life - fct.expect.error.setdyncov(data.cov.life = , regexp = "missing") + fct.expect.error.setdyncov(make.missing="data.cov.life", regexp = "not found") fct.expect.error.setdyncov(data.cov.life = NULL, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.life = NA, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.life = as.list(apparelDynCov), regexp = "type data.frame or data.table") # data.cov.trans - fct.expect.error.setdyncov(data.cov.trans = , regexp = "missing") + fct.expect.error.setdyncov(make.missing = "data.cov.trans", regexp = "not found") fct.expect.error.setdyncov(data.cov.trans = NULL, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.trans = NA, regexp = "type data.frame or data.table") fct.expect.error.setdyncov(data.cov.trans = as.list(apparelDynCov), regexp = "type data.frame or data.table") @@ -90,6 +100,18 @@ test_that("Fails if covariate data is to short for all customers",{ fct.expect.error.setdyncov(data.cov.trans = apparelDynCov.tooshort, regexp = "covariate data exactly from") }) +test_that("Fails if covariate data ends before data.end", { + + clv.data.apparel.obsE <- fct.helper.create.clvdata.apparel.nocov( + estimation.split = NULL, + data.end = "2012-12-31") + + fct.expect.error.setdyncov( + clv.data = clv.data.apparel.obsE, + regexp = "There need to be weekly covariate data exactly") + +}) + test_that("Fails if there are Ids in the covariates that are not in the transaction data", { dt.cov.1additional <- data.table::copy(apparelDynCov[Id == "1"]) dt.cov.1additional[, Id := "ABC"] @@ -214,13 +236,13 @@ test_that("Fails for variable with single category", { test_that("Fails if missing/NULL/NA/empty",{ # names.cov.life - fct.expect.error.setdyncov(names.cov.life = , regexp = "missing") + fct.expect.error.setdyncov(make.missing = "names.cov.life", regexp = "not found") fct.expect.error.setdyncov(names.cov.life = NULL, regexp = "may not be NULL") fct.expect.error.setdyncov(names.cov.life = NA_character_, regexp = "any NA") fct.expect.error.setdyncov(names.cov.life = "", regexp = "could not be found") # names.cov.trans - fct.expect.error.setdyncov(names.cov.trans = , regexp = "missing") + fct.expect.error.setdyncov(make.missing = "names.cov.trans", regexp = "not found") fct.expect.error.setdyncov(names.cov.trans = NULL, regexp = "may not be NULL") fct.expect.error.setdyncov(names.cov.trans = NA_character_, regexp = "any NA") fct.expect.error.setdyncov(names.cov.trans = "", regexp = "could not be found") @@ -313,7 +335,7 @@ test_that("Has default argument Id",{ # Parameter name.date --------------------------------------------------------------------------------------- test_that("Fails if NA/NULL", { - fct.expect.error.setdyncov(name.date = "id", regexp = "NULL") + fct.expect.error.setdyncov(name.date = "id", regexp = "could not be found") fct.expect.error.setdyncov(name.date = NA_character_, regexp = "any NA") fct.expect.error.setdyncov(name.date = character(0), regexp = "exactly 1 element") }) diff --git a/tests/testthat/test_runability_clvdata_clvdata.R b/tests/testthat/test_runability_clvdata_clvdata.R index 2750b080..887a5d88 100644 --- a/tests/testthat/test_runability_clvdata_clvdata.R +++ b/tests/testthat/test_runability_clvdata_clvdata.R @@ -249,3 +249,40 @@ test_that("Works when called from as.clv.data()", { expect_silent(as.clv.data(as.data.frame(cdnow))) expect_silent(as.clv.data(as.data.table(cdnow))) }) + +# data.end ---------------------------------------------------------------- + +test_that("Works with data.end with and without holdout",{ + skip_on_cran() + l.args <- list( + data.end = "2000-01-01", + data.transactions = cdnow, + time.unit = "w", + date.format = "ymd" + ) + + l.args["estimation.split"] <- list(NULL) + expect_silent(do.call(clvdata, l.args)) + + l.args$estimation.split <- 37 + expect_silent(do.call(clvdata, l.args)) +}) + +test_that("Works with data.end and time.units hours, days, years", { + l.args <- list( + data.transactions = cdnow, + data.end = "2000-01-01", + estimation.split = NULL, + date.format = "ymd" + ) + + l.args$time.unit <- "days" + expect_silent(do.call(clvdata, l.args)) + + l.args$time.unit <- "year" + expect_silent(do.call(clvdata, l.args)) + + l.args$time.unit <- "hours" + expect_silent(do.call(clvdata, l.args)) +}) + diff --git a/tests/testthat/test_runability_clvdata_s3.R b/tests/testthat/test_runability_clvdata_s3.R index a4259260..0422d72e 100644 --- a/tests/testthat/test_runability_clvdata_s3.R +++ b/tests/testthat/test_runability_clvdata_s3.R @@ -25,9 +25,11 @@ fct.helper.test.runability.clv.data.summary <- function(clv.data){ # warning if inexistent ids expect_warning(summary(clv.data, ids=c(ids, "abczxy")), regexp = "Not all given ids were found") - # id with trans in holdout - expect_silent(id.with.holdout <- clv.data@data.transactions[Date>=clv.data@clv.time@timepoint.holdout.start, head(Id,n=1)]) - expect_silent(summary(clv.data, ids=id.with.holdout)) + if(clv.data.has.holdout(clv.data)){ + # id with trans in holdout + expect_silent(id.with.holdout <- clv.data@data.transactions[Date>=clv.data@clv.time@timepoint.holdout.start, head(Id,n=1)]) + expect_silent(summary(clv.data, ids=id.with.holdout)) + } # id without trans in holdout # any zero-repeater @@ -270,20 +272,6 @@ fct.helper.test.runability.clv.data.others3 <- function(clv.data){ } - - -# Create all combos: {w/, w/o} holdout, {w/, w/o} {static, dynamic} covs - -apparel.holdout <- fct.helper.create.clvdata.apparel.nocov() -apparel.no.holdout <- fct.helper.create.clvdata.apparel.nocov(estimation.split = NULL) - -apparel.holdout.static.cov <- fct.helper.create.clvdata.apparel.staticcov() -apparel.no.holdout.static.cov <- fct.helper.create.clvdata.apparel.staticcov(estimation.split = NULL) - -apparel.holdout.dyn.cov <- fct.helper.create.clvdata.apparel.dyncov() -apparel.no.holdout.dyn.cov <- fct.helper.create.clvdata.apparel.dyncov(estimation.split = NULL) - - fct.helper.test.runability.clv.data.runall <- function(clv.data){ fct.helper.test.runability.clv.data.trackingplot(clv.data) fct.helper.test.runability.clv.data.plotfrequency(clv.data) @@ -295,10 +283,25 @@ fct.helper.test.runability.clv.data.runall <- function(clv.data){ } -fct.helper.test.runability.clv.data.runall(apparel.holdout) -fct.helper.test.runability.clv.data.runall(apparel.no.holdout) -fct.helper.test.runability.clv.data.runall(apparel.holdout.static.cov) -fct.helper.test.runability.clv.data.runall(apparel.no.holdout.static.cov) -fct.helper.test.runability.clv.data.runall(apparel.holdout.dyn.cov) -fct.helper.test.runability.clv.data.runall(apparel.no.holdout.dyn.cov) + +for(fn in list( + fct.helper.create.clvdata.apparel.nocov, + fct.helper.create.clvdata.apparel.staticcov +)){ + # With holdout + fct.helper.test.runability.clv.data.runall(fn()) + # . with data.end + fct.helper.test.runability.clv.data.runall(fn(data.end="2011-01-31")) + + # Without holdout + fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL)) + # . with data.end + fct.helper.test.runability.clv.data.runall(fn(estimation.split=NULL, data.end="2011-01-31")) + +} + +# data.end would require to prepare transaction data (cut before cov end) +# But dyncovs are not used for any of the s3 methods, therefore skip testing with data.end for dyncov +fct.helper.test.runability.clv.data.runall(fct.helper.create.clvdata.apparel.dyncov()) +fct.helper.test.runability.clv.data.runall(fct.helper.create.clvdata.apparel.dyncov(estimation.split = NULL)) diff --git a/tests/testthat/test_runability_pnbd_dynamiccov.R b/tests/testthat/test_runability_pnbd_dynamiccov.R index d5277fc7..acc99163 100644 --- a/tests/testthat/test_runability_pnbd_dynamiccov.R +++ b/tests/testthat/test_runability_pnbd_dynamiccov.R @@ -93,3 +93,68 @@ test_that("Dyncov works with additional model specifications", { names.cov.constr = "Gender", reg.lambda = c(trans=10, life=10)) }) + + +# With data.end --------------------------------------------------------- + +test_that("Fit, plot, predict work with partially empty estimation/holdout period", { + + covs.life <- c("High.Season", "Gender") + covs.trans <- c("High.Season", "Gender", "Channel") + + apparelTrans.cut.obsE <- apparelTrans[Date < "2010-12-01"] + date.original.trans.max <- apparelTrans[, max(Date)] + + + # fct.helper.runability.dyncov.all.downstream requires holdout + + names.params <- c( + 'r', 'alpha', 's', 'beta', + paste0('life.', covs.life), + paste0('trans.', covs.trans)) + + + # Create object with no transactions in December 2010 but with the observation + # period as originally (2010-12-20) + + # No holdout + expect_silent(fitted.dyncov.noholdout.obsE <- fit.apparel.dyncov( + data.apparelTrans = apparelTrans.cut.obsE, + estimation.split = NULL, + data.end = date.original.trans.max, + names.cov.life = covs.life, + names.cov.trans = covs.trans, + optimx.args=fct.helper.dyncov.get.optimxargs.quickfit(hessian=TRUE) + )) + + .fct.helper.clvfitted.all.s3.except.plot.and.predict( + clv.fitted=fitted.dyncov.noholdout.obsE, + full.names=names.params + ) + + expect_silent(predict(fitted.dyncov.noholdout.obsE, prediction.end=1, verbose=FALSE)) + expect_silent(plot(fitted.dyncov.noholdout.obsE, prediction.end = NULL, verbose=FALSE)) + + + # With holdout + expect_silent(fitted.dyncov.holdout.obsE <- fit.apparel.dyncov( + data.apparelTrans = apparelTrans.cut.obsE, + estimation.split = 104, + data.end = date.original.trans.max, + names.cov.life = covs.life, + names.cov.trans = covs.trans, + optimx.args=fct.helper.dyncov.get.optimxargs.quickfit(hessian=TRUE) + )) + + .fct.helper.clvfitted.all.s3.except.plot.and.predict( + clv.fitted=fitted.dyncov.holdout.obsE, + full.names=names.params + ) + + expect_silent(predict(fitted.dyncov.holdout.obsE, prediction.end=5, verbose=FALSE)) + expect_warning( + plot(fitted.dyncov.holdout.obsE, prediction.end = 5, verbose=FALSE), + regexp = "Not plotting full holdout period" + ) + +}) diff --git a/vignettes/CLVTools.Rmd b/vignettes/CLVTools.Rmd index a9e9cf49..086691d1 100644 --- a/vignettes/CLVTools.Rmd +++ b/vignettes/CLVTools.Rmd @@ -1,5 +1,11 @@ --- title: "Walkthrough for the CLVTools Package" +author: + - "Patrick Bachmann" + - "Patrik Schilter" + - "Jeffrey Näf" + - "Markus Meierer" +date: September 15, 2024 output: pdf_document: latex_engine: xelatex @@ -155,7 +161,7 @@ To execute the model estimation you have the choice between a formula-based inte -Parameter estimates may be reported by either printing the estimated object (i.e. `est.pnbd`) directly in the console or by calling `summary(est.pnbd)` to get a more detailed report including the likelihood value as well as AIC and BIC. Alternatively parameters may be directly extracted using `coef(est.pnbd)`. Also `loglik()`, `confint()` and `vcov()` are available to directly access the Loglikelihood value, confidence intervals for the parameters and to calculate the Variance-Covariance Matrix for the fitted model. For the standard Pareto/NBD model, we get 4 parameters $r, \alpha, s$ and $\beta$. where $r,\alpha$ represent the shape and scale parameter of the gamma distribution that determines the purchase rate and $s,\beta$ of the attrition rate across individual customers. $r/\alpha$ can be interpreted as the mean purchase and $s/\beta$ as the mean attrition rate. A significance level is provided for each parameter estimates. In the case of the apparelTrans dataset we observe a an average purchase rate of $r/\alpha=0.147$ transactions and an average attrition rate of $s/\beta=0.031$ per customer per week. KKT 1 and 2 indicate the Karush-Kuhn-Tucker optimality conditions of the first and second order [@KKT]. If those criteria are not met, the optimizer has probably not arrived at an optimal solution. If this is the case it is usually a good idea to rerun the estimation using alternative starting values. +Parameter estimates may be reported by either printing the estimated object (i.e. `est.pnbd`) directly in the console or by calling `summary(est.pnbd)` to get a more detailed report including the likelihood value as well as AIC and BIC. Alternatively parameters may be directly extracted using `coef(est.pnbd)`. Also `loglik()`, `confint()` and `vcov()` are available to directly access the Loglikelihood value, confidence intervals for the parameters and to calculate the Variance-Covariance Matrix for the fitted model. For the standard Pareto/NBD model, we get 4 parameters $r, \alpha, s$ and $\beta$. where $r,\alpha$ represent the shape and scale parameter of the gamma distribution that determines the purchase rate and $s,\beta$ of the attrition rate across individual customers. $r/\alpha$ can be interpreted as the mean purchase and $s/\beta$ as the mean attrition rate. Note that the significance indicators are set to `NA` for each parameter. The main model parameters are by definition always strictly positive and a hypothesis test relative to a null of 0 therefore does not make sense. In the case of the apparelTrans dataset we observe a an average purchase rate of $r/\alpha=0.147$ transactions and an average attrition rate of $s/\beta=0.031$ per customer per week. KKT 1 and 2 indicate the Karush-Kuhn-Tucker optimality conditions of the first and second order [@KKT]. If those criteria are not met, the optimizer has probably not arrived at an optimal solution. If this is the case it is usually a good idea to rerun the estimation using alternative starting values. ```{r param-summary} @@ -228,7 +234,7 @@ If spending information was provided when initializing the `clvdata`-object, `CL * predicted mean spending estimated by a Gamma/Gamma model [@Colombo1999; @Fader2005c] and * the customer lifetime value (CLV). CLV is calculated as the product of DERT and predicted spending. -If a holdout period is available additionally the true numbers of transactions ("actual.x") and true spending ("actual.total.spending") during the holdout period are reported. +If a holdout period is available additionally the true numbers of transactions ("actual.x") and true spending ("actual.period.spending") during the holdout period are reported. To use the parameter estimates on new data (e.g., an other customer cohort), the argument `newdata` optionally allows to provide a new `clvdata` object. @@ -383,7 +389,7 @@ est.pnbd.dyn <- pnbd(clv.dyn, optimx.args = list(control=list(trace=5))) ``` -To inspect the estimated model we use `summary()`, however all other commands such as `print()`, `coef()`, `loglike()`, `confint()` and `vcov()` are also available. Now, output contains also parameters for the covariates for both processes. Since covariates are added separately for the purchase and the attrition process, there are also separate model parameters for the two processes. These parameters are directly interpretable as rate elasticity of the corresponding factors: A 1% change in a contextual factor $\bf{X}^{P}$ or $\bf{X}^{L}$ changes the purchase or the attrition rate by $\gamma_{purch}\bf{X}^{P}$ or $\gamma_{life}\bf{X}^{L}$ percent, respectively [@Gupta1991]. In the example of the apparel retailer, we observe that female customer purchase significantly more (`trans.Gender=1.42576`). Note, that female customers are coded as 1, male customers as 0. Also customers acquired offline (coded as Channel=1), purchase more (`trans.Channel=0.40304`) and stay longer (`life.Channel=0.9343`). Make sure to check the Karush-Kuhn-Tucker optimality conditions of the first and second order [@KKT] (KKT1 and KKT1) before interpreting the parameters. If those criteria are not met, the optimizer has probably not arrived at an optimal solution. If this is the case it is usually a good idea to rerun the estimation using alternative starting values. +To inspect the estimated model we use `summary()`, however all other commands such as `print()`, `coef()`, `loglike()`, `confint()` and `vcov()` are also available. Now, output contains also parameters for the covariates for both processes. Since covariates are added separately for the purchase and the attrition process, there are also separate model parameters for the two processes. Note that while significance indicators are `NA` for the main model parameters, they are present for the covariate parameters because a hypothesis test relative to a null of 0 does make sense for them. These parameters are directly interpretable as rate elasticity of the corresponding factors: A 1% change in a contextual factor $\bf{X}^{P}$ or $\bf{X}^{L}$ changes the purchase or the attrition rate by $\gamma_{purch}\bf{X}^{P}$ or $\gamma_{life}\bf{X}^{L}$ percent, respectively [@Gupta1991]. In the example of the apparel retailer, we observe that female customer purchase significantly more (`trans.Gender=1.42576`). Note, that female customers are coded as 1, male customers as 0. Also customers acquired offline (coded as Channel=1), purchase more (`trans.Channel=0.40304`) and stay longer (`life.Channel=0.9343`). Make sure to check the Karush-Kuhn-Tucker optimality conditions of the first and second order [@KKT] (KKT1 and KKT1) before interpreting the parameters. If those criteria are not met, the optimizer has probably not arrived at an optimal solution. If this is the case it is usually a good idea to rerun the estimation using alternative starting values. ```{r Cov-summary} diff --git a/vignettes/CLVTools_advanced_techniques.pdf b/vignettes/CLVTools_advanced_techniques.pdf new file mode 100644 index 00000000..5c64d52a Binary files /dev/null and b/vignettes/CLVTools_advanced_techniques.pdf differ diff --git a/vignettes/CLVTools_advanced_techniques.pdf.asis b/vignettes/CLVTools_advanced_techniques.pdf.asis new file mode 100644 index 00000000..3783ab5c --- /dev/null +++ b/vignettes/CLVTools_advanced_techniques.pdf.asis @@ -0,0 +1,10 @@ +%\VignetteIndexEntry{Advanced and Very Advanced Modeling Techniques in CLVTools} +%\VignetteEngine{R.rsp::asis} +%\VignetteEncoding{UTF-8} +%\VignetteKeyword{regularization} +%\VignetteKeyword{correlation} +%\VignetteKeyword{equality constraints} +%\VignetteKeyword{endogenous covariates} +%\VignetteKeyword{hessian} +%\VignetteKeyword{bootstrapping} +%\VignetteKeyword{parameter bootstrapping} diff --git a/vignettes/CLVTools_classes.pdf b/vignettes/CLVTools_classes.pdf new file mode 100644 index 00000000..b6b15c22 Binary files /dev/null and b/vignettes/CLVTools_classes.pdf differ diff --git a/vignettes/CLVTools_classes.pdf.asis b/vignettes/CLVTools_classes.pdf.asis new file mode 100644 index 00000000..f54c2b16 --- /dev/null +++ b/vignettes/CLVTools_classes.pdf.asis @@ -0,0 +1,6 @@ +%\VignetteIndexEntry{Classes in CLVTools} +%\VignetteEngine{R.rsp::asis} +%\VignetteEncoding{UTF-8} +%\VignetteKeyword{CLVTools} +%\VignetteKeyword{classes} +%\VignetteKeyword{clv.fitted} diff --git a/vignettes/CLVTools_intuitive_explanations.pdf b/vignettes/CLVTools_intuitive_explanations.pdf new file mode 100644 index 00000000..496ec48f Binary files /dev/null and b/vignettes/CLVTools_intuitive_explanations.pdf differ diff --git a/vignettes/CLVTools_intuitive_explanations.pdf.asis b/vignettes/CLVTools_intuitive_explanations.pdf.asis new file mode 100644 index 00000000..c797754c --- /dev/null +++ b/vignettes/CLVTools_intuitive_explanations.pdf.asis @@ -0,0 +1,6 @@ +%\VignetteIndexEntry{Probabilistic Models for Analyzing Customer Purchase Behavior: A Primer} +%\VignetteEngine{R.rsp::asis} +%\VignetteEncoding{UTF-8} +%\VignetteKeyword{customer analysis} +%\VignetteKeyword{probabilistic models} +%\VignetteKeyword{intuition} diff --git a/vignettes/internal_docu/internal_docu.Rmd b/vignettes/internal_docu/internal_docu similarity index 100% rename from vignettes/internal_docu/internal_docu.Rmd rename to vignettes/internal_docu/internal_docu diff --git a/vignettes/internal_docu/uml_overview.uxf b/vignettes/internal_docu/uml_overview.uxf new file mode 100644 index 00000000..a710375d --- /dev/null +++ b/vignettes/internal_docu/uml_overview.uxf @@ -0,0 +1,2 @@ +10UMLClass5902019030clv.data +UMLClass5908019030clv.data.static.covariatesUMLClass59014019030clv.data.dynamic.covariatesRelation680403060lt=<<-10;10;10;40Relation6801003060lt=<<-10;10;10;40UMLClass1200010030/clv.time/UMLClass13106010030/clv.time.date/UMLClass131012010030clv.time.weekUMLClass120012010030clv.time.dayUMLClass106012010030clv.time.hourUMLClass10306015030/clv.time.datetime/UMLClass142012010030clv.time.yearRelation13508014060lt=<<-10;10;10;30;120;30;120;40Relation11002017060lt=<<-150;10;150;30;10;30;10;40Relation1100803060lt=<<-10;10;10;40Relation1350803060lt=<<-10;10;10;40Relation12408014060lt=<<-120;10;120;30;10;30;10;40Relation12402014060lt=<<-10;10;10;30;120;30;120;40UMLClass131032021030/clv.fitted/UMLClass80048010030clv.pnbdUMLClass91048010030clv.bgnbdUMLClass109041021030/clv.fitted.transactions/UMLClass102048010030clv.ggomnbdUMLClass125050021030clv.fitted.transactions.static.covUMLClass142058021030clv.fitted.transactions.dynamic.covUMLClass158041021030/clv.fitted.spending/UMLClass164048010030clv.ggUMLClass80058014030clv.pnbd.static.covUMLClass95058014030clv.bgnbd.static.covUMLClass111058014030clv.ggomnbd.static.covRelation141034030090lt=<<-10;10;10;50;280;50;280;70Relation119034025090lt=<<-230;10;230;50;10;50;10;70Relation16804303070lt=<<-10;10;10;50Relation84043031070lt=<<-290;10;290;30;10;30;10;50Relation95043020070lt=<<-180;10;180;30;10;30;10;50Relation10604309070lt=<<-70;10;70;30;10;30;10;50Relation124043014090lt=<<-10;10;10;30;120;30;120;70Relation116052017080lt=<<-150;10;150;40;10;40;10;60Relation101052032080lt=<<-300;10;300;40;10;40;10;60Relation86052047080lt=<<-450;10;450;40;10;40;10;60Relation141052014080lt=<<-10;10;10;40;120;40;120;60UMLClass146063014030clv.pnbd.dynamic.covRelation15206003050lt=<<-10;10;10;30UMLClass97095010030/clv.model/UMLClass800102017030/clv.model.with.correlation/UMLClass1090102017030/clv.model.no.correlation/Relation88097016070lt=<<-140;10;140;30;10;30;10;50Relation101097018070lt=<<-10;10;10;30;160;30;160;50UMLClass800108017030clv.model.pnbd.no.covUMLClass800113017030clv.model.pnbd.static.covUMLClass800118017030clv.model.pnbd.dynamic.covRelation88010403060lt=<<-10;10;10;40Relation88011003050lt=<<-10;10;10;30Relation88011503050lt=<<-10;10;10;30UMLClass1000108017030clv.model.bgnbd.no.covUMLClass1190108017030clv.model.ggomnbd.no.covUMLClass1000113017030clv.model.bgnbd.static.covUMLClass1190113017030clv.model.ggomnbd.static.covRelation1080104012060lt=<<-100;10;100;30;10;30;10;40Relation1170104013060lt=<<-10;10;10;30;110;30;110;40Relation108011003050lt=<<-10;10;10;30Relation127011003050lt=<<-10;10;10;30 \ No newline at end of file