Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: CLVTools
Title: Tools for Customer Lifetime Value Estimation
Version: 0.11.2
Date: 2024-12-01
Version: 0.12.0
Date: 2025-09-22
Authors@R: c(
person(given="Patrick", family="Bachmann", email = "pbachma@ethz.ch", role = c("cre","aut")),
person(given="Niels", family="Kuebler", email = "niels.kuebler@uzh.ch", role = "aut"),
Expand Down Expand Up @@ -50,7 +50,8 @@ Suggests:
rmarkdown,
xml2,
testthat (>= 3.0.0),
lmtest
lmtest,
R.rsp
License: GPL-3
URL: https://github.com/bachmannpatrick/CLVTools
BugReports: https://github.com/bachmannpatrick/CLVTools/issues
Expand Down Expand Up @@ -122,6 +123,7 @@ Collate:
'f_interface_clvdata.R'
'f_interface_gg.R'
'f_interface_ggomnbd.R'
'f_interface_hessian.R'
'f_interface_latentattrition.R'
'f_interface_lrtest.R'
'f_interface_newcustomer.R'
Expand Down Expand Up @@ -155,6 +157,6 @@ Collate:
'pnbd_dyncov_expectation.R'
'pnbd_dyncov_palive.R'
RoxygenNote: 7.3.2
VignetteBuilder: knitr
VignetteBuilder: knitr, R.rsp
Config/testthat/parallel: false
Config/testthat/edition: 3
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ S3method(lmtest::lrtest,clv.fitted)
S3method(logLik,clv.fitted)
S3method(nobs,clv.data)
S3method(nobs,clv.fitted)
S3method(numDeriv::hessian,clv.fitted)
S3method(plot,clv.data)
S3method(plot,clv.fitted.spending)
S3method(plot,clv.fitted.transactions)
Expand Down Expand Up @@ -53,6 +54,7 @@ exportMethods(bgbb)
exportMethods(bgnbd)
exportMethods(gg)
exportMethods(ggomnbd)
exportMethods(hessian)
exportMethods(lrtest)
exportMethods(plot)
exportMethods(pmf)
Expand Down
15 changes: 15 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
# CLVTools 0.12.0

### NEW FEATURES
* `clvdata(data.end)`: Add parameter `data.end` to specify a data end beyond the last actual transaction
* `summary()`: Always set `zval` and `pval` to NA for the main model parameters
* `hessian()`: Add method to calculate hessian matrix for already fitted models
* Add 3 new vignettes covering: Advanced modelling techniques, model intuition, and the internal class system

### BUG FIXES
* Fix CRAN notes: Replace `arma::is_finite()` -> `std::isfinite()`
* Dyncov PNBD: Rename `predicted.CLV` -> `predicted.period.CLV`
* `predict()`: Rename `{predicted, actual}.total.spending` -> `{predicted, actual}.period.spending`



# CLVTools 0.11.2

### NEW FEATURES
Expand Down
4 changes: 3 additions & 1 deletion R/all_generics.R
Original file line number Diff line number Diff line change
Expand Up @@ -245,8 +245,10 @@ setGeneric("clv.data.create.bootstrapping.data", def = function(clv.data, ids){
#'
#' @export
as.clv.data <- function(x,
date.format="ymd", time.unit="weeks",
date.format="ymd",
time.unit="weeks",
estimation.split = NULL,
data.end = NULL,
name.id="Id", name.date="Date", name.price="Price",
...){
UseMethod("as.clv.data", x)
Expand Down
14 changes: 14 additions & 0 deletions R/class_clv_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,21 @@ clv.data.make.descriptives <- function(clv.data, ids){
dt.interp <- clv.data.mean.interpurchase.times(clv.data=clv.data, dt.transactions = dt.data)
dt.num.trans.by.cust <- dt.data[, .N, by="Id"]

tp.period.start <- switch(
sample.name,
Estimation=clv.time@timepoint.estimation.start,
Holdout=clv.time@timepoint.holdout.start,
Total=clv.time@timepoint.estimation.start)

tp.period.end <- switch(
sample.name,
Estimation=clv.time@timepoint.estimation.end,
Holdout=clv.time@timepoint.holdout.end,
Total=clv.time@timepoint.holdout.end)

l.desc <- list(
"Period Start" = clv.time.format.timepoint(clv.time=clv.time, timepoint=tp.period.start),
"Period End" = clv.time.format.timepoint(clv.time=clv.time, timepoint=tp.period.end),
"Number of customers" = if(sample.name=="Total"){nrow(dt.num.trans.by.cust)}else{"-"},
"First Transaction in period" = clv.time.format.timepoint(clv.time=clv.time, timepoint=dt.data[, min(Date)]),
"Last Transaction in period" = clv.time.format.timepoint(clv.time=clv.time, timepoint=dt.data[, max(Date)]),
Expand Down
60 changes: 60 additions & 0 deletions R/class_clv_fitted.R
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,63 @@ setClass(Class = "clv.fitted", contains = "VIRTUAL",
optimx.hessian = matrix(data = numeric(0))))




clv.fitted.get.LL <- function(clv.fitted){

# Calling the LL with the exact same inputs/specification as when the fitting
# is not trivial as there are plenty of options.
# To reproduce it, the exact steps of the estimation are repeated here.

# Start parameters are not really required, they are just stored as item
# `par` for `optimx()`
final.coefs <- drop(tail(coef(clv.fitted@optimx.estimation.output), n=1))

prepared.optimx.args <- clv.controlflow.estimate.prepare.optimx.args(
clv.fitted=clv.fitted,
start.params.all= final.coefs)

prepared.optimx.args <- clv.model.prepare.optimx.args(
clv.model=clv.fitted@clv.model,
clv.fitted=clv.fitted,
prepared.optimx.args=prepared.optimx.args)

prepared.optimx.args[["LL.param.names.to.optimx"]] <- names(prepared.optimx.args$par)

# In the estimation procedure, the user can also supply custom `optimx.args`
# which override `prepared.optimx.args` here. Because optimx is not called
# here, there is no need to add `optimx.args` here.

# The generated args also contain parameters for optimx. These are not required
# for the LL and need to be removed. This also removes the LL itself (`fn`).
names.optimx.args <- setdiff(formalArgs(optimx), "...")
call.args <- prepared.optimx.args[!(names(prepared.optimx.args) %in% names.optimx.args)]

# Could save memory as the returned method is a closure and has the environment
# in which is was defined attached. Hence all variables in this method here
# which may be large. However, it can also be useful to have these objects.

# Wrapper to call the LL with the original args.
# It is preferred to return a method rather than calling it immediately
# because generating the call args may take time.
LL <- function(params){
req.names <- call.args$LL.param.names.to.optimx

# Ensure named same as original
if(!(identical(sort(names(params)), sort(req.names)))){
check_err_msg(paste0(
"'params' has to be named ",
paste(req.names, collapse = ", "),
". Often, `drop(coef(model@optimx.estimation.output))` is useful."))
}

# Ensure name and position are the same as original order. This is required as
# any param input will be re-named by slapping `LL.param.names.to.optimx` on
# them (using names() <- ) in the first interlayer and then further accessed
# by name.
call.args$LL.params <- params[req.names] # bring to correct order
return(do.call(what = prepared.optimx.args$fn, args = call.args))
}

return(LL)
}
35 changes: 27 additions & 8 deletions R/class_clv_time.R
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,24 @@ clv.time.has.holdout <- function(clv.time){

# set.sample.periods ------------------------------------------------------------------------
#' @importFrom lubridate period
clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end){
clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.transaction, user.estimation.end, user.data.end){

tp.estimation.start <- tp.first.transaction

if(is.null(user.data.end)){
tp.data.end <- tp.last.transaction
}else{
tp.data.end <- clv.time.convert.user.input.to.timepoint(
clv.time=clv.time,
user.timepoint=user.data.end)

# Data end may not be before last transaction
if(tp.data.end < tp.last.transaction){
stop("The given data.end may not be before the last recorded transaction!")
}
}


if(!is.null(user.estimation.end)){
# specific end

Expand All @@ -98,24 +112,29 @@ clv.time.set.sample.periods <- function(clv.time, tp.first.transaction, tp.last.
user.timepoint=user.estimation.end)
}


# Before the last transaction to ensure there is at least 1 transaction in the holdout period.
# Needed additionally to holdout >=2 periods
if(tp.estimation.end >= tp.last.transaction)
stop("Parameter estimation.split needs to indicate a point before the last transaction!", call. = FALSE)

# Need to be 2 periods because otherwise for days, holdout can be not on estimation.end but still be of length zero
# ie 2 periods to still have 1 as holdout
if(tp.estimation.end > tp.last.transaction-clv.time.number.timeunits.to.timeperiod(clv.time, 2L))
stop("Parameter estimation.split needs to indicate a point at least 2 periods before the last transaction!", call. = FALSE)
if(tp.estimation.end > tp.data.end - clv.time.number.timeunits.to.timeperiod(clv.time, 2L))
stop("Parameter estimation.split needs to indicate a point in time such that it yields a holdout period of at least 2 time.units!", call. = FALSE)

# + 1 day is the same for all because most fine-grained change that Date can do
tp.holdout.start <- tp.estimation.end + clv.time.epsilon(clv.time=clv.time)
tp.holdout.end <- tp.last.transaction
tp.holdout.end <- tp.data.end
holdout.period.in.tu <- clv.time.interval.in.number.tu(clv.time,
interv=interval(start = tp.holdout.start,
end = tp.holdout.end))
}else{
# NULL: no specific end - until end of data (last transaction)
# **TODO: last transaction or full period where last transaction is in?
# NULL: no specific end - until data end

# tp.holdout.start and .end HAVE to be end of estimation period as this is used elsewhere!
# tp.holdout.start/.end and HAVE to be end of estimation period as this is used elsewhere!
# ie to ensure prediction.end (with clv.time.get.prediction.table) finds correct end if user gives NULL
tp.estimation.end <- tp.last.transaction
tp.estimation.end <- tp.data.end
tp.holdout.start <- tp.estimation.end
tp.holdout.end <- tp.estimation.end
holdout.period.in.tu <- 0
Expand Down
4 changes: 3 additions & 1 deletion R/clv_template_controlflow_predict.R
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ clv.controlflow.predict.add.uncertainty.estimates <- function(clv.fitted, dt.pre
dt.boots[, Id := sub("_BOOTSTRAP_ID_[0-9]+$", "", Id)]

# quantiles for each predicted quantity: select only the existing ones
cols.predictions <- c("PAlive", "CET", "DERT", "DECT", "predicted.mean.spending", "predicted.total.spending", "predicted.CLV")
cols.predictions <- c("PAlive", "CET", "DERT", "DECT",
"predicted.mean.spending", "predicted.period.spending",
"predicted.CLV", "predicted.period.CLV")
cols.predictions <- cols.predictions[cols.predictions %in% colnames(dt.boots)]

# Long-format for easier handling of different prediction columns
Expand Down
25 changes: 25 additions & 0 deletions R/f_clvdata_inputchecks.R
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,31 @@ check_userinput_datanocov_estimationsplit <- function(estimation.split, date.for
return(c())
}

#' @importFrom lubridate is.POSIXt is.Date parse_date_time
check_userinput_datanocov_dataend <- function(data.end, date.format){

# May be NULL
if(is.null(data.end))
return(c())

if(length(data.end) != 1)
return("data.end must contain exactly one single element!")

if(anyNA(data.end))
return("data.end may not contain any NAs!")

if(!is.character(data.end)
& !is.Date(data.end)
& !is.POSIXt(data.end))
return("data.end needs to either of type character or date-like (Date or POSIXt)")

if(is.character(data.end))
if(anyNA(parse_date_time(x=data.end, quiet=TRUE, orders=date.format)))
return("Please provide a valid data.end to that can be converted with the given date.format!")

return(c())
}


#' @importFrom lubridate is.POSIXct
check_userinput_datanocov_datatransactions <- function(data.transactions.dt, has.spending){
Expand Down
1 change: 1 addition & 0 deletions R/f_generics_clvfitted.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,4 @@ setMethod("clv.fitted.estimate.same.specification.on.new.data", signature = "clv
new.fitted@call <- cl
return(new.fitted)
})

34 changes: 18 additions & 16 deletions R/f_generics_clvfittedtransactions.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,15 +160,15 @@ setMethod("clv.controlflow.predict.get.has.actuals", signature(clv.fitted="clv.f
# .clv.controlflow.predict.add.actuals ---------------------------------------------------------------------------------
setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitted.transactions"), definition = function(clv.fitted, dt.predictions, has.actuals, verbose, ...){

actual.total.spending <- i.actual.total.spending <- Price <- Date <- period.first <- period.last <- actual.x <- i.actual.x <- NULL
actual.period.spending <- i.actual.period.spending <- Price <- Date <- period.first <- period.last <- actual.x <- i.actual.x <- NULL

# Only if:
# - there is a holdout
# - the prediction is not beyond holdout
#
# Data until prediction end
# actual.x: number of transactions
# actual.total.spending: total spending
# actual.x: number of transactions
# actual.period.spending: total spending

timepoint.prediction.first <- dt.predictions[1, period.first]
timepoint.prediction.last <- dt.predictions[1, period.last]
Expand All @@ -184,8 +184,8 @@ setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitte
incbounds = TRUE)]

if(clv.data.has.spending(clv.fitted@clv.data)){
dt.actuals <- dt.actuals.transactions[, list(actual.x = .N,
actual.total.spending = sum(Price)),
dt.actuals <- dt.actuals.transactions[, list(actual.x = .N,
actual.period.spending = sum(Price)),
keyby="Id"]
}else{
dt.actuals <- dt.actuals.transactions[, list(actual.x = .N), keyby="Id"]
Expand All @@ -195,8 +195,8 @@ setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitte
dt.predictions[is.na(actual.x), actual.x := 0]

if(clv.data.has.spending(clv.fitted@clv.data)){
dt.predictions[dt.actuals, actual.total.spending := i.actual.total.spending, on="Id"]
dt.predictions[is.na(actual.total.spending), actual.total.spending := 0]
dt.predictions[dt.actuals, actual.period.spending := i.actual.period.spending, on="Id"]
dt.predictions[is.na(actual.period.spending), actual.period.spending := 0]
}
return(dt.predictions)
}
Expand All @@ -205,8 +205,8 @@ setMethod("clv.controlflow.predict.add.actuals", signature(clv.fitted="clv.fitte

# .clv.controlflow.predict.post.process.prediction.table ------------------------------------------------------------------------------
setMethod("clv.controlflow.predict.post.process.prediction.table", signature = signature(clv.fitted="clv.fitted.transactions"), function(clv.fitted, dt.predictions, has.actuals, verbose, predict.spending, ...){
predicted.mean.spending <- i.predicted.mean.spending <- actual.total.spending <- i.actual.total.spending <- predicted.total.spending <- NULL
predicted.CLV <- CET <- DECT <- DERT <- NULL
predicted.mean.spending <- i.predicted.mean.spending <- actual.period.spending <- i.actual.period.spending <- predicted.period.spending <- NULL
predicted.CLV <- predicted.period.CLV <- CET <- DECT <- DERT <- NULL

# Predict spending ---------------------------------------------------------------------------------------
# depends on content of predict.spending:
Expand All @@ -224,7 +224,7 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s
dt.predictions[dt.spending, predicted.mean.spending := i.predicted.mean.spending, on = "Id"]

# The actual.mean.spending from dt.spending is not added anymore
# actual.total.spending is already in prediction table
# actual.period.spending is already in prediction table

return(dt.predictions)
}
Expand Down Expand Up @@ -289,10 +289,10 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s
if("DERT" %in% colnames(dt.predictions))
dt.predictions[, predicted.CLV := DERT * predicted.mean.spending]
if("DECT" %in% colnames(dt.predictions))
dt.predictions[, predicted.CLV := DECT * predicted.mean.spending]
dt.predictions[, predicted.period.CLV := DECT * predicted.mean.spending]

# If spending is predicted, also add predicted.total.spending
dt.predictions[, predicted.total.spending := predicted.mean.spending * CET]
# If spending is predicted, also add predicted.period.spending
dt.predictions[, predicted.period.spending := predicted.mean.spending * CET]
}

# Present cols in desired order ------------------------------------------------------------
Expand All @@ -302,8 +302,8 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s
cols <- c(cols, "actual.x")

# cannot determine otherwise alone from has.actuals
if("actual.total.spending" %in% colnames(dt.predictions))
cols <- c(cols, "actual.total.spending")
if("actual.period.spending" %in% colnames(dt.predictions))
cols <- c(cols, "actual.period.spending")
}

if("DERT" %in% colnames(dt.predictions))
Expand All @@ -312,10 +312,12 @@ setMethod("clv.controlflow.predict.post.process.prediction.table", signature = s
cols <- c(cols, "PAlive", "CET", "DECT")

if(do.predict.spending)
cols <- c(cols, c("predicted.mean.spending", "predicted.total.spending"))
cols <- c(cols, c("predicted.mean.spending", "predicted.period.spending"))

if("predicted.CLV" %in% colnames(dt.predictions))
cols <- c(cols, "predicted.CLV")
if("predicted.period.CLV" %in% colnames(dt.predictions))
cols <- c(cols, "predicted.period.CLV")

setcolorder(dt.predictions, cols)

Expand Down
Loading
Loading