diff --git a/NAMESPACE b/NAMESPACE index 35c1eb7c0..c4ba7a2dc 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -59,10 +59,15 @@ export(plot_score_table) export(plot_wis) export(quantile_score) export(quantile_to_interval) +export(rules_binary) +export(rules_point) +export(rules_quantile) +export(rules_sample) export(run_safely) export(sample_to_quantile) export(score) export(se_mean_sample) +export(select_rules) export(set_forecast_unit) export(squared_error) export(summarise_scores) @@ -86,6 +91,7 @@ importFrom(checkmate,assert_logical) importFrom(checkmate,assert_number) importFrom(checkmate,assert_numeric) importFrom(checkmate,assert_string) +importFrom(checkmate,assert_subset) importFrom(checkmate,assert_vector) importFrom(checkmate,check_atomic_vector) importFrom(checkmate,check_data_frame) diff --git a/NEWS.md b/NEWS.md index e77c172c7..a991f3806 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,4 @@ -# scoringutils 1.3 +# scoringutils 2.0.0 This major update and addresses a variety of comments made by reviewers from the Journal of Statistical Software (see preprint of the manuscript [here](https://arxiv.org/abs/2205.07090)). @@ -7,20 +7,21 @@ The update introduces breaking changes. If you want to keep using the older vers ## Package updates - In `score()`, required columns "true_value" and "prediction" were renamed and replaced by required columns "observed" and "predicted". Scoring functions now also use the function arguments "observed" and "predicted" everywhere consistently. - The overall scoring workflow was updated. `score()` is now a generic function that dispatches the correct method based on the forecast type. forecast types currently supported are "binary", "point", "sample" and "quantile" with corresponding classes "forecast_binary", "forecast_point", "forecast_sample" and "forecast_quantile". An object of class `forecast_*` can be created using the function `as_forecast()`, which also replaces the previous function `check_forecasts()` (see more information below). -- Scoring functions received a consistent interface and input checks: - - metrics for binary forecasts: +- Scoring rules (functions used for scoring) received a consistent interface and input checks: + - Scoring rules for binary forecasts: - `observed`: factor with exactly 2 levels - `predicted`: numeric, vector with probabilities - - metrics for point forecasts: + - Scoring rules for point forecasts: - `observed`: numeric vector - `predicted`: numeric vector - - metrics for sample-based forecasts: + - Scoring rules for sample-based forecasts: - `observed`: numeric, either a scalar or a vector - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector) - - metrics for quantile-based forecasts: + - Scoring rules for quantile-based forecasts: - `observed`: numeric, either a scalar or a vector - `predicted`: numeric, a vector (if `observed` is a scalar) or a matrix (if `observed` is a vector) - `quantile`: numeric, a vector with quantile-levels. Can alternatively be a matrix of the same shape as `predicted`. +- Users can now supply their own scoring rules to `score()` as a list of functions. Default scoring rules can be accessed using the functions `rules_point()`, `rules_sample()`, `rules_quantile()` and `rules_binary()`, which return a list of scoring rules suitable for the respective forecast type. - `check_forecasts()` was replaced by a different workflow. There now is a function, `as_forecast()`, that determines forecast type of the data, constructs a forecasting object and validates it using the function `validate_forecast()` (a generic that dispatches the correct method based on the forecast type). Objects of class `forecast_binary`, `forecast_point`, `forecast_sample` and `forecast_quantile` have print methods that fulfill the functionality of `check_forecasts()`. - The functionality for computing pairwise comparisons was now split from `summarise_scores()`. Instead of doing pairwise comparisons as part of summarising scores, a new function, `add_pairwise_comparison()`, was introduced that takes summarised scores as an input and adds pairwise comparisons to it. - `add_coverage()` was reworked completely. It's new purpose is now to add coverage information to the raw forecast data (essentially fulfilling some of the functionality that was previously covered by `score_quantile()`) diff --git a/R/data.R b/R/data.R index d64b55366..c9bf77109 100644 --- a/R/data.R +++ b/R/data.R @@ -195,51 +195,3 @@ #' #' @keywords info "metrics" - -#' Default metrics for binary forecasts. -#' -#' A named list with functions: -#' - "brier_score" = [brier_score()] -#' - "log_score" = [logs_binary()] -#' @keywords info -"metrics_binary" - -#' Default metrics for point forecasts. -#' -#' A named list with functions: -#' - "ae_point" = [ae()][Metrics::ae()] -#' - "se_point" = [se()][Metrics::se()] -#' - "ape" = [ape()][Metrics::ape()] -#' @keywords info -"metrics_point" - -#' Default metrics for sample-based forecasts. -#' -#' A named list with functions: -#' - "mad" = [mad_sample()] -#' - "bias" = [bias_sample()] -#' - "dss" = [dss_sample()] -#' - "crps" = [crps_sample()] -#' - "log_score" = [logs_sample()] -#' - "mad" = [mad_sample()] -#' - "ae_median" = [ae_median_sample()] -#' - "se_mean" = [se_mean_sample()] -#' @keywords info -"metrics_sample" - -#' Default metrics for quantile-based forecasts. -#' -#' A named list with functions: -#' - "wis" = [wis] -#' - "overprediction" = [overprediction()] -#' - "underprediction" = [underprediction()] -#' - "dispersion" = [dispersion()] -#' - "bias" = [bias_quantile()] -#' - "coverage_50" = [interval_coverage_quantile()] -#' - "coverage_90" = \(...) \{ -#' run_safely(..., range = 90, fun = [interval_coverage_quantile]) -#' \} -#' - "coverage_deviation" = [interval_coverage_dev_quantile()], -#' - "ae_median" = [ae_median_quantile()] -#' @keywords info -"metrics_quantile" diff --git a/R/default-scoring-rules.R b/R/default-scoring-rules.R new file mode 100644 index 000000000..9dd6fd24d --- /dev/null +++ b/R/default-scoring-rules.R @@ -0,0 +1,169 @@ +#' @title Select Scoring Rules From A List of Possible Scoring Rules +#' @description Helper function to return only the scoring rules selected by +#' the user from a list of possible scoring rules. +#' @param rules A list of scoring rules. +#' @param select A character vector of scoring rules to select from the list. +#' If `select` is `NULL` (the default), all possible scoring rules are returned. +#' @param exclude A character vector of scoring rules to exclude from the list. +#' If `select` is not `NULL`, this argument is ignored. +#' @return A list of scoring rules. +#' @keywords metric +#' @importFrom checkmate assert_subset assert_list +#' @export +#' @examples +#' select_rules( +#' rules = rules_binary(), +#' select = "brier_score" +#' ) +#' select_rules( +#' rules = rules_binary(), +#' exclude = "log_score" +#' ) +select_rules <- function(rules, select = NULL, exclude = NULL) { + assert_character(x = c(select, exclude), null.ok = TRUE) + assert_list(rules, names = "named") + allowed <- names(rules) + + if (is.null(select) && is.null(exclude)) { + return(rules) + } else if (is.null(select)) { + assert_subset(exclude, allowed) + select <- allowed[!allowed %in% exclude] + return(rules[select]) + } else { + assert_subset(select, allowed) + return(rules[select]) + } +} + + +#' @title Scoring Rules for Binary Forecasts +#' @description Helper function that returns a named list of default +#' scoring rules suitable for binary forecasts. +#' +#' The default scoring rules are: +#' - "brier_score" = [brier_score()] +#' - "log_score" = [logs_binary()] +#' @inherit select_rules params return +#' @export +#' @keywords metric +#' @examples +#' rules_binary() +#' rules_binary(select = "brier_score") +#' rules_binary(exclude = "log_score") +rules_binary <- function(select = NULL, exclude = NULL) { + all <- list( + brier_score = brier_score, + log_score = logs_binary + ) + selected <- select_rules(all, select, exclude) + return(selected) +} + + +#' @title Scoring Rules for Point Forecasts +#' @description Helper function that returns a named list of default +#' scoring rules suitable for point forecasts. +#' +#' The default scoring rules are: +#' - "ae_point" = [ae()][Metrics::ae()] +#' - "se_point" = [se()][Metrics::se()] +#' - "ape" = [ape()][Metrics::ape()] +#' @inherit select_rules params return +#' @export +#' @keywords metric +#' @examples +#' rules_point() +#' rules_point(select = "ape") +rules_point <- function(select = NULL, exclude = NULL) { + all <- list( + ae_point = Metrics::ae, + se_point = Metrics::se, + ape = Metrics::ape + ) + selected <- select_rules(all, select, exclude) + return(selected) +} + + +#' @title Scoring Rules for Sample-Based Forecasts +#' @description Helper function that returns a named list of default +#' scoring rules suitable for forecasts in a sample-based format +#' +#' The default scoring rules are: +#' - "mad" = [mad_sample()] +#' - "bias" = [bias_sample()] +#' - "dss" = [dss_sample()] +#' - "crps" = [crps_sample()] +#' - "log_score" = [logs_sample()] +#' - "mad" = [mad_sample()] +#' - "ae_median" = [ae_median_sample()] +#' - "se_mean" = [se_mean_sample()] +#' @inherit select_rules params return +#' @export +#' @keywords metric +#' @examples +#' rules_sample() +#' rules_sample(select = "mad") +rules_sample <- function(select = NULL, exclude = NULL) { + all <- list( + bias = bias_sample, + dss = dss_sample, + crps = crps_sample, + log_score = logs_sample, + mad = mad_sample, + ae_median = ae_median_sample, + se_mean = se_mean_sample + ) + selected <- select_rules(all, select, exclude) + return(selected) +} + + +#' @title Scoring Rules for Quantile-Based Forecasts +#' @description Helper function that returns a named list of default +#' scoring rules suitable for forecasts in a quantile-based format +#' +#' The default scoring rules are: +#' - "wis" = [wis] +#' - "overprediction" = [overprediction()] +#' - "underprediction" = [underprediction()] +#' - "dispersion" = [dispersion()] +#' - "bias" = [bias_quantile()] +#' - "coverage_50" = [interval_coverage_quantile()] +#' - "coverage_90" = function(...) \{ +#' run_safely(..., range = 90, fun = [interval_coverage_quantile]) +#' \} +#' - "coverage_deviation" = [interval_coverage_dev_quantile()], +#' - "ae_median" = [ae_median_quantile()] +#' +#' Note: The `coverage_90` scoring rule is created as a wrapper around +#' [interval_coverage_quantile()], making use of the function [run_safely()]. +#' This construct allows the function to deal with arbitrary arguments in `...`, +#' while making sure that only those that [interval_coverage_quantile()] can +#' accept get passed on to it. `range = 90` is set in the function definition, +#' as passing an argument `range = 90` to [score()] would mean it would also +#' get passed to `coverage_50`. +#' @inherit select_rules params return +#' @export +#' @keywords metric +#' @examples +#' rules_quantile() +#' rules_quantile(select = "wis") +rules_quantile <- function(select = NULL, exclude = NULL) { + all <- list( + wis = wis, + overprediction = overprediction, + underprediction = underprediction, + dispersion = dispersion, + bias = bias_quantile, + coverage_50 = interval_coverage_quantile, + coverage_90 = function(...) { + run_safely(..., range = 90, fun = interval_coverage_quantile) + }, + coverage_deviation = interval_coverage_dev_quantile, + ae_median = ae_median_quantile + ) + selected <- select_rules(all, select, exclude) + return(selected) +} diff --git a/R/score.R b/R/score.R index 5ca049fa7..a26351e78 100644 --- a/R/score.R +++ b/R/score.R @@ -16,8 +16,8 @@ #' @inheritSection forecast_types Forecast unit #' @param data A data.frame or data.table with predicted and observed values. #' @param metrics A named list of scoring functions. Names will be used as -#' column names in the output. See [metrics_point()], [metrics_binary()], -#' `metrics_quantile()`, and [metrics_sample()] for more information on the +#' column names in the output. See [rules_point()], [rules_binary()], +#' [rules_quantile()], and [rules_sample()] for more information on the #' default metrics used. #' @param ... additional arguments #' @return A data.table with unsummarised scores. This will generally be @@ -76,7 +76,7 @@ score.default <- function(data, ...) { #' @rdname score #' @export -score.forecast_binary <- function(data, metrics = metrics_binary, ...) { +score.forecast_binary <- function(data, metrics = rules_binary(), ...) { data <- validate_forecast(data) data <- remove_na_observed_predicted(data) metrics <- validate_metrics(metrics) @@ -96,7 +96,7 @@ score.forecast_binary <- function(data, metrics = metrics_binary, ...) { #' @importFrom Metrics se ae ape #' @rdname score #' @export -score.forecast_point <- function(data, metrics = metrics_point, ...) { +score.forecast_point <- function(data, metrics = rules_point(), ...) { data <- validate_forecast(data) data <- remove_na_observed_predicted(data) metrics <- validate_metrics(metrics) @@ -113,7 +113,7 @@ score.forecast_point <- function(data, metrics = metrics_point, ...) { #' @rdname score #' @export -score.forecast_sample <- function(data, metrics = metrics_sample, ...) { +score.forecast_sample <- function(data, metrics = rules_sample(), ...) { data <- validate_forecast(data) data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") @@ -150,7 +150,7 @@ score.forecast_sample <- function(data, metrics = metrics_sample, ...) { #' @importFrom data.table `:=` as.data.table rbindlist %like% #' @rdname score #' @export -score.forecast_quantile <- function(data, metrics = metrics_quantile, ...) { +score.forecast_quantile <- function(data, metrics = rules_quantile(), ...) { data <- validate_forecast(data) data <- remove_na_observed_predicted(data) forecast_unit <- attr(data, "forecast_unit") diff --git a/R/utils.R b/R/utils.R index 8e5e8a9e1..680166bc5 100644 --- a/R/utils.R +++ b/R/utils.R @@ -145,7 +145,7 @@ strip_attributes <- function(object, attributes) { #' @title Run a function safely #' @description This is a wrapper function designed to run a function safely -#' when it is not completely clear what arguments coulld be passed to the +#' when it is not completely clear what arguments could be passed to the #' function. #' #' All named arguments in `...` that are not accepted by `fun` are removed. diff --git a/R/z_globalVariables.R b/R/z_globalVariables.R index 89cfc2eab..bcd591290 100644 --- a/R/z_globalVariables.R +++ b/R/z_globalVariables.R @@ -44,10 +44,10 @@ globalVariables(c( "metric", "metrics_select", "metrics", - "metrics_binary", - "metrics_point", - "metrics_quantile", - "metrics_sample", + "rules_binary", + "rules_point", + "rules_quantile", + "rules_sample", "model", "n_obs", "n_obs wis_component_name", diff --git a/data/metrics_binary.rda b/data/metrics_binary.rda deleted file mode 100644 index 9a37a5dda..000000000 Binary files a/data/metrics_binary.rda and /dev/null differ diff --git a/data/metrics_point.rda b/data/metrics_point.rda deleted file mode 100644 index bf6b60e86..000000000 Binary files a/data/metrics_point.rda and /dev/null differ diff --git a/data/metrics_quantile.rda b/data/metrics_quantile.rda deleted file mode 100644 index b5598113d..000000000 Binary files a/data/metrics_quantile.rda and /dev/null differ diff --git a/data/metrics_sample.rda b/data/metrics_sample.rda deleted file mode 100644 index f14e6f7c1..000000000 Binary files a/data/metrics_sample.rda and /dev/null differ diff --git a/inst/create-list-available-forecasts.R b/inst/create-list-available-forecasts.R deleted file mode 100644 index d1ee6cc43..000000000 --- a/inst/create-list-available-forecasts.R +++ /dev/null @@ -1,36 +0,0 @@ -metrics_binary <- list( - "brier_score" = brier_score, - "log_score" = logs_binary -) -usethis::use_data(metrics_binary, overwrite = TRUE) - -metrics_point <- list( - "ae_point" = Metrics::ae, - "se_point" = Metrics::se, - "ape" = Metrics::ape -) -usethis::use_data(metrics_point, overwrite = TRUE) - -metrics_sample <- list( - "bias" = bias_sample, - "dss" = dss_sample, - "crps" = crps_sample, - "log_score" = logs_sample, - "mad" = mad_sample, - "ae_median" = ae_median_sample, # not sure we still want these - "se_mean" = se_mean_sample # not sure we still want these -) -usethis::use_data(metrics_sample, overwrite = TRUE) - -metrics_quantile <- list( - "wis" = wis, - "overprediction" = overprediction, - "underprediction" = underprediction, - "dispersion" = dispersion, - "bias" = bias_quantile, - "coverage_50" = interval_coverage_quantile, - "coverage_90" = \(...) {run_safely(..., range = 90, fun = interval_coverage_quantile)}, - "coverage_deviation" = interval_coverage_dev_quantile, - "ae_median" = ae_median_quantile -) -usethis::use_data(metrics_quantile, overwrite = TRUE) diff --git a/man/apply_rules.Rd b/man/apply_rules.Rd index f64ba562f..294033e62 100644 --- a/man/apply_rules.Rd +++ b/man/apply_rules.Rd @@ -10,8 +10,8 @@ apply_rules(data, metrics, ...) \item{data}{A data.frame or data.table with predicted and observed values.} \item{metrics}{A named list of scoring functions. Names will be used as -column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}}, -\code{metrics_quantile()}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the +column names in the output. See \code{\link[=rules_point]{rules_point()}}, \code{\link[=rules_binary]{rules_binary()}}, +\code{\link[=rules_quantile]{rules_quantile()}}, and \code{\link[=rules_sample]{rules_sample()}} for more information on the default metrics used.} \item{...}{additional arguments} diff --git a/man/metrics_binary.Rd b/man/metrics_binary.Rd deleted file mode 100644 index a378be74f..000000000 --- a/man/metrics_binary.Rd +++ /dev/null @@ -1,20 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{metrics_binary} -\alias{metrics_binary} -\title{Default metrics for binary forecasts.} -\format{ -An object of class \code{list} of length 2. -} -\usage{ -metrics_binary -} -\description{ -A named list with functions: -\itemize{ -\item "brier_score" = \code{\link[=brier_score]{brier_score()}} -\item "log_score" = \code{\link[=logs_binary]{logs_binary()}} -} -} -\keyword{info} diff --git a/man/metrics_point.Rd b/man/metrics_point.Rd deleted file mode 100644 index af194c028..000000000 --- a/man/metrics_point.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{metrics_point} -\alias{metrics_point} -\title{Default metrics for point forecasts.} -\format{ -An object of class \code{list} of length 3. -} -\usage{ -metrics_point -} -\description{ -A named list with functions: -\itemize{ -\item "ae_point" = \link[Metrics:ae]{ae()} -\item "se_point" = \link[Metrics:se]{se()} -\item "ape" = \link[Metrics:ape]{ape()} -} -} -\keyword{info} diff --git a/man/metrics_quantile.Rd b/man/metrics_quantile.Rd deleted file mode 100644 index d62cc1b3c..000000000 --- a/man/metrics_quantile.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{metrics_quantile} -\alias{metrics_quantile} -\title{Default metrics for quantile-based forecasts.} -\format{ -An object of class \code{list} of length 9. -} -\usage{ -metrics_quantile -} -\description{ -A named list with functions: -\itemize{ -\item "wis" = \link{wis} -\item "overprediction" = \code{\link[=overprediction]{overprediction()}} -\item "underprediction" = \code{\link[=underprediction]{underprediction()}} -\item "dispersion" = \code{\link[=dispersion]{dispersion()}} -\item "bias" = \code{\link[=bias_quantile]{bias_quantile()}} -\item "coverage_50" = \code{\link[=interval_coverage_quantile]{interval_coverage_quantile()}} -\item "coverage_90" = \(...) \{ -run_safely(..., range = 90, fun = \link{interval_coverage_quantile}) -\} -\item "coverage_deviation" = \code{\link[=interval_coverage_dev_quantile]{interval_coverage_dev_quantile()}}, -\item "ae_median" = \code{\link[=ae_median_quantile]{ae_median_quantile()}} -} -} -\keyword{info} diff --git a/man/metrics_sample.Rd b/man/metrics_sample.Rd deleted file mode 100644 index 5231f4ae7..000000000 --- a/man/metrics_sample.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/data.R -\docType{data} -\name{metrics_sample} -\alias{metrics_sample} -\title{Default metrics for sample-based forecasts.} -\format{ -An object of class \code{list} of length 7. -} -\usage{ -metrics_sample -} -\description{ -A named list with functions: -\itemize{ -\item "mad" = \code{\link[=mad_sample]{mad_sample()}} -\item "bias" = \code{\link[=bias_sample]{bias_sample()}} -\item "dss" = \code{\link[=dss_sample]{dss_sample()}} -\item "crps" = \code{\link[=crps_sample]{crps_sample()}} -\item "log_score" = \code{\link[=logs_sample]{logs_sample()}} -\item "mad" = \code{\link[=mad_sample]{mad_sample()}} -\item "ae_median" = \code{\link[=ae_median_sample]{ae_median_sample()}} -\item "se_mean" = \code{\link[=se_mean_sample]{se_mean_sample()}} -} -} -\keyword{info} diff --git a/man/rules_binary.Rd b/man/rules_binary.Rd new file mode 100644 index 000000000..f5c455e35 --- /dev/null +++ b/man/rules_binary.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/default-scoring-rules.R +\name{rules_binary} +\alias{rules_binary} +\title{Scoring Rules for Binary Forecasts} +\usage{ +rules_binary(select = NULL, exclude = NULL) +} +\arguments{ +\item{select}{A character vector of scoring rules to select from the list. +If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} + +\item{exclude}{A character vector of scoring rules to exclude from the list. +If \code{select} is not \code{NULL}, this argument is ignored.} +} +\value{ +A list of scoring rules. +} +\description{ +Helper function that returns a named list of default +scoring rules suitable for binary forecasts. + +The default scoring rules are: +\itemize{ +\item "brier_score" = \code{\link[=brier_score]{brier_score()}} +\item "log_score" = \code{\link[=logs_binary]{logs_binary()}} +} +} +\examples{ +rules_binary() +rules_binary(select = "brier_score") +rules_binary(exclude = "log_score") +} +\keyword{metric} diff --git a/man/rules_point.Rd b/man/rules_point.Rd new file mode 100644 index 000000000..55f562dc9 --- /dev/null +++ b/man/rules_point.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/default-scoring-rules.R +\name{rules_point} +\alias{rules_point} +\title{Scoring Rules for Point Forecasts} +\usage{ +rules_point(select = NULL, exclude = NULL) +} +\arguments{ +\item{select}{A character vector of scoring rules to select from the list. +If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} + +\item{exclude}{A character vector of scoring rules to exclude from the list. +If \code{select} is not \code{NULL}, this argument is ignored.} +} +\value{ +A list of scoring rules. +} +\description{ +Helper function that returns a named list of default +scoring rules suitable for point forecasts. + +The default scoring rules are: +\itemize{ +\item "ae_point" = \link[Metrics:ae]{ae()} +\item "se_point" = \link[Metrics:se]{se()} +\item "ape" = \link[Metrics:ape]{ape()} +} +} +\examples{ +rules_point() +rules_point(select = "ape") +} +\keyword{metric} diff --git a/man/rules_quantile.Rd b/man/rules_quantile.Rd new file mode 100644 index 000000000..78b24342b --- /dev/null +++ b/man/rules_quantile.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/default-scoring-rules.R +\name{rules_quantile} +\alias{rules_quantile} +\title{Scoring Rules for Quantile-Based Forecasts} +\usage{ +rules_quantile(select = NULL, exclude = NULL) +} +\arguments{ +\item{select}{A character vector of scoring rules to select from the list. +If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} + +\item{exclude}{A character vector of scoring rules to exclude from the list. +If \code{select} is not \code{NULL}, this argument is ignored.} +} +\value{ +A list of scoring rules. +} +\description{ +Helper function that returns a named list of default +scoring rules suitable for forecasts in a quantile-based format + +The default scoring rules are: +\itemize{ +\item "wis" = \link{wis} +\item "overprediction" = \code{\link[=overprediction]{overprediction()}} +\item "underprediction" = \code{\link[=underprediction]{underprediction()}} +\item "dispersion" = \code{\link[=dispersion]{dispersion()}} +\item "bias" = \code{\link[=bias_quantile]{bias_quantile()}} +\item "coverage_50" = \code{\link[=interval_coverage_quantile]{interval_coverage_quantile()}} +\item "coverage_90" = function(...) \{ +run_safely(..., range = 90, fun = \link{interval_coverage_quantile}) +\} +\item "coverage_deviation" = \code{\link[=interval_coverage_dev_quantile]{interval_coverage_dev_quantile()}}, +\item "ae_median" = \code{\link[=ae_median_quantile]{ae_median_quantile()}} +} + +Note: The \code{coverage_90} scoring rule is created as a wrapper around +\code{\link[=interval_coverage_quantile]{interval_coverage_quantile()}}, making use of the function \code{\link[=run_safely]{run_safely()}}. +This construct allows the function to deal with arbitrary arguments in \code{...}, +while making sure that only those that \code{\link[=interval_coverage_quantile]{interval_coverage_quantile()}} can +accept get passed on to it. \code{range = 90} is set in the function definition, +as passing an argument \code{range = 90} to \code{\link[=score]{score()}} would mean it would also +get passed to \code{coverage_50}. +} +\examples{ +rules_quantile() +rules_quantile(select = "wis") +} +\keyword{metric} diff --git a/man/rules_sample.Rd b/man/rules_sample.Rd new file mode 100644 index 000000000..0a6399beb --- /dev/null +++ b/man/rules_sample.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/default-scoring-rules.R +\name{rules_sample} +\alias{rules_sample} +\title{Scoring Rules for Sample-Based Forecasts} +\usage{ +rules_sample(select = NULL, exclude = NULL) +} +\arguments{ +\item{select}{A character vector of scoring rules to select from the list. +If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} + +\item{exclude}{A character vector of scoring rules to exclude from the list. +If \code{select} is not \code{NULL}, this argument is ignored.} +} +\value{ +A list of scoring rules. +} +\description{ +Helper function that returns a named list of default +scoring rules suitable for forecasts in a sample-based format + +The default scoring rules are: +\itemize{ +\item "mad" = \code{\link[=mad_sample]{mad_sample()}} +\item "bias" = \code{\link[=bias_sample]{bias_sample()}} +\item "dss" = \code{\link[=dss_sample]{dss_sample()}} +\item "crps" = \code{\link[=crps_sample]{crps_sample()}} +\item "log_score" = \code{\link[=logs_sample]{logs_sample()}} +\item "mad" = \code{\link[=mad_sample]{mad_sample()}} +\item "ae_median" = \code{\link[=ae_median_sample]{ae_median_sample()}} +\item "se_mean" = \code{\link[=se_mean_sample]{se_mean_sample()}} +} +} +\examples{ +rules_sample() +rules_sample(select = "mad") +} +\keyword{metric} diff --git a/man/run_safely.Rd b/man/run_safely.Rd index b8c45751f..a4f809040 100644 --- a/man/run_safely.Rd +++ b/man/run_safely.Rd @@ -16,7 +16,7 @@ The result of \code{fun} or \code{NULL} if \code{fun} errors } \description{ This is a wrapper function designed to run a function safely -when it is not completely clear what arguments coulld be passed to the +when it is not completely clear what arguments could be passed to the function. All named arguments in \code{...} that are not accepted by \code{fun} are removed. diff --git a/man/score.Rd b/man/score.Rd index d317ed64a..b995b847f 100644 --- a/man/score.Rd +++ b/man/score.Rd @@ -13,13 +13,13 @@ score(data, ...) \method{score}{default}(data, ...) -\method{score}{forecast_binary}(data, metrics = metrics_binary, ...) +\method{score}{forecast_binary}(data, metrics = rules_binary(), ...) -\method{score}{forecast_point}(data, metrics = metrics_point, ...) +\method{score}{forecast_point}(data, metrics = rules_point(), ...) -\method{score}{forecast_sample}(data, metrics = metrics_sample, ...) +\method{score}{forecast_sample}(data, metrics = rules_sample(), ...) -\method{score}{forecast_quantile}(data, metrics = metrics_quantile, ...) +\method{score}{forecast_quantile}(data, metrics = rules_quantile(), ...) } \arguments{ \item{data}{A data.frame or data.table with predicted and observed values.} @@ -27,8 +27,8 @@ score(data, ...) \item{...}{additional arguments} \item{metrics}{A named list of scoring functions. Names will be used as -column names in the output. See \code{\link[=metrics_point]{metrics_point()}}, \code{\link[=metrics_binary]{metrics_binary()}}, -\code{metrics_quantile()}, and \code{\link[=metrics_sample]{metrics_sample()}} for more information on the +column names in the output. See \code{\link[=rules_point]{rules_point()}}, \code{\link[=rules_binary]{rules_binary()}}, +\code{\link[=rules_quantile]{rules_quantile()}}, and \code{\link[=rules_sample]{rules_sample()}} for more information on the default metrics used.} } \value{ diff --git a/man/select_rules.Rd b/man/select_rules.Rd new file mode 100644 index 000000000..605d7176b --- /dev/null +++ b/man/select_rules.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/default-scoring-rules.R +\name{select_rules} +\alias{select_rules} +\title{Select Scoring Rules From A List of Possible Scoring Rules} +\usage{ +select_rules(rules, select = NULL, exclude = NULL) +} +\arguments{ +\item{rules}{A list of scoring rules.} + +\item{select}{A character vector of scoring rules to select from the list. +If \code{select} is \code{NULL} (the default), all possible scoring rules are returned.} + +\item{exclude}{A character vector of scoring rules to exclude from the list. +If \code{select} is not \code{NULL}, this argument is ignored.} +} +\value{ +A list of scoring rules. +} +\description{ +Helper function to return only the scoring rules selected by +the user from a list of possible scoring rules. +} +\examples{ +select_rules( + rules = rules_binary(), + select = "brier_score" +) +select_rules( + rules = rules_binary(), + exclude = "log_score" +) +} +\keyword{metric} diff --git a/tests/testthat/setup.R b/tests/testthat/setup.R index a236f299d..362f52adb 100644 --- a/tests/testthat/setup.R +++ b/tests/testthat/setup.R @@ -3,8 +3,12 @@ library(ggplot2, quietly = TRUE) library(data.table) suppressMessages(library(magrittr)) -metrics_no_cov <- metrics_quantile[!grepl("coverage", names(metrics_quantile))] -metrics_no_cov_no_ae <- metrics_no_cov[!grepl("ae", names(metrics_no_cov))] +metrics_no_cov <- rules_quantile( + exclude = c("coverage_50", "coverage_90", "coverage_deviation") +) +metrics_no_cov_no_ae <- rules_quantile( + exclude = c("coverage_50", "coverage_90", "coverage_deviation", "ae_median") +) # compute scores diff --git a/tests/testthat/test-default-scoring-rules.R b/tests/testthat/test-default-scoring-rules.R new file mode 100644 index 000000000..a53dcbe00 --- /dev/null +++ b/tests/testthat/test-default-scoring-rules.R @@ -0,0 +1,76 @@ +test_that("`select_rules` works as expected", { + + expect_equal( + scoringutils:::select_rules(rules_point(), select = NULL), + rules_point() + ) + + expect_equal( + scoringutils:::select_rules(rules_point(), select = NULL), + scoringutils:::select_rules(rules_point()) + ) + + expect_equal( + names(scoringutils:::select_rules(rules_point(), select = "ape")), + "ape" + ) + + expect_equal( + length(scoringutils:::select_rules(rules_point(), select = NULL, exclude = "ape")), + length(rules_point()) - 1 + ) + + # if both select and exclude are specified, exclude is ignored + expect_equal( + names(scoringutils:::select_rules(rules_point(), select = "ape", exclude = "ape")), + "ape" + ) + + # expect error if possibilities is not a list + expect_error( + scoringutils:::select_rules(rules_point, select = NULL), + "Assertion on 'rules' failed: Must be of type 'list', not 'closure'." + ) +}) + + +test_that("default rules work as expected", { + + expect_true( + all(c( + is.list(rules_point()), + is.list(rules_binary()), + is.list(rules_quantile()), + is.list(rules_sample())) + ) + ) + + expect_equal( + names(rules_point(select = "ape")), + "ape" + ) + + expect_equal( + length(rules_binary(select = NULL, exclude = "brier_score")), + length(rules_binary()) - 1 + ) + + # if both select and exclude are specified, exclude is ignored + expect_equal( + names(scoringutils:::select_rules(rules_quantile(), select = "wis", exclude = "wis")), + "wis" + ) + + # expect error if select is not included in the default possibilities + expect_error( + rules_sample(select = "not-included"), + "Must be a subset of" + ) + + # expect error if exclude is not included in the default possibilities + expect_error( + rules_quantile(exclude = "not-included"), + "Must be a subset of" + ) +}) + diff --git a/tests/testthat/test-score.R b/tests/testthat/test-score.R index ac6249528..3cb960618 100644 --- a/tests/testthat/test-score.R +++ b/tests/testthat/test-score.R @@ -140,18 +140,18 @@ test_that("function produces output for a point case", { ) expect_equal( colnames(eval), - c("model", "target_type",names(metrics_point)) + c("model", "target_type", names(rules_point())) ) }) test_that("Changing metrics names works", { - metrics_test <- metrics_point + metrics_test <- rules_point() names(metrics_test)[1] = "just_testing" eval <- suppressMessages(score(example_point, metrics = metrics_test)) eval_summarised <- summarise_scores(eval, by = "model") expect_equal( colnames(eval_summarised), - c("model", "just_testing", names(metrics_point)[-1]) + c("model", "just_testing", names(rules_point())[-1]) ) }) @@ -174,7 +174,7 @@ test_that("score_quantile correctly handles separate results = FALSE", { nrow(eval) > 1, TRUE ) - expect_true(all(names(metrics_quantile) %in% colnames(eval))) + expect_true(all(names(rules_quantile()) %in% colnames(eval))) })