From 1d6566356a723aa5972e5e822551376f00db3435 Mon Sep 17 00:00:00 2001 From: Anirban Date: Fri, 8 Mar 2024 10:44:46 -0700 Subject: [PATCH] Update this branch (includes changes to R/binseg.R; same as that in Siyao's 'another-branch') --- .DS_Store | Bin 6148 -> 16388 bytes R/binseg.R | 2 + README.org | 107 +---------------------------------------------------- 3 files changed, 4 insertions(+), 105 deletions(-) diff --git a/.DS_Store b/.DS_Store index 518325ec29dee9e6f8d798fad70dd3c70e6b2071..ac4f42146dfe8082e10c962a56ebea8d3f3c3f2e 100644 GIT binary patch literal 16388 zcmeHO%Wl&^6uoXzlu{5N^|AGqO&;RCOWQ;U2};%STGF&pTPY<`lT_+1d1+F#`MYF&R0I?l)M~k$MT2t$C zPE~q{uu2c;8DenFVANS34@XDHHK)l%G>eoa+JqKXsKv{yQlE}!&sX9j$WE5{(kbx? z^m9T(8c~-HsYdURvqxi`cM)4ddD}?a#@So^@1j%|yG@_y0OuWk)}Y7u)$mW0!;b9V z0j^?s_JD1N2Kd!*w<$}(HGfR(QCf%f-bbR7bx?M%)#vlqowPJ6$!x{~#sb&Y0+wK2 zQQG=zYYRi%Sio4oYXSCtu(7kS>4Xyz@>d5Atoz8)Sr?LTILA1E$JtKUbi#=Uc~<1% zSrLk?mNUe#M0Uj6>~e%nC!B~-WG72xC(F6B<&01)es|=v`Q>CaMQD!30>%PC3oIil z?O08YJZ@^octjTdhv7-$6`eVR(A>{p`nznz|d2 zI@niAwKiDccf|eh#*pLH;*8|tu}Nz>w$btl8^hHvb+oJ-AM32^N0XF65ytyrd{k^f zKKJ;11e-?~7E)!&@JU{?3L7GQUVk0SS%WMTmkd!_Hfjw^Hd)U$MOjQI@JY@>6d-XBB5OXV!9?%~-%#z*xXo zz*xXoAP)=N$4wWoHabSnvPZjNqZ_RM4rw1Nzxnk&(x8+*kBY*KGYDnt=}89IW-;o~ z^V8!xA3wKm7T}ltI4GQL5M>nGWvt})q1!GGk0bPZ0w022t_n_%RWhIVv8=*J0&Jk> z@G||%@$=6@)9KM&l(DKFKjZ9G9?f5eH3|O~_p(~U?CT5oTD`&XHGDvdIQzK_e;wu# zmK^Jpzdk2Zygp^h-=loxiO)vyY0+I4V^>WI<2)3fgujY4o`2c#+!}dT6zpGk|G8=E znnJ1Vt|=(#(^Umsx@TQMEIQtm_bueJl?0u%t!o8XOqVauCngD=G(1#o<=0ycW2K_g zu_~cmhc}~F>7N%+Z%&G;^KToj&fV0h%7dvMjnC9kDAvyS{R*k@cx=*w4tp)1JhdHU z;75~`fvTJQRITTuVhi$#_AD;WN^LrvlBeDSI=U>s24sC^(LZ6&yt*ynPdr++h9%nU z`xfbNM%Pdh59|DxCP63po!9?63$^KZ_|>0=M=^95#!5w}V`b5%w_X4G4^JOr)i(Y4 z&o8HrKMYTqw&&gyPg_pE*)x3`Bf{w@s5o!dXk5}#U>o4z*NS{1v(T;=wI4|wYE=so zb@p$}AO{tfmqVNvSaI}}7OCVJ=C2l4p1beEY}z|JxWC zhJ63WFER&X0b_xyX#uO;+G?$Wn-#*3`P$pqKVoMayKW*vHw#X-4MY?JtuP&S0non(KB@6O-l%a7{)2Ho`Kgtx=0>&=70of7 zw0grdnU^AEKok%K{-Of(?gnrHGg!cqzrXs`_a$i@52on=$HKeY;BY^`aCiT>`XVG2`#}L*v&q5@i_(h% zqJSunD?r}|jg2sJm{~NR4(!Yp05QN}V|Yw^3Cht9BZrwq^q?u5ifB`n+hQo2j^oW05l7JEhk%hmI#J+P G75D_iP`nuc diff --git a/R/binseg.R b/R/binseg.R index 5b11576..e6404c6 100644 --- a/R/binseg.R +++ b/R/binseg.R @@ -82,6 +82,7 @@ binseg <- structure(function # Binary segmentation ## splits. For l1/laplace distributions the best case is O(N log N ## log K) time for equal splits and worst case is O(N log N K) time ## for unequal splits. + switch(distribution.str, l1=Sys.sleep(0.001), meanvar_norm=Sys.sleep(0.00001*length(data.vec)), mean_norm=matrix(NA, length(data.vec), length(data.vec))) result <- binseg_interface( data.vec, weight.vec, max.segments, min.segment.length, @@ -287,3 +288,4 @@ coef.binsegRcpp <- function }, by="segments"] ### data.table with one row for each segment. } + diff --git a/README.org b/README.org index 4fa295d..3f20b5e 100644 --- a/README.org +++ b/README.org @@ -1,105 +1,2 @@ -this is another branch. - -binsegRcpp Efficient implementation of the binary segmentation -heuristic algorithm for changepoint detection, using C++ -std::multiset. Also contains functions for comparing empirical time -complexity to best/worst case. - -| [[file:tests/testthat][tests]] | [[https://github.com/tdhock/binsegRcpp/actions][https://github.com/tdhock/binsegRcpp/workflows/R-CMD-check/badge.svg]] | -| [[https://github.com/jimhester/covr][coverage]] | [[https://app.codecov.io/gh/tdhock/binsegRcpp?branch=master][https://codecov.io/gh/tdhock/binsegRcpp/branch/master/graph/badge.svg]] | - -** Installation - -#+BEGIN_SRC R - install.packages("binsegRcpp") - ##OR - if(require("remotes"))install.packages("remotes") - remotes::install_github("tdhock/binsegRcpp") -#+END_SRC - -** Usage - -The main function is =binseg= for which you must at least specify the -first two arguments: -- =distribution.str= specifies the loss function to minimize. -- =data.vec= is a numeric vector of data to segment. - -#+BEGIN_SRC R - > x <- c(0.1, 0, 1, 1.1, 0.1, 0) - > (models.dt <- binsegRcpp::binseg("mean_norm", x)) - binary segmentation model: - segments end loss validation.loss - - 1: 1 6 1.348333e+00 0 - 2: 2 4 1.015000e+00 0 - 3: 3 2 1.500000e-02 0 - 4: 4 3 1.000000e-02 0 - 5: 5 5 5.000000e-03 0 - 6: 6 1 -3.339343e-16 0 -#+END_SRC - -The result above summarizes the data that are computed during the -binary segmentation algorithm. It has a special class with dedicated -methods: - -#+BEGIN_SRC R - > class(models.dt) - [1] "binsegRcpp" "list" - > methods(class="binsegRcpp") - [1] coef plot print - see '?methods' for accessing help and source code -#+END_SRC - -The coef methods returns a data table of segment means: - -#+BEGIN_SRC R - > coef(models.dt, segments=2:3) - segments start end start.pos end.pos mean - - 1: 2 1 4 0.5 4.5 0.55 - 2: 2 5 6 4.5 6.5 0.05 - 3: 3 1 2 0.5 2.5 0.05 - 4: 3 3 4 2.5 4.5 1.05 - 5: 3 5 6 4.5 6.5 0.05 -#+END_SRC - -Demo of poisson loss and non-uniform weights: - -#+begin_src R -> data.vec <- c(3,4,10,20) -> (fit1 <- binsegRcpp::binseg("poisson", data.vec, weight.vec=c(1,1,1,10))) -binary segmentation model: - segments end loss validation.loss - -1: 1 4 -393.8437 0 -2: 2 3 -411.6347 0 -3: 3 2 -413.9416 0 -4: 4 1 -414.0133 0 -#+end_src - -Demo of change in mean and variance for normal distribution: - -#+begin_src R -> sim <- function(mu,sigma)rnorm(10000,mu,sigma) -> set.seed(1) -> data.vec <- c(sim(5,1), sim(0, 5)) -> fit <- binsegRcpp::binseg("meanvar_norm", data.vec) -> coef(fit, 2L) - segments start end start.pos end.pos mean var - -1: 2 1 10000 0.5 10000.5 4.99346296 1.024763 -2: 2 10001 20000 10000.5 20000.5 -0.02095033 24.538556 -#+end_src - -** Related work - -Other implementations of binary segmentation include -[[https://github.com/rkillick/changepoint/][changepoint::cpt.mean(method="BinSeg")]] (quadratic storage in max -number of segments), [[https://github.com/diego-urgell/BinSeg][BinSeg::BinSegModel()]] (same linear storage as -binsegRcpp), and [[https://github.com/deepcharles/ruptures][ruptures.Binseg()]] (unknown storage). [[https://github.com/tdhock/binseg-model-selection][Figures comparing the timings]]. - -This version uses the [[http://www.rcpp.org/][Rcpp]]/.Call interface whereas the [[https://github.com/tdhock/binseg][binseg]] package -uses the .C interface. - -See [[branches][branches]] for variations of the interface to use as test cases in -[[https://github.com/NAU-CS/RcppDeepState][RcppDeepState]] development. +This is another branch. +There is `Sys.sleep` in `R/binseg.R` to intentionally slow down the performance.