From 8667a2f5a355ee7781f1fe5a5fe2fde2e67122c9 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Thu, 2 Apr 2020 14:00:49 +0100
Subject: [PATCH 1/4] setorderv gets new arg neworder, closes #4012

---
 NEWS.md               | 14 +++++++++++
 R/setkey.R            | 57 ++++++++++++++++++++++++++-----------------
 inst/tests/tests.Rraw | 19 +++++++++++++++
 man/setorder.Rd       | 13 ++++++++--
 src/reorder.c         |  5 ++--
 5 files changed, 82 insertions(+), 26 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 71fd76aa65..59596479dd 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -81,6 +81,20 @@ unit = "s")
 
 14. Added support for `round()` and `trunc()` to extend functionality of `ITime`. `round()` and `trunc()` can be used with argument units: "hours" or "minutes". Thanks to @JensPederM for the suggestion and PR.
 
+15. Function `setorderv` gets new argument `neworder` where user can specify custom ordering directly, [#4012](https://github.com/Rdatatable/data.table/issues/4012).
+
+```r
+DT = data.table(id1 = c("a","b","c","d"), v1 = rnorm(4))
+
+# move first row to the end
+setorderv(DT, neworder = c(2:4,1L))
+DT
+
+# random order
+setorderv(DT, neworder = sample(nrow(DT)))
+DT
+```
+
 ## BUG FIXES
 
 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085).
diff --git a/R/setkey.R b/R/setkey.R
index 334ca1e801..1e130777ce 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -274,30 +274,43 @@ setorder = function(x, ..., na.last=FALSE)
   setorderv(x, cols, order, na.last)
 }
 
-setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE)
-{
-  if (is.null(cols)) return(x)
+setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE, neworder) {
   if (!is.data.frame(x)) stop("x must be a data.frame or data.table")
-  na.last = as.logical(na.last)
-  if (is.na(na.last) || !length(na.last)) stop('na.last must be logical TRUE/FALSE')
-  if (!is.character(cols)) stop("cols is not a character vector. Please see further information in ?setorder.")
-  if (!length(cols)) {
-    warning("cols is a character vector of zero length. Use NULL instead, or wrap with suppressWarnings() to avoid this warning.")
-    return(x)
-  }
-  if (!all(nzchar(cols))) stop("cols contains some blanks.")     # TODO: probably I'm checking more than necessary here.. there are checks in 'forderv' as well
-  # remove backticks from cols
-  cols = gsub("`", "", cols, fixed = TRUE)
-  miss = !(cols %chin% colnames(x))
-  if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=","))
-  if (".xi" %chin% colnames(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.")
-  for (i in cols) {
-    .xi = x[[i]]  # [[ is copy on write, otherwise checking type would be copying each column
-    if (!typeof(.xi) %chin% ORDERING_TYPES) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.")
-  }
-  if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
+  if (is.null(cols) || !length(x)) return(x)
+  if (!missing(neworder)) {
+    if (!missing(cols))
+      stop("Provide either cols or neworder, not both")
+    if (!missing(order))
+      warning("Argument order is ignored when neworder argument was provided")
+    if (!missing(na.last))
+      warning("Argument na.last is ignored when neworder argument was provided")
+    if (length(neworder) != nrow(x))
+      stop("Provided neworder is a different length than nrow of provided data.table")
+    if (!is.integer(neworder) && is.numeric(neworder))
+      neworder = as.integer(neworder)
+    o = neworder
+  } else {
+    na.last = as.logical(na.last)
+    if (is.na(na.last) || !length(na.last)) stop('na.last must be logical TRUE/FALSE')
+    if (!is.character(cols)) stop("cols is not a character vector. Please see further information in ?setorder.")
+    if (!length(cols)) {
+      warning("cols is a character vector of zero length. Use NULL instead, or wrap with suppressWarnings() to avoid this warning.")
+      return(x)
+    }
+    if (!all(nzchar(cols))) stop("cols contains some blanks.")     # TODO: probably I'm checking more than necessary here.. there are checks in 'forderv' as well
+    # remove backticks from cols
+    cols = gsub("`", "", cols, fixed = TRUE)
+    miss = !(cols %chin% colnames(x))
+    if (any(miss)) stop("some columns are not in the data.table: ", paste(cols[miss], collapse=","))
+    if (".xi" %chin% colnames(x)) stop("x contains a column called '.xi'. Conflicts with internal use by data.table.")
+    for (i in cols) {
+      .xi = x[[i]]  # [[ is copy on write, otherwise checking type would be copying each column
+      if (!typeof(.xi) %chin% ORDERING_TYPES) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.")
+    }
+    if (!is.character(cols) || length(cols)<1L) stop("Internal error. 'cols' should be character at this point in setkey; please report.") # nocov
 
-  o = forderv(x, cols, sort=TRUE, retGrp=FALSE, order=order, na.last=na.last)
+    o = forderv(x, cols, sort=TRUE, retGrp=FALSE, order=order, na.last=na.last)
+  }
   if (length(o)) {
     .Call(Creorder, x, o)
     if (is.data.frame(x) & !is.data.table(x)) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 7cc6819e8f..2a6f226364 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16846,3 +16846,22 @@ A = data.table(A=c(complex(real = 1:3, imaginary=c(0, -1, 1)), NaN))
 test(2138.3, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
 A = data.table(A=as.complex(rep(NA, 5)))
 test(2138.4, rbind(A,B), data.table(A=c(as.character(A$A), B$A)))
+
+# setorderv could take index vector too #4012
+DT = data.table(id1 = c("a","b","c","d"), v1 = rnorm(4))
+d = copy(DT)
+test(2139.01, setorderv(DT, neworder = c(2:4,1L)), d[c(2:4,1L)]) # move first row to the end
+DT = copy(d)
+s = sample(nrow(DT))
+test(2139.02, setorderv(DT, neworder = s), d[s]) # random order
+DT = copy(d)
+test(2139.03, setorderv(DT, order=1L, neworder = 1:4), d, warning = "Argument order is ignored")
+test(2139.04, setorderv(DT, na.last=FALSE, neworder = 1:4), d, warning = "Argument na.last is ignored")
+test(2139.05, setorderv(DT, order=1L, na.last=FALSE, neworder = 1:4), d, warning = c("Argument order is ignored","Argument na.last is ignored"))
+test(2139.06, setorderv(DT, cols="id1", neworder = 1:4), error = "Provide either cols or neworder, not both")
+test(2139.07, setorderv(DT, neworder = 1:3), error = "Provided neworder is a different length than nrow of provided data.table")
+test(2139.08, setorderv(DT, neworder = 1:5), error = "Provided neworder is a different length than nrow of provided data.table")
+test(2139.09, setorderv(DT, neworder = c(1L,1L,2L,3L)), error = "duplicated.")
+test(2139.10, setorderv(DT, neworder = c(1L,2L,3L,NA_integer_)), error = "NA")
+test(2139.11, setorderv(DT, neworder = c(1L,0L,2L,3L)), error = "out of range")
+test(2139.12, setorderv(DT, neworder = c(1L,5L,2L,3L)), error = "out of range")
diff --git a/man/setorder.Rd b/man/setorder.Rd
index 6e7b598427..cd04eed393 100644
--- a/man/setorder.Rd
+++ b/man/setorder.Rd
@@ -28,7 +28,7 @@ Also note that \code{data.table} always reorders in "C-locale" (see Details). To
 
 \usage{
 setorder(x, \dots, na.last=FALSE)
-setorderv(x, cols = colnames(x), order=1L, na.last=FALSE)
+setorderv(x, cols = colnames(x), order=1L, na.last=FALSE, neworder)
 # optimised to use data.table's internal fast order
 # x[order(., na.last=TRUE)]
 }
@@ -48,6 +48,7 @@ when \code{b} is of type \code{character} as well. }
 \code{na.last=NA} is valid only for \code{x[order(., na.last)]} and its
 default is \code{TRUE}. \code{setorder} and \code{setorderv} only accept
 \code{TRUE}/\code{FALSE} with default \code{FALSE}. }
+\item{neworder}{ Integer vector, strict permutation of \code{1:nrow(x)}, no repeats, zeros, NAs, also known as a \emph{shuffle}. }
 }
 \details{
 \code{data.table} implements its own fast radix-based ordering. See the references for some exposition on the concept of radix sort.
@@ -115,7 +116,6 @@ If you require a copy, take a copy first (using \code{DT2 = copy(DT)}). See
   \code{\link{setDF}}, \code{\link{copy}}, \code{\link{setNumericRounding}}
 }
 \examples{
-
 set.seed(45L)
 DT = data.table(A=sample(3, 10, TRUE),
          B=sample(letters[1:3], 10, TRUE), C=sample(10))
@@ -125,6 +125,15 @@ setorder(DT, A, -B)
 
 # same as above, but using setorderv
 setorderv(DT, c("A", "B"), c(1, -1))
+
+# neworder
+DT = data.table(id1 = c("a","b","c","d"), v1 = rnorm(4))
+
+# move first row to the end
+setorderv(DT, neworder = c(2:4,1L))
+
+# random order
+setorderv(DT, neworder = sample(nrow(DT)))
 }
 \keyword{ data }
 
diff --git a/src/reorder.c b/src/reorder.c
index da3784e94d..eb18766198 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -2,7 +2,7 @@
 
 SEXP reorder(SEXP x, SEXP order)
 {
-  // For internal use only by setkey().
+  // For internal use by setkeyv and setorderv
   // 'order' must be a strict permutation of 1:n; i.e. no repeats, zeros, NAs. Also known as a shuffle.
   // If only a small subset in the middle is reordered, the ends are moved in to avoid wasteful work.
   // x may be a vector, or a list of same-length vectors (typically a data.table).
@@ -52,7 +52,8 @@ SEXP reorder(SEXP x, SEXP order)
               i+1, idx[i], length(order));
     // This should run in reasonable time because although 'seen' is random write, it is writing to just 1 byte * nrow
     // which is relatively small and has a good chance of fitting in cache.
-    // A worry mitigated by this check is a user passing their own incorrect ordering using ::: to reach this internal.
+    // A worry mitigated by this check is a user passing their own incorrect ordering using ::: to reach this internal - it happened to be used on SO already so is likely to happen
+    // There is also new arg to setorderv which is likely to hit this
     // This check is once up front, and then idx is applied to all the columns which is where the most time is spent.
   }
 

From 6dfd502dd597fb017e26cbe05d6f82c92ba5a22f Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sat, 11 Apr 2020 17:27:27 +0100
Subject: [PATCH 2/4] codecov missing test, unfold C code for more

---
 inst/tests/tests.Rraw |  3 ++-
 src/reorder.c         | 36 ++++++++++++++++++++++--------------
 2 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2a6f226364..7ec1d720f1 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -16861,7 +16861,8 @@ test(2139.05, setorderv(DT, order=1L, na.last=FALSE, neworder = 1:4), d, warning
 test(2139.06, setorderv(DT, cols="id1", neworder = 1:4), error = "Provide either cols or neworder, not both")
 test(2139.07, setorderv(DT, neworder = 1:3), error = "Provided neworder is a different length than nrow of provided data.table")
 test(2139.08, setorderv(DT, neworder = 1:5), error = "Provided neworder is a different length than nrow of provided data.table")
-test(2139.09, setorderv(DT, neworder = c(1L,1L,2L,3L)), error = "duplicated.")
+test(2139.09, setorderv(DT, neworder = c(1L,1L,2L,3L)), error = "duplicated")
 test(2139.10, setorderv(DT, neworder = c(1L,2L,3L,NA_integer_)), error = "NA")
 test(2139.11, setorderv(DT, neworder = c(1L,0L,2L,3L)), error = "out of range")
 test(2139.12, setorderv(DT, neworder = c(1L,5L,2L,3L)), error = "out of range")
+test(2139.13, setorderv(DT, neworder = as.numeric(s)), d[s])
diff --git a/src/reorder.c b/src/reorder.c
index eb18766198..023ff8257f 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -1,7 +1,6 @@
 #include "data.table.h"
 
-SEXP reorder(SEXP x, SEXP order)
-{
+SEXP reorder(SEXP x, SEXP order) {
   // For internal use by setkeyv and setorderv
   // 'order' must be a strict permutation of 1:n; i.e. no repeats, zeros, NAs. Also known as a shuffle.
   // If only a small subset in the middle is reordered, the ends are moved in to avoid wasteful work.
@@ -19,29 +18,39 @@ SEXP reorder(SEXP x, SEXP order)
         error(_("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table."), i+1, length(v), nrow);
       if (SIZEOF(v) > maxSize)
         maxSize=SIZEOF(v);
-      if (ALTREP(v)) SET_VECTOR_ELT(x, i, copyAsPlain(v));
+      if (ALTREP(v))
+        SET_VECTOR_ELT(x, i, copyAsPlain(v));
     }
     copySharedColumns(x); // otherwise two columns which point to the same vector would be reordered and then re-reordered, issues linked in PR#3768
   } else {
     if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16)
       error(_("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)"), type2char(TYPEOF(x)), SIZEOF(x));
-    if (ALTREP(x)) error(_("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug.")); // # nocov
+    if (ALTREP(x))
+      error(_("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug.")); // # nocov
     maxSize = SIZEOF(x);
     nrow = length(x);
     ncol = 1;
   }
-  if (!isInteger(order)) error(_("order must be an integer vector"));
-  if (length(order) != nrow) error(_("nrow(x)[%d]!=length(order)[%d]"),nrow,length(order));
+  if (!isInteger(order))
+    error(_("order must be an integer vector"));
+  if (length(order) != nrow)
+    error(_("nrow(x)[%d]!=length(order)[%d]"),nrow,length(order));
   int nprotect = 0;
-  if (ALTREP(order)) { order=PROTECT(copyAsPlain(order)); nprotect++; }  // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand
+  if (ALTREP(order)) {
+    order=PROTECT(copyAsPlain(order)); nprotect++;
+  }  // TODO: if it's an ALTREP sequence some optimizations are possible rather than expand
 
   const int *restrict idx = INTEGER(order);
   int i=0;
-  while (i<nrow && idx[i] == i+1) ++i;
+  while (i<nrow && idx[i] == i+1)
+    ++i;
   const int start=i;
-  if (start==nrow) { UNPROTECT(nprotect); return R_NilValue; }  // input is 1:n, nothing to do
+  if (start==nrow) {
+    UNPROTECT(nprotect); return R_NilValue;
+  }  // input is 1:n, nothing to do
   i = nrow-1;
-  while (idx[i] == i+1) --i;
+  while (idx[i] == i+1)
+    --i;
   const int end=i, nmid=end-start+1;
 
   uint8_t *seen = (uint8_t *)R_alloc(nmid, sizeof(uint8_t)); // detect duplicates
@@ -99,11 +108,11 @@ SEXP reorder(SEXP x, SEXP order)
   return R_NilValue;
 }
 
-SEXP setcolorder(SEXP x, SEXP o)
-{
+SEXP setcolorder(SEXP x, SEXP o) {
   SEXP names = getAttrib(x, R_NamesSymbol);
   const int ncol=LENGTH(x);
-  if (isNull(names)) error(_("dt passed to setcolorder has no names"));
+  if (isNull(names))
+    error(_("dt passed to setcolorder has no names"));
   if (ncol != LENGTH(names))
     error(_("Internal error: dt passed to setcolorder has %d columns but %d names"), ncol, LENGTH(names));  // # nocov
   SEXP tt = PROTECT(allocVector(VECSXP, 2));
@@ -113,4 +122,3 @@ SEXP setcolorder(SEXP x, SEXP o)
   UNPROTECT(1);
   return R_NilValue;
 }
-

From 5541e9c5204507f3ebe5aa8a7dbc7dae433935cd Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Sat, 11 Apr 2020 17:59:50 +0100
Subject: [PATCH 3/4] address lines revealed by codecov

---
 R/setkey.R            | 2 --
 inst/tests/tests.Rraw | 8 ++++++--
 src/reorder.c         | 2 +-
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/setkey.R b/R/setkey.R
index 1e130777ce..a719049ffa 100644
--- a/R/setkey.R
+++ b/R/setkey.R
@@ -284,8 +284,6 @@ setorderv = function(x, cols = colnames(x), order=1L, na.last=FALSE, neworder) {
       warning("Argument order is ignored when neworder argument was provided")
     if (!missing(na.last))
       warning("Argument na.last is ignored when neworder argument was provided")
-    if (length(neworder) != nrow(x))
-      stop("Provided neworder is a different length than nrow of provided data.table")
     if (!is.integer(neworder) && is.numeric(neworder))
       neworder = as.integer(neworder)
     o = neworder
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 7ec1d720f1..1658b8d414 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -13725,6 +13725,9 @@ test(1967.622, setnames(x, 1:2, c("a",NA)), error = "NA in 'new' at positions [2
 test(1967.63, setcolorder(x, c(1, 1)), error = 'Item 2 of order (1) is either NA, out of range [1,2], or is duplicated. The new order must be a strict permutation of 1:n')
 test(1967.64, setcolorder(x, 1+3i), error = 'must be character or numeric')
 test(1967.65, setcolorder(x, 300), error = 'specify non existing column*.*300')
+d = data.table(1:2, 2:3)
+setattr(d, "names", NULL)
+test(1967.651, setcolorder(d, 2:1), error="dt passed to setcolorder has no names")
 
 test(1967.66, rbindlist(list(x), idcol = FALSE), rbindlist(list(x)))
 test(1967.67, rbindlist(list(x), idcol = 1+3i), error = 'idcol must be a logical')
@@ -16859,10 +16862,11 @@ test(2139.03, setorderv(DT, order=1L, neworder = 1:4), d, warning = "Argument or
 test(2139.04, setorderv(DT, na.last=FALSE, neworder = 1:4), d, warning = "Argument na.last is ignored")
 test(2139.05, setorderv(DT, order=1L, na.last=FALSE, neworder = 1:4), d, warning = c("Argument order is ignored","Argument na.last is ignored"))
 test(2139.06, setorderv(DT, cols="id1", neworder = 1:4), error = "Provide either cols or neworder, not both")
-test(2139.07, setorderv(DT, neworder = 1:3), error = "Provided neworder is a different length than nrow of provided data.table")
-test(2139.08, setorderv(DT, neworder = 1:5), error = "Provided neworder is a different length than nrow of provided data.table")
+test(2139.07, setorderv(DT, neworder = 1:3), error = "length must be equal to nrow")
+test(2139.08, setorderv(DT, neworder = 1:5), error = "length must be equal to nrow")
 test(2139.09, setorderv(DT, neworder = c(1L,1L,2L,3L)), error = "duplicated")
 test(2139.10, setorderv(DT, neworder = c(1L,2L,3L,NA_integer_)), error = "NA")
 test(2139.11, setorderv(DT, neworder = c(1L,0L,2L,3L)), error = "out of range")
 test(2139.12, setorderv(DT, neworder = c(1L,5L,2L,3L)), error = "out of range")
 test(2139.13, setorderv(DT, neworder = as.numeric(s)), d[s])
+test(2139.14, setorderv(DT, neworder=c("a","b")), error="must be an integer vector")
diff --git a/src/reorder.c b/src/reorder.c
index 023ff8257f..2042e5bb25 100644
--- a/src/reorder.c
+++ b/src/reorder.c
@@ -34,7 +34,7 @@ SEXP reorder(SEXP x, SEXP order) {
   if (!isInteger(order))
     error(_("order must be an integer vector"));
   if (length(order) != nrow)
-    error(_("nrow(x)[%d]!=length(order)[%d]"),nrow,length(order));
+    error(_("order length must be equal to nrow of x: nrow(x)[%d]!=length(order)[%d]"),nrow,length(order));
   int nprotect = 0;
   if (ALTREP(order)) {
     order=PROTECT(copyAsPlain(order)); nprotect++;

From 3b18044311cbe76e041528fe6c872fd3f3ebeb51 Mon Sep 17 00:00:00 2001
From: jangorecki <j.gorecki@wit.edu.pl>
Date: Mon, 18 May 2020 19:23:05 +0100
Subject: [PATCH 4/4] setorder doc, move most to forder.Rd

---
 man/setorder.Rd | 120 +++++++++---------------------------------------
 1 file changed, 21 insertions(+), 99 deletions(-)

diff --git a/man/setorder.Rd b/man/setorder.Rd
index cd04eed393..2179bb54c4 100644
--- a/man/setorder.Rd
+++ b/man/setorder.Rd
@@ -1,119 +1,41 @@
 \name{setorder}
 \alias{setorder}
 \alias{setorderv}
-\alias{order}
-\alias{fastorder}
-\alias{forder}
-\alias{forderv}
-
 \title{Fast row reordering of a data.table by reference}
 \description{
-In \code{data.table} parlance, all \code{set*} functions change their input
-\emph{by reference}. That is, no copy is made at all, other than temporary
-working memory, which is as large as one column. The only other
-\code{data.table} operator that modifies input by reference is \code{\link{:=}}.
-Check out the \code{See Also} section below for other \code{set*} function
-\code{data.table} provides.
-
-\code{setorder} (and \code{setorderv}) reorders the rows of a \code{data.table}
-based on the columns (and column order) provided. It reorders the table
-\emph{by reference} and is therefore very memory efficient.
-
-Note that queries like \code{x[order(.)]} are optimised internally to use \code{data.table}'s fast order.
-
-Also note that \code{data.table} always reorders in "C-locale" (see Details). To sort by session locale, use \code{x[base::order(.)]}.
-
-\code{bit64::integer64} type is also supported for reordering rows of a \code{data.table}.
+  \code{setorder} (and \code{setorderv}) reorders the rows of a \code{data.table} based on the columns (and column order) provided. It reorders the table \emph{by reference} and is therefore very memory efficient.
 }
-
 \usage{
-setorder(x, \dots, na.last=FALSE)
-setorderv(x, cols = colnames(x), order=1L, na.last=FALSE, neworder)
-# optimised to use data.table's internal fast order
-# x[order(., na.last=TRUE)]
+  setorder(x, \dots, na.last=FALSE)
+  setorderv(x, cols = colnames(x), order=1L, na.last=FALSE, neworder)
 }
 \arguments{
-\item{x}{ A \code{data.table}. }
-\item{\dots}{ The columns to sort by. Do not quote column names. If \code{\dots}
-is missing (ex: \code{setorder(x)}), \code{x} is rearranged based on all
-columns in ascending order by default. To sort by a column in descending order
-prefix the symbol \code{"-"} which means "descending" (\emph{not} "negative", in this context), i.e., \code{setorder(x, a, -b, c)}. The \code{-b} works
-when \code{b} is of type \code{character} as well. }
-\item{cols}{ A character vector of column names of \code{x} by which to order. By default, sorts over all columns; \code{cols = NULL} will return \code{x} untouched. Do not add \code{"-"} here. Use \code{order} argument instead. }
-\item{order}{ An integer vector with only possible values of \code{1} and
-\code{-1}, corresponding to ascending and descending order. The length of
-\code{order} must be either \code{1} or equal to that of \code{cols}. If
-\code{length(order) == 1}, it is recycled to \code{length(cols)}. }
-\item{na.last}{ \code{logical}. If \code{TRUE}, missing values in the data are placed last; if \code{FALSE}, they are placed first; if \code{NA} they are removed.
-\code{na.last=NA} is valid only for \code{x[order(., na.last)]} and its
-default is \code{TRUE}. \code{setorder} and \code{setorderv} only accept
-\code{TRUE}/\code{FALSE} with default \code{FALSE}. }
-\item{neworder}{ Integer vector, strict permutation of \code{1:nrow(x)}, no repeats, zeros, NAs, also known as a \emph{shuffle}. }
+  \item{x}{ A \code{data.table}. }
+  \item{\dots}{ The columns to sort by. Do not quote column names. If \code{\dots} is missing (ex: \code{setorder(x)}), \code{x} is rearranged based on all columns in ascending order by default. To sort by a column in descending order prefix the symbol \code{"-"} which means \emph{descending} (not \emph{negative}, in this context), i.e., \code{setorder(x, a, -b, c)}. The \code{-b} works when \code{b} is of type \code{character} as well. }
+  \item{cols}{ A character vector of column names of \code{x} by which to order. By default, sorts over all columns; \code{cols = NULL} will return \code{x} untouched. Do not add \code{"-"} here. Use \code{order} argument instead. }
+  \item{order}{ An integer vector with only possible values of \code{1} and \code{-1}, corresponding to ascending and descending order. The length of \code{order} must be either \code{1} or equal to that of \code{cols}. If \code{length(order) == 1}, it is recycled to \code{length(cols)}. }
+  \item{na.last}{ \code{logical}. If \code{TRUE}, missing values in the data are placed last; if \code{FALSE} (default), they are placed first. }
+  \item{neworder}{ Integer vector, use custom order rather than order based on columns. This has to be strict permutation of \code{1:nrow(x)}, no repeats, zeros, NAs, also known as a \emph{shuffle}. }
 }
 \details{
-\code{data.table} implements its own fast radix-based ordering. See the references for some exposition on the concept of radix sort.
+  \code{setorder} accepts unquoted column names (with names preceded with a \code{-} sign for descending order) and reorders \code{data.table} rows
+\emph{by reference}, for e.g., \code{setorder(x, a, -b, c)}. We emphasize that this means \emph{descending} and not \emph{negative} because the implementation simply reverses the sort order, as opposed to sorting the opposite of the input (which would be inefficient).
 
-\code{setorder} accepts unquoted column names (with names preceded with a
-\code{-} sign for descending order) and reorders \code{data.table} rows
-\emph{by reference}, for e.g., \code{setorder(x, a, -b, c)}. We emphasize that
-this means "descending" and not "negative" because the implementation simply
-reverses the sort order, as opposed to sorting the opposite of the input
-(which would be inefficient).
+  Note that \code{-b} also works with columns of type \code{character} unlike \code{\link[base]{order}}, which requires \code{-xtfrm(y)} instead (which is slow).
 
-Note that \code{-b} also works with columns of type \code{character} unlike
-\code{\link[base]{order}}, which requires \code{-xtfrm(y)} instead (which is slow).
-\code{setorderv} in turn accepts a character vector of column names and an
-integer vector of column order separately.
+  \code{setorderv} in turn accepts a character vector of column names and an integer vector of column order separately.
 
-Note that \code{\link{setkey}} still requires and will always sort only in
-ascending order, and is different from \code{setorder} in that it additionally
-sets the \code{sorted} attribute.
-
-\code{na.last} argument, by default, is \code{FALSE} for \code{setorder} and
-\code{setorderv} to be consistent with \code{data.table}'s \code{setkey} and
-is \code{TRUE} for \code{x[order(.)]} to be consistent with \code{base::order}.
-Only \code{x[order(.)]} can have \code{na.last = NA} as it is a subset operation
-as opposed to \code{setorder} or \code{setorderv} which reorders the data.table
-by reference.
-
-\code{data.table} always reorders in "C-locale".
-As a consequence, the ordering may be different to that obtained by \code{base::order}.
-In English locales, for example, sorting is case-sensitive in C-locale.
-Thus, sorting \code{c("c", "a", "B")} returns \code{c("B", "a", "c")} in \code{data.table}
- but \code{c("a", "B", "c")} in \code{base::order}. Note this makes no difference in most cases
-of data; both return identical results on ids where only upper-case or lower-case letters are present (\code{"AB123" < "AC234"}
-is true in both), or on country names and other proper nouns which are consistently capitalized.
-For example, neither \code{"America" < "Brazil"} nor
-\code{"america" < "brazil"} are affected since the first letter is consistently
-capitalized.
-
-Using C-locale makes the behaviour of sorting in \code{data.table} more consistent across sessions and locales.
-The behaviour of \code{base::order} depends on assumptions about the locale of the R session.
-In English locales, \code{"america" < "BRAZIL"} is true by default
-but false if you either type \code{Sys.setlocale(locale="C")} or the R session has been started in a C locale
-for you -- which can happen on servers/services since the locale comes from the environment the R session
-was started in. By contrast, \code{"america" < "BRAZIL"} is always \code{FALSE} in \code{data.table} regardless of the way your R session was started.
-
-If \code{setorder} results in reordering of the rows of a keyed \code{data.table},
-then its key will be set to \code{NULL}.
+  Note that \code{\link{setkey}} still requires and will always sort only in ascending order, and is different from \code{setorder} in that it additionally sets the \code{sorted} attribute.
 }
-\value{
-The input is modified by reference, and returned (invisibly) so it can be used
-in compound statements; e.g., \code{setorder(DT,a,-b)[, cumsum(c), by=list(a,b)]}.
-If you require a copy, take a copy first (using \code{DT2 = copy(DT)}). See
-\code{\link{copy}}.
+\note{
+  \code{data.table} always reorders in \emph{C-locale}, see \code{\link{forder}} for details. To reorder by session locale, use \code{setorderv(DT, neworder=base::order(.))}.
 }
-\references{
-  \url{https://en.wikipedia.org/wiki/Radix_sort}\cr
-  \url{https://en.wikipedia.org/wiki/Counting_sort}\cr
-  \url{http://stereopsis.com/radix.html}\cr
-  \url{https://codercorner.com/RadixSortRevisited.htm}\cr
-  \url{https://medium.com/basecs/getting-to-the-root-of-sorting-with-radix-sort-f8e9240d4224}
+\value{
+  The input is modified by reference, and returned (invisibly) so it can be used in compound statements; e.g., \code{setorder(DT,a,-b)[, cumsum(c), by=list(a,b)]}. If you require a copy, take a copy first (using \code{DT2 = copy(DT)}). See \code{\link{copy}}.
+  If \code{setorder} results in reordering of the rows of a keyed \code{data.table}, then its \emph{key} will be set to \code{NULL}.
 }
 \seealso{
-  \code{\link{setkey}}, \code{\link{setcolorder}}, \code{\link{setattr}},
-  \code{\link{setnames}}, \code{\link{set}}, \code{\link{:=}}, \code{\link{setDT}},
-  \code{\link{setDF}}, \code{\link{copy}}, \code{\link{setNumericRounding}}
+  \code{\link{forder}}, \code{\link{setkey}}, \code{\link{setcolorder}}, \code{\link{copy}}
 }
 \examples{
 set.seed(45L)
@@ -133,7 +55,7 @@ DT = data.table(id1 = c("a","b","c","d"), v1 = rnorm(4))
 setorderv(DT, neworder = c(2:4,1L))
 
 # random order
-setorderv(DT, neworder = sample(nrow(DT)))
+setorderv(DT, neworder = sample.int(nrow(DT)))
 }
 \keyword{ data }