Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
binary = data.table:::binary
bmerge = data.table:::bmerge
brackify = data.table:::brackify
Ctest_dt_win_snprintf = data.table:::Ctest_dt_win_snprintf
chmatchdup = data.table:::chmatchdup
compactprint = data.table:::compactprint
cube.data.table = data.table:::cube.data.table
Expand Down Expand Up @@ -16910,3 +16911,39 @@ dt = data.table(time = 1:8, v = INT(5,7,6,1,8,4,2,3))
dt[time == 2L, v := 2L]
dt[time == 7L, v := 7L]
test(2140, dt[dt, on=.(time>time, v>v), .N, by=.EACHI], data.table(time=1:8, v=INT(5,2,6,1,8,4,7,3), N=INT(3,5,2,4,0,1,0,0)))

# repeat of test 450 for #4402
test(2141, .Call(Ctest_dt_win_snprintf), NULL)
DT = data.table(a=1:3,b=4:6)
test(2142, rbind(DT,list(c=4L,a=7L)), error="Column 1 ['c'] of item 2 is missing in item 1")
if (.Platform$OS.type=="windows") local({
x = list(
LC_COLLATE = "Chinese (Simplified)_China.936",
LC_CTYPE = "Chinese (Simplified)_China.936",
LC_MONETARY = "Chinese (Simplified)_China.936",
LC_NUMERIC = "C",
LC_TIME = "Chinese (Simplified)_China.936"
)
for (i in seq_along(x)) {
lc = names(x)[[i]]
old = Sys.getlocale(lc)
Sys.setlocale(lc, x[[i]])
on.exit(Sys.setlocale(lc, old), add = TRUE)
}
old = Sys.getenv('LANGUAGE')
Sys.setenv('LANGUAGE' = 'zh_CN')
on.exit({
if (nzchar(old))
Sys.setenv('LANGUAGE' = old)
else
Sys.unsetenv('LANGUAGE')
}, add = TRUE)
# triggered segfault here in #4402, Windows-only under translation.
# test that the argument order changes correctly (the 'item 2' moves to the beginning of the message)
# since the argument order changes in this example (and that was the crash) we don't need to test
# the display of the Chinese characters here. Thanks to @shrektan for all his help on this.
test(2143, rbind(DT,list(c=4L,a=7L)), error="2.*1.*c.*1")
})
# test back to English (the argument order is back to 1,c,2,1)
test(2144, rbind(DT,list(c=4L,a=7L)), error="Column 1 ['c'] of item 2 is missing in item 1")

6 changes: 5 additions & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "types.h"
#include "po.h"
#ifdef WIN32 // positional specifiers (%n$) used in translations; #4402
//# define snprintf _sprintf_p // the non-n one in Windows takes n anyway so there's no separate _snprintf_f
# define snprintf dt_win_snprintf // see our snprintf.c; tried and failed to link to _sprintf_p on Windows
#endif
#define sprintf USE_SNPRINTF_NOT_SPRINTF // prevent use of sprintf in data.table source; force us to use n always

Expand Down Expand Up @@ -243,3 +243,7 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k);
//fifelse.c
SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na);
SEXP fcaseR(SEXP na, SEXP rho, SEXP args);

//snprintf.c
int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...);

1 change: 0 additions & 1 deletion src/dt_stdio.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
#define DT_STDIO_H
#if defined(__MINGW32__) || (defined __MINGW64__)
#define __USE_MINGW_ANSI_STDIO 1
#define _XOPEN_SOURCE 1
#include <stdio.h>
#define PRId64 "lld"
#define PRIu64 "llu"
Expand Down
2 changes: 2 additions & 0 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ SEXP lock();
SEXP unlock();
SEXP islockedR();
SEXP allNAR();
SEXP test_dt_win_snprintf();

// .Externals
SEXP fastmean();
Expand Down Expand Up @@ -211,6 +212,7 @@ R_CallMethodDef callMethods[] = {
{"CfrollapplyR", (DL_FUNC) &frollapplyR, -1},
{"CtestMsgR", (DL_FUNC) &testMsgR, -1},
{"C_allNAR", (DL_FUNC) &allNAR, -1},
{"Ctest_dt_win_snprintf", (DL_FUNC)&test_dt_win_snprintf, -1},
{NULL, NULL, 0}
};

Expand Down
103 changes: 103 additions & 0 deletions src/snprintf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// For translations (#4402) we need positional specifiers (%n$), a non-C99 POSIX extension.
// On Linux and Mac, standard snprintf supports positional specifiers.
// On Windows, we tried many things but just couldn't achieve it. This may be why R uses
// a third party library, trio, on Windows. But R does not expose trio for use by packages.
// So ...
// Rather than require compile flags (such as _XOPEN_SOURCE or POSIX_C_SOURCE), or require
// linking to particular Windows libraries which may be fragile over time depending on
// user's environments, we use the standard C99 features here. To do so, we simulate
// positionals via format massage. Just on Windows, all snprintf calls are replaced with
// this dt_win_snprintf via a #define in data.table.h. The goal of this massage is to be
// as light and minimal as possible.
// In C it is not possible, portably, to reorder a va_list (sadly).
// In C you must past the correct type to va_arg(), so even to navigate va_list you
// must parse and rely on fmt. But we don't want to reimplement all the types and modifiers.
// Hence, reordering the specifiers, passing the va_list to the library, and then
// putting the output strings into the desired order afterwards.
// NB: must be thread-safe

#include "data.table.h"
#include <stdarg.h>
#undef snprintf // on Windows, just in this file, we do want to use the C library's snprintf

int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
const char *ch = strstr(fmt, "%1$");
if (ch==NULL) {
// no positionals present, just pass on to the C library vsnprintf as-is
int ans = vsnprintf(dest, n, fmt, ap);
va_end(ap);
return ans;
}
// Standards say that if one specifier uses position, they all must. Good.
// We will not allow repeats though; must be a permutation.
// As in C, there are few checks; wrong/mismatching positionals will be a crash.
// This is for messages/errors, so time should not be spent on a fast solution.
char *buff = (char *)malloc(n); // not R_alloc as we need to be thread-safe
if (!buff) error("Unable to allocate %d bytes for buffer in dt_win_snprintf", n);
int pos=1;
// Use dest as temp to write the reordered specifiers
char *ch2=dest;
#define NDELIM 2
const char delim[NDELIM+1] = "\x7f\x7f"; // tokenize using 2 DELs
while (ch!=NULL) { // ch is resting on start of %pos$ in fmt
// Find end of %[parameter][flags][width][.precision][length]type
// https://en.wikipedia.org/wiki/Printf_format_string#Syntax
const char *start = strchr(ch, '$')+1; // look for $ since pos could be > 9 or potentially > 99
const char *end = strpbrk(start,"diufFeEgGxXoscpaA"); // last character of specifier
*ch2++ = '%';
strncpy(ch2, start, end-start+1); // write the specifer in order without the n$ part
ch2 += end-start+1;
strcpy(ch2, delim); // includes '\0'
ch2 += NDELIM; // now resting on the '\0'
char posstr[15]; // 15 to avoid C compiler warnings
snprintf(posstr, 15, "%%%d$", ++pos); // snprintf was #undef above, so this is the C library one
ch = strstr(fmt, posstr);
}
int narg = pos-1;
vsnprintf(buff, n, dest, ap); // dest used as tmp here, holds reordered specifiers same order as ap
// All the hard formatting work and va_arg type navigation has now been done by the C library
// Now we just need to put the string results for each argument back into the desired positions
// First create lookups so we can loop through fmt once replacing the specifiers as they appear
const char *arg[narg];
int len[narg];
ch = buff;
for (int i=0; i<narg; ++i) {
arg[i] = ch;
const char *end = strstr(ch, delim);
len[i] = end-ch;
ch = end+NDELIM;
}
ch = fmt;
ch2 = dest;
while (*ch!='\0') {
if (*ch!='%') {*ch2++=*ch++; continue; } // copy non-specifier to the result as-is
if (ch[1]=='%') {*ch2++='%'; ch+=2; continue; } // interpret %% as a single %
if (ch[1]<'1' || ch[1]>'9') error("When positional %n$ is used, all specifiers must include positional");
int pos = atoi(ch+1);
ch = strpbrk(ch,"diufFeEgGxXoscpaA")+1; // move to the end of the specifier
strncpy(ch2, arg[pos-1], len[pos-1]); // write the result of the appropriate argument
ch2 += len[pos-1];
}
*ch2='\0';
free(buff);
va_end(ap);
return ch2-dest;
}

SEXP test_dt_win_snprintf()
{
char buff[50];
dt_win_snprintf(buff, 50, "No pos %d%%%d ok", 42, -84);
if (strcmp(buff, "No pos 42%-84 ok")) error("dt_win_snprintf test 1 failed: %s", buff);
dt_win_snprintf(buff, 50, "With pos %1$d%%%2$d ok", 42, -84);
if (strcmp(buff, "With pos 42%-84 ok")) error("dt_win_snprintf test 2 failed: %s", buff);
dt_win_snprintf(buff, 50, "With pos %2$d%%%1$d ok", 42, -84);
if (strcmp(buff, "With pos -84%42 ok")) error("dt_win_snprintf test 3 failed: %s", buff);
dt_win_snprintf(buff, 50, "%3$s %1$d %4$10s %2$03d$", -99, 12, "hello%2$d", "short");
if (strcmp(buff, "hello%2$d -99 short 012$")) error("dt_win_snprintf test 4 failed: %s", buff);
return R_NilValue;
}