Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
230 changes: 186 additions & 44 deletions src/snprintf.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,86 +18,228 @@

#include "data.table.h"
#include <stdarg.h>
#include <ctype.h> // isdigit
#undef snprintf // on Windows, just in this file, we do want to use the C library's snprintf

int dt_win_snprintf(char *dest, size_t n, const char *fmt, ...)
{
if (n<1) return 0;
va_list ap;
va_start(ap, fmt);
const char *ch = strstr(fmt, "%1$");
if (ch==NULL) {
const char *strp[99]={NULL};
int strl[99]={0};
int narg=0;
// are any positional specifiers present?
// previously used strstr(fmt, "%1$") here but that could match to %%1$ and then
// what if there's another %1$ as well as the %%1$. Hence a more complicated
// loop here with more robust checks as well to catch mistakes in fmt
bool posSpec=false, nonPosSpec=false;
int specAlloc=0; // total characters of specifiers for alloc
const char *ch = fmt;
while (*ch!='\0') {
if (*ch!='%') {ch++; continue;}
if (ch[1]=='%') {ch+=2; continue; } // %% means literal %
// Find end of %[parameter][flags][width][.precision][length]type
// https://en.wikipedia.org/wiki/Printf_format_string#Syntax
// These letters do not appear in flags or length modifiers, just type
const char *end = strpbrk(ch,"diufFeEgGxXoscpaA");
if (!end) {
// an error() call is not thread-safe; placing error in dest is better than a crash. This way
// we have a better chance of the user reporting the strange error and we'll see it's a fmt issue
// in the message itself.
snprintf(dest, n, "snprintf %-5s does not end with recognized type letter", ch);
return -1;
}
const char *d = ch+1;
if (*d=='-') d++; // to give helpful outside-range message for %-1$ too
while (isdigit(*d)) d++;
if (*d=='$') {
posSpec=true;
int pos = atoi(ch+1);
if (pos<1 || pos>99) {
// up to 99 supported here; should not need more than 99 in a message
snprintf(dest, n, "snprintf %.*s outside range [1,99]", (int)(d-ch+1), ch);
return -1;
}
if (pos>narg) narg=pos;
if (strp[pos-1]) {
// no dups allowed because it's reasonable to not support dups, but this wrapper
// could not cope with the same argument formatted differently; e.g. "%1$d %1$5d"
snprintf(dest, n, "snprintf %%%d$ appears twice", pos);
return -1;
}
strp[pos-1] = strchr(ch, '$')+1;
strl[pos-1] = end-strp[pos-1]+1;
specAlloc += strl[pos-1]+1; // +1 for leading '%'
} else {
nonPosSpec=true;
}
ch = end+1;
}
if (posSpec && nonPosSpec) {
// Standards state that if one specifier uses position, they all must; good.
snprintf(dest, n, "snprintf some %%n$ but not all");
return -1;
}
if (!posSpec) {
// no positionals present, just pass on to the C library vsnprintf as-is
int ans = vsnprintf(dest, n, fmt, ap);
va_end(ap);
return ans;
}
// Standards say that if one specifier uses position, they all must. Good.
// We will not allow repeats though; must be a permutation.
// As in C, there are few checks; wrong/mismatching positionals will be a crash.
// This is for messages/errors, so time should not be spent on a fast solution.
char *buff = (char *)malloc(n); // not R_alloc as we need to be thread-safe
if (!buff) error("Unable to allocate %d bytes for buffer in dt_win_snprintf", n);
int pos=1;
// Use dest as temp to write the reordered specifiers
char *ch2=dest;
#define NDELIM 2
const char delim[NDELIM+1] = "\x7f\x7f"; // tokenize using 2 DELs
while (ch!=NULL) { // ch is resting on start of %pos$ in fmt
// Find end of %[parameter][flags][width][.precision][length]type
// https://en.wikipedia.org/wiki/Printf_format_string#Syntax
const char *start = strchr(ch, '$')+1; // look for $ since pos could be > 9 or potentially > 99
const char *end = strpbrk(start,"diufFeEgGxXoscpaA"); // last character of specifier
const char delim[NDELIM+1] = "\x7f\x7f"; // tokenize temporary using 2 DELs
specAlloc += narg*NDELIM + 1; // +1 for final '\0'
char *spec = (char *)malloc(specAlloc); // not R_alloc as we need to be thread-safe
if (!spec) {
// # nocov start
snprintf(dest, n, "snprintf: %d byte spec alloc failed", (int)specAlloc);
return -1;
// # nocov end
}
char *ch2 = spec;
for (int i=0; i<narg; ++i) {
if (!strp[i] || strl[i]<1) {
// if %n$ is present, then %[1:n]$ must all be present
snprintf(dest, n, "snprintf %%%d$ missing", i+1);
free(spec);
return -1;
}
*ch2++ = '%';
strncpy(ch2, start, end-start+1); // write the specifer in order without the n$ part
ch2 += end-start+1;
strncpy(ch2, strp[i], strl[i]); // write the reordered specifers without the n$ part
ch2 += strl[i];
strcpy(ch2, delim); // includes '\0'
ch2 += NDELIM; // now resting on the '\0'
char posstr[15]; // 15 to avoid C compiler warnings
snprintf(posstr, 15, "%%%d$", ++pos); // snprintf was #undef above, so this is the C library one
ch = strstr(fmt, posstr);
ch2 += NDELIM; // now resting on the '\0'
}
char *buff = malloc(n); // for the result of the specifiers
if (!buff) {
// # nocov start
snprintf(dest, n, "snprintf: %d byte buff alloc failed", (int)n);
free(spec);
return -1;
// # nocov end
}
int narg = pos-1;
vsnprintf(buff, n, dest, ap); // dest used as tmp here, holds reordered specifiers same order as ap
// All the hard formatting work and va_arg type navigation has now been done by the C library
// Now we just need to put the string results for each argument back into the desired positions
// First create lookups so we can loop through fmt once replacing the specifiers as they appear
const char *arg[narg];
int len[narg];
// now spec contains the specifiers (minus their $n parts) in the same oder as ap
int res = vsnprintf(buff, n, spec, ap); // C library does all the (non-positional) hard work here
if (res>=n) {
// 0.01% likely: n wasn't big enough to hold result; test 9 covers this
// C99 standard states that vsnprintf returns the size that would be big enough
char *new = realloc(buff, res+1);
if (!new) {
// # nocov start
snprintf(dest, n, "snprintf: %d byte buff realloc failed", (int)res+1);
free(spec);
free(buff);
return -1;
// # nocov end
}
buff = new;
int newres = vsnprintf(buff, res+1, spec, ap); // try again; test 9
if (newres!=res) {
// # nocov start
snprintf(dest, n, "snprintf: second vsnprintf %d != %d", newres, res);
free(spec);
free(buff);
return -1;
// # nocov end
}
} else if (res<1) { // negative is error, cover 0 as error too here
// # nocov start
snprintf(dest, n, "snprintf: clib error %d", res);
free(spec);
free(buff);
// # nocov end
}
// now we just need to put the string results for each arg back into the desired positions
// create lookups so we can loop through fmt once replacing the specifiers as they appear
ch = buff;
for (int i=0; i<narg; ++i) {
arg[i] = ch;
strp[i] = ch;
const char *end = strstr(ch, delim);
len[i] = end-ch;
strl[i] = end-ch;
ch = end+NDELIM;
}
ch = fmt;
ch2 = dest;
int nc=0; // as per C99 standard: how many chars would be written if output isn't curtailed at n-1
// since we are replacing snprintf with dt_win_snprintf it has to conform to same result
while (*ch!='\0') {
if (*ch!='%') {*ch2++=*ch++; continue; } // copy non-specifier to the result as-is
if (ch[1]=='%') {*ch2++='%'; ch+=2; continue; } // interpret %% as a single %
if (ch[1]<'1' || ch[1]>'9') error("When positional %n$ is used, all specifiers must include positional");
int pos = atoi(ch+1);
ch = strpbrk(ch,"diufFeEgGxXoscpaA")+1; // move to the end of the specifier
strncpy(ch2, arg[pos-1], len[pos-1]); // write the result of the appropriate argument
ch2 += len[pos-1];
const int space = nc>=n-1 ? 0 : n-1-nc; // space remaining
if (*ch!='%') { if (space) *ch2++=*ch; ch++; nc++; continue; } // copy non-specifier to the result as-is
if (ch[1]=='%') { if (space) *ch2++='%'; ch+=2; nc++; continue; } // interpret %% as a single %
const int pos = atoi(ch+1); // valid position already checked above
nc += strl[pos-1];
const int nWrite = MIN(strl[pos-1], space); // potentially write half of this field to fill up n
strncpy(ch2, strp[pos-1], nWrite);
ch2 += nWrite;
ch = strpbrk(ch,"diufFeEgGxXoscpaA")+1; // move to the end of the specifier; valid checked earlier
}
*ch2='\0';
free(spec);
free(buff);
va_end(ap);
return ch2-dest;
return nc;
}

SEXP test_dt_win_snprintf()
{
char buff[50];

dt_win_snprintf(buff, 50, "No pos %d%%%d ok", 42, -84);
if (strcmp(buff, "No pos 42%-84 ok")) error("dt_win_snprintf test 1 failed: %s", buff);
if (strcmp(buff, "No pos 42%-84 ok")) error("dt_win_snprintf test 1 failed: %s", buff);

dt_win_snprintf(buff, 50, "With pos %1$d%%%2$d ok", 42, -84);
if (strcmp(buff, "With pos 42%-84 ok")) error("dt_win_snprintf test 2 failed: %s", buff);
if (strcmp(buff, "With pos 42%-84 ok")) error("dt_win_snprintf test 2 failed: %s", buff);

dt_win_snprintf(buff, 50, "With pos %2$d%%%1$d ok", 42, -84);
if (strcmp(buff, "With pos -84%42 ok")) error("dt_win_snprintf test 3 failed: %s", buff);
if (strcmp(buff, "With pos -84%42 ok")) error("dt_win_snprintf test 3 failed: %s", buff);

dt_win_snprintf(buff, 50, "%3$s %1$d %4$10s %2$03d$", -99, 12, "hello%2$d", "short");
if (strcmp(buff, "hello%2$d -99 short 012$")) error("dt_win_snprintf test 4 failed: %s", buff);
if (strcmp(buff, "hello%2$d -99 short 012$")) error("dt_win_snprintf test 4 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d %s", 9, "foo");
if (strcmp(buff, "snprintf some %n$ but not all")) error("dt_win_snprintf test 5 failed: %s", buff);

dt_win_snprintf(buff, 50, "%%1$foo%d", 9); // The %1$f is not a specifier because % is doubled
if (strcmp(buff, "%1$foo9")) error("dt_win_snprintf test 6 failed: %s", buff);

dt_win_snprintf(buff, 40, "long format string more than n==%d chopped", 40); // regular library (no %n$) chops to 39 chars + '/0'
if (strlen(buff)!=39 || strcmp(buff, "long format string more than n==40 chop")) error("dt_win_snprintf test 7 failed: %s", buff);

dt_win_snprintf(buff, 40, "long %3$s %2$s more than n==%1$d chopped", 40, "string", "format"); // same with dt_win_snprintf
if (strlen(buff)!=39 || strcmp(buff, "long format string more than n==40 chop")) error("dt_win_snprintf test 8 failed: %s", buff);

int res = dt_win_snprintf(buff, 10, "%4$d%2$d%3$d%5$d%1$d", 111, 222, 33, 44, 555); // fmt longer than n
if (strlen(buff)!=9 || strcmp(buff, "442223355")) error("dt_win_snprintf test 9 failed: %s", buff);
if (res!=13) /* should return what would have been written if not chopped */ error("dt_win_snprintf test 10 failed: %d", res);

dt_win_snprintf(buff, 47, "%l", 3);
if (strlen(buff)!=46 || strcmp(buff, "snprintf %l does not end with recognized ty")) error("dt_win_snprintf test 11 failed: %s", buff);

dt_win_snprintf(buff, 19, "%l", 3);
if (strlen(buff)!=18 || strcmp(buff, "snprintf %l doe")) error("dt_win_snprintf test 12 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d == %0$d", 1, 2);
if (strcmp(buff, "snprintf %0$ outside range [1,99]")) error("dt_win_snprintf test 13 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d == %$d", 1, 2);
if (strcmp(buff, "snprintf %$ outside range [1,99]")) error("dt_win_snprintf test 14 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d == %100$d", 1, 2);
if (strcmp(buff, "snprintf %100$ outside range [1,99]")) error("dt_win_snprintf test 15 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d == %-1$d", 1, 2);
if (strcmp(buff, "snprintf %-1$ outside range [1,99]")) error("dt_win_snprintf test 16 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d == %3$d", 1, 2, 3);
if (strcmp(buff, "snprintf %2$ missing")) error("dt_win_snprintf test 17 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d == %1$d", 42);
if (strcmp(buff, "snprintf %1$ appears twice")) error("dt_win_snprintf test 18 failed: %s", buff);

dt_win_snprintf(buff, 50, "%1$d + %3$d - %2$d == %3$d", 1, 1, 2);
if (strcmp(buff, "snprintf %3$ appears twice")) error("dt_win_snprintf test 19 failed: %s", buff);

return R_NilValue;
}