diff --git a/NEWS.md b/NEWS.md index ac4b442ace..d53d5e514b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -97,6 +97,8 @@ unit = "s") 8. An `fwrite` error message could include a garbled number and cause test 1737.5 to fail, [#3492](https://github.com/Rdatatable/data.table/issues/3492). Thanks to @QuLogic for debugging the issue on ARMv7hl, and the PR fixing it. +9. `fread` improves handling of very small (<1e-300) or very large (>1e+300) floating point numbers on non-x86 architectures (specifically ppc64le and armv7hl). Thanks to @QuLogic for reporting and fixing, [PR#4165](https://github.com/Rdatatable/data.table/pull/4165). + ## NOTES 1. `as.IDate`, `as.ITime`, `second`, `minute`, and `hour` now recognize UTC equivalents for speed: GMT, GMT-0, GMT+0, GMT0, Etc/GMT, and Etc/UTC, [#4116](https://github.com/Rdatatable/data.table/issues/4116). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b2ea9a3554..0ffe3f5bf5 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2934,8 +2934,9 @@ if (test_bit64) test(1017.1, fread(f), copy(DT)[,A:=as.integer64(A)]) test(1017.2, fread(f, integer64="character"), DT) unlink(f) -# ERANGE errno handled, #106 -test(1018, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313))) +# ERANGE errno handled, #106 #4165 +test(1018.1, identical(fread("1.46761e-313\n"), data.table(V1=1.46761e-313))) +test(1018.2, identical(fread("1.46761e+313\n"), data.table(V1=1.46761e+313))) test(1019, fread("A\n1.23456789123456789123456999\n"), data.table(A=1.234567891234568)) # crash assigning to row 0, #2754 diff --git a/src/fread.c b/src/fread.c index 69082c5d95..c94aeac069 100644 --- a/src/fread.c +++ b/src/fread.c @@ -648,9 +648,9 @@ static void StrtoI64(FieldParseContext *ctx) // TODO: review ERANGE checks and tests; that range outside [1.7e-308,1.7e+308] coerces to [0.0,Inf] /* f = "~/data.table/src/freadLookups.h" -cat("const long double pow10lookup[701] = {\n", file=f, append=FALSE) -for (i in (-350):(349)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) -cat("1.0E350L\n};\n", file=f, append=TRUE) +cat("const long double pow10lookup[601] = {\n", file=f, append=FALSE) +for (i in (-300):(299)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) +cat("1.0E300L\n};\n", file=f, append=TRUE) */ @@ -767,11 +767,23 @@ static void parse_double_regular(FieldParseContext *ctx) } e += Eneg? -E : E; } - e += 350; // lookup table is arranged from -350 (0) to +350 (700) - if (e<0 || e>700) goto fail; + if (e<-350 || e>350) goto fail; - double r = (double)((long double)acc * pow10lookup[e]); - *target = neg? -r : r; + long double r = (long double)acc; + if (e < -300 || e > 300) { + // Handle extra precision by pre-multiplying the result by pow(10, extra), + // and then remove extra from e. + // This avoids having to store very small or very large constants that may + // fail to be encoded by the compiler, even though the values can actually + // be stored correctly. + int_fast8_t extra = e < 0 ? e + 300 : e - 300; + r *= pow10lookup[extra + 300]; + e -= extra; + } + e += 300; // lookup table is arranged from -300 (0) to +300 (600) + + r *= pow10lookup[e]; + *target = (double)(neg? -r : r); *(ctx->ch) = ch; return; diff --git a/src/fread.h b/src/fread.h index 91c084127b..60894a873f 100644 --- a/src/fread.h +++ b/src/fread.h @@ -30,7 +30,7 @@ typedef enum { extern int8_t typeSize[NUMTYPE]; extern const char typeName[NUMTYPE][10]; -extern const long double pow10lookup[701]; +extern const long double pow10lookup[601]; extern const uint8_t hexdigits[256]; diff --git a/src/freadLookups.h b/src/freadLookups.h index faeb4ade54..bb736a60ac 100644 --- a/src/freadLookups.h +++ b/src/freadLookups.h @@ -27,57 +27,7 @@ const uint8_t hexdigits[256] = { }; -const long double pow10lookup[701] = { -1.0E-350L, -1.0E-349L, -1.0E-348L, -1.0E-347L, -1.0E-346L, -1.0E-345L, -1.0E-344L, -1.0E-343L, -1.0E-342L, -1.0E-341L, -1.0E-340L, -1.0E-339L, -1.0E-338L, -1.0E-337L, -1.0E-336L, -1.0E-335L, -1.0E-334L, -1.0E-333L, -1.0E-332L, -1.0E-331L, -1.0E-330L, -1.0E-329L, -1.0E-328L, -1.0E-327L, -1.0E-326L, -1.0E-325L, -1.0E-324L, -1.0E-323L, -1.0E-322L, -1.0E-321L, -1.0E-320L, -1.0E-319L, -1.0E-318L, -1.0E-317L, -1.0E-316L, -1.0E-315L, -1.0E-314L, -1.0E-313L, -1.0E-312L, -1.0E-311L, -1.0E-310L, -1.0E-309L, -1.0E-308L, -1.0E-307L, -1.0E-306L, -1.0E-305L, -1.0E-304L, -1.0E-303L, -1.0E-302L, -1.0E-301L, +const long double pow10lookup[601] = { 1.0E-300L, 1.0E-299L, 1.0E-298L, @@ -678,57 +628,7 @@ const long double pow10lookup[701] = { 1.0E297L, 1.0E298L, 1.0E299L, -1.0E300L, -1.0E301L, -1.0E302L, -1.0E303L, -1.0E304L, -1.0E305L, -1.0E306L, -1.0E307L, -1.0E308L, -1.0E309L, -1.0E310L, -1.0E311L, -1.0E312L, -1.0E313L, -1.0E314L, -1.0E315L, -1.0E316L, -1.0E317L, -1.0E318L, -1.0E319L, -1.0E320L, -1.0E321L, -1.0E322L, -1.0E323L, -1.0E324L, -1.0E325L, -1.0E326L, -1.0E327L, -1.0E328L, -1.0E329L, -1.0E330L, -1.0E331L, -1.0E332L, -1.0E333L, -1.0E334L, -1.0E335L, -1.0E336L, -1.0E337L, -1.0E338L, -1.0E339L, -1.0E340L, -1.0E341L, -1.0E342L, -1.0E343L, -1.0E344L, -1.0E345L, -1.0E346L, -1.0E347L, -1.0E348L, -1.0E349L, -1.0E350L +1.0E300L }; #endif