diff --git a/NEWS.md b/NEWS.md index 03dbe3d4b5..6a7c3ba948 100644 --- a/NEWS.md +++ b/NEWS.md @@ -295,6 +295,8 @@ 39. `DT[i, sum(b), by=grp]` (and other optimized-by-group aggregates: `mean`, `var`, `sd`, `median`, `prod`, `min`, `max`, `first`, `last`, `head` and `tail`) could segfault if `i` contained row numbers and one or more were NA, [#1994](https://github.com/Rdatatable/data.table/issues/1994). Thanks to Arun Srinivasan for reporting, and Benjamin Schwendinger for the PR. +40. `identical(fread(text="A\n0.8060667366\n")$A, 0.8060667366)` is now TRUE, [#4461](https://github.com/Rdatatable/data.table/issues/4461). `fread` was using `*10^-n` rather than `/10^n` resulting in `0.80606673659999994` vs `0.80606673660000006`. `fread()` now matches R's parser and `read.table` identically in this respect. Thanks to Gabe Becker for requesting consistency, and Michael Chirico for the PR. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a9eb68ab09..1d39b2d817 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -18107,3 +18107,6 @@ if (base::getRversion() >= "4.1.0") { test(2212.33, EVAL("iris |> DT('setosa', on='Species')"), {tt=droprn(iris[iris$Species=="setosa",]); tt$Species=as.character(tt$Species); tt}) } +# precision powers of 10^(-n), #4461 +test(2213, identical(fread(text="A\n0.8060667366\n")$A, 0.8060667366)) + diff --git a/src/fread.c b/src/fread.c index 7c7836250a..e0a32d3e14 100644 --- a/src/fread.c +++ b/src/fread.c @@ -652,8 +652,8 @@ static void StrtoI64(FieldParseContext *ctx) // TODO: review ERANGE checks and tests; that range outside [1.7e-308,1.7e+308] coerces to [0.0,Inf] /* f = "~/data.table/src/freadLookups.h" -cat("const long double pow10lookup[601] = {\n", file=f, append=FALSE) -for (i in (-300):(299)) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) +cat("const long double pow10lookup[301] = {\n", file=f, append=FALSE) +for (i in 0:299) cat("1.0E",i,"L,\n", sep="", file=f, append=TRUE) cat("1.0E300L\n};\n", file=f, append=TRUE) */ @@ -780,12 +780,13 @@ static void parse_double_regular_core(const char **pch, double *target) // fail to be encoded by the compiler, even though the values can actually // be stored correctly. int_fast8_t extra = e < 0 ? e + 300 : e - 300; - r *= pow10lookup[extra + 300]; + r = extra<0 ? r/pow10lookup[-extra] : r*pow10lookup[extra]; e -= extra; } - e += 300; // lookup table is arranged from -300 (0) to +300 (600) - r *= pow10lookup[e]; + // pow10lookup[301] contains 10^(0:300). Storing negative powers there too + // avoids this ternary but is slightly less accurate in some cases, #4461 + r = e < 0 ? r/pow10lookup[-e] : r*pow10lookup[e]; *target = (double)(neg? -r : r); *pch = ch; return; diff --git a/src/fread.h b/src/fread.h index c0e9669d01..446da18e4b 100644 --- a/src/fread.h +++ b/src/fread.h @@ -37,7 +37,7 @@ typedef enum { extern int8_t typeSize[NUMTYPE]; extern const char typeName[NUMTYPE][10]; -extern const long double pow10lookup[601]; +extern const long double pow10lookup[301]; extern const uint8_t hexdigits[256]; diff --git a/src/freadLookups.h b/src/freadLookups.h index 80c4861014..103d644da4 100644 --- a/src/freadLookups.h +++ b/src/freadLookups.h @@ -142,307 +142,8 @@ const int32_t cumDaysCycleYears[401] = { 9496, 9862, 10227, 10592, 146097// total days in 400 years }; -const long double pow10lookup[601] = { -1.0E-300L, -1.0E-299L, -1.0E-298L, -1.0E-297L, -1.0E-296L, -1.0E-295L, -1.0E-294L, -1.0E-293L, -1.0E-292L, -1.0E-291L, -1.0E-290L, -1.0E-289L, -1.0E-288L, -1.0E-287L, -1.0E-286L, -1.0E-285L, -1.0E-284L, -1.0E-283L, -1.0E-282L, -1.0E-281L, -1.0E-280L, -1.0E-279L, -1.0E-278L, -1.0E-277L, -1.0E-276L, -1.0E-275L, -1.0E-274L, -1.0E-273L, -1.0E-272L, -1.0E-271L, -1.0E-270L, -1.0E-269L, -1.0E-268L, -1.0E-267L, -1.0E-266L, -1.0E-265L, -1.0E-264L, -1.0E-263L, -1.0E-262L, -1.0E-261L, -1.0E-260L, -1.0E-259L, -1.0E-258L, -1.0E-257L, -1.0E-256L, -1.0E-255L, -1.0E-254L, -1.0E-253L, -1.0E-252L, -1.0E-251L, -1.0E-250L, -1.0E-249L, -1.0E-248L, -1.0E-247L, -1.0E-246L, -1.0E-245L, -1.0E-244L, -1.0E-243L, -1.0E-242L, -1.0E-241L, -1.0E-240L, -1.0E-239L, -1.0E-238L, -1.0E-237L, -1.0E-236L, -1.0E-235L, -1.0E-234L, -1.0E-233L, -1.0E-232L, -1.0E-231L, -1.0E-230L, -1.0E-229L, -1.0E-228L, -1.0E-227L, -1.0E-226L, -1.0E-225L, -1.0E-224L, -1.0E-223L, -1.0E-222L, -1.0E-221L, -1.0E-220L, -1.0E-219L, -1.0E-218L, -1.0E-217L, -1.0E-216L, -1.0E-215L, -1.0E-214L, -1.0E-213L, -1.0E-212L, -1.0E-211L, -1.0E-210L, -1.0E-209L, -1.0E-208L, -1.0E-207L, -1.0E-206L, -1.0E-205L, -1.0E-204L, -1.0E-203L, -1.0E-202L, -1.0E-201L, -1.0E-200L, -1.0E-199L, -1.0E-198L, -1.0E-197L, -1.0E-196L, -1.0E-195L, -1.0E-194L, -1.0E-193L, -1.0E-192L, -1.0E-191L, -1.0E-190L, -1.0E-189L, -1.0E-188L, -1.0E-187L, -1.0E-186L, -1.0E-185L, -1.0E-184L, -1.0E-183L, -1.0E-182L, -1.0E-181L, -1.0E-180L, -1.0E-179L, -1.0E-178L, -1.0E-177L, -1.0E-176L, -1.0E-175L, -1.0E-174L, -1.0E-173L, -1.0E-172L, -1.0E-171L, -1.0E-170L, -1.0E-169L, -1.0E-168L, -1.0E-167L, -1.0E-166L, -1.0E-165L, -1.0E-164L, -1.0E-163L, -1.0E-162L, -1.0E-161L, -1.0E-160L, -1.0E-159L, -1.0E-158L, -1.0E-157L, -1.0E-156L, -1.0E-155L, -1.0E-154L, -1.0E-153L, -1.0E-152L, -1.0E-151L, -1.0E-150L, -1.0E-149L, -1.0E-148L, -1.0E-147L, -1.0E-146L, -1.0E-145L, -1.0E-144L, -1.0E-143L, -1.0E-142L, -1.0E-141L, -1.0E-140L, -1.0E-139L, -1.0E-138L, -1.0E-137L, -1.0E-136L, -1.0E-135L, -1.0E-134L, -1.0E-133L, -1.0E-132L, -1.0E-131L, -1.0E-130L, -1.0E-129L, -1.0E-128L, -1.0E-127L, -1.0E-126L, -1.0E-125L, -1.0E-124L, -1.0E-123L, -1.0E-122L, -1.0E-121L, -1.0E-120L, -1.0E-119L, -1.0E-118L, -1.0E-117L, -1.0E-116L, -1.0E-115L, -1.0E-114L, -1.0E-113L, -1.0E-112L, -1.0E-111L, -1.0E-110L, -1.0E-109L, -1.0E-108L, -1.0E-107L, -1.0E-106L, -1.0E-105L, -1.0E-104L, -1.0E-103L, -1.0E-102L, -1.0E-101L, -1.0E-100L, -1.0E-99L, -1.0E-98L, -1.0E-97L, -1.0E-96L, -1.0E-95L, -1.0E-94L, -1.0E-93L, -1.0E-92L, -1.0E-91L, -1.0E-90L, -1.0E-89L, -1.0E-88L, -1.0E-87L, -1.0E-86L, -1.0E-85L, -1.0E-84L, -1.0E-83L, -1.0E-82L, -1.0E-81L, -1.0E-80L, -1.0E-79L, -1.0E-78L, -1.0E-77L, -1.0E-76L, -1.0E-75L, -1.0E-74L, -1.0E-73L, -1.0E-72L, -1.0E-71L, -1.0E-70L, -1.0E-69L, -1.0E-68L, -1.0E-67L, -1.0E-66L, -1.0E-65L, -1.0E-64L, -1.0E-63L, -1.0E-62L, -1.0E-61L, -1.0E-60L, -1.0E-59L, -1.0E-58L, -1.0E-57L, -1.0E-56L, -1.0E-55L, -1.0E-54L, -1.0E-53L, -1.0E-52L, -1.0E-51L, -1.0E-50L, -1.0E-49L, -1.0E-48L, -1.0E-47L, -1.0E-46L, -1.0E-45L, -1.0E-44L, -1.0E-43L, -1.0E-42L, -1.0E-41L, -1.0E-40L, -1.0E-39L, -1.0E-38L, -1.0E-37L, -1.0E-36L, -1.0E-35L, -1.0E-34L, -1.0E-33L, -1.0E-32L, -1.0E-31L, -1.0E-30L, -1.0E-29L, -1.0E-28L, -1.0E-27L, -1.0E-26L, -1.0E-25L, -1.0E-24L, -1.0E-23L, -1.0E-22L, -1.0E-21L, -1.0E-20L, -1.0E-19L, -1.0E-18L, -1.0E-17L, -1.0E-16L, -1.0E-15L, -1.0E-14L, -1.0E-13L, -1.0E-12L, -1.0E-11L, -1.0E-10L, -1.0E-9L, -1.0E-8L, -1.0E-7L, -1.0E-6L, -1.0E-5L, -1.0E-4L, -1.0E-3L, -1.0E-2L, -1.0E-1L, + +const long double pow10lookup[301] = { 1.0E0L, 1.0E1L, 1.0E2L,