From 49feabfc8c01175c32b0a715f4aea78a2d9378fc Mon Sep 17 00:00:00 2001 From: Michael Tautschnig Date: Sat, 29 Nov 2025 21:28:34 +0100 Subject: [PATCH 1/2] Fix wrong line number for assertions exceeding flex buffer size When a preprocessed source line exceeds the flex input buffer size (YY_READ_BUF_SIZE, typically 8192 bytes), the scanner's YY_INPUT macro would report an incorrect (off-by-one) line number for tokens on that line. This is because YY_INPUT increments line_no when reading a newline character and then breaks, so the increment happens at the END of a line. For lines fitting in the buffer, the newline is read before flex matches tokens, so line_no is correct. For longer lines, the newline is read in a later YY_INPUT call, after flex has already matched tokens with a line_no that is one too low. The fix overrides YY_INPUT in the ANSI-C scanner to defer the line number increment: instead of incrementing immediately when reading a newline, a flag is set, and the increment happens at the start of the next YY_INPUT call. This ensures line_no is correct when flex matches the first token on any line, regardless of line length. All flex scanners in the codebase were analysed for this bug: Scanner parser.h YY_INPUT Reports lines Affected ansi-c yes (now overridden) yes fixed here assembler yes no no crangler no (own YY_INPUT) no no json yes no no statement-list yes yes latent bug xmllang yes no no The statement-list scanner has the same latent bug but is not fixed in this commit because statement-list lines are unlikely to exceed 8192 bytes in practice. Fixes: #8257 Co-authored-by: Kiro --- .../cbmc/long_assertion_line_number/main.c | 13 ++++++ .../cbmc/long_assertion_line_number/test.desc | 12 ++++++ src/ansi-c/ansi_c_parser.h | 8 +++- src/ansi-c/scanner.l | 40 +++++++++++++++++++ 4 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 regression/cbmc/long_assertion_line_number/main.c create mode 100644 regression/cbmc/long_assertion_line_number/test.desc diff --git a/regression/cbmc/long_assertion_line_number/main.c b/regression/cbmc/long_assertion_line_number/main.c new file mode 100644 index 00000000000..4bc6c9f013d --- /dev/null +++ b/regression/cbmc/long_assertion_line_number/main.c @@ -0,0 +1,13 @@ +#define __CBMC_assert(cond) \ + __CPROVER_assert((cond), "assertion"); \ + __CPROVER_assume(cond) +extern int __VERIFIER_nondet_int(); +int main() +{ + int A[1]; + A[0] = __VERIFIER_nondet_int(); + /* clang-format off */ + __CBMC_assert((A[0] < (-1) && A[0] < ~(4294967294U) && A[0] < 0LL && (A[0] > -214748647LL && ((((A[0] < 20158765) && A[0] > 2147483648U && A[0] > 1929301145 && (A[0] > 1944729020 && A[0] < 0 && A[0] < -1LL && A[0] > 4294967294ULL && A[0] != ~(2147483647U)) && A[0] > ~(4294967294U) && A[0] < ~(4294967295U))) && (A[0] > ~(2147483646U) && (A[0] < 1L && A[0] > 1ULL && A[0] > 1LL && A[0] < ~(-1) && A[0] < 2147483647ULL) && (A[0] < 0U) && (A[0] < 2147483647L && (A[0] < 0U && (A[0] < 1UL)) && (((A[0] > ~(0U) || A[0] < ~(1U)) || A[0] > 1321734560)) && A[0] < 4294967294ULL) && (A[0] < 1UL && ((A[0] > 2147483646U && A[0] <= 1U) && A[0] < 1U) && A[0] > 1U) && A[0] > 1ULL) && A[0] > 1L && (A[0] < 4294967295UL && A[0] < 4294967294UL && A[0] < 1L && (A[0] > ~(2147483647) && A[0] < 2147483647UL && A[0] < 2147483647L && A[0] < 0U) && ((A[0] > ~(1) && (A[0] > 1UL) && (A[0] > 2147483648U) && A[0] <= 2147483647) && A[0] > 1L)) && (A[0] < 4294967294UL && A[0] < ~(4294967294U) && A[0] > -1453357121 && A[0] < 1U) && A[0] < 2147483646U && (A[0] > 1L && (((A[0] < 1UL)) && A[0] < ~(0) && A[0] != ~(-1)) && A[0] < 4294967294ULL) && (A[0] < ~(4294967295U)) && A[0] <= 2147483647LL) && ((A[0] < 0U && ((A[0] > ~(2147483646U) && ((A[0] < ~(-2147483647)) && A[0] > ~(0) && A[0] > -1 && A[0] < -1L && (A[0] < 2147483646LL && (A[0] < -1) && ((A[0] < 2))) && (A[0] < 2147483647L && (A[0] < 2147483646LL && A[0] < 4294967295U) && A[0] > ~(4294967294U))) && ((A[0] > ~(1U)) && ((A[0] < 1U && (A[0] < 0 && A[0] < 2147483647U && (A[0] > 4294967294ULL)) && (A[0] < 2147483647ULL || A[0] < ~(1)))) && A[0] < -1LL && (((A[0] > 4294967295ULL && (((A[0] > 2147483648ULL)) && A[0] >= 4294967295U)) && A[0] < 2147483646ULL && A[0] <= 0L))) && A[0] > 0U && ((A[0] > 552237804 && A[0] > 4294967294UL) && (((A[0] < 0L && (A[0] > -2147483648) && ((((((((A[0] > -2147483647))))) && A[0] >= -2147483648) || (A[0] <= ~(1) && (A[0] >= 4294967295UL))) && A[0] <= 2147483646LL)) && A[0] != 100000) || (A[0] < -1L)) && A[0] != 4294967294U))) && A[0] < 0 && A[0] > ~(1U) && A[0] > -62844098 && A[0] < ~(-2147483647) && A[0] > 0UL && A[0] < 1U && A[0] > 0) && (A[0] < ~(0) && (A[0] > 4294967295UL) && (A[0] < 0UL) && (A[0] > 1ULL && (A[0] > 1UL) && (A[0] > 2056281653) && A[0] < 4294967294UL) && A[0] > 4294967294U && A[0] > 4294967294U && (A[0] > -1) && A[0] <= ~(1U)) && A[0] < 2147483646L && (A[0] > ~(0U) && A[0] > -1LL && (A[0] > ~(2147483647)) && A[0] > -2147483647L && ((A[0] < -2147483648 && (A[0] >= ~(2147483646))))) && A[0] > 4294967295ULL && (A[0] > -2147483648L && (A[0] < ~(1) && A[0] < 2147483646LL) && A[0] > ~(-1)) && ((A[0] < 4294967295UL) || (((A[0] > 4294967295UL && (((A[0] < 0 && ((A[0] < 2147483646ULL) && ((A[0] < 1)))))) && A[0] > ~(0U) && (((A[0] >= -1)) && A[0] > -2147483648)) && A[0] < 2147483646ULL && A[0] != 0L))) && A[0] > 1UL) && A[0] > 4294967294ULL && (A[0] < 2147483646U && A[0] < -2147483647L && A[0] > -1485823147 && A[0] < 1U && A[0] > 1L && (((A[0] < 2147483647ULL && A[0] > 2147483646ULL)))) && A[0] < 4294967294UL && ((A[0] != 2147483647ULL || A[0] > 0))) && (A[0] < ~(1U) && (A[0] < 1ULL && (A[0] > ~(4294967295U) && A[0] > 1U && (A[0] > 2147483646U && A[0] > -1LL && A[0] > ~(1) && A[0] < 2147483646L && A[0] > 1UL && A[0] < 0 && A[0] < -1L && A[0] != ~(1)) && (((((A[0] > -2147483647LL) && A[0] < 1L && (A[0] > -2147483648L) && A[0] < 4294967294ULL && A[0] > -2147483648L)) && A[0] > -82984055 && A[0] > -1 && ((A[0] < 2147483646LL) && A[0] < 2147483646LL) && A[0] < 2147483646UL) && (((A[0] > -2147483647L))) && A[0] < 2147483646 && A[0] < 100000) && A[0] < 1LL && A[0] < ~(2147483648U) && (A[0] >= 1U || A[0] != 2147483647ULL)) && A[0] < 2147483646U && A[0] < 2147483647L) && (A[0] < 4294967294UL && A[0] < 2147483646U && ((A[0] < ~(4294967295U))) && A[0] <= ~(0)) && ((((A[0] < 2147483646ULL && A[0] > -1 && A[0] < ~(2147483648U) && A[0] < 0UL) && A[0] > -2147483647L && A[0] > ~(-1)) && A[0] < 1U && A[0] <= ~(4294967294U)) && A[0] > ~(4294967295U)) && A[0] < 0) && (((A[0] > -390958390)) && (A[0] < 0UL) && (A[0] < ~(2147483648U)) && A[0] <= 4294967294UL) && A[0] >= 0ULL)); + /* clang-format on */ + return 0; +} diff --git a/regression/cbmc/long_assertion_line_number/test.desc b/regression/cbmc/long_assertion_line_number/test.desc new file mode 100644 index 00000000000..140c9c946fc --- /dev/null +++ b/regression/cbmc/long_assertion_line_number/test.desc @@ -0,0 +1,12 @@ +CORE +main.c + +^\[main.assertion.1\] line 10 assertion: FAILURE$ +^VERIFICATION FAILED$ +^EXIT=10$ +^SIGNAL=0$ +-- +^warning: ignoring +-- +This test verifies that CBMC correctly reports line numbers for assertions in +very long expressions. diff --git a/src/ansi-c/ansi_c_parser.h b/src/ansi-c/ansi_c_parser.h index 169a74dc4cf..6e6046ac777 100644 --- a/src/ansi-c/ansi_c_parser.h +++ b/src/ansi-c/ansi_c_parser.h @@ -39,7 +39,8 @@ class ansi_c_parsert:public parsert __float128_is_keyword(false), float16_type(false), bf16_type(false), - fp16_type(false) + fp16_type(false), + last_input_ended_with_newline(false) { // set up global scope scopes.clear(); @@ -74,6 +75,11 @@ class ansi_c_parsert:public parsert bool bf16_type; bool fp16_type; + // tracks whether the last YY_INPUT call ended with a newline, used to + // defer line number increments so that lines exceeding the flex input + // buffer size are counted correctly + bool last_input_ended_with_newline; + typedef ansi_c_identifiert identifiert; typedef ansi_c_scopet scopet; diff --git a/src/ansi-c/scanner.l b/src/ansi-c/scanner.l index f9c7b8674ce..841490f516f 100644 --- a/src/ansi-c/scanner.l +++ b/src/ansi-c/scanner.l @@ -50,6 +50,46 @@ static int isatty(int) { return 0; } extern int yyansi_cdebug; #endif +// Override YY_INPUT from parser.h to defer line-number increments. +// The default YY_INPUT increments line_no when reading '\n' and then +// breaks, which means the increment happens at the END of a line. +// For lines that fit in the flex input buffer this works because the +// entire line (including '\n') is read before flex matches tokens. +// For lines exceeding the buffer, the '\n' is read in a later call, +// so tokens are matched with a line_no that is one too low. +// This version sets a flag when '\n' is read and defers the increment +// to the start of the next YY_INPUT call, ensuring line_no is correct +// regardless of line length. +#undef YY_INPUT +#define YY_INPUT(buf, result, max_size) \ + do { \ + if(PARSER.last_input_ended_with_newline) \ + { \ + PARSER.inc_line_no(); \ + PARSER.last_input_ended_with_newline=false; \ + } \ + for(result=0; result Date: Fri, 6 Mar 2026 23:11:34 +0000 Subject: [PATCH 2/2] Move deferred line-number increment into parsert (global fix) Move the deferred YY_INPUT line-number increment from the ANSI-C scanner override into parser.h so that all flex-based scanners benefit from correct line counting for lines exceeding the input buffer size. Changes: - parser.h: YY_INPUT now defers inc_line_no() to the start of the next call (via last_input_ended_with_newline flag); line_no initialised to 1 instead of 0; the line_no==0 -> 1 special case in source_location() is removed (no longer needed). - scanner.l: remove the per-scanner YY_INPUT override added in the previous commit (now redundant). - ansi_c_parser.h: remove the per-parser flag (now in parsert). - ansi_c_language.cpp: reset line_no to 1 (not 0) between parse passes so that .i files with no newline report line 1. - contracts_wrangler.cpp: drop redundant set_line_no(0) (constructor already initialises to 1). - statement_list_language.cpp: drop set_line_no(0) (same reason). Co-authored-by: Kiro --- src/ansi-c/ansi_c_language.cpp | 2 +- src/ansi-c/ansi_c_parser.h | 8 +-- src/ansi-c/scanner.l | 40 ----------- .../contracts/contracts_wrangler.cpp | 1 - .../statement_list_language.cpp | 1 - src/util/parser.h | 70 +++++++++++-------- 6 files changed, 41 insertions(+), 81 deletions(-) diff --git a/src/ansi-c/ansi_c_language.cpp b/src/ansi-c/ansi_c_language.cpp index 570d62c4db9..dc40fc4225d 100644 --- a/src/ansi-c/ansi_c_language.cpp +++ b/src/ansi-c/ansi_c_language.cpp @@ -93,7 +93,7 @@ bool ansi_c_languaget::parse( if(!result) { - ansi_c_parser.set_line_no(0); + ansi_c_parser.set_line_no(1); ansi_c_parser.set_file(path); ansi_c_parser.in=&i_preprocessed; ansi_c_scanner_init(ansi_c_parser); diff --git a/src/ansi-c/ansi_c_parser.h b/src/ansi-c/ansi_c_parser.h index 6e6046ac777..169a74dc4cf 100644 --- a/src/ansi-c/ansi_c_parser.h +++ b/src/ansi-c/ansi_c_parser.h @@ -39,8 +39,7 @@ class ansi_c_parsert:public parsert __float128_is_keyword(false), float16_type(false), bf16_type(false), - fp16_type(false), - last_input_ended_with_newline(false) + fp16_type(false) { // set up global scope scopes.clear(); @@ -75,11 +74,6 @@ class ansi_c_parsert:public parsert bool bf16_type; bool fp16_type; - // tracks whether the last YY_INPUT call ended with a newline, used to - // defer line number increments so that lines exceeding the flex input - // buffer size are counted correctly - bool last_input_ended_with_newline; - typedef ansi_c_identifiert identifiert; typedef ansi_c_scopet scopet; diff --git a/src/ansi-c/scanner.l b/src/ansi-c/scanner.l index 841490f516f..f9c7b8674ce 100644 --- a/src/ansi-c/scanner.l +++ b/src/ansi-c/scanner.l @@ -50,46 +50,6 @@ static int isatty(int) { return 0; } extern int yyansi_cdebug; #endif -// Override YY_INPUT from parser.h to defer line-number increments. -// The default YY_INPUT increments line_no when reading '\n' and then -// breaks, which means the increment happens at the END of a line. -// For lines that fit in the flex input buffer this works because the -// entire line (including '\n') is read before flex matches tokens. -// For lines exceeding the buffer, the '\n' is read in a later call, -// so tokens are matched with a line_no that is one too low. -// This version sets a flag when '\n' is read and defers the increment -// to the start of the next YY_INPUT call, ensuring line_no is correct -// regardless of line length. -#undef YY_INPUT -#define YY_INPUT(buf, result, max_size) \ - do { \ - if(PARSER.last_input_ended_with_newline) \ - { \ - PARSER.inc_line_no(); \ - PARSER.last_input_ended_with_newline=false; \ - } \ - for(result=0; result"); ansi_c_parser.in = &is; ansi_c_parser.for_has_scope = config.ansi_c.for_has_scope; diff --git a/src/statement-list/statement_list_language.cpp b/src/statement-list/statement_list_language.cpp index d1cc36b7b5b..db871437507 100644 --- a/src/statement-list/statement_list_language.cpp +++ b/src/statement-list/statement_list_language.cpp @@ -62,7 +62,6 @@ bool statement_list_languaget::parse( { statement_list_parsert statement_list_parser{message_handler}; parse_path = path; - statement_list_parser.set_line_no(0); statement_list_parser.set_file(path); statement_list_parser.in = &instream; bool result = statement_list_parser.parse(); diff --git a/src/util/parser.h b/src/util/parser.h index eeec3b469a0..f4a5bf7ab1f 100644 --- a/src/util/parser.h +++ b/src/util/parser.h @@ -33,9 +33,10 @@ class parsert explicit parsert(message_handlert &message_handler) : in(nullptr), log(message_handler), - line_no(0), + line_no(1), previous_line_no(std::numeric_limits::max()), - column(1) + column(1), + last_input_ended_with_newline(false) { } @@ -114,12 +115,7 @@ class parsert if(previous_line_no!=line_no) { previous_line_no=line_no; - - // for the case of a file with no newlines - if(line_no == 0) - _source_location.set_line(1); - else - _source_location.set_line(line_no); + _source_location.set_line(line_no); } return _source_location; @@ -150,6 +146,12 @@ class parsert source_locationt _source_location; unsigned line_no, previous_line_no; unsigned column; + +public: + // Accessed by YY_INPUT macro; tracks whether the last YY_INPUT call + // ended with a newline so that the line-number increment can be + // deferred to the start of the next call. + bool last_input_ended_with_newline; }; exprt &_newstack(parsert &parser, unsigned &x); @@ -161,29 +163,35 @@ exprt &_newstack(parsert &parser, unsigned &x); #define stack_type(x) \ (static_cast(static_cast(PARSER.stack[x]))) -#define YY_INPUT(buf, result, max_size) \ - do { \ - for(result=0; result