From 2cebf24c5b16ba66590dd18776f7166419ce14ad Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 31 May 2025 14:47:36 +0300 Subject: [PATCH 01/20] Validate: report error lines --- tools/validate/validate.cpp | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 0a7936b50af..18ea2497793 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -80,11 +81,18 @@ void scan_file( size_t tab_characters = 0; size_t trailing_whitespace_lines = 0; + constexpr size_t max_error_lines_per_file = 8; + + array overlength_line_numbers{}; + array tab_characters_line_numbers{}; + array trailing_whitespace_line_numbers{}; + unsigned char prev = '@'; unsigned char previous2 = '@'; unsigned char previous3 = '@'; size_t columns = 0; + size_t line = 1; for (BinaryFile binary_file{filepath}; binary_file.read_next_block(buffer);) { for (const auto& ch : buffer) { @@ -94,13 +102,18 @@ void scan_file( } else { has_cr = true; } + ++line; } else { if (ch == LF) { has_lf = true; + ++line; } } if (ch == '\t') { + if (tab_characters < max_error_lines_per_file) { + tab_characters_line_numbers[disallowed_characters] = line; + } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { // 0xEF, 0xBB, and 0xBF are the UTF-8 BOM characters. @@ -112,17 +125,24 @@ void scan_file( ++disallowed_characters; constexpr size_t MaxErrorsForDisallowedCharacters = 10; if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { - validation_failure(any_errors, filepath, "file contains disallowed character 0x{:02X}.", - static_cast(ch)); + validation_failure(any_errors, filepath, + "file contains disallowed character 0x{:02X}. error line {}", static_cast(ch), + line); } } if (ch == CR || ch == LF) { if (prev == ' ' || prev == '\t') { + if (trailing_whitespace_lines < max_error_lines_per_file) { + trailing_whitespace_line_numbers[trailing_whitespace_lines] = line; + } ++trailing_whitespace_lines; } if (columns > max_line_length) { + if (overlength_lines < max_error_lines_per_file) { + overlength_line_numbers[overlength_lines] = line; + } ++overlength_lines; } columns = 0; @@ -164,12 +184,14 @@ void scan_file( } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, "file contains {} tab characters.", tab_characters); + validation_failure(any_errors, filepath, "file contains {} tab characters. Lines (up to {}): {}.", + tab_characters, max_error_lines_per_file, tab_characters_line_numbers | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { - validation_failure( - any_errors, filepath, "file contains {} lines with trailing whitespace.", trailing_whitespace_lines); + validation_failure(any_errors, filepath, + "file contains {} lines with trailing whitespace. Lines (up to {}): {}.", trailing_whitespace_lines, + max_error_lines_per_file, trailing_whitespace_line_numbers | views::take(trailing_whitespace_lines)); } if (overlength_lines != 0) { @@ -188,8 +210,9 @@ void scan_file( static_assert(ranges::is_sorted(checked_extensions)); if (ranges::binary_search(checked_extensions, filepath.extension().wstring())) { - validation_failure(any_errors, filepath, "file contains {} lines with more than {} columns.", - overlength_lines, max_line_length); + validation_failure(any_errors, filepath, + "file contains {} lines with more than {} columns. Lines (up to {}): {}.", overlength_lines, + max_line_length, max_error_lines_per_file, overlength_line_numbers | views::take(overlength_lines)); } } } From 0f3f6f985259925ef4002736060b4f032641d394 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 31 May 2025 15:57:20 +0300 Subject: [PATCH 02/20] Line and column in the message parameters --- tools/validate/validate.cpp | 86 ++++++++++++++++++++++++------------- 1 file changed, 55 insertions(+), 31 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 18ea2497793..92fd826ed52 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -55,12 +55,32 @@ class BinaryFile { FILE* m_file{nullptr}; }; + +struct line_and_column_type { + size_t line; + size_t column; +}; + +constexpr line_and_column_type unspecified_line_and_column{1, 1}; + +template <> +struct std::formatter { + constexpr auto parse(std::format_parse_context& ctx) { + return ctx.begin(); + } + + template + auto format(const line_and_column_type& lc, FormatContext& ctx) const { + return std::format_to(ctx.out(), "{}:{}", lc.line, lc.column); + } +}; + template -void validation_failure( - bool& any_errors, const filesystem::path& filepath, format_string...> fmt, Args&&... args) { +void validation_failure(bool& any_errors, const filesystem::path& filepath, format_string...> fmt, + line_and_column_type line_and_column, Args&&... args) { any_errors = true; - print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber=1;columnnumber=1]Validation failed: ", - filepath.string()); + print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber={};columnnumber={}]Validation failed: ", + filepath.string(), line_and_column.line, line_and_column.column); println(stderr, fmt, forward(args)...); } @@ -83,9 +103,9 @@ void scan_file( constexpr size_t max_error_lines_per_file = 8; - array overlength_line_numbers{}; - array tab_characters_line_numbers{}; - array trailing_whitespace_line_numbers{}; + array overlength_line_numbers{}; + array tab_characters_line_numbers{}; + array trailing_whitespace_line_numbers{}; unsigned char prev = '@'; unsigned char previous2 = '@'; @@ -112,7 +132,7 @@ void scan_file( if (ch == '\t') { if (tab_characters < max_error_lines_per_file) { - tab_characters_line_numbers[disallowed_characters] = line; + tab_characters_line_numbers[tab_characters] = {line, columns}; } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { @@ -125,23 +145,22 @@ void scan_file( ++disallowed_characters; constexpr size_t MaxErrorsForDisallowedCharacters = 10; if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { - validation_failure(any_errors, filepath, - "file contains disallowed character 0x{:02X}. error line {}", static_cast(ch), - line); + validation_failure(any_errors, filepath, "file contains disallowed character 0x{:02X}.", + {line, columns}, static_cast(ch)); } } if (ch == CR || ch == LF) { if (prev == ' ' || prev == '\t') { if (trailing_whitespace_lines < max_error_lines_per_file) { - trailing_whitespace_line_numbers[trailing_whitespace_lines] = line; + trailing_whitespace_line_numbers[trailing_whitespace_lines] = {line, columns}; } ++trailing_whitespace_lines; } if (columns > max_line_length) { if (overlength_lines < max_error_lines_per_file) { - overlength_line_numbers[overlength_lines] = line; + overlength_line_numbers[overlength_lines] = {line, columns}; } ++overlength_lines; } @@ -158,40 +177,44 @@ void scan_file( } if (has_cr) { - validation_failure(any_errors, filepath, "file contains CR line endings (possibly damaged CRLF)."); + validation_failure(any_errors, filepath, "file contains CR line endings (possibly damaged CRLF).", + unspecified_line_and_column); } else if (has_lf && has_crlf) { - validation_failure(any_errors, filepath, "file contains mixed line endings (both LF and CRLF)."); + validation_failure( + any_errors, filepath, "file contains mixed line endings (both LF and CRLF).", unspecified_line_and_column); } else if (has_lf) { - validation_failure(any_errors, filepath, "file contains LF line endings."); + validation_failure(any_errors, filepath, "file contains LF line endings.", unspecified_line_and_column); if (prev != LF) { - validation_failure(any_errors, filepath, "file doesn't end with a newline."); + validation_failure(any_errors, filepath, "file doesn't end with a newline.", unspecified_line_and_column); } else if (previous2 == LF) { - validation_failure(any_errors, filepath, "file ends with multiple newlines."); + validation_failure(any_errors, filepath, "file ends with multiple newlines.", unspecified_line_and_column); } } else if (has_crlf) { if (previous2 != CR || prev != LF) { - validation_failure(any_errors, filepath, "file doesn't end with a newline."); + validation_failure(any_errors, filepath, "file doesn't end with a newline.", unspecified_line_and_column); } else if (previous3 == LF) { - validation_failure(any_errors, filepath, "file ends with multiple newlines."); + validation_failure(any_errors, filepath, "file ends with multiple newlines.", unspecified_line_and_column); } } else { - validation_failure(any_errors, filepath, "file doesn't contain any newlines."); + validation_failure(any_errors, filepath, "file doesn't contain any newlines.", unspecified_line_and_column); } if (has_utf8_bom) { - validation_failure(any_errors, filepath, "file contains UTF-8 BOM characters."); + validation_failure(any_errors, filepath, "file contains UTF-8 BOM characters.", unspecified_line_and_column); } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, "file contains {} tab characters. Lines (up to {}): {}.", - tab_characters, max_error_lines_per_file, tab_characters_line_numbers | views::take(tab_characters)); + validation_failure(any_errors, filepath, "file contains {} tab characters. Lines (up to {}): {}.", + tab_characters_line_numbers[0], tab_characters, max_error_lines_per_file, + tab_characters_line_numbers | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { validation_failure(any_errors, filepath, - "file contains {} lines with trailing whitespace. Lines (up to {}): {}.", trailing_whitespace_lines, - max_error_lines_per_file, trailing_whitespace_line_numbers | views::take(trailing_whitespace_lines)); + "file contains {} lines with trailing whitespace. Lines (up to {}): {}.", + trailing_whitespace_line_numbers[0], trailing_whitespace_lines, max_error_lines_per_file, + trailing_whitespace_line_numbers | views::take(trailing_whitespace_lines)); } if (overlength_lines != 0) { @@ -211,8 +234,9 @@ void scan_file( if (ranges::binary_search(checked_extensions, filepath.extension().wstring())) { validation_failure(any_errors, filepath, - "file contains {} lines with more than {} columns. Lines (up to {}): {}.", overlength_lines, - max_line_length, max_error_lines_per_file, overlength_line_numbers | views::take(overlength_lines)); + "file contains {} lines with more than {} columns. Lines and columns (up to {}): {}.", + overlength_line_numbers[0], overlength_lines, max_line_length, max_error_lines_per_file, + overlength_line_numbers | views::take(overlength_lines)); } } } @@ -290,11 +314,11 @@ int main() { constexpr size_t maximum_relative_path_length = 120; if (relative_path.size() > maximum_relative_path_length) { validation_failure(any_errors, filepath, "filepath is too long ({} characters; the limit is {}).", - relative_path.size(), maximum_relative_path_length); + unspecified_line_and_column, relative_path.size(), maximum_relative_path_length); } if (relative_path.find(L' ') != wstring::npos) { - validation_failure(any_errors, filepath, "filepath contains spaces."); + validation_failure(any_errors, filepath, "filepath contains spaces.", unspecified_line_and_column); } const wstring extension = filepath.extension().wstring(); @@ -304,7 +328,7 @@ int main() { } if (ranges::binary_search(bad_extensions, extension)) { - validation_failure(any_errors, filepath, "file should not be checked in."); + validation_failure(any_errors, filepath, "file should not be checked in.", unspecified_line_and_column); continue; } From 83fda835887d743b3c8a757d1a9b2716a9048d14 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 31 May 2025 15:59:54 +0300 Subject: [PATCH 03/20] Consistent message --- tools/validate/validate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 92fd826ed52..a271dfd401d 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -205,14 +205,14 @@ void scan_file( } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, "file contains {} tab characters. Lines (up to {}): {}.", + validation_failure(any_errors, filepath, "file contains {} tab characters. Lines and columns (up to {}): {}.", tab_characters_line_numbers[0], tab_characters, max_error_lines_per_file, tab_characters_line_numbers | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { validation_failure(any_errors, filepath, - "file contains {} lines with trailing whitespace. Lines (up to {}): {}.", + "file contains {} lines with trailing whitespace. Lines and columns (up to {}): {}.", trailing_whitespace_line_numbers[0], trailing_whitespace_lines, max_error_lines_per_file, trailing_whitespace_line_numbers | views::take(trailing_whitespace_lines)); } From 28a2e1cc04219b9e40105bf19a2fa3846f08b884 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 1 Jun 2025 10:48:06 +0300 Subject: [PATCH 04/20] less disruption via overload --- tools/validate/validate.cpp | 59 +++++++++++++++++++------------------ 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index a271dfd401d..ba62d0966f6 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -61,8 +61,6 @@ struct line_and_column_type { size_t column; }; -constexpr line_and_column_type unspecified_line_and_column{1, 1}; - template <> struct std::formatter { constexpr auto parse(std::format_parse_context& ctx) { @@ -76,14 +74,20 @@ struct std::formatter { }; template -void validation_failure(bool& any_errors, const filesystem::path& filepath, format_string...> fmt, - line_and_column_type line_and_column, Args&&... args) { +void validation_failure(bool& any_errors, const filesystem::path& filepath, line_and_column_type line_and_column, + format_string...> fmt, Args&&... args) { any_errors = true; print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber={};columnnumber={}]Validation failed: ", filepath.string(), line_and_column.line, line_and_column.column); println(stderr, fmt, forward(args)...); } +template +void validation_failure( + bool& any_errors, const filesystem::path& filepath, format_string...> fmt, Args&&... args) { + validation_failure(any_errors, filepath, {1, 1}, fmt, forward(args)...); +} + enum class TabPolicy : bool { Forbidden, Allowed }; void scan_file( @@ -145,8 +149,8 @@ void scan_file( ++disallowed_characters; constexpr size_t MaxErrorsForDisallowedCharacters = 10; if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { - validation_failure(any_errors, filepath, "file contains disallowed character 0x{:02X}.", - {line, columns}, static_cast(ch)); + validation_failure(any_errors, filepath, {line, columns}, + "file contains disallowed character 0x{:02X}.", static_cast(ch)); } } @@ -177,43 +181,41 @@ void scan_file( } if (has_cr) { - validation_failure(any_errors, filepath, "file contains CR line endings (possibly damaged CRLF).", - unspecified_line_and_column); + validation_failure(any_errors, filepath, "file contains CR line endings (possibly damaged CRLF)."); } else if (has_lf && has_crlf) { - validation_failure( - any_errors, filepath, "file contains mixed line endings (both LF and CRLF).", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file contains mixed line endings (both LF and CRLF)."); } else if (has_lf) { - validation_failure(any_errors, filepath, "file contains LF line endings.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file contains LF line endings."); if (prev != LF) { - validation_failure(any_errors, filepath, "file doesn't end with a newline.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file doesn't end with a newline."); } else if (previous2 == LF) { - validation_failure(any_errors, filepath, "file ends with multiple newlines.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file ends with multiple newlines."); } } else if (has_crlf) { if (previous2 != CR || prev != LF) { - validation_failure(any_errors, filepath, "file doesn't end with a newline.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file doesn't end with a newline."); } else if (previous3 == LF) { - validation_failure(any_errors, filepath, "file ends with multiple newlines.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file ends with multiple newlines."); } } else { - validation_failure(any_errors, filepath, "file doesn't contain any newlines.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file doesn't contain any newlines."); } if (has_utf8_bom) { - validation_failure(any_errors, filepath, "file contains UTF-8 BOM characters.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file contains UTF-8 BOM characters."); } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, "file contains {} tab characters. Lines and columns (up to {}): {}.", - tab_characters_line_numbers[0], tab_characters, max_error_lines_per_file, - tab_characters_line_numbers | views::take(tab_characters)); + validation_failure(any_errors, filepath, tab_characters_line_numbers[0], + "file contains {} tab characters. Lines and columns (up to {}): {}.", tab_characters, + max_error_lines_per_file, tab_characters_line_numbers | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { - validation_failure(any_errors, filepath, + validation_failure(any_errors, filepath, trailing_whitespace_line_numbers[0], "file contains {} lines with trailing whitespace. Lines and columns (up to {}): {}.", - trailing_whitespace_line_numbers[0], trailing_whitespace_lines, max_error_lines_per_file, + trailing_whitespace_lines, max_error_lines_per_file, trailing_whitespace_line_numbers | views::take(trailing_whitespace_lines)); } @@ -233,10 +235,9 @@ void scan_file( static_assert(ranges::is_sorted(checked_extensions)); if (ranges::binary_search(checked_extensions, filepath.extension().wstring())) { - validation_failure(any_errors, filepath, - "file contains {} lines with more than {} columns. Lines and columns (up to {}): {}.", - overlength_line_numbers[0], overlength_lines, max_line_length, max_error_lines_per_file, - overlength_line_numbers | views::take(overlength_lines)); + validation_failure(any_errors, filepath, overlength_line_numbers[0], + "file contains {} lines with more than {} columns. Lines and columns (up to {}): {}.", overlength_lines, + max_line_length, max_error_lines_per_file, overlength_line_numbers | views::take(overlength_lines)); } } } @@ -314,11 +315,11 @@ int main() { constexpr size_t maximum_relative_path_length = 120; if (relative_path.size() > maximum_relative_path_length) { validation_failure(any_errors, filepath, "filepath is too long ({} characters; the limit is {}).", - unspecified_line_and_column, relative_path.size(), maximum_relative_path_length); + relative_path.size(), maximum_relative_path_length); } if (relative_path.find(L' ') != wstring::npos) { - validation_failure(any_errors, filepath, "filepath contains spaces.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "filepath contains spaces."); } const wstring extension = filepath.extension().wstring(); @@ -328,7 +329,7 @@ int main() { } if (ranges::binary_search(bad_extensions, extension)) { - validation_failure(any_errors, filepath, "file should not be checked in.", unspecified_line_and_column); + validation_failure(any_errors, filepath, "file should not be checked in."); continue; } From 4eaf575a40d1cf467f9919816a75125114977ea3 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 1 Jun 2025 10:54:47 +0300 Subject: [PATCH 05/20] Fix off by one error --- tools/validate/validate.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index ba62d0966f6..258543ef1bb 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -136,7 +136,7 @@ void scan_file( if (ch == '\t') { if (tab_characters < max_error_lines_per_file) { - tab_characters_line_numbers[tab_characters] = {line, columns}; + tab_characters_line_numbers[tab_characters] = {line, columns + 1}; } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { @@ -157,14 +157,14 @@ void scan_file( if (ch == CR || ch == LF) { if (prev == ' ' || prev == '\t') { if (trailing_whitespace_lines < max_error_lines_per_file) { - trailing_whitespace_line_numbers[trailing_whitespace_lines] = {line, columns}; + trailing_whitespace_line_numbers[trailing_whitespace_lines] = {line, columns + 1}; } ++trailing_whitespace_lines; } if (columns > max_line_length) { if (overlength_lines < max_error_lines_per_file) { - overlength_line_numbers[overlength_lines] = {line, columns}; + overlength_line_numbers[overlength_lines] = {line, columns + 1}; } ++overlength_lines; } From c81422255e7c5c1c7f5aac6c25b68f84a404f974 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 1 Jun 2025 11:02:47 +0300 Subject: [PATCH 06/20] -line --- tools/validate/validate.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 258543ef1bb..af895b00795 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -55,7 +55,6 @@ class BinaryFile { FILE* m_file{nullptr}; }; - struct line_and_column_type { size_t line; size_t column; From e4deb7ff8bc9d101c0aad2b5d5fa7ec996d4e87d Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 1 Jun 2025 11:03:10 +0300 Subject: [PATCH 07/20] off by one here too --- tools/validate/validate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index af895b00795..eadc8705afe 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -148,7 +148,7 @@ void scan_file( ++disallowed_characters; constexpr size_t MaxErrorsForDisallowedCharacters = 10; if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { - validation_failure(any_errors, filepath, {line, columns}, + validation_failure(any_errors, filepath, {line, columns + 1}, "file contains disallowed character 0x{:02X}.", static_cast(ch)); } } From 555697f5b9ac9bcaba7864f996795d2a6a9ae2bb Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:03:10 -0700 Subject: [PATCH 08/20] Avoid `std::` qualification. --- tools/validate/validate.cpp | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index eadc8705afe..62040d9a7ae 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -60,17 +60,19 @@ struct line_and_column_type { size_t column; }; -template <> -struct std::formatter { - constexpr auto parse(std::format_parse_context& ctx) { - return ctx.begin(); - } +namespace std { + template <> + struct formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } - template - auto format(const line_and_column_type& lc, FormatContext& ctx) const { - return std::format_to(ctx.out(), "{}:{}", lc.line, lc.column); - } -}; + template + auto format(const line_and_column_type& lc, FormatContext& ctx) const { + return format_to(ctx.out(), "{}:{}", lc.line, lc.column); + } + }; +} // namespace std template void validation_failure(bool& any_errors, const filesystem::path& filepath, line_and_column_type line_and_column, From ed4bb5e3f8e5c31b872d6d669c6e7ae7b8c8d25a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:04:50 -0700 Subject: [PATCH 09/20] Consistently take `const line_and_column_type&`. --- tools/validate/validate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 62040d9a7ae..b0ba892b233 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -75,11 +75,11 @@ namespace std { } // namespace std template -void validation_failure(bool& any_errors, const filesystem::path& filepath, line_and_column_type line_and_column, +void validation_failure(bool& any_errors, const filesystem::path& filepath, const line_and_column_type& lc, format_string...> fmt, Args&&... args) { any_errors = true; print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber={};columnnumber={}]Validation failed: ", - filepath.string(), line_and_column.line, line_and_column.column); + filepath.string(), lc.line, lc.column); println(stderr, fmt, forward(args)...); } From 0981f3af28a9e059f612a1b54e9a712c3c47edbe Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:09:00 -0700 Subject: [PATCH 10/20] `line_and_column_type` => `line_and_column` --- tools/validate/validate.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index b0ba892b233..e802fbc5333 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -55,27 +55,27 @@ class BinaryFile { FILE* m_file{nullptr}; }; -struct line_and_column_type { +struct line_and_column { size_t line; size_t column; }; namespace std { template <> - struct formatter { + struct formatter { constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } template - auto format(const line_and_column_type& lc, FormatContext& ctx) const { + auto format(const line_and_column& lc, FormatContext& ctx) const { return format_to(ctx.out(), "{}:{}", lc.line, lc.column); } }; } // namespace std template -void validation_failure(bool& any_errors, const filesystem::path& filepath, const line_and_column_type& lc, +void validation_failure(bool& any_errors, const filesystem::path& filepath, const line_and_column& lc, format_string...> fmt, Args&&... args) { any_errors = true; print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber={};columnnumber={}]Validation failed: ", @@ -108,9 +108,9 @@ void scan_file( constexpr size_t max_error_lines_per_file = 8; - array overlength_line_numbers{}; - array tab_characters_line_numbers{}; - array trailing_whitespace_line_numbers{}; + array overlength_line_numbers{}; + array tab_characters_line_numbers{}; + array trailing_whitespace_line_numbers{}; unsigned char prev = '@'; unsigned char previous2 = '@'; From 49956126059aac3c12717ac1a28ec287ca186674 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:14:21 -0700 Subject: [PATCH 11/20] Track 0-based `lines` while reading the file. --- tools/validate/validate.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index e802fbc5333..d9ee06e05ff 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -117,7 +117,7 @@ void scan_file( unsigned char previous3 = '@'; size_t columns = 0; - size_t line = 1; + size_t lines = 0; for (BinaryFile binary_file{filepath}; binary_file.read_next_block(buffer);) { for (const auto& ch : buffer) { @@ -127,17 +127,17 @@ void scan_file( } else { has_cr = true; } - ++line; + ++lines; } else { if (ch == LF) { has_lf = true; - ++line; + ++lines; } } if (ch == '\t') { if (tab_characters < max_error_lines_per_file) { - tab_characters_line_numbers[tab_characters] = {line, columns + 1}; + tab_characters_line_numbers[tab_characters] = {lines + 1, columns + 1}; } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { @@ -150,7 +150,7 @@ void scan_file( ++disallowed_characters; constexpr size_t MaxErrorsForDisallowedCharacters = 10; if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { - validation_failure(any_errors, filepath, {line, columns + 1}, + validation_failure(any_errors, filepath, {lines + 1, columns + 1}, "file contains disallowed character 0x{:02X}.", static_cast(ch)); } } @@ -158,14 +158,14 @@ void scan_file( if (ch == CR || ch == LF) { if (prev == ' ' || prev == '\t') { if (trailing_whitespace_lines < max_error_lines_per_file) { - trailing_whitespace_line_numbers[trailing_whitespace_lines] = {line, columns + 1}; + trailing_whitespace_line_numbers[trailing_whitespace_lines] = {lines + 1, columns + 1}; } ++trailing_whitespace_lines; } if (columns > max_line_length) { if (overlength_lines < max_error_lines_per_file) { - overlength_line_numbers[overlength_lines] = {line, columns + 1}; + overlength_line_numbers[overlength_lines] = {lines + 1, columns + 1}; } ++overlength_lines; } From af61ed6e8d31d7bc188a2b481cf890336a38896d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:20:16 -0700 Subject: [PATCH 12/20] `tab_characters_line_numbers` => `tab_character_line_numbers` --- tools/validate/validate.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index d9ee06e05ff..f6eb3e0e8a9 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -109,7 +109,7 @@ void scan_file( constexpr size_t max_error_lines_per_file = 8; array overlength_line_numbers{}; - array tab_characters_line_numbers{}; + array tab_character_line_numbers{}; array trailing_whitespace_line_numbers{}; unsigned char prev = '@'; @@ -137,7 +137,7 @@ void scan_file( if (ch == '\t') { if (tab_characters < max_error_lines_per_file) { - tab_characters_line_numbers[tab_characters] = {lines + 1, columns + 1}; + tab_character_line_numbers[tab_characters] = {lines + 1, columns + 1}; } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { @@ -208,9 +208,9 @@ void scan_file( } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, tab_characters_line_numbers[0], + validation_failure(any_errors, filepath, tab_character_line_numbers[0], "file contains {} tab characters. Lines and columns (up to {}): {}.", tab_characters, - max_error_lines_per_file, tab_characters_line_numbers | views::take(tab_characters)); + max_error_lines_per_file, tab_character_line_numbers | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { From f675d570cd0ae2f3f4ed728667cb2a4d19528b7f Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:22:20 -0700 Subject: [PATCH 13/20] `typename` => `class` --- tools/validate/validate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index f6eb3e0e8a9..aadc540768c 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -67,7 +67,7 @@ namespace std { return ctx.begin(); } - template + template auto format(const line_and_column& lc, FormatContext& ctx) const { return format_to(ctx.out(), "{}:{}", lc.line, lc.column); } From bc37e18949d5c454f4497f5dccf9f3726283fd76 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 13:25:41 -0700 Subject: [PATCH 14/20] Add DMIs for `line_and_column`. 1 is the "default" / "don't care" value used below. --- tools/validate/validate.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index aadc540768c..a986e2aaaa7 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -56,8 +56,8 @@ class BinaryFile { }; struct line_and_column { - size_t line; - size_t column; + size_t line = 1; + size_t column = 1; }; namespace std { From be185f5e21265ec4f66a94f7e33755ed529e4b49 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 15:40:18 -0700 Subject: [PATCH 15/20] Consistently mention lines before columns. --- tools/validate/validate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index a986e2aaaa7..9f369bea1ad 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -116,8 +116,8 @@ void scan_file( unsigned char previous2 = '@'; unsigned char previous3 = '@'; - size_t columns = 0; size_t lines = 0; + size_t columns = 0; for (BinaryFile binary_file{filepath}; binary_file.read_next_block(buffer);) { for (const auto& ch : buffer) { From 9a413167538d0f642da44bbf51817bb5fec99f1d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 15:51:25 -0700 Subject: [PATCH 16/20] Use the new limit/pattern for `disallowed_characters`. --- tools/validate/validate.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 9f369bea1ad..159c40ad1f6 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -147,12 +147,11 @@ void scan_file( } else if (ch != CR && ch != LF && !(ch >= 0x20 && ch <= 0x7E)) { // [0x20, 0x7E] are the printable characters, including the space character. // https://en.wikipedia.org/wiki/ASCII#Printable_characters - ++disallowed_characters; - constexpr size_t MaxErrorsForDisallowedCharacters = 10; - if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { + if (disallowed_characters < max_error_lines_per_file) { validation_failure(any_errors, filepath, {lines + 1, columns + 1}, "file contains disallowed character 0x{:02X}.", static_cast(ch)); } + ++disallowed_characters; } if (ch == CR || ch == LF) { From b68d4b3b3e0c54f7b61e4262c4fa03792597a857 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 5 Jun 2025 15:58:23 -0700 Subject: [PATCH 17/20] Also increment lines when file ends with CR. --- tools/validate/validate.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 159c40ad1f6..222f1443e72 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -178,6 +178,7 @@ void scan_file( if (prev == CR) { // file ends with CR has_cr = true; + ++lines; } if (has_cr) { From a37437269a232e29b5c3da8f799fa326d17a9254 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 6 Jun 2025 08:33:19 -0700 Subject: [PATCH 18/20] meow_line_numbers => meow_locations --- tools/validate/validate.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 222f1443e72..2d16a7446fb 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -108,9 +108,9 @@ void scan_file( constexpr size_t max_error_lines_per_file = 8; - array overlength_line_numbers{}; - array tab_character_line_numbers{}; - array trailing_whitespace_line_numbers{}; + array overlength_locations{}; + array tab_character_locations{}; + array trailing_whitespace_locations{}; unsigned char prev = '@'; unsigned char previous2 = '@'; @@ -137,7 +137,7 @@ void scan_file( if (ch == '\t') { if (tab_characters < max_error_lines_per_file) { - tab_character_line_numbers[tab_characters] = {lines + 1, columns + 1}; + tab_character_locations[tab_characters] = {lines + 1, columns + 1}; } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { @@ -157,14 +157,14 @@ void scan_file( if (ch == CR || ch == LF) { if (prev == ' ' || prev == '\t') { if (trailing_whitespace_lines < max_error_lines_per_file) { - trailing_whitespace_line_numbers[trailing_whitespace_lines] = {lines + 1, columns + 1}; + trailing_whitespace_locations[trailing_whitespace_lines] = {lines + 1, columns + 1}; } ++trailing_whitespace_lines; } if (columns > max_line_length) { if (overlength_lines < max_error_lines_per_file) { - overlength_line_numbers[overlength_lines] = {lines + 1, columns + 1}; + overlength_locations[overlength_lines] = {lines + 1, columns + 1}; } ++overlength_lines; } @@ -208,16 +208,16 @@ void scan_file( } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, tab_character_line_numbers[0], + validation_failure(any_errors, filepath, tab_character_locations[0], "file contains {} tab characters. Lines and columns (up to {}): {}.", tab_characters, - max_error_lines_per_file, tab_character_line_numbers | views::take(tab_characters)); + max_error_lines_per_file, tab_character_locations | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { - validation_failure(any_errors, filepath, trailing_whitespace_line_numbers[0], + validation_failure(any_errors, filepath, trailing_whitespace_locations[0], "file contains {} lines with trailing whitespace. Lines and columns (up to {}): {}.", trailing_whitespace_lines, max_error_lines_per_file, - trailing_whitespace_line_numbers | views::take(trailing_whitespace_lines)); + trailing_whitespace_locations | views::take(trailing_whitespace_lines)); } if (overlength_lines != 0) { @@ -236,9 +236,9 @@ void scan_file( static_assert(ranges::is_sorted(checked_extensions)); if (ranges::binary_search(checked_extensions, filepath.extension().wstring())) { - validation_failure(any_errors, filepath, overlength_line_numbers[0], + validation_failure(any_errors, filepath, overlength_locations[0], "file contains {} lines with more than {} columns. Lines and columns (up to {}): {}.", overlength_lines, - max_line_length, max_error_lines_per_file, overlength_line_numbers | views::take(overlength_lines)); + max_line_length, max_error_lines_per_file, overlength_locations | views::take(overlength_lines)); } } } From 3013086a31be161b13acbcc64a841ce990bf1c02 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 6 Jun 2025 08:51:50 -0700 Subject: [PATCH 19/20] Use the array technique for disallowed characters. --- tools/validate/validate.cpp | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 2d16a7446fb..4a0da809fc1 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -60,6 +60,11 @@ struct line_and_column { size_t column = 1; }; +struct character_line_column { + unsigned char ch = '?'; + line_and_column lc; +}; + namespace std { template <> struct formatter { @@ -72,6 +77,18 @@ namespace std { return format_to(ctx.out(), "{}:{}", lc.line, lc.column); } }; + + template <> + struct formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + + template + auto format(const character_line_column& clc, FormatContext& ctx) const { + return format_to(ctx.out(), "0x{:02X}@{}", static_cast(clc.ch), clc.lc); + } + }; } // namespace std template @@ -111,6 +128,7 @@ void scan_file( array overlength_locations{}; array tab_character_locations{}; array trailing_whitespace_locations{}; + array disallowed_character_locations{}; unsigned char prev = '@'; unsigned char previous2 = '@'; @@ -148,8 +166,7 @@ void scan_file( // [0x20, 0x7E] are the printable characters, including the space character. // https://en.wikipedia.org/wiki/ASCII#Printable_characters if (disallowed_characters < max_error_lines_per_file) { - validation_failure(any_errors, filepath, {lines + 1, columns + 1}, - "file contains disallowed character 0x{:02X}.", static_cast(ch)); + disallowed_character_locations[disallowed_characters] = {ch, {lines + 1, columns + 1}}; } ++disallowed_characters; } @@ -241,6 +258,12 @@ void scan_file( max_line_length, max_error_lines_per_file, overlength_locations | views::take(overlength_lines)); } } + + if (disallowed_characters != 0) { + validation_failure(any_errors, filepath, disallowed_character_locations[0].lc, + "file contains {} disallowed characters. Locations (up to {}): {}.", disallowed_characters, + max_error_lines_per_file, disallowed_character_locations | views::take(disallowed_characters)); + } } int main() { From 4cd3065fd078c73a9284ca34844b59f5346bd081 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 6 Jun 2025 10:38:00 -0700 Subject: [PATCH 20/20] Spaces. The final frontiers. --- tools/validate/validate.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 4a0da809fc1..55ed9a2d266 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -86,7 +86,7 @@ namespace std { template auto format(const character_line_column& clc, FormatContext& ctx) const { - return format_to(ctx.out(), "0x{:02X}@{}", static_cast(clc.ch), clc.lc); + return format_to(ctx.out(), "0x{:02X} @ {}", static_cast(clc.ch), clc.lc); } }; } // namespace std