-
Notifications
You must be signed in to change notification settings - Fork 54
Closed
Labels
Description
- When using
#if 0 ... #endifit properly writes out a#linedirective that accounts for the whitespace. - However, when using
#ifdef NOT_DEFINED ... #endif, no#linedirective is emitted, leading to incorrect line numbering by subsequent compiler's accounting (compiler's reconstruct line numbers via properly emitted#linedirectives).
Example code that is preprocessed:
// This should be line 2
// A comment, followed by if with whitespace (line 5)
#if 0
// A lot of white space
#endif
// This should be line 13
// A comment, followed by ifdef with whitespace (line 15)
#ifdef NOT_DEFINED
// A lot of white space
#endif
// This should be line 24
Above preprocessed (with line numbering for your convenience).
/path/to/source/source.cxx:1 |
/path/to/source/source.cxx:2 |// This should be line 2
/path/to/source/source.cxx:3 |
/path/to/source/source.cxx:4 |
/path/to/source/source.cxx:5 |// A comment, followed by if with whitespace (line 5)
/path/to/source/source.cxx: |#line 13 "/path/to/source/source.cxx"
/path/to/source/source.cxx:13|// This should be line 13
/path/to/source/source.cxx:14|
/path/to/source/source.cxx:15|// A comment, followed by ifdef with whitespace (line 15)
/path/to/source/source.cxx:16|
/path/to/source/source.cxx:17|// This should be line 24
/path/to/source/source.cxx:18|
/path/to/source/source.cxx:19|
/path/to/source/source.cxx:20| Full code to reproduce (you can pretty much ignore everything outside of main(), is just for the nice debug output):
/*
reset && g++-11 \
/mnt/y/dump/code/gigaclip-project-2020/boost-wave-ifdef-whitespace.cxx \
--std=c++17 -I$HOME/boost/ -L$HOME/boost/stage/lib/ -lboost_wave \
-lboost_thread -lboost_filesystem
LD_LIBRARY_PATH=$HOME/boost/stage/lib/ ./a.out
*/
#include <filesystem>
#include <functional>
#include <iostream>
#include <map>
#include <optional>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include "boost/algorithm/string/classification.hpp"
#include "boost/algorithm/string/join.hpp"
#include "boost/algorithm/string/predicate.hpp"
#include "boost/algorithm/string/split.hpp"
#include "boost/core/span.hpp"
#include "boost/lexical_cast.hpp"
#include "boost/range/iterator_range.hpp"
#include "boost/wave.hpp"
#include "boost/wave/cpp_context.hpp"
#include "boost/wave/cpp_exceptions.hpp"
#include "boost/wave/cpplexer/cpp_lex_iterator.hpp"
#include "boost/wave/language_support.hpp"
template <typename Token>
struct ContextPolicy
: boost::wave::context_policies::default_preprocessing_hooks {
// typedef boost::wave::context_policies::eat_whitespace<Token> base_type;
typedef boost::wave::context_policies::default_preprocessing_hooks base_type;
// template <typename ContextT, typename TokenT>
// bool may_skip_whitespace(ContextT const& ctx, TokenT& token,
// bool& skipped_newline) {
// skipped_newline = false;
// return false;
// }
};
std::string LeftPad(const std::string& value, std::size_t max_length);
std::string RightPad(const std::string& value, std::size_t max_length);
void StripLeadingDoubleQuotes(std::string* str);
void StripTrailingDoubleQuotes(std::string* str);
void StripDoubleQuotes(std::string* value);
bool ParseLineDirective(const std::string& line, std::size_t* line_num,
std::string* file_name);
using TranslateFilename = std::function<std::string(const std::string& name)>;
// Format a preprocessed file with file names and line numbers for debugging
// view.
//
// preprocessed_source: The preprocessed file, which is expected to contain the
// proper `#line` directives, so that this function can recover the original
// file names and line numbers.
//
// translate: This translates the file names (or paths) in the preprocessed
// source files to whatever you want to be displayed. This is useful for
// displaying only the file name (instead of the entire path), or, for languages
// like glsl which use numbers instead of file names/paths, you can translate
// the numbers back to human readable file names. If the nullopt is used, then
// then no translation will take place.
//
// untranslated_source_file_name: The file name of the main source file.
std::string NumberTheLines(const std::string preprocessed_source,
std::optional<TranslateFilename> translate,
const std::string& untranslated_source_file_name);
template <typename WaveException>
void PrettyPrintWaveException(const WaveException& e);
int main() {
try {
typedef boost::wave::cpplexer::lex_iterator<
boost::wave::cpplexer::lex_token<>>
LexIterator;
typedef ContextPolicy<typename LexIterator::token_type> ContextPolicyT;
typedef boost::wave::context<
/*Iterator=*/std::string::const_iterator,
/*LexIterator=*/LexIterator,
/*InputPolicy=*/
boost::wave::iteration_context_policies::load_file_to_string,
/*ContextPolicy=*/ContextPolicyT>
context_type;
std::string source_path = "/path/to/source/source.cxx";
std::string source = R"source(
// This should be line 2
// A comment, followed by if with whitespace (line 5)
#if 0
// A lot of white space
#endif
// This should be line 13
// A comment, followed by ifdef with whitespace (line 15)
#ifdef NOT_DEFINED
// A lot of white space
#endif
// This should be line 24
)source";
ContextPolicyT ctx_policy;
context_type ctx(/*first_=*/source.begin(),
/*last_=*/source.end(),
/*fname=*/source_path.c_str(),
/*ctx_policy=*/ctx_policy);
ctx.set_language(/*language=*/boost::wave::language_support::support_c99);
ctx.set_language(
/*language=*/boost::wave::enable_preserve_comments(ctx.get_language()));
ctx.set_language(
/*language=*/boost::wave::enable_insert_whitespace(ctx.get_language()));
ctx.set_language(
/*language=*/boost::wave::enable_single_line(ctx.get_language()));
ctx.set_language(/*language=*/boost::wave::enable_emit_line_directives(
ctx.get_language()));
ctx.set_language(/*language=*/boost::wave::enable_emit_pragma_directives(
ctx.get_language()));
context_type::iterator_type first = ctx.begin();
context_type::iterator_type last = ctx.end();
std::ostringstream sstr;
while (first != last) {
sstr << (*first).get_value();
++first;
}
std::string preprocessed_source = sstr.str();
std::cout << NumberTheLines(/*preprocessed_source=*/preprocessed_source,
/*translate=*/std::nullopt,
/*untranslated_source_file_name=*/source_path)
<< std::endl;
} catch (const boost::wave::cpp_exception& e) {
PrettyPrintWaveException(e);
return 1;
} catch (const boost::wave::cpplexer::cpplexer_exception& e) {
PrettyPrintWaveException(e);
return 1;
}
return 0;
}
std::string NumberTheLines(const std::string preprocessed_source,
std::optional<TranslateFilename> translate,
const std::string& untranslated_source_file_name) {
// TODO: paste an example of a few lines.
// line => (line string).
std::vector<std::string> lines;
// TODO: allow user defined newline.
boost::split(lines, preprocessed_source, boost::is_any_of("\n"));
// Unpack the optional into something usable:
TranslateFilename translate_;
if (translate) {
translate_ = *translate;
} else {
translate_ = [](const auto& file_name) { return file_name; };
}
// This will act as a cache for the translated (displayed) file names.
//
// (file name) => (displayed file names).
std::map<std::string, std::string> translated_file_names;
// Cache the main source file name.
translated_file_names[untranslated_source_file_name] =
translate_(untranslated_source_file_name);
// line => (file name).
std::vector<std::string> line_filenames;
// Track maximum displayed file name width, so the output can be properly
// padded.
std::size_t max_line_file_width =
translated_file_names[untranslated_source_file_name].size();
// line => (line number).
std::vector<std::size_t> line_numbers;
// Track maximum displayed line number width, so the output can be properly
// padded.
std::size_t max_line_num_width = 0;
// file => (current line number).
std::map<std::string, std::size_t> file2current_line;
// Track the line number for the main source file.
file2current_line[untranslated_source_file_name] = 1;
// Track which file we are in as we iterate the lines. We start with the main
// source file name.
std::string current_file_name = untranslated_source_file_name;
// Iterate through all the lines in the preprocessed file.
for (const std::string& line : lines) {
if (boost::algorithm::starts_with(line, "#line")) {
// TODO: paste an example of a line
std::size_t line_num = 0u;
std::string file_name;
bool success = ParseLineDirective(/*line=*/line, /*line_num=*/&line_num,
/*file_name=*/&file_name);
assert(success);
if (file2current_line.count(file_name) > 0) {
assert(line_num >= file2current_line[file_name]);
}
current_file_name = file_name;
file2current_line[file_name] = line_num;
// Append the line number and file name for this line.
// Line number of size_t(-1) marks a #line directive which has no position
// in the original file.
line_numbers.push_back(std::size_t(-1));
line_filenames.push_back(file_name);
if (translated_file_names.count(file_name) == 0) {
std::string translated_file_name = translate_(file_name);
translated_file_names[file_name] = translated_file_name;
max_line_file_width =
std::max(max_line_file_width, translated_file_name.size());
}
} else {
std::size_t line_num = file2current_line[current_file_name]++;
line_numbers.push_back(line_num);
line_filenames.push_back(current_file_name);
max_line_num_width = std::max(max_line_num_width,
std::to_string(line_numbers.back()).size());
}
}
// This is for debugging, to ensure we never output the same line twice.
//
// {(file name, line number)}
std::set<std::tuple<std::string, std::size_t>> seen;
// This will hold the formatted output lines.
std::vector<std::string> numbered_lines;
// Now iterate through the lines once more to construct the formatted lines.
for (std::size_t i = 0; i < lines.size(); ++i) {
std::string file_name = line_filenames.at(i);
std::string displayed_file_name = translated_file_names.at(file_name);
std::size_t line_num = line_numbers.at(i);
std::string displayed_line_num = std::to_string(line_num);
const std::string& line = lines.at(i);
// If the line number is this special size_t(-1), then we know that this
// line is a `#line` directive, so don't display it.
if (line_num == std::size_t(-1)) {
displayed_line_num = "";
} else {
assert(seen.count(std::make_tuple(file_name, line_num)) == 0);
seen.insert(std::make_tuple(file_name, line_num));
}
displayed_file_name = LeftPad(/*value=*/displayed_file_name,
/*max_length=*/max_line_file_width);
displayed_line_num = RightPad(/*value=*/displayed_line_num,
/*max_length=*/max_line_num_width);
numbered_lines.push_back(displayed_file_name + ":" + displayed_line_num +
"|" + line);
}
// TODO: allow user defined newline.
return boost::algorithm::join(numbered_lines, "\n");
}
template <typename WaveException>
void PrettyPrintWaveException(const WaveException& e) {
std::string severity = boost::wave::util::get_severity(e.get_severity());
std::cerr << "Error during preprocessing"
<< "\n e.what(): " << e.what()
<< "\n e.description(): " << e.description()
<< "\n e.get_errorcode(): " << e.get_errorcode()
<< "\n e.get_severity(): " << e.get_severity()
<< "\n e.get_severity(): " << severity
<< "\n e.is_recoverable(): " << e.is_recoverable()
<< "\n e.file_name(): " << e.file_name()
<< "\n e.file_name(): " << e.file_name() << ":" << e.line_no()
<< "\n e.line_no(): " << e.line_no()
<< "\n e.column_no(): " << e.column_no() << std::endl;
}
std::string LeftPad(const std::string& value, std::size_t max_length) {
assert(value.size() <= max_length);
std::size_t padding_length = max_length - value.size();
std::string padding = std::string(/*count=*/padding_length, /*ch=*/' ');
return padding + value;
}
std::string RightPad(const std::string& value, std::size_t max_length) {
assert(value.size() <= max_length);
std::size_t padding_length = max_length - value.size();
std::string padding = std::string(/*count=*/padding_length, /*ch=*/' ');
return value + padding;
}
void StripLeadingDoubleQuotes(std::string* str) {
auto it = std::find_if_not(str->begin(), str->end(),
[](char c) { return c == '"'; });
str->erase(str->begin(), it);
}
void StripTrailingDoubleQuotes(std::string* str) {
auto it = std::find_if_not(str->rbegin(), str->rend(),
[](char c) { return c == '"'; });
str->erase(str->rend() - it);
}
void StripDoubleQuotes(std::string* value) {
StripTrailingDoubleQuotes(value);
StripLeadingDoubleQuotes(value);
}
bool ParseLineDirective(const std::string& line, std::size_t* line_num,
std::string* file_name) {
if (!boost::algorithm::starts_with(line, "#line")) {
return false;
}
// TODO: paste an example of a line
std::vector<std::string> line_parts;
// TODO: Probably handle quotes better.
boost::split(line_parts, line, boost::is_any_of(" "));
assert(line_parts.size() >= 3u);
assert(line_parts.at(0) == "#line");
*line_num = boost::lexical_cast<std::size_t>(line_parts.at(1));
boost::span<const std::string> file_name_parts(&line_parts.at(2),
line_parts.size() - 2u);
*file_name = boost::algorithm::join(file_name_parts, "\n");
StripDoubleQuotes(/*value=*/file_name);
return true;
}