Skip to content
File renamed without changes.
File renamed without changes.
File renamed without changes.
142 changes: 105 additions & 37 deletions src/resinator/cli.zig → lib/compiler/resinator/cli.zig

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,9 @@ pub const CodePage = enum(u16) {
pub const Utf8 = struct {
/// Implements decoding with rejection of ill-formed UTF-8 sequences based on section
/// D92 of Chapter 3 of the Unicode standard (Table 3-7 specifically).
///
/// Note: This does not match "U+FFFD Substitution of Maximal Subparts", but instead
/// matches the behavior of the Windows RC compiler.
pub const WellFormedDecoder = struct {
/// Like std.unicode.utf8ByteSequenceLength, but:
/// - Rejects non-well-formed first bytes, i.e. C0-C1, F5-FF
Expand Down Expand Up @@ -347,9 +350,6 @@ pub const Utf8 = struct {
// Only include the byte in the invalid sequence if it's in the range
// of a continuation byte. All other values should not be included in the
// invalid sequence.
//
// Note: This is how the Windows RC compiler handles this, this may not
// be the correct-as-according-to-the-Unicode-standard way to do it.
if (isContinuationByte(byte)) len += 1;
return .{ .value = Codepoint.invalid, .byte_len = len };
}
Expand Down Expand Up @@ -437,6 +437,19 @@ test "codepointAt invalid utf8" {
}, CodePage.utf8.codepointAt(1, invalid_utf8).?);
try std.testing.expectEqual(@as(?Codepoint, null), CodePage.windows1252.codepointAt(2, invalid_utf8));
}

{
// encoded high surrogate
const invalid_utf8 = "\xED\xA0\xBD";
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 2,
}, CodePage.utf8.codepointAt(0, invalid_utf8).?);
try std.testing.expectEqual(Codepoint{
.value = Codepoint.invalid,
.byte_len = 1,
}, CodePage.utf8.codepointAt(2, invalid_utf8).?);
}
}

test "codepointAt utf8 encoded" {
Expand Down
48 changes: 33 additions & 15 deletions src/resinator/comments.zig → lib/compiler/resinator/comments.zig
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ const formsLineEndingPair = @import("source_mapping.zig").formsLineEndingPair;

/// `buf` must be at least as long as `source`
/// In-place transformation is supported (i.e. `source` and `buf` can be the same slice)
pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) []u8 {
pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMappings) ![]u8 {
std.debug.assert(buf.len >= source.len);
var result = UncheckedSliceWriter{ .slice = buf };
const State = enum {
Expand Down Expand Up @@ -85,7 +85,7 @@ pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMa
else => {},
},
.multiline_comment => switch (c) {
'\r' => handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
'\r' => try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings),
'\n' => {
_ = line_handler.incrementLineNumber(index);
result.write(c);
Expand All @@ -95,7 +95,7 @@ pub fn removeComments(source: []const u8, buf: []u8, source_mappings: ?*SourceMa
},
.multiline_comment_end => switch (c) {
'\r' => {
handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
try handleMultilineCarriageReturn(source, &line_handler, index, &result, source_mappings);
// We only want to treat this as a newline if it's part of a CRLF pair. If it's
// not, then we still want to stay in .multiline_comment_end, so that e.g. `*<\r>/` still
// functions as a `*/` comment ending. Kinda crazy, but that's how the Win32 implementation works.
Expand Down Expand Up @@ -184,13 +184,21 @@ inline fn handleMultilineCarriageReturn(
index: usize,
result: *UncheckedSliceWriter,
source_mappings: ?*SourceMappings,
) void {
) !void {
// This is a dumb way to go about this, but basically we want to determine
// if this is part of a distinct CRLF or LFCR pair. This function call will detect
// LFCR pairs correctly since the function we're in will only be called on CR,
// but will not detect CRLF pairs since it only looks at the line ending before the
// CR. So, we do a second (forward) check if the first fails to detect CRLF that is
// not part of another pair.
const is_lfcr_pair = line_handler.currentIndexFormsLineEndingPair(index);
const is_crlf_pair = !is_lfcr_pair and formsLineEndingPair(source, '\r', index + 1);
// Note: Bare \r within a multiline comment should *not* be treated as a line ending for the
// purposes of removing comments, but *should* be treated as a line ending for the
// purposes of line counting/source mapping
_ = line_handler.incrementLineNumber(index);
// So only write the \r if it's part of a CRLF pair
if (formsLineEndingPair(source, '\r', index + 1)) {
// So only write the \r if it's part of a CRLF/LFCR pair
if (is_lfcr_pair or is_crlf_pair) {
result.write('\r');
}
// And otherwise, we want to collapse the source mapping so that we can still know which
Expand All @@ -200,15 +208,15 @@ inline fn handleMultilineCarriageReturn(
// the next collapse acts on the first of the collapsed line numbers
line_handler.line_number -= 1;
if (source_mappings) |mappings| {
mappings.collapse(line_handler.line_number, 1);
try mappings.collapse(line_handler.line_number, 1);
}
}
}

pub fn removeCommentsAlloc(allocator: Allocator, source: []const u8, source_mappings: ?*SourceMappings) ![]u8 {
const buf = try allocator.alloc(u8, source.len);
errdefer allocator.free(buf);
const result = removeComments(source, buf, source_mappings);
const result = try removeComments(source, buf, source_mappings);
return allocator.realloc(buf, result.len);
}

Expand Down Expand Up @@ -252,6 +260,16 @@ test "line comments retain newlines" {
try testRemoveComments("\r\n", "//comment\r\n");
}

test "unfinished multiline comment" {
try testRemoveComments(
\\unfinished
\\
,
\\unfinished/*
\\
);
}

test "crazy" {
try testRemoveComments(
\\blah"/*som*/\""BLAH
Expand Down Expand Up @@ -321,20 +339,20 @@ test "remove comments with mappings" {
var mut_source = "blah/*\rcommented line*\r/blah".*;
var mappings = SourceMappings{};
_ = try mappings.files.put(allocator, "test.rc");
try mappings.set(allocator, 1, .{ .start_line = 1, .end_line = 1, .filename_offset = 0 });
try mappings.set(allocator, 2, .{ .start_line = 2, .end_line = 2, .filename_offset = 0 });
try mappings.set(allocator, 3, .{ .start_line = 3, .end_line = 3, .filename_offset = 0 });
try mappings.set(1, 1, 0);
try mappings.set(2, 2, 0);
try mappings.set(3, 3, 0);
defer mappings.deinit(allocator);

const result = removeComments(&mut_source, &mut_source, &mappings);
const result = try removeComments(&mut_source, &mut_source, &mappings);

try std.testing.expectEqualStrings("blahblah", result);
try std.testing.expectEqual(@as(usize, 1), mappings.mapping.items.len);
try std.testing.expectEqual(@as(usize, 3), mappings.mapping.items[0].end_line);
try std.testing.expectEqual(@as(usize, 1), mappings.end_line);
try std.testing.expectEqual(@as(usize, 3), mappings.getCorrespondingSpan(1).?.end_line);
}

test "in place" {
var mut_source = "blah /* comment */ blah".*;
const result = removeComments(&mut_source, &mut_source, null);
const result = try removeComments(&mut_source, &mut_source, null);
try std.testing.expectEqualStrings("blah blah", result);
}
107 changes: 78 additions & 29 deletions src/resinator/compile.zig → lib/compiler/resinator/compile.zig
Original file line number Diff line number Diff line change
Expand Up @@ -321,10 +321,7 @@ pub const Compiler = struct {

return buf.toOwnedSlice();
},
else => {
std.debug.print("unexpected filename token type: {}\n", .{literal_node.token});
unreachable; // no other token types should be in a filename literal node
},
else => unreachable, // no other token types should be in a filename literal node
}
},
.binary_expression => {
Expand Down Expand Up @@ -404,6 +401,72 @@ pub const Compiler = struct {
return first_error orelse error.FileNotFound;
}

pub fn parseDlgIncludeString(self: *Compiler, token: Token) ![]u8 {
// For the purposes of parsing, we want to strip the L prefix
// if it exists since we want escaped integers to be limited to
// their ascii string range.
//
// We keep track of whether or not there was an L prefix, though,
// since there's more weirdness to come.
var bytes = self.sourceBytesForToken(token);
var was_wide_string = false;
if (bytes.slice[0] == 'L' or bytes.slice[0] == 'l') {
was_wide_string = true;
bytes.slice = bytes.slice[1..];
}

var buf = try std.ArrayList(u8).initCapacity(self.allocator, bytes.slice.len);
errdefer buf.deinit();

var iterative_parser = literals.IterativeStringParser.init(bytes, .{
.start_column = token.calculateColumn(self.source, 8, null),
.diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
});

// No real idea what's going on here, but this matches the rc.exe behavior
while (try iterative_parser.next()) |parsed| {
const c = parsed.codepoint;
switch (was_wide_string) {
true => {
switch (c) {
0...0x7F, 0xA0...0xFF => try buf.append(@intCast(c)),
0x80...0x9F => {
if (windows1252.bestFitFromCodepoint(c)) |_| {
try buf.append(@intCast(c));
} else {
try buf.append('?');
}
},
else => {
if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
try buf.append(best_fit);
} else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
try buf.append('?');
} else {
try buf.appendSlice("??");
}
},
}
},
false => {
if (parsed.from_escaped_integer) {
try buf.append(@truncate(c));
} else {
if (windows1252.bestFitFromCodepoint(c)) |best_fit| {
try buf.append(best_fit);
} else if (c < 0x10000 or c == code_pages.Codepoint.invalid) {
try buf.append('?');
} else {
try buf.appendSlice("??");
}
}
},
}
}

return buf.toOwnedSlice();
}

pub fn writeResourceExternal(self: *Compiler, node: *Node.ResourceExternal, writer: anytype) !void {
// Init header with data size zero for now, will need to fill it in later
var header = try self.resourceHeader(node.id, node.type, .{});
Expand All @@ -414,13 +477,16 @@ pub const Compiler = struct {
// DLGINCLUDE has special handling that doesn't actually need the file to exist
if (maybe_predefined_type != null and maybe_predefined_type.? == .DLGINCLUDE) {
const filename_token = node.filename.cast(.literal).?.token;
const parsed_filename = try self.parseQuotedStringAsAsciiString(filename_token);
const parsed_filename = try self.parseDlgIncludeString(filename_token);
defer self.allocator.free(parsed_filename);

// NUL within the parsed string acts as a terminator
const parsed_filename_terminated = std.mem.sliceTo(parsed_filename, 0);

header.applyMemoryFlags(node.common_resource_attributes, self.source);
header.data_size = @intCast(parsed_filename.len + 1);
header.data_size = @intCast(parsed_filename_terminated.len + 1);
try header.write(writer, .{ .diagnostics = self.diagnostics, .token = node.id });
try writer.writeAll(parsed_filename);
try writer.writeAll(parsed_filename_terminated);
try writer.writeByte(0);
try writeDataPadding(writer, header.data_size);
return;
Expand Down Expand Up @@ -1141,21 +1207,15 @@ pub const Compiler = struct {
errdefer self.allocator.free(parsed_string);
return .{ .wide_string = parsed_string };
},
else => {
std.debug.print("unexpected token in literal node: {}\n", .{literal_node.token});
unreachable; // no other token types should be in a data literal node
},
else => unreachable, // no other token types should be in a data literal node
}
},
.binary_expression, .grouped_expression => {
const result = evaluateNumberExpression(expression_node, self.source, self.input_code_pages);
return .{ .number = result };
},
.not_expression => unreachable,
else => {
std.debug.print("{}\n", .{expression_node.id});
@panic("TODO: evaluateDataExpression");
},
else => unreachable,
}
}

Expand Down Expand Up @@ -1669,6 +1729,7 @@ pub const Compiler = struct {
};
}

// We know the data_buffer len is limited to u32 max.
const data_size: u32 = @intCast(data_buffer.items.len);
var header = try self.resourceHeader(node.id, node.type, .{
.data_size = data_size,
Expand Down Expand Up @@ -1966,6 +2027,7 @@ pub const Compiler = struct {
try data_writer.writeInt(u16, 1, .little);
try data_writer.writeInt(u16, button_width.asWord(), .little);
try data_writer.writeInt(u16, button_height.asWord(), .little);
// Number of buttons is guaranteed by the parser to be within maxInt(u16).
try data_writer.writeInt(u16, @as(u16, @intCast(node.buttons.len)), .little);

for (node.buttons) |button_or_sep| {
Expand Down Expand Up @@ -2806,19 +2868,6 @@ pub const Compiler = struct {
);
}

/// Helper that calls parseQuotedStringAsAsciiString with the relevant context
/// Resulting slice is allocated by `self.allocator`.
pub fn parseQuotedStringAsAsciiString(self: *Compiler, token: Token) ![]u8 {
return literals.parseQuotedStringAsAsciiString(
self.allocator,
self.sourceBytesForToken(token),
.{
.start_column = token.calculateColumn(self.source, 8, null),
.diagnostics = .{ .diagnostics = self.diagnostics, .token = token },
},
);
}

fn addErrorDetails(self: *Compiler, details: ErrorDetails) Allocator.Error!void {
try self.diagnostics.append(details);
}
Expand Down Expand Up @@ -3356,7 +3405,7 @@ test "StringTable" {
}
break :ids buf;
};
var prng = std.Random.DefaultPrng.init(0);
var prng = std.rand.DefaultPrng.init(0);
var random = prng.random();
random.shuffle(u16, &ids);

Expand Down
Loading