From 0f92cb4d5ee836ef69a8ad3619a31ae7a0ba9a2a Mon Sep 17 00:00:00 2001 From: TsunamiNoAi Date: Wed, 1 Apr 2026 14:50:52 -0400 Subject: [PATCH 1/5] feat: enhance YAML parser to preserve quoted numeric strings as strings --- src/markdown/frontmatter.zig | 139 +++++++++++++++++++++--------- src/markdown/frontmatter_test.zig | 34 +++++++- 2 files changed, 128 insertions(+), 45 deletions(-) diff --git a/src/markdown/frontmatter.zig b/src/markdown/frontmatter.zig index 0af04bd..26c84ba 100644 --- a/src/markdown/frontmatter.zig +++ b/src/markdown/frontmatter.zig @@ -14,6 +14,7 @@ const JsonValue = std.json.Value; const tomlz = @import("tomlz"); const Yaml = @import("yaml").Yaml; +const Tree = @import("yaml").Tree; const FrontMatter = @This(); @@ -27,8 +28,16 @@ original: Origin, /// Lazily initialised on first mutation; freed by `deinit()`. set_arena: ?std.heap.ArenaAllocator = null, -const Origin = union(Kind) { +const YamlOrigin = struct { yaml: Yaml, + /// Arena owning all JsonValue container and string memory produced by + /// `treeNodeToJson`. Must NOT call `deinitJsonValue` on `.root` for + /// this variant — the arena frees everything at once. + arena: std.heap.ArenaAllocator, +}; + +const Origin = union(Kind) { + yaml: YamlOrigin, toml: tomlz.Table, /// `std.json.Parsed` owns all json memory in an arena; we must NOT call /// `deinitJsonValue` on `.root` for this variant. @@ -60,9 +69,12 @@ pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter else => return err, }; if (y.docs.items.len == 0) return error.EmptyDocument; - orig = .{ .yaml = y }; - const doc = y.docs.items[0]; - break :blk try yamlNodeToJson(alloc, doc); + var arena = std.heap.ArenaAllocator.init(alloc); + errdefer arena.deinit(); + const tree = y.tree.?; + const value = try treeNodeToJson(arena.allocator(), tree, tree.docs[0]); + orig = .{ .yaml = .{ .yaml = y, .arena = arena } }; + break :blk value; }, .toml => blk: { const doc = try tomlz.parser.parse(alloc, source); @@ -94,8 +106,9 @@ pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter pub fn deinit(self: *FrontMatter) void { switch (self.original) { .yaml => |*o| { - deinitJsonValue(self.allocator, &self.root); - o.deinit(self.allocator); + // Arena owns all JsonValue memory — no deinitJsonValue needed. + o.arena.deinit(); + o.yaml.deinit(self.allocator); }, .toml => |*o| { deinitJsonValue(self.allocator, &self.root); @@ -492,9 +505,11 @@ test { defer y.deinit(alloc); try y.load(alloc); - const doc = y.docs.items[0]; - var json_value = try yamlNodeToJson(alloc, doc); - defer deinitJsonValue(alloc, &json_value); + var arena = std.heap.ArenaAllocator.init(alloc); + defer arena.deinit(); + const tree = y.tree.?; + const json_value = try treeNodeToJson(arena.allocator(), tree, tree.docs[0]); + _ = json_value; // var buf: [1024]u8 = undefined; // var fbs = std.io.fixedBufferStream(&buf); @@ -570,48 +585,90 @@ fn mergeJsonValue(alloc: Allocator, base: *std.json.Value, overlay: std.json.Val } } -/// Recursively convert a `zig-yaml` value tree into `std.json.Value`. -pub fn yamlNodeToJson(allocator: std.mem.Allocator, node: Yaml.Value) !JsonValue { - switch (node) { - .map => |m| { +/// Recursively convert a `zig-yaml` Tree node into `std.json.Value`, +/// preserving quoting information: `.string_value` (quoted) nodes always +/// produce `.string`; `.value` (unquoted) nodes are coerced to int/float +/// where possible. +fn treeNodeToJson(allocator: Allocator, tree: Tree, node_index: Tree.Node.Index) !JsonValue { + switch (tree.nodeTag(node_index)) { + .doc => { + const inner = tree.nodeData(node_index).maybe_node.unwrap() orelse + return JsonValue{ .null = {} }; + return treeNodeToJson(allocator, tree, inner); + }, + .doc_with_directive => { + const inner = tree.nodeData(node_index).doc_with_directive.maybe_node.unwrap() orelse + return JsonValue{ .null = {} }; + return treeNodeToJson(allocator, tree, inner); + }, + .map_single => { var object = JsonValue{ .object = .init(allocator) }; - var iter = m.iterator(); - while (iter.next()) |entry| { - const key = entry.key_ptr; - const value = try yamlNodeToJson(allocator, entry.value_ptr.*); - try object.object.put(key.*, value); + const entry = tree.nodeData(node_index).map; + const key = try allocator.dupe(u8, tree.rawString(entry.key, entry.key)); + const val = if (entry.maybe_node.unwrap()) |vn| + try treeNodeToJson(allocator, tree, vn) + else + JsonValue{ .null = {} }; + try object.object.put(key, val); + return object; + }, + .map_many => { + var object = JsonValue{ .object = .init(allocator) }; + const extra_index = tree.nodeData(node_index).extra; + const map = tree.extraData(Tree.Map, extra_index); + var extra_end = map.end; + for (0..map.data.map_len) |_| { + const entry = tree.extraData(Tree.Map.Entry, extra_end); + extra_end = entry.end; + const key = try allocator.dupe(u8, tree.rawString(entry.data.key, entry.data.key)); + const val = if (entry.data.maybe_node.unwrap()) |vn| + try treeNodeToJson(allocator, tree, vn) + else + JsonValue{ .null = {} }; + try object.object.put(key, val); } return object; }, - .list => |l| { + .list_empty => return JsonValue{ .array = .init(allocator) }, + .list_one => { var list = JsonValue{ .array = .init(allocator) }; - for (l) |val| { - const value = try yamlNodeToJson(allocator, val); - try list.array.append(value); - } + const idx = tree.nodeData(node_index).node; + try list.array.append(try treeNodeToJson(allocator, tree, idx)); return list; }, - .scalar => |s| { - const value = blk: { - break :blk JsonValue{ .float = std.fmt.parseFloat(f32, s) catch { - break :blk JsonValue{ .integer = std.fmt.parseInt(u32, s, 10) catch { - break :blk JsonValue{ .string = s }; - } }; - } }; - }; - return value; + .list_two => { + var list = JsonValue{ .array = .init(allocator) }; + const l = tree.nodeData(node_index).list; + try list.array.append(try treeNodeToJson(allocator, tree, l.el1)); + try list.array.append(try treeNodeToJson(allocator, tree, l.el2)); + return list; }, - - .boolean => |b| { - return JsonValue{ .bool = b }; + .list_many => { + var list = JsonValue{ .array = .init(allocator) }; + const extra_index = tree.nodeData(node_index).extra; + const l = tree.extraData(Tree.List, extra_index); + var extra_end = l.end; + for (0..l.data.list_len) |_| { + const elem = tree.extraData(Tree.List.Entry, extra_end); + extra_end = elem.end; + try list.array.append(try treeNodeToJson(allocator, tree, elem.data.node)); + } + return list; + }, + .string_value => { + // Quoted scalar — always a string, never coerced to a number. + const raw = tree.nodeData(node_index).string.slice(tree); + return JsonValue{ .string = raw }; }, - .empty => { - return JsonValue{ .null = {} }; + .value => { + // Unquoted scalar — coerce to int, float, or keep as string. + const raw = tree.nodeScope(node_index).rawString(tree); + return JsonValue{ .integer = std.fmt.parseInt(i64, raw, 10) catch { + return JsonValue{ .float = std.fmt.parseFloat(f64, raw) catch { + return JsonValue{ .string = try allocator.dupe(u8, raw) }; + } }; + } }; }, - // else => |u| { - // std.debug.print("Unsuported type: {}\n", .{u}); - // return error.UnsupportedYamlType; - // }, } } diff --git a/src/markdown/frontmatter_test.zig b/src/markdown/frontmatter_test.zig index eaad50c..c3a10fd 100644 --- a/src/markdown/frontmatter_test.zig +++ b/src/markdown/frontmatter_test.zig @@ -71,13 +71,39 @@ test "frontmatter: YAML integer and negative values" { const count = fm.get("count"); try tst.expect(count != null); - try tst.expect(count.? == .float); - try tst.expectApproxEqAbs(@as(f64, 42), count.?.float, 0.001); + try tst.expect(count.? == .integer); + try tst.expectEqual(@as(i64, 42), count.?.integer); const neg = fm.get("negative"); try tst.expect(neg != null); - try tst.expect(neg.? == .float); - try tst.expectApproxEqAbs(@as(f64, -7), neg.?.float, 0.001); + try tst.expect(neg.? == .integer); + try tst.expectEqual(@as(i64, -7), neg.?.integer); +} + +test "frontmatter: YAML quoted numeric strings stay as strings" { + const alloc = tst.allocator; + const source = + \\version: "1" + \\weight: "42" + \\tag: "007" + ; + var fm = try FrontMatter.init(alloc, source, .yaml); + defer fm.deinit(); + + const version = fm.get("version"); + try tst.expect(version != null); + try tst.expect(version.? == .string); + try tst.expectEqualStrings("1", version.?.string); + + const weight = fm.get("weight"); + try tst.expect(weight != null); + try tst.expect(weight.? == .string); + try tst.expectEqualStrings("42", weight.?.string); + + const tag = fm.get("tag"); + try tst.expect(tag != null); + try tst.expect(tag.? == .string); + try tst.expectEqualStrings("007", tag.?.string); } test "frontmatter: YAML boolean values" { From bde282788fd2fd5a12737628f3512170892f77fe Mon Sep 17 00:00:00 2001 From: Ben Craton Date: Wed, 1 Apr 2026 15:36:00 -0400 Subject: [PATCH 2/5] Update src/markdown/frontmatter.zig Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- src/markdown/frontmatter.zig | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/markdown/frontmatter.zig b/src/markdown/frontmatter.zig index 26c84ba..eb878bd 100644 --- a/src/markdown/frontmatter.zig +++ b/src/markdown/frontmatter.zig @@ -30,9 +30,10 @@ set_arena: ?std.heap.ArenaAllocator = null, const YamlOrigin = struct { yaml: Yaml, - /// Arena owning all JsonValue container and string memory produced by - /// `treeNodeToJson`. Must NOT call `deinitJsonValue` on `.root` for - /// this variant — the arena frees everything at once. + /// Arena owning all JsonValue container memory produced by `treeNodeToJson`. + /// Some string values may alias `yaml` / `source` memory instead of being + /// duplicated into this arena. Must NOT call `deinitJsonValue` on `.root` + /// for this variant — the arena frees all container allocations at once. arena: std.heap.ArenaAllocator, }; From 623f4a8fad41e07e73a1f5050eb7e0bbe7115ece Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 19:43:58 +0000 Subject: [PATCH 3/5] Fix YAML round-trip: quote numeric strings and restore empty-as-null Agent-Logs-Url: https://github.com/sc2in/zigmark/sessions/8d11af91-f8da-453b-a5a3-8148aa6a507f Co-authored-by: tsunaminoai <3594035+tsunaminoai@users.noreply.github.com> --- src/markdown/frontmatter.zig | 6 ++++++ src/markdown/frontmatter_test.zig | 27 +++++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/src/markdown/frontmatter.zig b/src/markdown/frontmatter.zig index eb878bd..c8bb6fe 100644 --- a/src/markdown/frontmatter.zig +++ b/src/markdown/frontmatter.zig @@ -664,6 +664,7 @@ fn treeNodeToJson(allocator: Allocator, tree: Tree, node_index: Tree.Node.Index) .value => { // Unquoted scalar — coerce to int, float, or keep as string. const raw = tree.nodeScope(node_index).rawString(tree); + if (raw.len == 0) return JsonValue{ .null = {} }; return JsonValue{ .integer = std.fmt.parseInt(i64, raw, 10) catch { return JsonValue{ .float = std.fmt.parseFloat(f64, raw) catch { return JsonValue{ .string = try allocator.dupe(u8, raw) }; @@ -1017,6 +1018,11 @@ fn yamlNeedsQuote(s: []const u8) bool { } } if (s[s.len - 1] == ':') return true; + // Quote strings that would be coerced to a non-string type on re-read. + switch (inferValue(s)) { + .integer, .float => return true, + else => {}, + } return false; } diff --git a/src/markdown/frontmatter_test.zig b/src/markdown/frontmatter_test.zig index c3a10fd..70882e6 100644 --- a/src/markdown/frontmatter_test.zig +++ b/src/markdown/frontmatter_test.zig @@ -591,6 +591,33 @@ test "frontmatter: serialize YAML round-trip" { } } +test "frontmatter: YAML quoted numeric string survives round-trip" { + const alloc = tst.allocator; + const input = + \\--- + \\tag: "007" + \\version: "1" + \\--- + \\# Content + ; + var fm = try FrontMatter.initFromMarkdown(alloc, input); + defer fm.deinit(); + + // Quoted numerics must arrive as strings before serialization. + try tst.expectEqualStrings("007", fm.get("tag").?.string); + try tst.expectEqualStrings("1", fm.get("version").?.string); + + const out = try fm.serialize(alloc); + defer alloc.free(out); + + var fm2 = try FrontMatter.initFromMarkdown(alloc, out); + defer fm2.deinit(); + + // Must still be strings after serialization + re-parse. + try tst.expectEqualStrings("007", fm2.get("tag").?.string); + try tst.expectEqualStrings("1", fm2.get("version").?.string); +} + test "frontmatter: serialize YAML nested and array" { const alloc = tst.allocator; const source = From 195290cb64ff9e7475637f5d1d48aabbc2e59936 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 20:02:18 +0000 Subject: [PATCH 4/5] Align inferValue with treeNodeToJson float coercion (fixes scientific notation round-trip) Agent-Logs-Url: https://github.com/sc2in/zigmark/sessions/2c047116-1248-491b-b7f9-fe0a79778218 Co-authored-by: tsunaminoai <3594035+tsunaminoai@users.noreply.github.com> --- src/markdown/frontmatter.zig | 13 +++++-------- src/markdown/frontmatter_test.zig | 3 +++ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/markdown/frontmatter.zig b/src/markdown/frontmatter.zig index c8bb6fe..c68a70a 100644 --- a/src/markdown/frontmatter.zig +++ b/src/markdown/frontmatter.zig @@ -858,21 +858,18 @@ pub const FieldArg = struct { /// Infer the JSON type of a raw string value (no allocation required). /// -/// Type precedence: +/// Type precedence (mirrors `treeNodeToJson` unquoted-scalar coercion): /// 1. `"true"` / `"false"` → `.bool` /// 2. `"null"` → `.null` -/// 3. Valid integer (no `.` in string) → `.integer` -/// 4. Valid float (has `.` and parses) → `.float` +/// 3. Valid integer → `.integer` +/// 4. Valid float (including scientific notation such as `1e3`) → `.float` /// 5. Everything else → `.string` (aliases `raw`) pub fn inferValue(raw: []const u8) std.json.Value { if (std.mem.eql(u8, raw, "true")) return .{ .bool = true }; if (std.mem.eql(u8, raw, "false")) return .{ .bool = false }; if (std.mem.eql(u8, raw, "null")) return .{ .null = {} }; - if (std.mem.indexOfScalar(u8, raw, '.') == null) { - if (std.fmt.parseInt(i64, raw, 10)) |n| return .{ .integer = n } else |_| {} - } else { - if (std.fmt.parseFloat(f64, raw)) |f| return .{ .float = f } else |_| {} - } + if (std.fmt.parseInt(i64, raw, 10)) |n| return .{ .integer = n } else |_| {} + if (std.fmt.parseFloat(f64, raw)) |f| return .{ .float = f } else |_| {} return .{ .string = raw }; } diff --git a/src/markdown/frontmatter_test.zig b/src/markdown/frontmatter_test.zig index 70882e6..c67efcd 100644 --- a/src/markdown/frontmatter_test.zig +++ b/src/markdown/frontmatter_test.zig @@ -597,6 +597,7 @@ test "frontmatter: YAML quoted numeric string survives round-trip" { \\--- \\tag: "007" \\version: "1" + \\sci: "1e3" \\--- \\# Content ; @@ -606,6 +607,7 @@ test "frontmatter: YAML quoted numeric string survives round-trip" { // Quoted numerics must arrive as strings before serialization. try tst.expectEqualStrings("007", fm.get("tag").?.string); try tst.expectEqualStrings("1", fm.get("version").?.string); + try tst.expectEqualStrings("1e3", fm.get("sci").?.string); const out = try fm.serialize(alloc); defer alloc.free(out); @@ -616,6 +618,7 @@ test "frontmatter: YAML quoted numeric string survives round-trip" { // Must still be strings after serialization + re-parse. try tst.expectEqualStrings("007", fm2.get("tag").?.string); try tst.expectEqualStrings("1", fm2.get("version").?.string); + try tst.expectEqualStrings("1e3", fm2.get("sci").?.string); } test "frontmatter: serialize YAML nested and array" { From 9ba5047686d5863e35af8bcf77390a53f0bf512b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 1 Apr 2026 20:18:47 +0000 Subject: [PATCH 5/5] Narrow inferValue docstring: bool/null handling is additional, not in treeNodeToJson Agent-Logs-Url: https://github.com/sc2in/zigmark/sessions/bec0d6f6-29c0-46e0-8496-a3fb365909f0 Co-authored-by: tsunaminoai <3594035+tsunaminoai@users.noreply.github.com> --- src/markdown/frontmatter.zig | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/markdown/frontmatter.zig b/src/markdown/frontmatter.zig index c68a70a..64c7dac 100644 --- a/src/markdown/frontmatter.zig +++ b/src/markdown/frontmatter.zig @@ -858,7 +858,8 @@ pub const FieldArg = struct { /// Infer the JSON type of a raw string value (no allocation required). /// -/// Type precedence (mirrors `treeNodeToJson` unquoted-scalar coercion): +/// Type precedence (numeric coercion mirrors `treeNodeToJson`; bool/null +/// handling is additional and not present in `treeNodeToJson`): /// 1. `"true"` / `"false"` → `.bool` /// 2. `"null"` → `.null` /// 3. Valid integer → `.integer`