Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
160 changes: 111 additions & 49 deletions src/markdown/frontmatter.zig
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ const JsonValue = std.json.Value;

const tomlz = @import("tomlz");
const Yaml = @import("yaml").Yaml;
const Tree = @import("yaml").Tree;
Comment thread
tsunaminoai marked this conversation as resolved.

const FrontMatter = @This();

Expand All @@ -27,8 +28,17 @@ original: Origin,
/// Lazily initialised on first mutation; freed by `deinit()`.
set_arena: ?std.heap.ArenaAllocator = null,

const Origin = union(Kind) {
const YamlOrigin = struct {
yaml: Yaml,
/// Arena owning all JsonValue container memory produced by `treeNodeToJson`.
/// Some string values may alias `yaml` / `source` memory instead of being
/// duplicated into this arena. Must NOT call `deinitJsonValue` on `.root`
/// for this variant — the arena frees all container allocations at once.
arena: std.heap.ArenaAllocator,
};

const Origin = union(Kind) {
yaml: YamlOrigin,
toml: tomlz.Table,
/// `std.json.Parsed` owns all json memory in an arena; we must NOT call
/// `deinitJsonValue` on `.root` for this variant.
Expand Down Expand Up @@ -60,9 +70,12 @@ pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter
else => return err,
};
if (y.docs.items.len == 0) return error.EmptyDocument;
orig = .{ .yaml = y };
const doc = y.docs.items[0];
break :blk try yamlNodeToJson(alloc, doc);
var arena = std.heap.ArenaAllocator.init(alloc);
errdefer arena.deinit();
const tree = y.tree.?;
const value = try treeNodeToJson(arena.allocator(), tree, tree.docs[0]);
orig = .{ .yaml = .{ .yaml = y, .arena = arena } };
break :blk value;
},
.toml => blk: {
const doc = try tomlz.parser.parse(alloc, source);
Expand Down Expand Up @@ -94,8 +107,9 @@ pub fn init(alloc: Allocator, source: []const u8, input_kind: Kind) !FrontMatter
pub fn deinit(self: *FrontMatter) void {
switch (self.original) {
.yaml => |*o| {
deinitJsonValue(self.allocator, &self.root);
o.deinit(self.allocator);
// Arena owns all JsonValue memory — no deinitJsonValue needed.
o.arena.deinit();
o.yaml.deinit(self.allocator);
},
.toml => |*o| {
deinitJsonValue(self.allocator, &self.root);
Expand Down Expand Up @@ -492,9 +506,11 @@ test {
defer y.deinit(alloc);

try y.load(alloc);
const doc = y.docs.items[0];
var json_value = try yamlNodeToJson(alloc, doc);
defer deinitJsonValue(alloc, &json_value);
var arena = std.heap.ArenaAllocator.init(alloc);
defer arena.deinit();
const tree = y.tree.?;
const json_value = try treeNodeToJson(arena.allocator(), tree, tree.docs[0]);
_ = json_value;

// var buf: [1024]u8 = undefined;
// var fbs = std.io.fixedBufferStream(&buf);
Expand Down Expand Up @@ -570,48 +586,91 @@ fn mergeJsonValue(alloc: Allocator, base: *std.json.Value, overlay: std.json.Val
}
}

/// Recursively convert a `zig-yaml` value tree into `std.json.Value`.
pub fn yamlNodeToJson(allocator: std.mem.Allocator, node: Yaml.Value) !JsonValue {
switch (node) {
.map => |m| {
/// Recursively convert a `zig-yaml` Tree node into `std.json.Value`,
/// preserving quoting information: `.string_value` (quoted) nodes always
/// produce `.string`; `.value` (unquoted) nodes are coerced to int/float
/// where possible.
fn treeNodeToJson(allocator: Allocator, tree: Tree, node_index: Tree.Node.Index) !JsonValue {
switch (tree.nodeTag(node_index)) {
.doc => {
const inner = tree.nodeData(node_index).maybe_node.unwrap() orelse
return JsonValue{ .null = {} };
return treeNodeToJson(allocator, tree, inner);
},
.doc_with_directive => {
const inner = tree.nodeData(node_index).doc_with_directive.maybe_node.unwrap() orelse
return JsonValue{ .null = {} };
return treeNodeToJson(allocator, tree, inner);
},
.map_single => {
var object = JsonValue{ .object = .init(allocator) };
var iter = m.iterator();
while (iter.next()) |entry| {
const key = entry.key_ptr;
const value = try yamlNodeToJson(allocator, entry.value_ptr.*);
try object.object.put(key.*, value);
const entry = tree.nodeData(node_index).map;
const key = try allocator.dupe(u8, tree.rawString(entry.key, entry.key));
const val = if (entry.maybe_node.unwrap()) |vn|
try treeNodeToJson(allocator, tree, vn)
else
JsonValue{ .null = {} };
try object.object.put(key, val);
return object;
},
.map_many => {
var object = JsonValue{ .object = .init(allocator) };
const extra_index = tree.nodeData(node_index).extra;
const map = tree.extraData(Tree.Map, extra_index);
var extra_end = map.end;
for (0..map.data.map_len) |_| {
const entry = tree.extraData(Tree.Map.Entry, extra_end);
extra_end = entry.end;
const key = try allocator.dupe(u8, tree.rawString(entry.data.key, entry.data.key));
const val = if (entry.data.maybe_node.unwrap()) |vn|
try treeNodeToJson(allocator, tree, vn)
else
JsonValue{ .null = {} };
try object.object.put(key, val);
}
return object;
},
.list => |l| {
.list_empty => return JsonValue{ .array = .init(allocator) },
.list_one => {
var list = JsonValue{ .array = .init(allocator) };
for (l) |val| {
const value = try yamlNodeToJson(allocator, val);
try list.array.append(value);
}
const idx = tree.nodeData(node_index).node;
try list.array.append(try treeNodeToJson(allocator, tree, idx));
return list;
},
.scalar => |s| {
const value = blk: {
break :blk JsonValue{ .float = std.fmt.parseFloat(f32, s) catch {
break :blk JsonValue{ .integer = std.fmt.parseInt(u32, s, 10) catch {
break :blk JsonValue{ .string = s };
} };
} };
};
return value;
.list_two => {
var list = JsonValue{ .array = .init(allocator) };
const l = tree.nodeData(node_index).list;
try list.array.append(try treeNodeToJson(allocator, tree, l.el1));
try list.array.append(try treeNodeToJson(allocator, tree, l.el2));
return list;
},

.boolean => |b| {
return JsonValue{ .bool = b };
.list_many => {
var list = JsonValue{ .array = .init(allocator) };
const extra_index = tree.nodeData(node_index).extra;
const l = tree.extraData(Tree.List, extra_index);
var extra_end = l.end;
for (0..l.data.list_len) |_| {
const elem = tree.extraData(Tree.List.Entry, extra_end);
extra_end = elem.end;
try list.array.append(try treeNodeToJson(allocator, tree, elem.data.node));
}
return list;
},
.string_value => {
// Quoted scalar — always a string, never coerced to a number.
const raw = tree.nodeData(node_index).string.slice(tree);
return JsonValue{ .string = raw };
},
.empty => {
return JsonValue{ .null = {} };
.value => {
// Unquoted scalar — coerce to int, float, or keep as string.
const raw = tree.nodeScope(node_index).rawString(tree);
if (raw.len == 0) return JsonValue{ .null = {} };
return JsonValue{ .integer = std.fmt.parseInt(i64, raw, 10) catch {
Comment thread
tsunaminoai marked this conversation as resolved.
return JsonValue{ .float = std.fmt.parseFloat(f64, raw) catch {
return JsonValue{ .string = try allocator.dupe(u8, raw) };
} };
} };
},
// else => |u| {
// std.debug.print("Unsuported type: {}\n", .{u});
// return error.UnsupportedYamlType;
// },
}
}

Expand Down Expand Up @@ -799,21 +858,19 @@ pub const FieldArg = struct {

/// Infer the JSON type of a raw string value (no allocation required).
///
/// Type precedence:
/// Type precedence (numeric coercion mirrors `treeNodeToJson`; bool/null
/// handling is additional and not present in `treeNodeToJson`):
/// 1. `"true"` / `"false"` → `.bool`
/// 2. `"null"` → `.null`
/// 3. Valid integer (no `.` in string) → `.integer`
/// 4. Valid float (has `.` and parses) → `.float`
/// 3. Valid integer → `.integer`
/// 4. Valid float (including scientific notation such as `1e3`) → `.float`
/// 5. Everything else → `.string` (aliases `raw`)
Comment thread
tsunaminoai marked this conversation as resolved.
pub fn inferValue(raw: []const u8) std.json.Value {
if (std.mem.eql(u8, raw, "true")) return .{ .bool = true };
if (std.mem.eql(u8, raw, "false")) return .{ .bool = false };
if (std.mem.eql(u8, raw, "null")) return .{ .null = {} };
if (std.mem.indexOfScalar(u8, raw, '.') == null) {
if (std.fmt.parseInt(i64, raw, 10)) |n| return .{ .integer = n } else |_| {}
} else {
if (std.fmt.parseFloat(f64, raw)) |f| return .{ .float = f } else |_| {}
}
if (std.fmt.parseInt(i64, raw, 10)) |n| return .{ .integer = n } else |_| {}
if (std.fmt.parseFloat(f64, raw)) |f| return .{ .float = f } else |_| {}
return .{ .string = raw };
}

Expand Down Expand Up @@ -959,6 +1016,11 @@ fn yamlNeedsQuote(s: []const u8) bool {
}
}
if (s[s.len - 1] == ':') return true;
// Quote strings that would be coerced to a non-string type on re-read.
switch (inferValue(s)) {
.integer, .float => return true,
else => {},
}
Comment thread
tsunaminoai marked this conversation as resolved.
return false;
}

Expand Down
64 changes: 60 additions & 4 deletions src/markdown/frontmatter_test.zig
Original file line number Diff line number Diff line change
Expand Up @@ -71,13 +71,39 @@ test "frontmatter: YAML integer and negative values" {

const count = fm.get("count");
try tst.expect(count != null);
try tst.expect(count.? == .float);
try tst.expectApproxEqAbs(@as(f64, 42), count.?.float, 0.001);
try tst.expect(count.? == .integer);
try tst.expectEqual(@as(i64, 42), count.?.integer);

const neg = fm.get("negative");
try tst.expect(neg != null);
try tst.expect(neg.? == .float);
try tst.expectApproxEqAbs(@as(f64, -7), neg.?.float, 0.001);
try tst.expect(neg.? == .integer);
try tst.expectEqual(@as(i64, -7), neg.?.integer);
}

test "frontmatter: YAML quoted numeric strings stay as strings" {
const alloc = tst.allocator;
const source =
\\version: "1"
\\weight: "42"
\\tag: "007"
;
var fm = try FrontMatter.init(alloc, source, .yaml);
defer fm.deinit();

const version = fm.get("version");
try tst.expect(version != null);
try tst.expect(version.? == .string);
try tst.expectEqualStrings("1", version.?.string);

const weight = fm.get("weight");
try tst.expect(weight != null);
try tst.expect(weight.? == .string);
try tst.expectEqualStrings("42", weight.?.string);

const tag = fm.get("tag");
try tst.expect(tag != null);
try tst.expect(tag.? == .string);
try tst.expectEqualStrings("007", tag.?.string);
}

test "frontmatter: YAML boolean values" {
Expand Down Expand Up @@ -565,6 +591,36 @@ test "frontmatter: serialize YAML round-trip" {
}
}

test "frontmatter: YAML quoted numeric string survives round-trip" {
const alloc = tst.allocator;
const input =
\\---
\\tag: "007"
\\version: "1"
\\sci: "1e3"
Comment thread
tsunaminoai marked this conversation as resolved.
\\---
\\# Content
;
var fm = try FrontMatter.initFromMarkdown(alloc, input);
defer fm.deinit();

// Quoted numerics must arrive as strings before serialization.
try tst.expectEqualStrings("007", fm.get("tag").?.string);
try tst.expectEqualStrings("1", fm.get("version").?.string);
try tst.expectEqualStrings("1e3", fm.get("sci").?.string);

const out = try fm.serialize(alloc);
defer alloc.free(out);

var fm2 = try FrontMatter.initFromMarkdown(alloc, out);
defer fm2.deinit();

// Must still be strings after serialization + re-parse.
try tst.expectEqualStrings("007", fm2.get("tag").?.string);
try tst.expectEqualStrings("1", fm2.get("version").?.string);
try tst.expectEqualStrings("1e3", fm2.get("sci").?.string);
}

test "frontmatter: serialize YAML nested and array" {
const alloc = tst.allocator;
const source =
Expand Down
Loading