From d7c15e16c9fe4e7db73d293abcf44248a2470d32 Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 10:08:16 +0100 Subject: [PATCH 01/10] Switch XML decoding to stream parser. --- src/text/xml/Decode.d | 682 ++++++++++++++++++++++----------- src/text/xml/Parser.d | 17 +- src/text/xml/Validation.d | 21 + unittest/text/xml/DecodeTest.d | 8 +- 4 files changed, 500 insertions(+), 228 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index 79f494f..8d6d3a4 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -1,40 +1,48 @@ module text.xml.Decode; -import boilerplate.util : udaIndex; +import boilerplate.util : optionallyRemoveTrailingUnderline, udaIndex; +static import dxml.parser; static import dxml.util; import serialized.meta.attributesOrNothing; import serialized.meta.never; import serialized.meta.SafeUnqual; +import std.algorithm; +import std.array; +import std.exception : enforce; import std.format : format; +import std.meta; +import std.range; +import std.string : stripLeft; import std.sumtype; +import std.traits; +import std.typecons; import text.xml.Tree; import text.xml.Validation : enforceName, normalize, require, requireChild; +import text.xml.XmlException; public import text.xml.Xml; +public alias XmlRange = dxml.parser.EntityRange!(dxml.parser.simpleXML, string); + /** * Throws: XmlException if the message is not well-formed or doesn't match the type */ public T decode(T, alias customDecode = never)(string message) { - import text.xml.Parser : parse; - static assert(__traits(isSame, customDecode, never), "XML does not yet support a decode function"); - XmlNode rootNode = parse(message); + auto range = dxml.parser.parseXML!(dxml.parser.simpleXML)(message); - return decodeXml!T(rootNode); + return decodeXml!T(range); } /** * Throws: XmlException if the XML element doesn't match the type */ -public T decodeXml(T)(XmlNode node) +public T decodeXml(T)(XmlRange range) { - import std.traits : fullyQualifiedName; - static if (is(T : SumType!Types, Types...)) { - return decodeToplevelSumtype!Types(node); + return decodeToplevelSumtype!Types(range); } else { @@ -45,9 +53,9 @@ public T decodeXml(T)(XmlNode node) fullyQualifiedName!T ~ ": type passed to text.xml.decode must have an Xml.Element attribute indicating its element name."); - node.enforceName(name.get); + range.enforceName(name.get); - return decodeUnchecked!T(node); + return decodeUnchecked!T(range); } } @@ -55,19 +63,13 @@ public T decodeXml(T)(XmlNode node) * Throws: XmlException if the XML element doesn't match the type * Returns: T, or the type returned from a decoder function defined on T. */ -public auto decodeUnchecked(T, attributes...)(XmlNode node) +public T decodeUnchecked(T, attributes...)(ref XmlRange range) { - import boilerplate.util : formatNamed, optionallyRemoveTrailingUnderline, udaIndex; - import std.algorithm : map; - import std.meta : AliasSeq, anySatisfy, ApplyLeft; - import std.range : array, ElementType; import std.string : empty, strip; - import std.traits : fullyQualifiedName, isIterable, Unqual; - import std.typecons : Nullable, Tuple; static if (isNodeLeafType!(T, attributes)) { - return decodeNodeLeaf!(T, attributes)(node); + return decodeNodeLeaf!(T, attributes)(range); } else { @@ -75,243 +77,416 @@ public auto decodeUnchecked(T, attributes...)(XmlNode node) __traits(hasMember, T, "ConstructorInfo"), fullyQualifiedName!T ~ " does not have a boilerplate constructor!"); - auto builder = T.Builder(); - - alias Info = Tuple!(string, "builderField", string, "constructorField"); + const currentTag = range.front.name; + auto xmlBuilder = XmlBuilder!T(); - static foreach (string constructorField; T.ConstructorInfo.fields) - {{ - enum builderField = optionallyRemoveTrailingUnderline!constructorField; + foreach (entry; range.front.attributes) + { + switchLabel: + switch (entry.name) + { + static foreach (attributeMethod; definedAttributes!(XmlBuilder!T)) + { + case attributeMethod.drop("attribute_".length): + __traits(getMember, xmlBuilder, attributeMethod) = dxml.util.decodeXML(entry.value); + break switchLabel; + } + default: + // ignore unknown attributes + break; + } + } - alias Type = Unqual!(__traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).Type); - alias attributes = AliasSeq!( - __traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).attributes); + void tagElement() + { + switchLabel: + switch (range.front.name) + { + static foreach (tagMethod; definedTags!(XmlBuilder!T)) + { + case tagMethod.drop("tag_".length): + __traits(getMember, xmlBuilder, tagMethod) = range; + break switchLabel; + } + default: + range.skipElement; + break switchLabel; + } + } - static if (is(Type : Nullable!Arg, Arg)) + void textElement() + { + static if (__traits(hasMember, xmlBuilder, "text")) { - alias DecodeType = Arg; - enum isNullable = true; + xmlBuilder.text = dxml.util.decodeXML(range.front.text); + range.popFront; } else { - alias DecodeType = SafeUnqual!Type; - enum isNullable = false; + throw new XmlException(format!"unexpected text entity in %s: '%s'"(currentTag, range.front.text)); + } + } + + range.byChildElement(&tagElement, &textElement); + + static foreach (finalizerMethod; definedFinalizers!(XmlBuilder!T)) + { + __traits(getMember, xmlBuilder, finalizerMethod)(); + } + + return xmlBuilder.builder.builderValue; + } +} + +private enum definedAttributes(T) = [__traits(allMembers, T)] + .filter!(a => a.startsWith("attribute_")) + .array; + +private enum definedTags(T) = [__traits(allMembers, T)] + .filter!(a => a.startsWith("tag_")) + .array; + +private enum definedFinalizers(T) = [__traits(allMembers, T)] + .filter!(a => a.startsWith("finalize_")) + .array; + +/* + * Technical explanation: to implement stream parsing, we take a type T and generate a XML parser type from it. + * The parser type has three types of methods: + * + * - attribute_foo(string): Process a 'foo' attribute + * - tag_Foo(Range): Process a 'Foo' tag + * - text(string): Process a text node + * - finalize_Foo(): called once after parsing + * + * The difference is that whereas T may have, say, aliased fields, the XmlBuilder!T corresponds *strictly* + * to the XML structure of T's element: `<.../> some text `. + * It is capable of reacting to anything it sees directly, and setting the corresponding field on T's builder. + */ +private struct XmlBuilder(T) +{ + T.BuilderType!() builder; + + mixin BuilderFields!(T, "this.builder"); +} + +private mixin template BuilderFields(T, string builderPath) +{ + static foreach (string constructorField; T.ConstructorInfo.fields) + { + static if (anySatisfy!(ApplyLeft!(sameField, constructorField), __traits(getAliasThis, T))) + { + // aliased to this, recurse + mixin BuilderFields!( + Unqual!(__traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).Type), + builderPath ~ "." ~ optionallyRemoveTrailingUnderline!constructorField, + ); + } + else + { + mixin XmlBuilderField!( + constructorField, + Unqual!(__traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).Type), + builderPath ~ "." ~ optionallyRemoveTrailingUnderline!constructorField, + AliasSeq!(__traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).attributes), + ); + } + } +} + +private template sameField(string lhs, string rhs) +{ + enum sameField = optionallyRemoveTrailingUnderline!lhs == optionallyRemoveTrailingUnderline!rhs; +} + +private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) +if (!Xml.elementName!attributes(typeName!(stripArray!T)).isNull) +{ + static if (is(T : U[], U) && !is(T == string)) + { + enum isArray = true; + alias DecodeType = U; + } + else + { + enum isArray = false; + alias DecodeType = T; + } + + enum name = Xml.elementName!attributes(typeName!DecodeType).get; + + mixin(format!q{ + static if (isArray) + { + DecodeType[] array_; + + void finalize_%s() + { + mixin(builderPath) = array_; } + } - static if (is(Type : SumType!T, T...)) + void tag_%s(ref XmlRange range) + { + static if(__traits(compiles, .decodeUnchecked!(T, attributes)(range))) { - __traits(getMember, builder, builderField) = decodeSumType!T(node); + mixin(builderPath) = decodeUnchecked!(T, attributes)(range); } - else static if (is(Type : SumType!T[], T...)) + else static if (is(T : U[], U)) { - __traits(getMember, builder, builderField) = decodeSumTypeArray!T(node); + array_ ~= decodeUnchecked!(Unqual!U, attributes)(range); } - else static if (!Xml.attributeName!attributes(builderField).isNull) + else { - enum name = Xml.attributeName!attributes(builderField).get; + pragma(msg, "While decoding field '" ~ constructorField ~ "' of type " ~ T.stringof ~ ":"); - static if (isNullable || __traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).useDefault) - { - if (name in node.attributes) - { - __traits(getMember, builder, builderField) - = decodeAttributeLeaf!(DecodeType, name, attributes)(node); - } - } - else - { - __traits(getMember, builder, builderField) - = decodeAttributeLeaf!(DecodeType, name, attributes)(node); - } + // reproduce the error we swallowed earlier + auto _ = .decodeUnchecked!(T, attributes)(range); } - else static if (!Xml.elementName!attributes(typeName!Type).isNull) + } + }(name, name)); +} + +private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) +if (!Xml.attributeName!attributes(optionallyRemoveTrailingUnderline!constructorField).isNull) +{ + enum attributeName = Xml.attributeName!attributes(optionallyRemoveTrailingUnderline!constructorField).get; + + mixin(format!q{ + void attribute_%s(const string value) + { + mixin(builderPath) = decodeAttributeLeaf!(T, attributeName, attributes)(value); + } + }(attributeName)); +} + +private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) +if (udaIndex!(Xml.Text, attributes) != -1) +{ + void text(string value) + { + mixin(builderPath) = value; + } +} + +private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) +if (is(Unqual!T : SumType!U, U...) || is(Unqual!T : SumType!U[], U...)) +{ + static if (is(Unqual!T : SumType!U, U...)) + { + alias Types = staticMap!(stripArray, U); + } + else static if (is(Unqual!T : SumType!U[], U...)) + { + alias Types = U; + } + else + { + static assert(false, "Unknown kind of sum type: ", T); + } + + SumType!Types[] decodedValues; + + static foreach (i, SubType; Types) + { + mixin XmlSumTypeBuilderMethod!(constructorField, T, builderPath, i); + } + + mixin(format!q{ + void finalize_%s() + { + static if (is(Unqual!T : SumType!U, U...)) { + enforce!XmlException(this.decodedValues.length != 0, + format!`"%%s": no child element of %%(%%s, %%) in %%s`( + builderPath, [staticMap!(typeName, Types)], this.decodedValues)); - enum canDecodeNode = isNodeLeafType!(DecodeType, attributes) - || __traits(compiles, .decodeUnchecked!(DecodeType, attributes)(XmlNode.init)); + size_t[Types.length] occurrences; - static if (canDecodeNode) + static foreach (i, Type; Types) { - enum name = Xml.elementName!attributes(typeName!Type).get; + occurrences[i] = this.decodedValues.count!(a => a.has!Type); + } + enforce!XmlException(occurrences[].count!"a > 0" == 1, + format!`"%%s": found more than one kind of element of %%(%%s, %%) in %%s`( + builderPath, [staticMap!(typeName, Types)], this.decodedValues)); - static if (isNullable) + static foreach (i, Element; U) + { { - static if (__traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).useDefault) - { - // missing element = null - auto child = node.findChild(name); + alias MatchType = stripArray!Element; - if (!child.isNull) - { - __traits(getMember, builder, builderField) - = decodeUnchecked!(DecodeType, attributes)(child.get); - } - } - else - { - auto child = node.requireChild(name); + auto matches = this.decodedValues.filter!(a => a.has!MatchType).map!(a => a.get!MatchType); - if (child.text.strip.empty) - { - // empty element = null - __traits(getMember, builder, builderField) = Type(); - } - else + static if (is(MatchType : Element)) + { + if (!matches.empty) { - __traits(getMember, builder, builderField) - = .decodeUnchecked!(DecodeType, attributes)(child); + enforce!XmlException(matches.dropOne.empty, + format!`"%%s": found more than one %%s in %%s`( + builderPath , MatchType.stringof, this.decodedValues)); + mixin(builderPath) = T(matches.front); } } - } - else - { - static if (__traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).useDefault) + else static if (is(MatchType[] : Element)) { - // missing element = default - auto child = node.findChild(name); - - if (!child.isNull) + if (!matches.empty) { - __traits(getMember, builder, builderField) - = decodeUnchecked!(DecodeType, attributes)(child.get); + mixin(builderPath) = T(matches.array); } } else { - auto child = node.requireChild(name); - - __traits(getMember, builder, builderField) - = .decodeUnchecked!(DecodeType, attributes)(child); + static assert(false, "I forgot to handle this case sorry: ", MatchType, ", ", Element); } } } - else static if (is(DecodeType: U[], U)) - { - enum name = Xml.elementName!attributes(typeName!U).get; - - alias decodeChild = delegate U(XmlNode child) - { - return .decodeUnchecked!(U, attributes)(child); - }; - - auto children = node.findChildren(name).map!decodeChild.array; - - __traits(getMember, builder, builderField) = children; - } - else - { - pragma(msg, "While decoding field '" ~ constructorField ~ "' of type " ~ DecodeType.stringof ~ ":"); - - // reproduce the error we swallowed earlier - auto _ = .decodeUnchecked!(DecodeType, attributes)(XmlNode.init); - } } - else static if (udaIndex!(Xml.Text, attributes) != -1) + else static if (is(Unqual!T : SumType!U[], U...)) { - __traits(getMember, builder, builderField) = dxml.util.decodeXML(node.text); + mixin(builderPath) = this.decodedValues; } else { - enum sameField(string lhs, string rhs) - = optionallyRemoveTrailingUnderline!lhs == optionallyRemoveTrailingUnderline!rhs; - enum memberIsAliasedToThis = anySatisfy!( - ApplyLeft!(sameField, constructorField), - __traits(getAliasThis, T)); - - static if (memberIsAliasedToThis) - { - // decode inline - __traits(getMember, builder, builderField) = .decodeUnchecked!(DecodeType, attributes)(node); - } - else - { - static assert( - __traits(getMember, T.ConstructorInfo.FieldInfo, constructorField).useDefault, - "Field " ~ fullyQualifiedName!T ~ "." ~ constructorField ~ " is required but has no Xml tag"); - } + static assert(false, "Unknown kind of sum type: ", T); } - }} - - return builder.builderValue(); - } + } + }(constructorField)); } -/** - * Throws: XmlException if the XML element doesn't have a child matching exactly one of the subtypes, - * or if the child doesn't match the subtype. - */ -private SumType!Types decodeSumType(Types...)(XmlNode node) -{ - import std.algorithm : find, map, moveEmplace, sum; - import std.array : array, front; - import std.exception : enforce; - import std.meta : AliasSeq, staticMap; - import std.traits : fullyQualifiedName; - import std.typecons : apply, Nullable, nullable; - import text.xml.XmlException : XmlException; +private alias stripArray(T) = T; +private alias stripArray(T : string) = T; +private alias stripArray(T : V[], V) = V; - Nullable!(SumType!Types)[Types.length] decodedValues; +private bool has(T, U : SumType!V, V...)(U value) => value.match!( + (T _) => true, + staticMap!((_) => false, Erase!(T, V)), +); - static foreach (i, Type; Types) - {{ - static if (is(Type: U[], U)) - { - enum isArray = true; - alias BaseType = U; - } - else - { - enum isArray = false; - alias BaseType = Type; - } +private T get(T, U : SumType!V, V...)(U value) => value.match!( + (T value) => value, + staticMap!((_) => assert(false), Erase!(T, V)), +); + +// Separate template so I can redefine types. +private mixin template XmlSumTypeBuilderMethod(string constructorField, T, string builderPath, int i) +{ + static if (is(Unqual!T : SumType!U_, U_...)) + { + alias U = U_; + alias SumTypeMember = U[i]; + } + else static if (is(Unqual!T : SumType!U_[], U_...)) + { + alias U = U_; + alias SumTypeMember = U[i]; + } + else + { + static assert(false, "Unknown kind of sum type: ", T); + } - alias attributes = AliasSeq!(__traits(getAttributes, BaseType)); + // SumType!(A[], B[]) + static if (!is(SumTypeMember == string) && is(SumTypeMember: V[], V)) + { + alias BaseType = V; + } + else + { + alias BaseType = SumTypeMember; + } - static assert( - !Xml.elementName!attributes(typeName!BaseType).isNull, - fullyQualifiedName!Type ~ - ": SumType component type must have an Xml.Element attribute indicating its element name."); + alias attributes = AliasSeq!(__traits(getAttributes, BaseType)); + static if (Xml.elementName!attributes(typeName!BaseType).isNull) + { + static assert(false, fullyQualifiedName!BaseType ~ + ": SumType component type must have an Xml.Element attribute indicating its element name."); + } + else + { enum name = Xml.elementName!attributes(typeName!BaseType).get; - static if (isArray) - { - auto children = node.findChildren(name); - - if (!children.empty) + mixin(format!q{ + void tag_%s(ref XmlRange range) { - decodedValues[i] = SumType!Types(children.map!(a => a.decodeUnchecked!U).array); + this.decodedValues ~= typeof(this.decodedValues.front)(decodeUnchecked!(BaseType, attributes)(range)); } - } - else - { - auto child = node.findChild(name); + }(name)); + } +} - decodedValues[i] = child.apply!(a => SumType!Types(a.decodeUnchecked!Type)); - } - }} +/** + * Skip past the current element. + */ +private void skipElement(ref XmlRange range) +in (range.isElement) +{ + range.byChildElement({ range.skipElement; }, { range.skipElement; }); +} - const matchedValues = decodedValues[].map!(a => a.isNull ? 0 : 1).sum; +/** + * `range` must point to an element. While there are sub-elements, this function + * points `range` at each sub-element and invokes `callback`. `callback` is required + * to advance the range past that sub-element entirely. + * + * Throws: XMLParsingException on well-formedness violation. + * Throws: XmlException on validity violation. + */ +private void byChildElement(ref XmlRange range, scope void delegate() nodeCallback, scope void delegate() textCallback) +in (range.isElement) +{ + if (range.front.type == dxml.parser.EntityType.elementEmpty) + { + // no descendants + range.popFront; + return; + } - enforce!XmlException(matchedValues != 0, - format!`Element "%s": no child element of %(%s, %)`(node.tag, [staticMap!(typeName, Types)])); - enforce!XmlException(matchedValues == 1, - format!`Element "%s": contained more than one of %(%s, %)`(node.tag, [staticMap!(typeName, Types)])); + auto tag = range.front.name; - // workaround for dmd2.100 issue (get returns ref) - auto result = decodedValues[].find!(a => !a.isNull).front.get; + range.popFront; - return result; + while (!range.empty) + { + final switch (range.front.type) with (dxml.parser.EntityType) + { + case cdata: + case comment: + case pi: + range.popFront; + continue; + case elementEnd: + enforce!XmlException(range.front.name == tag, + format!"mismatched xml start and end tags: '%s', '%s'"(tag, range.front.name)); + range.popFront; + return; + case text: + textCallback(); + break; + case elementEmpty: + case elementStart: + nodeCallback(); + break; + } + } + throw new XmlException(format!"Unclosed XML tag %s"(tag)); +} + +private bool isElement(ref XmlRange range) +{ + return range.front.isElementStartToken; +} + +private bool isElementStartToken(const ElementType!XmlRange token) +{ + return token.type == dxml.parser.EntityType.elementStart + || token.type == dxml.parser.EntityType.elementEmpty; } /// Ditto. -private SumType!Types decodeToplevelSumtype(Types...)(XmlNode node) +private SumType!Types decodeToplevelSumtype(Types...)(ref XmlRange range) { - import std.algorithm : find, map, sum; - import std.exception : enforce; - import std.meta : AliasSeq, staticMap; - import std.range : front; - import std.typecons : Nullable; import text.xml.XmlException : XmlException; Nullable!(SumType!Types)[Types.length] decodedValues; @@ -327,27 +502,24 @@ private SumType!Types decodeToplevelSumtype(Types...)(XmlNode node) enum name = Xml.elementName!attributes(typeName!Type).get; - if (node.tag == name) + if (range.front.name == name) { - decodedValues[i] = SumType!Types(node.decodeUnchecked!Type); + decodedValues[i] = SumType!Types(range.decodeUnchecked!Type); } }} - const matchedValues = decodedValues[].map!(a => a.isNull ? 0 : 1).sum; + const matchedValues = decodedValues[].count!(a => !a.isNull); enforce!XmlException(matchedValues != 0, - format!`Element "%s": no child element of %(%s, %)`(node.tag, [staticMap!(typeName, Types)])); + format!`Element "%s": no child element of %(%s, %)`(range.front.name, [staticMap!(typeName, Types)])); enforce!XmlException(matchedValues == 1, - format!`Element "%s": contained more than one of %(%s, %)`(node.tag, [staticMap!(typeName, Types)])); + format!`Element "%s": contained more than one of %(%s, %)`(range.front.name, [staticMap!(typeName, Types)])); return decodedValues[].find!(a => !a.isNull).front.get; } private SumType!Types[] decodeSumTypeArray(Types...)(XmlNode node) { - import std.meta : AliasSeq; - import std.traits : fullyQualifiedName; - SumType!Types[] result; foreach (child; node.children) @@ -374,7 +546,7 @@ private SumType!Types[] decodeSumTypeArray(Types...)(XmlNode node) private enum typeName(T) = typeof(cast() T.init).stringof; -private auto decodeAttributeLeaf(T, string name, attributes...)(XmlNode node) +private auto decodeAttributeLeaf(T, string name, attributes...)(string value) { alias typeAttributes = attributesOrNothing!T; @@ -382,23 +554,25 @@ private auto decodeAttributeLeaf(T, string name, attributes...)(XmlNode node) { alias decodeFunction = attributes[udaIndex!(Xml.Decode, attributes)].DecodeFunction; - return decodeFunction(dxml.util.decodeXML(node.attributes[name])); + return decodeFunction(value); } else static if (udaIndex!(Xml.Decode, typeAttributes) != -1) { alias decodeFunction = typeAttributes[udaIndex!(Xml.Decode, typeAttributes)].DecodeFunction; - return decodeFunction(dxml.util.decodeXML(node.attributes[name])); + return decodeFunction(value); } else static if (is(T == enum)) { import serialized.util.SafeEnum : safeToEnum; - return dxml.util.decodeXML(node.attributes[name]).safeToEnum!T; + return value.safeToEnum!T; } else { - return node.require!T(name); + import text.xml.Convert : Convert; + + return Convert.to!T(value); } } @@ -408,10 +582,14 @@ enum isNodeLeafType(T, attributes...) = || udaIndex!(Xml.Decode, attributesOrNothing!T) != -1 || is(T == string) || is(T == enum) - || __traits(compiles, XmlNode.init.require!(SafeUnqual!T)()); + || __traits(compiles, XmlNode.init.require!(SafeUnqual!T)()) + || is(T : Nullable!U, U) && isNodeLeafType!(U, attributes); -private auto decodeNodeLeaf(T, attributes...)(XmlNode node) +private T decodeNodeLeaf(T, attributes...)(ref XmlRange range) { + import text.xml.Convert : Convert; + import text.xml.Parser : parseRange; + alias typeAttributes = attributesOrNothing!T; static if (udaIndex!(Xml.Decode, attributes) != -1 || udaIndex!(Xml.Decode, typeAttributes) != -1) @@ -425,6 +603,8 @@ private auto decodeNodeLeaf(T, attributes...)(XmlNode node) alias decodeFunction = typeAttributes[udaIndex!(Xml.Decode, typeAttributes)].DecodeFunction; } + auto node = parseRange(range); + static if (__traits(isTemplate, decodeFunction)) { return decodeFunction!T(node); @@ -434,18 +614,84 @@ private auto decodeNodeLeaf(T, attributes...)(XmlNode node) return decodeFunction(node); } } - else static if (is(T == string)) - { - return dxml.util.decodeXML(node.text).normalize; - } - else static if (is(T == enum)) + else { - import serialized.util.SafeEnum : safeToEnum; + string text = parseTextElement(range); - return dxml.util.decodeXML(node.text).normalize.safeToEnum!T; + static if (is(T == string)) + { + return text; + } + else static if (is(T == enum)) + { + import serialized.util.SafeEnum : safeToEnum; + + return text.safeToEnum!T; + } + else static if (is(T : Nullable!U, U)) + { + if (text.empty) + { + return T(); + } + return T(Convert.to!U(text)); + + } + else + { + return Convert.to!T(text); + } } - else +} + +private string parseTextElement(ref XmlRange range) +{ + import std.string : strip; + + string startName = null; + string[] fragments = null; + int level = 0; + + while (!range.empty) { - return node.require!(SafeUnqual!T)(); + final switch (range.front.type) with (dxml.parser.EntityType) + { + case cdata: + case comment: + case pi: + range.popFront; + break; + case elementStart: + if (level++ == 0) + { + startName = range.front.name; + } + range.popFront; + break; + case elementEnd: + enforce!XmlException(range.front.name == startName, + format!"mismatched xml start and end tags: '%s', '%s'"(startName, range.front.name)); + range.popFront; + if (--level == 0) + { + return fragments.join(" ").normalize; + } + break; + case text: + if (level == 1) + { + fragments ~= dxml.util.decodeXML(range.front.text).strip; + } + range.popFront; + break; + case elementEmpty: + range.popFront; + if (level == 0) + { + return fragments.join(" ").normalize; + } + break; + } } + throw new XmlException(format!"Unclosed XML tag %s"(startName)); } diff --git a/src/text/xml/Parser.d b/src/text/xml/Parser.d index 00cb7bb..9022a04 100644 --- a/src/text/xml/Parser.d +++ b/src/text/xml/Parser.d @@ -23,7 +23,8 @@ public XmlNode parse(string content) try { auto range = parseXML!simpleXML(content); - return parseDocumentImpl(range, new MemoryManager); + + return parseRange(range); } catch (XMLParsingException exception) { @@ -32,11 +33,14 @@ public XmlNode parse(string content) } } -private XmlNode parseDocumentImpl(ref EntityRange!(simpleXML, string) range, - MemoryManager memoryManager) +package XmlNode parseRange(ref EntityRange!(simpleXML, string) range, MemoryManager memoryManager = null) in (!range.empty) -in (memoryManager !is null) { + if (!memoryManager) + { + memoryManager = new MemoryManager; + } + XmlNode xmlNode; alias toAttribute = attr => Attribute(attr.name, attr.value); @@ -59,9 +63,9 @@ in (memoryManager !is null) { memoryManager.releaseAppender(children); } - for (; range.front.type != EntityType.elementEnd; range.popFront) + while (range.front.type != EntityType.elementEnd) { - children.put(parseDocumentImpl(range, memoryManager)); + children.put(parseRange(range, memoryManager)); } xmlNode.children = children.data.dup; @@ -77,6 +81,7 @@ in (memoryManager !is null) assert(false); } + range.popFront; return xmlNode; } diff --git a/src/text/xml/Validation.d b/src/text/xml/Validation.d index cb0a86a..8865801 100644 --- a/src/text/xml/Validation.d +++ b/src/text/xml/Validation.d @@ -14,6 +14,8 @@ import text.xml.XmlException; alias nodes = filter!(node => node.type == XmlNode.Type.element); +private alias XmlRange = EntityRange!(simpleXML, string); + /** * Throws: XmlException on validity violation. */ @@ -24,6 +26,25 @@ in (node.type == XmlNode.Type.element) format!`element "%s": unexpected element (expected is "%s")`(node.tag, name)); } +/// Ditto +void enforceName(XmlRange range, string name) pure @safe +in (range.isElement) +{ + enforce!XmlException(range.front.name == name, + format!`element "%s": unexpected element (expected is "%s")`(range.front.name, name)); +} + +private bool isElement(ref XmlRange range) pure @safe +{ + return range.front.isElementStartToken; +} + +private bool isElementStartToken(const ElementType!XmlRange token) pure @safe +{ + return token.type == dxml.parser.EntityType.elementStart + || token.type == dxml.parser.EntityType.elementEmpty; +} + /** * Throws: XmlException on validity violation. */ diff --git a/unittest/text/xml/DecodeTest.d b/unittest/text/xml/DecodeTest.d index 0066ef1..16ea941 100644 --- a/unittest/text/xml/DecodeTest.d +++ b/unittest/text/xml/DecodeTest.d @@ -396,10 +396,10 @@ unittest decode!Value(``).should.equal(Value(Either(B(3)))); decode!Value(``).should.throwAn!XmlException - (`Element "Value": no child element of "A", "B"`); + (`"this.builder.field": no child element of "A", "B" in []`); decode!Value(``).should.throwAn!XmlException - (`Element "Value": contained more than one of "A", "B"`); + (`"this.builder.field": found more than one kind of element of "A", "B" in [A(5), B(3)]`); } } @@ -442,10 +442,10 @@ unittest decode!Value(``).should.equal(Value(Either([A(5), A(6)]))); decode!Value(``).should.throwAn!XmlException - (`Element "Value": no child element of "A[]", "B[]"`); + (`"this.builder.field": no child element of "A", "B" in []`); decode!Value(``).should.throwAn!XmlException - (`Element "Value": contained more than one of "A[]", "B[]"`); + (`"this.builder.field": found more than one kind of element of "A", "B" in [A(5), B(3)]`); } } From 556cff78493aacb378cc5faf0cfd6c34c6740169 Mon Sep 17 00:00:00 2001 From: "Mathis Beer (aider)" Date: Fri, 28 Feb 2025 10:16:25 +0100 Subject: [PATCH 02/10] feat: Update CHANGELOG.md. --- CHANGELOG.md | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c781e08..71cb2bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,88 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +## [1.17.0] - 2025-02-28 +### Changed +- Use XML stream parser for decoding. + +## [1.16.1] - 2024-08-12 +### Fixed +- Speed up `Convert.toString(SysTime)` by reimplementing `toISOExtString` manually. + +## [1.16.0] - 2024-08-09 +### Added +- Vendor XML stream writer from dxml under `funkwerk.dxml.writer` to improve performance. + +## [1.15.2] - 2024-08-09 +### Fixed +- Avoid evaluating member values that we're not including. + +## [1.15.1] - 2024-08-09 +### Fixed +- Fix wrapper copying writer instead of capturing by ref in XML stream encoding. + +## [1.15.0] - 2024-08-08 +### Added +- XML: Add `void encode(value, sink);` to encode to an output stream. + +## [1.14.1] - 2024-07-26 +### Fixed +- XML: Support SysTime as an element's text value. + +## [1.14.0] - 2024-07-26 +### Added +- XML: Allow decoding a document where the top element can be one of n different types, identified by tag name. + +## [1.13.7] - 2024-05-21 +### Added +- Allow `decode` helper to take std.data.json JSON stream. + +## [1.13.6] - 2023-11-09 +### Fixed +- Restore non-nullable default field behavior. + +## [1.13.5] - 2023-10-23 +### Fixed +- Fix build on DMD 2.097. + +## [1.13.4] - 2023-10-20 +### Fixed +- Fix variable declaration collision when decoding enum with more than one member. + +## [1.13.3] - 2023-10-20 +### Fixed +- Support keywords in JSON-style enums as well. + +## [1.13.2] - 2023-10-19 +### Fixed +- Fix encoding/decoding of reserved-keyword enum members. + +## [1.13.1] - 2023-06-30 +### Fixed +- Dup string literals on decoding to break references to the input stream. + +## [1.13.0] - 2023-06-12 +### Added +- Implement alias-this masking in JSON serialization. + +## [1.12.0] - 2023-06-12 +### Changed +- Remove deprecated text.xml.tree module. + +## [1.11.0] - 2023-04-18 +### Added +- Json.Encode: Custom encoders can chain. + +## [1.10.0] - 2023-04-18 +### Changed +- Move package `meta` to `serialized.meta`. These modules shouldn't be externally used anyways. + +## [1.9.1] - 2023-03-04 +### Fixed +- Fix stdx.data.json for -preview=in on 2.102.2 + ## [1.9.0] - 2023-01-11 ### Added - Interpret boilerplate's `@AliasThis` equivalent to `alias this` in JSON encoding/decoding. @@ -34,6 +116,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - JSON: Encode `Nullable.null` as `null`. +## [1.6.0] - 2022-08-09 +### Changed +- Nullable!T() is now encoded as `null` if there is no `@(This.Default)` annotation set. + ## [1.5.6] - 2022-07-15 ### Fixed - When an invariant is violated while JSON decoding a type, we now throw a parser exception. From 85333bcb43706ab8d3f29299b101308a9dd519aa Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 10:47:45 +0100 Subject: [PATCH 03/10] Support XML attributes and tags with namespace separators. --- src/text/xml/Decode.d | 18 ++++++++++++------ unittest/text/xml/DecodeTest.d | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 6 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index 8d6d3a4..d5a209b 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -87,7 +87,7 @@ public T decodeUnchecked(T, attributes...)(ref XmlRange range) { static foreach (attributeMethod; definedAttributes!(XmlBuilder!T)) { - case attributeMethod.drop("attribute_".length): + case __traits(getAttributes, __traits(getMember, xmlBuilder, attributeMethod))[0]: __traits(getMember, xmlBuilder, attributeMethod) = dxml.util.decodeXML(entry.value); break switchLabel; } @@ -104,7 +104,7 @@ public T decodeUnchecked(T, attributes...)(ref XmlRange range) { static foreach (tagMethod; definedTags!(XmlBuilder!T)) { - case tagMethod.drop("tag_".length): + case __traits(getAttributes, __traits(getMember, xmlBuilder, tagMethod))[0]: __traits(getMember, xmlBuilder, tagMethod) = range; break switchLabel; } @@ -226,6 +226,7 @@ if (!Xml.elementName!attributes(typeName!(stripArray!T)).isNull) } } + @(name) void tag_%s(ref XmlRange range) { static if(__traits(compiles, .decodeUnchecked!(T, attributes)(range))) @@ -244,7 +245,7 @@ if (!Xml.elementName!attributes(typeName!(stripArray!T)).isNull) auto _ = .decodeUnchecked!(T, attributes)(range); } } - }(name, name)); + }(name.cleanupIdentifier, name.cleanupIdentifier)); } private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) @@ -253,11 +254,12 @@ if (!Xml.attributeName!attributes(optionallyRemoveTrailingUnderline!constructorF enum attributeName = Xml.attributeName!attributes(optionallyRemoveTrailingUnderline!constructorField).get; mixin(format!q{ + @(attributeName) void attribute_%s(const string value) { mixin(builderPath) = decodeAttributeLeaf!(T, attributeName, attributes)(value); } - }(attributeName)); + }(attributeName.cleanupIdentifier)); } private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) @@ -351,7 +353,7 @@ if (is(Unqual!T : SumType!U, U...) || is(Unqual!T : SumType!U[], U...)) static assert(false, "Unknown kind of sum type: ", T); } } - }(constructorField)); + }(constructorField.cleanupIdentifier)); } private alias stripArray(T) = T; @@ -408,14 +410,18 @@ private mixin template XmlSumTypeBuilderMethod(string constructorField, T, strin enum name = Xml.elementName!attributes(typeName!BaseType).get; mixin(format!q{ + @(name) void tag_%s(ref XmlRange range) { this.decodedValues ~= typeof(this.decodedValues.front)(decodeUnchecked!(BaseType, attributes)(range)); } - }(name)); + }(name.cleanupIdentifier)); } } +// XML identifiers can have namespaces separated by colons; this is not valid in D. +private alias cleanupIdentifier = name => name.replace(":", "_"); + /** * Skip past the current element. */ diff --git a/unittest/text/xml/DecodeTest.d b/unittest/text/xml/DecodeTest.d index 16ea941..a42e81c 100644 --- a/unittest/text/xml/DecodeTest.d +++ b/unittest/text/xml/DecodeTest.d @@ -569,6 +569,38 @@ unittest value.should.equal(expected); } +@("attribute/element with namespace") +unittest +{ + struct Value + { + @(Xml.Attribute("test:value")) + private int value_; + + mixin(GenerateThis); + } + + @(Xml.Element) + struct Container + { + @(Xml.Element("test:Value")) + immutable(Value)[] values; + + mixin(GenerateThis); + } + + // when + auto value = decode!Container(` + + + `); + + // then + auto expected = Container([Value(1), Value(2)]); + + value.should.equal(expected); +} + @(Xml.Element("root")) private struct Value { From 5861afdb39c139623a793860590da8661d4e211b Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 10:54:29 +0100 Subject: [PATCH 04/10] Fix support, add test, for nullable attributes. --- src/text/xml/Decode.d | 4 +++ unittest/text/xml/DecodeTest.d | 53 +++++++++++++++++++++++++++++----- 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index d5a209b..2175628 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -568,6 +568,10 @@ private auto decodeAttributeLeaf(T, string name, attributes...)(string value) return decodeFunction(value); } + else static if (is(T : Nullable!U, U)) + { + return value.decodeAttributeLeaf!(U, name, attributes).nullable; + } else static if (is(T == enum)) { import serialized.util.SafeEnum : safeToEnum; diff --git a/unittest/text/xml/DecodeTest.d b/unittest/text/xml/DecodeTest.d index a42e81c..b54ea2b 100644 --- a/unittest/text/xml/DecodeTest.d +++ b/unittest/text/xml/DecodeTest.d @@ -4,6 +4,7 @@ import boilerplate; import dshould; import std.datetime; import std.sumtype : match, SumType; +import std.typecons; import text.xml.Decode; import text.xml.Tree; import text.xml.Xml; @@ -147,8 +148,6 @@ unittest @("element field has default") unittest { - import std.typecons : Nullable; - @(Xml.Element("root")) struct Value { @@ -174,8 +173,6 @@ unittest @("field is Nullable default") unittest { - import std.typecons : Nullable; - @(Xml.Element("root")) struct Value { @@ -201,8 +198,6 @@ unittest @("field and decoder are Nullable") unittest { - import std.typecons : Nullable; - static Nullable!int returnsNull(const XmlNode) { return Nullable!int(); @@ -234,8 +229,6 @@ unittest @("field is Nullable") unittest { - import std.typecons : Nullable; - @(Xml.Element("root")) struct Value { @@ -601,6 +594,50 @@ unittest value.should.equal(expected); } +@("nullable attribute") +unittest +{ + @(Xml.Element) + struct Container + { + @(This.Default) + @(Xml.Attribute) + private Nullable!string value; + + mixin(GenerateThis); + } + + // when + auto value = decode!Container(``); + + // then + auto expected = Container("".nullable); + + value.should.equal(expected); +} + +@("nullable element") +unittest +{ + @(Xml.Element) + struct Container + { + @(This.Default) + @(Xml.Element("Value")) + private Nullable!string value; + + mixin(GenerateThis); + } + + // when + auto value = decode!Container(`foo`); + + // then + auto expected = Container("foo".nullable); + + value.should.equal(expected); +} + @(Xml.Element("root")) private struct Value { From 34a7acaa5c1ef9b19df619fa84c44d301874b840 Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 11:30:18 +0100 Subject: [PATCH 05/10] Support nullable nested elements. --- src/text/xml/Decode.d | 42 ++++++++++++++++++++++++++-------- unittest/text/xml/DecodeTest.d | 33 +++++++++++++++++++++++++- 2 files changed, 65 insertions(+), 10 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index 2175628..41afb6d 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -200,20 +200,32 @@ private template sameField(string lhs, string rhs) } private mixin template XmlBuilderField(string constructorField, T, string builderPath, attributes...) -if (!Xml.elementName!attributes(typeName!(stripArray!T)).isNull) +if (!Xml.elementName!attributes("").isNull) { - static if (is(T : U[], U) && !is(T == string)) + static if (!is(T == string) && is(T : U[], U)) { enum isArray = true; alias DecodeType = U; } + else static if (is(T : Nullable!U, U) && !isNodeLeafType!(U, attributes)) + { + enum isArray = false; + alias DecodeType = U; + } else { enum isArray = false; alias DecodeType = T; } - enum name = Xml.elementName!attributes(typeName!DecodeType).get; + static if (is(T : Nullable!V, V)) + { + enum name = Xml.elementName!attributes(typeName!V).get; + } + else + { + enum name = Xml.elementName!attributes(typeName!DecodeType).get; + } mixin(format!q{ static if (isArray) @@ -229,20 +241,30 @@ if (!Xml.elementName!attributes(typeName!(stripArray!T)).isNull) @(name) void tag_%s(ref XmlRange range) { - static if(__traits(compiles, .decodeUnchecked!(T, attributes)(range))) + static if (isArray) { - mixin(builderPath) = decodeUnchecked!(T, attributes)(range); + array_ ~= decodeUnchecked!(Unqual!U, attributes)(range); } - else static if (is(T : U[], U)) + else static if(__traits(compiles, .decodeUnchecked!(DecodeType, attributes)(range))) { - array_ ~= decodeUnchecked!(Unqual!U, attributes)(range); + auto value = decodeUnchecked!(DecodeType, attributes)(range); + + static if (is(typeof(value) : T)) + { + // decoder who returned a Nullable!T; assign directly + mixin(builderPath) = value; + } + else + { + mixin(builderPath) = value.nullable; + } } else { pragma(msg, "While decoding field '" ~ constructorField ~ "' of type " ~ T.stringof ~ ":"); // reproduce the error we swallowed earlier - auto _ = .decodeUnchecked!(T, attributes)(range); + auto _ = .decodeUnchecked!(DecodeType, attributes)(range); } } }(name.cleanupIdentifier, name.cleanupIdentifier)); @@ -570,7 +592,9 @@ private auto decodeAttributeLeaf(T, string name, attributes...)(string value) } else static if (is(T : Nullable!U, U)) { - return value.decodeAttributeLeaf!(U, name, attributes).nullable; + import text.xml.Convert : Convert; + + return T(Convert.to!U(value)); } else static if (is(T == enum)) { diff --git a/unittest/text/xml/DecodeTest.d b/unittest/text/xml/DecodeTest.d index b54ea2b..b4d32d7 100644 --- a/unittest/text/xml/DecodeTest.d +++ b/unittest/text/xml/DecodeTest.d @@ -616,7 +616,7 @@ unittest value.should.equal(expected); } -@("nullable element") +@("nullable value element") unittest { @(Xml.Element) @@ -638,6 +638,37 @@ unittest value.should.equal(expected); } +@("nullable nested element") +unittest +{ + @(Xml.Element) + struct Value + { + @(Xml.Attribute) + private string value; + + mixin(GenerateThis); + } + + @(Xml.Element) + struct Container + { + @(This.Default) + @(Xml.Element) + private Nullable!Value value; + + mixin(GenerateThis); + } + + // when + auto value = decode!Container(``); + + // then + auto expected = Container(Value("foo").nullable); + + value.should.equal(expected); +} + @(Xml.Element("root")) private struct Value { From 377ca25da60dd21d870b61c952c45ef57c6971a7 Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 11:56:41 +0100 Subject: [PATCH 06/10] Add dup when returning strings. Remove unneeded label. --- src/text/xml/Decode.d | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index 41afb6d..06fe0dd 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -99,18 +99,17 @@ public T decodeUnchecked(T, attributes...)(ref XmlRange range) void tagElement() { - switchLabel: switch (range.front.name) { static foreach (tagMethod; definedTags!(XmlBuilder!T)) { case __traits(getAttributes, __traits(getMember, xmlBuilder, tagMethod))[0]: __traits(getMember, xmlBuilder, tagMethod) = range; - break switchLabel; + return; } default: range.skipElement; - break switchLabel; + return; } } @@ -289,7 +288,7 @@ if (udaIndex!(Xml.Text, attributes) != -1) { void text(string value) { - mixin(builderPath) = value; + mixin(builderPath) = value.dup; } } @@ -654,7 +653,7 @@ private T decodeNodeLeaf(T, attributes...)(ref XmlRange range) static if (is(T == string)) { - return text; + return text.dup; } else static if (is(T == enum)) { From 7116a2238522824cbf35085967f0069b02fd28d9 Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 13:19:49 +0100 Subject: [PATCH 07/10] Fix: use idup for strings. --- src/text/xml/Decode.d | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index 06fe0dd..0d2e80a 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -288,7 +288,7 @@ if (udaIndex!(Xml.Text, attributes) != -1) { void text(string value) { - mixin(builderPath) = value.dup; + mixin(builderPath) = value.idup; } } @@ -653,7 +653,7 @@ private T decodeNodeLeaf(T, attributes...)(ref XmlRange range) static if (is(T == string)) { - return text.dup; + return text.idup; } else static if (is(T == enum)) { From 56cd7afc745b854709adfd44ec98f66db8a7d81b Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 13:21:52 +0100 Subject: [PATCH 08/10] Support older DMD versions. --- src/text/xml/Decode.d | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index 0d2e80a..dce35bd 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -305,7 +305,7 @@ if (is(Unqual!T : SumType!U, U...) || is(Unqual!T : SumType!U[], U...)) } else { - static assert(false, "Unknown kind of sum type: ", T); + static assert(false, "Unknown kind of sum type: " ~ T.stringof); } SumType!Types[] decodedValues; @@ -360,7 +360,8 @@ if (is(Unqual!T : SumType!U, U...) || is(Unqual!T : SumType!U[], U...)) } else { - static assert(false, "I forgot to handle this case sorry: ", MatchType, ", ", Element); + static assert(false, + "I forgot to handle this case sorry: " ~ MatchType.stringof ~ ", " ~ Element.stringof); } } } @@ -371,7 +372,7 @@ if (is(Unqual!T : SumType!U, U...) || is(Unqual!T : SumType!U[], U...)) } else { - static assert(false, "Unknown kind of sum type: ", T); + static assert(false, "Unknown kind of sum type: " ~ T.stringof); } } }(constructorField.cleanupIdentifier)); @@ -381,15 +382,21 @@ private alias stripArray(T) = T; private alias stripArray(T : string) = T; private alias stripArray(T : V[], V) = V; -private bool has(T, U : SumType!V, V...)(U value) => value.match!( - (T _) => true, - staticMap!((_) => false, Erase!(T, V)), -); +private bool has(T, U : SumType!V, V...)(U value) +{ + return value.match!( + (T _) => true, + staticMap!((_) => false, Erase!(T, V)), + ); +} -private T get(T, U : SumType!V, V...)(U value) => value.match!( - (T value) => value, - staticMap!((_) => assert(false), Erase!(T, V)), -); +private T get(T, U : SumType!V, V...)(U value) +{ + return value.match!( + (T value) => value, + staticMap!((_) => assert(false), Erase!(T, V)), + ); +} // Separate template so I can redefine types. private mixin template XmlSumTypeBuilderMethod(string constructorField, T, string builderPath, int i) @@ -406,7 +413,7 @@ private mixin template XmlSumTypeBuilderMethod(string constructorField, T, strin } else { - static assert(false, "Unknown kind of sum type: ", T); + static assert(false, "Unknown kind of sum type: " ~ T.stringof); } // SumType!(A[], B[]) From 2630191680ebbe37bdeb4d8609a3afabaafa0755 Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 13:23:52 +0100 Subject: [PATCH 09/10] Test with DMD 2.106.1. Drop 2.097 support. --- .github/workflows/dlang.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/dlang.yml b/.github/workflows/dlang.yml index a4ee6eb..99b0245 100644 --- a/.github/workflows/dlang.yml +++ b/.github/workflows/dlang.yml @@ -13,15 +13,15 @@ jobs: name: Dub Tests strategy: matrix: - dc: [dmd-2.097.2, dmd-2.099.1, dmd-2.102.2, ldc-1.27.1, ldc-1.29.0, ldc-1.32.2] + dc: [dmd-2.099.1, dmd-2.102.2, dmd-2.106.1, ldc-1.29.0, ldc-1.32.2, ldc-1.36.0] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Install D compiler - uses: dlang-community/setup-dlang@v1 + uses: dlang-community/setup-dlang@v2 with: compiler: ${{ matrix.dc }} From aebe5a8eb2686dcd78f15800ac8794975eb47434 Mon Sep 17 00:00:00 2001 From: Mathis Beer Date: Fri, 28 Feb 2025 14:49:49 +0100 Subject: [PATCH 10/10] Add `safetyDup` helper, call it consistently. --- src/text/xml/Decode.d | 22 +++++++++---------- src/text/xml/Validation.d | 45 ++++++++++++++++----------------------- 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/text/xml/Decode.d b/src/text/xml/Decode.d index dce35bd..82c9dcf 100644 --- a/src/text/xml/Decode.d +++ b/src/text/xml/Decode.d @@ -16,8 +16,9 @@ import std.string : stripLeft; import std.sumtype; import std.traits; import std.typecons; +import text.xml.Convert; import text.xml.Tree; -import text.xml.Validation : enforceName, normalize, require, requireChild; +import text.xml.Validation : enforceName, normalize, safetyDup; import text.xml.XmlException; public import text.xml.Xml; @@ -88,7 +89,9 @@ public T decodeUnchecked(T, attributes...)(ref XmlRange range) static foreach (attributeMethod; definedAttributes!(XmlBuilder!T)) { case __traits(getAttributes, __traits(getMember, xmlBuilder, attributeMethod))[0]: - __traits(getMember, xmlBuilder, attributeMethod) = dxml.util.decodeXML(entry.value); + const value = dxml.util.decodeXML(entry.value).safetyDup(entry.value); + + __traits(getMember, xmlBuilder, attributeMethod) = value; break switchLabel; } default: @@ -117,7 +120,7 @@ public T decodeUnchecked(T, attributes...)(ref XmlRange range) { static if (__traits(hasMember, xmlBuilder, "text")) { - xmlBuilder.text = dxml.util.decodeXML(range.front.text); + xmlBuilder.text = dxml.util.decodeXML(range.front.text).safetyDup(range.front.text); range.popFront; } else @@ -288,7 +291,7 @@ if (udaIndex!(Xml.Text, attributes) != -1) { void text(string value) { - mixin(builderPath) = value.idup; + mixin(builderPath) = value; } } @@ -622,12 +625,11 @@ enum isNodeLeafType(T, attributes...) = || udaIndex!(Xml.Decode, attributesOrNothing!T) != -1 || is(T == string) || is(T == enum) - || __traits(compiles, XmlNode.init.require!(SafeUnqual!T)()) + || __traits(compiles, Convert.to!(SafeUnqual!T)(string.init)) || is(T : Nullable!U, U) && isNodeLeafType!(U, attributes); private T decodeNodeLeaf(T, attributes...)(ref XmlRange range) { - import text.xml.Convert : Convert; import text.xml.Parser : parseRange; alias typeAttributes = attributesOrNothing!T; @@ -658,11 +660,7 @@ private T decodeNodeLeaf(T, attributes...)(ref XmlRange range) { string text = parseTextElement(range); - static if (is(T == string)) - { - return text.idup; - } - else static if (is(T == enum)) + static if (is(T == enum)) { import serialized.util.SafeEnum : safeToEnum; @@ -720,7 +718,7 @@ private string parseTextElement(ref XmlRange range) case text: if (level == 1) { - fragments ~= dxml.util.decodeXML(range.front.text).strip; + fragments ~= dxml.util.decodeXML(range.front.text).strip.safetyDup(range.front.text); } range.popFront; break; diff --git a/src/text/xml/Validation.d b/src/text/xml/Validation.d index 8865801..16b085f 100644 --- a/src/text/xml/Validation.d +++ b/src/text/xml/Validation.d @@ -182,15 +182,7 @@ template requireImpl(string conversion) T requireImpl(T)(XmlNode node) in (node.type == XmlNode.Type.element) { - string text = dxml.util.decodeXML(node.text); - - static if (is(T == string)) - { - if (text.sameHead(node.text)) - { - text = text.idup; - } - } + const string text = dxml.util.decodeXML(node.text).safetyDup(node.text); try { @@ -211,15 +203,7 @@ template requireImpl(string conversion) enforce!XmlException(name in node.attributes, format!`element "%s": required attribute "%s" is missing`(node.tag, name)); - string value = dxml.util.decodeXML(node.attributes[name]); - - static if (is(T == string)) - { - if (value.sameHead(node.attributes[name])) - { - value = value.idup; - } - } + const string value = dxml.util.decodeXML(node.attributes[name]).safetyDup(node.attributes[name]); try { @@ -242,15 +226,7 @@ template requireImpl(string conversion) return fallback; } - string value = dxml.util.decodeXML(node.attributes[name]); - - static if (is(T == string)) - { - if (value.sameHead(node.attributes[name])) - { - value = value.idup; - } - } + const string value = dxml.util.decodeXML(node.attributes[name]).safetyDup(node.text); try { @@ -263,6 +239,21 @@ template requireImpl(string conversion) } } +/** + * If `value` is from `base`, make sure it's dupped. + * This is to ensure that we break every reference to the (large) string that we're parsing from. + * If we've already decoded entities, creating a new string, this is not necessary. + * Intended to be paired with `decodeXML`. + */ +public string safetyDup(string value, string base) pure @safe +{ + if (!value.sameHead(base)) + { + return value; + } + return value.idup; +} + public string normalize(string value) pure @safe { return value.split.join(" ");