From 865e79f68d16b8709a9c4f280af251a94c223b0b Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 02:45:14 +0200 Subject: [PATCH 1/9] feat(xmldsig): add URI dereference for Reference elements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - UriReferenceResolver with configurable ID attribute names (ID/Id/id + custom) - Empty URI → entire document without comments (XMLDSig §4.3.3.2) - Bare-name #id → element subtree by ID attribute - #xpointer(/) → document root with comments - #xpointer(id('...')) → element subtree (XPointer form) - NodeSet type with subtree inclusion/exclusion for transform pipeline - TransformData enum (NodeSet/Binary) for transform chain data flow Closes #8 --- src/xmldsig/mod.rs | 11 + src/xmldsig/types.rs | 202 +++++++++++++++ src/xmldsig/uri.rs | 538 +++++++++++++++++++++++++++++++++++++++ tests/uri_integration.rs | 222 ++++++++++++++++ 4 files changed, 973 insertions(+) create mode 100644 src/xmldsig/types.rs create mode 100644 src/xmldsig/uri.rs create mode 100644 tests/uri_integration.rs diff --git a/src/xmldsig/mod.rs b/src/xmldsig/mod.rs index 04a4156..0a33452 100644 --- a/src/xmldsig/mod.rs +++ b/src/xmldsig/mod.rs @@ -1,3 +1,14 @@ //! XML Digital Signatures (XMLDSig). //! //! Implements [XML Signature Syntax and Processing](https://www.w3.org/TR/xmldsig-core1/). +//! +//! ## Current Status +//! +//! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`) +//! - ID attribute resolution with configurable attribute names +//! - Node set types for the transform pipeline + +pub mod types; +pub mod uri; + +pub use types::{NodeSet, TransformData, TransformError}; diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs new file mode 100644 index 0000000..b9001dc --- /dev/null +++ b/src/xmldsig/types.rs @@ -0,0 +1,202 @@ +//! Core types for the XMLDSig transform pipeline. +//! +//! These types flow between URI dereference, transforms, and digest computation. +//! +//! These types are consumed by URI dereference, the transform chain (P1-014, +//! P1-015), and reference processing (P1-018). + +use std::collections::HashSet; + +use roxmltree::{Document, Node, NodeId}; + +// roxmltree 0.21 uses `Node<'a, 'input: 'a>`. We tie both lifetimes together +// with a single `'a` by requiring `'input = 'a` at every use site (`Node<'a, 'a>`). +// This is safe because our NodeSet borrows the Document which owns the input. + +/// Data flowing between transforms in the verification/signing pipeline. +/// +/// Transforms consume and produce either a node set (XML-level) or raw bytes +/// (after canonicalization or base64 decode). +pub enum TransformData<'a> { + /// A set of nodes from the parsed XML document. + NodeSet(NodeSet<'a>), + /// Raw bytes (e.g., after canonicalization). + Binary(Vec), +} + +impl std::fmt::Debug for TransformData<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::NodeSet(_) => f.debug_tuple("NodeSet").field(&"...").finish(), + Self::Binary(b) => f.debug_tuple("Binary").field(&b.len()).finish(), + } + } +} + +impl<'a> TransformData<'a> { + /// Convert to `NodeSet`, returning an error if this is `Binary` data. + pub fn into_node_set(self) -> Result, TransformError> { + match self { + Self::NodeSet(ns) => Ok(ns), + Self::Binary(_) => Err(TransformError::TypeMismatch { + expected: "NodeSet", + got: "Binary", + }), + } + } + + /// Convert to binary bytes, returning an error if this is a `NodeSet`. + pub fn into_binary(self) -> Result, TransformError> { + match self { + Self::Binary(b) => Ok(b), + Self::NodeSet(_) => Err(TransformError::TypeMismatch { + expected: "Binary", + got: "NodeSet", + }), + } + } +} + +/// A set of nodes from a roxmltree document. +/// +/// Represents "which nodes are included" for canonicalization and transforms. +/// Two modes: +/// - **Whole document**: `included` is `None`, meaning all nodes are in the set +/// (minus any in `excluded`). +/// - **Subset**: `included` is `Some(ids)`, meaning only those node IDs are in +/// the set (minus any in `excluded`). +pub struct NodeSet<'a> { + /// Reference to the parsed document. + doc: &'a Document<'a>, + /// If `None`, all nodes are included. If `Some`, only these nodes. + included: Option>, + /// Nodes explicitly excluded (e.g., `` subtree for enveloped transform). + excluded: HashSet, + /// Whether comment nodes are included. For empty URI dereference (whole + /// document), comments are excluded per XMLDSig spec. + with_comments: bool, +} + +impl<'a> NodeSet<'a> { + /// Create a node set representing the entire document without comments. + /// + /// Per XMLDSig §4.3.3.2: "An empty URI [...] is a reference to the document + /// [...] and the comment nodes are not included." + pub fn entire_document_without_comments(doc: &'a Document<'a>) -> Self { + Self { + doc, + included: None, + excluded: HashSet::new(), + with_comments: false, + } + } + + /// Create a node set representing the entire document with comments. + /// + /// Used for `#xpointer(/)` which, unlike empty URI, includes comment nodes. + pub fn entire_document_with_comments(doc: &'a Document<'a>) -> Self { + Self { + doc, + included: None, + excluded: HashSet::new(), + with_comments: true, + } + } + + /// Create a node set representing an element and all its descendants + /// (including attributes, namespaces, and text nodes), with comments. + pub fn subtree(doc: &'a Document<'a>, element: Node<'a, 'a>) -> Self { + let mut ids = HashSet::new(); + collect_subtree_ids(element, &mut ids); + Self { + doc, + included: Some(ids), + excluded: HashSet::new(), + with_comments: true, + } + } + + /// Reference to the underlying document. + pub fn document(&self) -> &'a Document<'a> { + self.doc + } + + /// Check whether a node is in this set. + pub fn contains(&self, node: Node<'_, '_>) -> bool { + let id = node.id(); + + // Check exclusion first + if self.excluded.contains(&id) { + return false; + } + + // Filter comments if not included + if !self.with_comments && node.is_comment() { + return false; + } + + // Check inclusion + match &self.included { + None => true, + Some(ids) => ids.contains(&id), + } + } + + /// Exclude a node and all its descendants from this set. + pub fn exclude_subtree(&mut self, node: Node<'_, '_>) { + collect_subtree_ids(node, &mut self.excluded); + } + + /// Whether comments are included in this node set. + pub fn with_comments(&self) -> bool { + self.with_comments + } +} + +/// Collect a node and all its descendants into a set of `NodeId`s. +fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet) { + ids.insert(node.id()); + for child in node.children() { + collect_subtree_ids(child, ids); + } + // roxmltree models attributes as children only for elements, + // but they're accessible via node.attributes(). For node-set + // membership, we need to track the element ID — the C14N + // serializer checks element membership and then serializes + // all its attributes. Individual attribute NodeIds are not + // needed because roxmltree doesn't expose them as separate nodes + // in the tree traversal. +} + +/// Errors during transform processing. +#[derive(Debug, thiserror::Error)] +pub enum TransformError { + /// Data type mismatch between transforms. + #[error("type mismatch: expected {expected}, got {got}")] + TypeMismatch { + /// Expected data type. + expected: &'static str, + /// Actual data type. + got: &'static str, + }, + + /// URI dereference failed. + #[error("URI dereference failed: {0}")] + UriDeref(String), + + /// Element not found by ID. + #[error("element not found by ID: {0}")] + ElementNotFound(String), + + /// Unsupported URI scheme or format. + #[error("unsupported URI: {0}")] + UnsupportedUri(String), + + /// Unsupported transform algorithm. + #[error("unsupported transform: {0}")] + UnsupportedTransform(String), + + /// Canonicalization error during transform. + #[error("C14N error: {0}")] + C14n(String), +} diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs new file mode 100644 index 0000000..b3745e2 --- /dev/null +++ b/src/xmldsig/uri.rs @@ -0,0 +1,538 @@ +//! URI dereference for XMLDSig `` elements. +//! +//! Implements same-document URI resolution per +//! [XMLDSig §4.3.3.2](https://www.w3.org/TR/xmldsig-core1/#sec-Same-Document): +//! +//! - **Empty URI** (`""` or absent): the entire document, excluding comments. +//! - **Bare-name `#id`**: the element whose ID attribute matches `id`, as a subtree. +//! +//! External URIs (http://, file://, etc.) are not supported — only same-document +//! references are needed for SAML signature verification. + +use std::collections::HashMap; + +use roxmltree::{Document, Node}; + +use super::types::{NodeSet, TransformData, TransformError}; + +/// Default ID attribute names to scan when building the ID index. +/// +/// These cover the most common conventions: +/// - `ID` — SAML 2.0 (``) +/// - `Id` — XMLDSig (``) +/// - `id` — general XML +const DEFAULT_ID_ATTRS: &[&str] = &["ID", "Id", "id"]; + +/// Resolves same-document URI references against a parsed XML document. +/// +/// Builds a `HashMap<&str, Node>` index on construction for O(1) fragment +/// lookups. Supports caller-provided ID attribute names (important for SAML +/// which uses `ID` rather than the xml:id mechanism). +/// +/// # Example +/// +/// ``` +/// # fn main() -> Result<(), Box> { +/// use xml_sec::xmldsig::uri::UriReferenceResolver; +/// +/// let xml = r#"content"#; +/// let doc = roxmltree::Document::parse(xml)?; +/// let resolver = UriReferenceResolver::new(&doc); +/// +/// assert!(resolver.has_id("abc")); +/// assert_eq!(resolver.id_count(), 1); +/// # Ok(()) +/// # } +/// ``` +pub struct UriReferenceResolver<'a> { + doc: &'a Document<'a>, + /// ID → element node mapping for O(1) fragment lookups. + id_map: HashMap<&'a str, Node<'a, 'a>>, +} + +impl<'a> UriReferenceResolver<'a> { + /// Build a resolver with default ID attribute names (`ID`, `Id`, `id`). + pub fn new(doc: &'a Document<'a>) -> Self { + Self::with_id_attrs(doc, DEFAULT_ID_ATTRS) + } + + /// Build a resolver with custom ID attribute names. + /// + /// The defaults (`ID`, `Id`, `id`) are always included. Additional names + /// are appended. Pass an empty slice to use only the defaults. + pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self { + let mut id_map = HashMap::new(); + + // Merge default + extra attribute names, dedup + let mut attr_names: Vec<&str> = DEFAULT_ID_ATTRS.to_vec(); + for name in extra_attrs { + if !attr_names.contains(name) { + attr_names.push(name); + } + } + + // Scan all elements for ID attributes + for node in doc.descendants() { + if node.is_element() { + for attr_name in &attr_names { + if let Some(value) = node.attribute(*attr_name) { + // First occurrence wins (per XML spec, IDs should be unique) + id_map.entry(value).or_insert(node); + } + } + } + } + + Self { doc, id_map } + } + + /// Dereference a URI string to a [`TransformData`]. + /// + /// # URI forms + /// + /// | URI | Result | + /// |-----|--------| + /// | `""` (empty) | Entire document, comments excluded | + /// | `"#foo"` | Subtree rooted at element with ID `foo` | + /// | other | `Err(UnsupportedUri)` | + pub fn dereference(&self, uri: &str) -> Result, TransformError> { + if uri.is_empty() { + // Empty URI = entire document without comments + // XMLDSig §4.3.3.2: "the reference is to the document [...], + // and the comment nodes are not included" + Ok(TransformData::NodeSet( + NodeSet::entire_document_without_comments(self.doc), + )) + } else if let Some(fragment) = uri.strip_prefix('#') { + self.dereference_fragment(fragment) + } else { + Err(TransformError::UnsupportedUri(uri.to_string())) + } + } + + /// Resolve a URI fragment (the part after `#`). + /// + /// Handles: + /// - `xpointer(/)` → entire document (with comments, per XPointer spec) + /// - `xpointer(id('foo'))` → element by ID (equivalent to bare-name `#foo`) + /// - bare name `foo` → element by ID attribute + fn dereference_fragment(&self, fragment: &str) -> Result, TransformError> { + if fragment == "xpointer(/)" { + // XPointer root: entire document WITH comments (unlike empty URI). + // Per XMLDSig §4.3.3.3: "the XPointer expression [...] includes + // comment nodes" + Ok(TransformData::NodeSet( + NodeSet::entire_document_with_comments(self.doc), + )) + } else if let Some(id) = parse_xpointer_id(fragment) { + // xpointer(id('foo')) → same as bare-name #foo + self.resolve_id(id) + } else { + // Bare-name fragment: #foo → element by ID + self.resolve_id(fragment) + } + } + + /// Look up an element by its ID attribute value and return a subtree node set. + fn resolve_id(&self, id: &str) -> Result, TransformError> { + match self.id_map.get(id) { + Some(&element) => Ok(TransformData::NodeSet(NodeSet::subtree(self.doc, element))), + None => Err(TransformError::ElementNotFound(id.to_string())), + } + } + + /// Check if an ID is registered in the resolver's index. + pub fn has_id(&self, id: &str) -> bool { + self.id_map.contains_key(id) + } + + /// Get the number of registered IDs. + pub fn id_count(&self) -> usize { + self.id_map.len() + } +} + +/// Parse `xpointer(id('value'))` or `xpointer(id("value"))` and return the ID value. +/// Returns `None` if the fragment doesn't match this pattern. +fn parse_xpointer_id(fragment: &str) -> Option<&str> { + let inner = fragment.strip_prefix("xpointer(id(")?.strip_suffix("))")?; + + // Strip single or double quotes + if (inner.starts_with('\'') && inner.ends_with('\'')) + || (inner.starts_with('"') && inner.ends_with('"')) + { + Some(&inner[1..inner.len() - 1]) + } else { + None + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn empty_uri_returns_whole_document() { + let xml = "text"; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("").unwrap(); + let node_set = data.into_node_set().unwrap(); + + // Whole document: root and child should be in the set + let root = doc.root_element(); + assert!(node_set.contains(root)); + let child = root.first_child().unwrap(); + assert!(node_set.contains(child)); + } + + #[test] + fn empty_uri_excludes_comments() { + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("").unwrap(); + let node_set = data.into_node_set().unwrap(); + + // Comment should be excluded + for node in doc.descendants() { + if node.is_comment() { + assert!( + !node_set.contains(node), + "comment should be excluded for empty URI" + ); + } + } + // Element should still be included + assert!(node_set.contains(doc.root_element())); + } + + #[test] + fn fragment_uri_resolves_by_id_attr() { + let xml = r#"contentother"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("#abc").unwrap(); + let node_set = data.into_node_set().unwrap(); + + // The element with ID="abc" and its children should be in the set + let abc_elem = doc + .descendants() + .find(|n| n.attribute("ID") == Some("abc")) + .unwrap(); + assert!(node_set.contains(abc_elem)); + + // The text child "content" should also be in the set + let text_child = abc_elem.first_child().unwrap(); + assert!(node_set.contains(text_child)); + + // The root element should NOT be in the set (subtree only) + assert!(!node_set.contains(doc.root_element())); + + // The element with ID="def" should NOT be in the set + let def_elem = doc + .descendants() + .find(|n| n.attribute("ID") == Some("def")) + .unwrap(); + assert!(!node_set.contains(def_elem)); + } + + #[test] + fn fragment_uri_resolves_lowercase_id() { + let xml = r#"text"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("#lower").unwrap(); + let node_set = data.into_node_set().unwrap(); + + let elem = doc + .descendants() + .find(|n| n.attribute("id") == Some("lower")) + .unwrap(); + assert!(node_set.contains(elem)); + } + + #[test] + fn fragment_uri_resolves_mixed_case_id() { + let xml = r#""#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + assert!(resolver.has_id("sig1")); + let data = resolver.dereference("#sig1").unwrap(); + assert!(data.into_node_set().is_ok()); + } + + #[test] + fn fragment_uri_not_found() { + let xml = "text"; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let result = resolver.dereference("#nonexistent"); + assert!(result.is_err()); + match result.unwrap_err() { + TransformError::ElementNotFound(id) => assert_eq!(id, "nonexistent"), + other => panic!("expected ElementNotFound, got: {other:?}"), + } + } + + #[test] + fn unsupported_external_uri() { + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let result = resolver.dereference("http://example.com/doc.xml"); + assert!(result.is_err()); + match result.unwrap_err() { + TransformError::UnsupportedUri(uri) => { + assert_eq!(uri, "http://example.com/doc.xml") + } + other => panic!("expected UnsupportedUri, got: {other:?}"), + } + } + + #[test] + fn custom_id_attr_name() { + // roxmltree stores `wsu:Id` with local name "Id" — already in DEFAULT_ID_ATTRS. + // Test with a truly custom attribute name instead. + let xml = r#"data"#; + let doc = Document::parse(xml).unwrap(); + + // Default resolver doesn't know about "myid" + let resolver_default = UriReferenceResolver::new(&doc); + assert!(!resolver_default.has_id("custom1")); + + // Custom resolver with "myid" added + let resolver_custom = UriReferenceResolver::with_id_attrs(&doc, &["myid"]); + assert!(resolver_custom.has_id("custom1")); + + let data = resolver_custom.dereference("#custom1").unwrap(); + assert!(data.into_node_set().is_ok()); + } + + #[test] + fn namespaced_id_attr_found_by_local_name() { + // roxmltree strips prefix: `wsu:Id` → local name "Id", which is in DEFAULT_ID_ATTRS + let xml = + r#"data"#; + let doc = Document::parse(xml).unwrap(); + + let resolver = UriReferenceResolver::new(&doc); + assert!(resolver.has_id("ts1")); + } + + #[test] + fn id_count_reports_unique_ids() { + let xml = r#""#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + // 4 elements with ID-like attributes + assert_eq!(resolver.id_count(), 4); + } + + #[test] + fn first_id_wins_for_duplicates() { + // If two elements have the same ID value, first occurrence wins + let xml = r#"firstsecond"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("#dup").unwrap(); + let node_set = data.into_node_set().unwrap(); + + // Should resolve to the first element + let first = doc + .descendants() + .find(|n| n.is_element() && n.has_tag_name("a")) + .unwrap(); + assert!(node_set.contains(first)); + } + + #[test] + fn node_set_exclude_subtree() { + let xml = r#"yesno"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("").unwrap(); + let mut node_set = data.into_node_set().unwrap(); + + // Find and exclude the subtree + let remove_elem = doc + .descendants() + .find(|n| n.is_element() && n.has_tag_name("remove")) + .unwrap(); + node_set.exclude_subtree(remove_elem); + + // should still be in the set + let keep_elem = doc + .descendants() + .find(|n| n.is_element() && n.has_tag_name("keep")) + .unwrap(); + assert!(node_set.contains(keep_elem)); + + // and its children should be excluded + assert!(!node_set.contains(remove_elem)); + let deep_elem = doc + .descendants() + .find(|n| n.is_element() && n.has_tag_name("deep")) + .unwrap(); + assert!(!node_set.contains(deep_elem)); + } + + #[test] + fn subtree_includes_comments() { + // Subtree dereference (via #id) includes comments, unlike empty URI + let xml = r#""#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("#x").unwrap(); + let node_set = data.into_node_set().unwrap(); + + for node in doc.descendants() { + if node.is_comment() { + assert!( + node_set.contains(node), + "comment should be included in #id subtree" + ); + } + } + } + + #[test] + fn xpointer_root_returns_whole_document_with_comments() { + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("#xpointer(/)").unwrap(); + let node_set = data.into_node_set().unwrap(); + + // Unlike empty URI, xpointer(/) includes comments + for node in doc.descendants() { + if node.is_comment() { + assert!( + node_set.contains(node), + "comment should be included for #xpointer(/)" + ); + } + } + assert!(node_set.contains(doc.root_element())); + } + + #[test] + fn xpointer_id_single_quotes() { + let xml = r#"content"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("#xpointer(id('abc'))").unwrap(); + let node_set = data.into_node_set().unwrap(); + + let elem = doc + .descendants() + .find(|n| n.attribute("ID") == Some("abc")) + .unwrap(); + assert!(node_set.contains(elem)); + } + + #[test] + fn xpointer_id_double_quotes() { + let xml = r#"content"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference(r#"#xpointer(id("xyz"))"#).unwrap(); + let node_set = data.into_node_set().unwrap(); + + let elem = doc + .descendants() + .find(|n| n.attribute("ID") == Some("xyz")) + .unwrap(); + assert!(node_set.contains(elem)); + } + + #[test] + fn xpointer_id_not_found() { + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let result = resolver.dereference("#xpointer(id('missing'))"); + assert!(result.is_err()); + match result.unwrap_err() { + TransformError::ElementNotFound(id) => assert_eq!(id, "missing"), + other => panic!("expected ElementNotFound, got: {other:?}"), + } + } + + #[test] + fn parse_xpointer_id_variants() { + // Valid forms + assert_eq!(super::parse_xpointer_id("xpointer(id('foo'))"), Some("foo")); + assert_eq!( + super::parse_xpointer_id(r#"xpointer(id("bar"))"#), + Some("bar") + ); + + // Invalid forms + assert_eq!(super::parse_xpointer_id("xpointer(/)"), None); + assert_eq!(super::parse_xpointer_id("xpointer(id(foo))"), None); // no quotes + assert_eq!(super::parse_xpointer_id("not-xpointer"), None); + assert_eq!(super::parse_xpointer_id(""), None); + } + + #[test] + fn saml_style_document() { + // Realistic SAML-like structure + let xml = r#" + + user@example.com + + + + + "#; + + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + // Should find all three IDs + assert!(resolver.has_id("_resp1")); + assert!(resolver.has_id("_assert1")); + assert!(resolver.has_id("sig1")); + assert_eq!(resolver.id_count(), 3); + + // Dereference the assertion + let data = resolver.dereference("#_assert1").unwrap(); + let node_set = data.into_node_set().unwrap(); + + // Assertion element should be in the set + let assertion = doc + .descendants() + .find(|n| n.attribute("ID") == Some("_assert1")) + .unwrap(); + assert!(node_set.contains(assertion)); + + // Subject (child of assertion) should be in the set + let subject = assertion + .children() + .find(|n| n.is_element() && n.has_tag_name("Subject")) + .unwrap(); + assert!(node_set.contains(subject)); + + // Response (parent) should NOT be in the set + assert!(!node_set.contains(doc.root_element())); + } +} diff --git a/tests/uri_integration.rs b/tests/uri_integration.rs new file mode 100644 index 0000000..c29c578 --- /dev/null +++ b/tests/uri_integration.rs @@ -0,0 +1,222 @@ +//! Integration tests: URI dereference → NodeSet → C14N canonicalization. +//! +//! Verifies that dereferencing a URI produces a NodeSet that, when used as +//! a predicate for C14N, produces the correct canonical output. + +use xml_sec::c14n::{canonicalize, C14nAlgorithm, C14nMode}; +use xml_sec::xmldsig::uri::UriReferenceResolver; + +// ─── Helpers ──────────────────────────────────────────────────────────────── + +/// Dereference `uri`, build a C14N predicate from the resulting NodeSet, +/// canonicalize with inclusive C14N 1.0, and return the canonical string. +fn deref_and_canonicalize(xml: &str, uri: &str) -> String { + let doc = roxmltree::Document::parse(xml).expect("parse"); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference(uri).expect("dereference"); + let node_set = data.into_node_set().expect("into_node_set"); + + let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false); + let predicate = |n: roxmltree::Node| node_set.contains(n); + let mut output = Vec::new(); + canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize"); + String::from_utf8(output).expect("utf8") +} + +/// Same but with comments enabled (for xpointer(/) which includes comments). +fn deref_and_canonicalize_with_comments(xml: &str, uri: &str) -> String { + let doc = roxmltree::Document::parse(xml).expect("parse"); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference(uri).expect("dereference"); + let node_set = data.into_node_set().expect("into_node_set"); + + let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, true); + let predicate = |n: roxmltree::Node| node_set.contains(n); + let mut output = Vec::new(); + canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize"); + String::from_utf8(output).expect("utf8") +} + +// ─── Empty URI: whole document without comments ───────────────────────────── + +#[test] +fn empty_uri_canonicalizes_whole_document() { + let xml = r#""#; + let result = deref_and_canonicalize(xml, ""); + // C14N sorts attributes and expands empty elements + assert_eq!(result, r#""#); +} + +#[test] +fn empty_uri_strips_comments() { + let xml = "text"; + let result = deref_and_canonicalize(xml, ""); + // Comment should be stripped for empty URI dereference + assert_eq!(result, "text"); +} + +#[test] +fn empty_uri_with_namespaces() { + let xml = r#""#; + let result = deref_and_canonicalize(xml, ""); + // Inclusive C14N: root declares both ns, child suppresses redundant redeclarations + assert_eq!( + result, + r#""# + ); +} + +// ─── #id: subtree by ID ───────────────────────────────────────────────────── + +#[test] +fn fragment_id_canonicalizes_subtree_only() { + let xml = r#"skiptextskip"#; + let result = deref_and_canonicalize(xml, "#t1"); + // Only the target subtree should appear; root, before, after excluded + assert_eq!( + result, + r#"text"# + ); +} + +#[test] +fn fragment_id_includes_comments_in_subtree() { + // Unlike empty URI, #id subtrees include comments + let xml = r#""#; + let result = deref_and_canonicalize_with_comments(xml, "#x"); + assert_eq!( + result, + r#""# + ); +} + +#[test] +fn fragment_id_inherits_ancestor_namespaces() { + // When canonicalizing a subtree, inclusive C14N emits in-scope namespaces + // from ancestor elements even though those ancestors are not in the node set + let xml = + r#""#; + let result = deref_and_canonicalize(xml, "#sub"); + // ns:item declares xmlns:ns (inherited from root ancestor outside subset). + // ns:child suppresses redundant redeclaration since parent ns:item already declared it. + assert_eq!( + result, + r#""# + ); +} + +// ─── #xpointer(/) : whole document WITH comments ─────────────────────────── + +#[test] +fn xpointer_root_includes_comments() { + let xml = ""; + let result = deref_and_canonicalize_with_comments(xml, "#xpointer(/)"); + // xpointer(/) includes comments, unlike empty URI + assert_eq!(result, ""); +} + +#[test] +fn xpointer_root_vs_empty_uri_comment_difference() { + let xml = ""; + + let empty_uri = deref_and_canonicalize(xml, ""); + let xpointer_root = deref_and_canonicalize_with_comments(xml, "#xpointer(/)"); + + // Empty URI: comments stripped + assert_eq!(empty_uri, ""); + // xpointer(/): comments preserved + assert_eq!( + xpointer_root, + "" + ); +} + +// ─── #xpointer(id('...')) : equivalent to bare-name ──────────────────────── + +#[test] +fn xpointer_id_canonicalizes_same_as_bare_name() { + let xml = r#"data"#; + + let bare_name = deref_and_canonicalize(xml, "#abc"); + let xpointer = deref_and_canonicalize(xml, "#xpointer(id('abc'))"); + + assert_eq!(bare_name, xpointer); +} + +// ─── SAML-like scenario ───────────────────────────────────────────────────── + +#[test] +fn saml_assertion_subtree_canonicalization() { + // Realistic SAML: dereference assertion by ID, canonicalize the subtree + let xml = r#" + + user@example.com + +"#; + + let result = deref_and_canonicalize(xml, "#_a1"); + + // Assertion subtree with inherited namespace declarations + assert!( + result.contains("xmlns:saml=\"urn:oasis:names:tc:SAML:2.0:assertion\""), + "should inherit saml namespace from ancestor: {result}" + ); + assert!( + result.contains("user@example.com"), + "should include Subject child: {result}" + ); + // Response element should NOT appear + assert!( + !result.contains("samlp:Response"), + "Response should not be in subtree: {result}" + ); +} + +#[test] +fn saml_enveloped_signature_exclusion() { + // Simulate enveloped signature: dereference whole doc, then exclude Signature subtree + // This is what P1-014 (enveloped transform) will do, but we can test the + // NodeSet.exclude_subtree() + C14N combination here + let xml = r#" + data + + digest + +"#; + + let doc = roxmltree::Document::parse(xml).expect("parse"); + let resolver = UriReferenceResolver::new(&doc); + + let data = resolver.dereference("").expect("dereference"); + let mut node_set = data.into_node_set().expect("into_node_set"); + + // Exclude the Signature subtree (simulating enveloped-signature transform) + let sig_elem = doc + .descendants() + .find(|n| n.is_element() && n.has_tag_name("Signature")) + .expect("Signature element"); + node_set.exclude_subtree(sig_elem); + + let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false); + let predicate = |n: roxmltree::Node| node_set.contains(n); + let mut output = Vec::new(); + canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize"); + let result = String::from_utf8(output).expect("utf8"); + + // Signature and its children should be gone + assert!( + !result.contains("Signature"), + "Signature should be excluded: {result}" + ); + assert!( + !result.contains("SignedInfo"), + "SignedInfo should be excluded: {result}" + ); + // Assertion should remain + assert!( + result.contains("data"), + "Assertion should remain: {result}" + ); +} From 67abda9632855b5fbb5a7febad94927dc7e6e5d0 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 04:06:54 +0200 Subject: [PATCH 2/9] fix(xmldsig): harden URI dereference error handling and DoS safety - Return UnsupportedUri for unrecognized XPointer expressions instead of falling through to ID lookup (misclassified errors) - Replace recursive collect_subtree_ids with iterative stack traversal to prevent stack overflow on deeply nested attacker-controlled XML - Document all supported URI forms in dereference() doc comment - Add unit test for unsupported XPointer expressions --- src/xmldsig/types.rs | 12 +++++++++--- src/xmldsig/uri.rs | 31 +++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs index b9001dc..187fc17 100644 --- a/src/xmldsig/types.rs +++ b/src/xmldsig/types.rs @@ -154,10 +154,16 @@ impl<'a> NodeSet<'a> { } /// Collect a node and all its descendants into a set of `NodeId`s. +/// +/// Uses an explicit stack instead of recursion to avoid stack overflow +/// on deeply nested XML (attacker-controlled input in SAML contexts). fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet) { - ids.insert(node.id()); - for child in node.children() { - collect_subtree_ids(child, ids); + let mut stack = vec![node]; + while let Some(current) = stack.pop() { + ids.insert(current.id()); + for child in current.children() { + stack.push(child); + } } // roxmltree models attributes as children only for elements, // but they're accessible via node.attributes(). For node-set diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs index b3745e2..e532344 100644 --- a/src/xmldsig/uri.rs +++ b/src/xmldsig/uri.rs @@ -94,6 +94,8 @@ impl<'a> UriReferenceResolver<'a> { /// |-----|--------| /// | `""` (empty) | Entire document, comments excluded | /// | `"#foo"` | Subtree rooted at element with ID `foo` | + /// | `"#xpointer(/)"` | Entire document, comments included | + /// | `"#xpointer(id('foo'))"` | Subtree rooted at element with ID `foo` | /// | other | `Err(UnsupportedUri)` | pub fn dereference(&self, uri: &str) -> Result, TransformError> { if uri.is_empty() { @@ -127,6 +129,9 @@ impl<'a> UriReferenceResolver<'a> { } else if let Some(id) = parse_xpointer_id(fragment) { // xpointer(id('foo')) → same as bare-name #foo self.resolve_id(id) + } else if fragment.starts_with("xpointer(") { + // Any other XPointer expression is unsupported + Err(TransformError::UnsupportedUri(format!("#{fragment}"))) } else { // Bare-name fragment: #foo → element by ID self.resolve_id(fragment) @@ -298,6 +303,32 @@ mod tests { } } + #[test] + fn unsupported_xpointer_expression() { + // XPointer expressions other than xpointer(/) and xpointer(id(...)) + // should return UnsupportedUri, not fall through to ID lookup + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let result = resolver.dereference("#xpointer(foo())"); + assert!(result.is_err()); + match result.unwrap_err() { + TransformError::UnsupportedUri(uri) => { + assert_eq!(uri, "#xpointer(foo())") + } + other => panic!("expected UnsupportedUri, got: {other:?}"), + } + + // Generic XPointer with XPath should also be unsupported + let result = resolver.dereference("#xpointer(//element)"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + TransformError::UnsupportedUri(_) + )); + } + #[test] fn custom_id_attr_name() { // roxmltree stores `wsu:Id` with local name "Id" — already in DEFAULT_ID_ATTRS. From eac825f55d56adefad499cc43056f3c271ac11ac Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 04:33:53 +0200 Subject: [PATCH 3/9] fix(xmldsig): reject duplicate IDs, guard foreign nodes, reject empty fragment - Remove duplicate IDs from index to prevent signature-wrapping attacks (ambiguous IDs fail with ElementNotFound instead of picking arbitrarily) - Guard NodeSet.contains() and exclude_subtree() against cross-document nodes via pointer comparison (prevents NodeId collisions) - Reject bare "#" (empty fragment) as UnsupportedUri - Add xpointer(id('...')) to module-level doc comment - Clarify with_id_attrs() doc: extra_attrs appends, does not replace - Add code comment explaining intentional lack of percent-decoding --- src/xmldsig/mod.rs | 2 +- src/xmldsig/types.rs | 16 ++++++++ src/xmldsig/uri.rs | 90 ++++++++++++++++++++++++++++++++++++-------- 3 files changed, 91 insertions(+), 17 deletions(-) diff --git a/src/xmldsig/mod.rs b/src/xmldsig/mod.rs index 0a33452..c68dd30 100644 --- a/src/xmldsig/mod.rs +++ b/src/xmldsig/mod.rs @@ -4,7 +4,7 @@ //! //! ## Current Status //! -//! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`) +//! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`, `#xpointer(id('...'))`) //! - ID attribute resolution with configurable attribute names //! - Node set types for the transform pipeline diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs index 187fc17..610cb17 100644 --- a/src/xmldsig/types.rs +++ b/src/xmldsig/types.rs @@ -122,7 +122,17 @@ impl<'a> NodeSet<'a> { } /// Check whether a node is in this set. + /// + /// Returns `false` for nodes from a different document than this set's + /// owning document (prevents cross-document NodeId collisions). pub fn contains(&self, node: Node<'_, '_>) -> bool { + // Guard: reject nodes from a different document. NodeIds are + // per-document indices — the same index from another document + // would reference a completely different node. + if !std::ptr::eq(node.document() as *const _, self.doc as *const _) { + return false; + } + let id = node.id(); // Check exclusion first @@ -143,7 +153,13 @@ impl<'a> NodeSet<'a> { } /// Exclude a node and all its descendants from this set. + /// + /// No-op for nodes from a different document. pub fn exclude_subtree(&mut self, node: Node<'_, '_>) { + // Guard: only exclude nodes from our document + if !std::ptr::eq(node.document() as *const _, self.doc as *const _) { + return; + } collect_subtree_ids(node, &mut self.excluded); } diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs index e532344..11dfdb8 100644 --- a/src/xmldsig/uri.rs +++ b/src/xmldsig/uri.rs @@ -9,6 +9,7 @@ //! External URIs (http://, file://, etc.) are not supported — only same-document //! references are needed for SAML signature verification. +use std::collections::hash_map::Entry; use std::collections::HashMap; use roxmltree::{Document, Node}; @@ -56,10 +57,10 @@ impl<'a> UriReferenceResolver<'a> { Self::with_id_attrs(doc, DEFAULT_ID_ATTRS) } - /// Build a resolver with custom ID attribute names. + /// Build a resolver scanning additional ID attribute names beyond the defaults. /// - /// The defaults (`ID`, `Id`, `id`) are always included. Additional names - /// are appended. Pass an empty slice to use only the defaults. + /// The defaults (`ID`, `Id`, `id`) are always included; `extra_attrs` + /// adds to them (does not replace). Pass an empty slice to use only defaults. pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self { let mut id_map = HashMap::new(); @@ -76,8 +77,18 @@ impl<'a> UriReferenceResolver<'a> { if node.is_element() { for attr_name in &attr_names { if let Some(value) = node.attribute(*attr_name) { - // First occurrence wins (per XML spec, IDs should be unique) - id_map.entry(value).or_insert(node); + // Duplicate IDs are invalid per XML spec and can enable + // signature-wrapping attacks. Remove the entry so that + // lookups for ambiguous IDs fail with ElementNotFound + // rather than silently picking an arbitrary node. + match id_map.entry(value) { + Entry::Vacant(v) => { + v.insert(node); + } + Entry::Occupied(o) => { + o.remove(); + } + } } } } @@ -106,6 +117,10 @@ impl<'a> UriReferenceResolver<'a> { NodeSet::entire_document_without_comments(self.doc), )) } else if let Some(fragment) = uri.strip_prefix('#') { + // Note: we intentionally do NOT percent-decode the fragment. + // XMLDSig ID values are XML Name tokens (no spaces/special chars), + // and real-world SAML never uses percent-encoded fragments. + // xmlsec1 also passes fragments through without decoding. self.dereference_fragment(fragment) } else { Err(TransformError::UnsupportedUri(uri.to_string())) @@ -119,6 +134,11 @@ impl<'a> UriReferenceResolver<'a> { /// - `xpointer(id('foo'))` → element by ID (equivalent to bare-name `#foo`) /// - bare name `foo` → element by ID attribute fn dereference_fragment(&self, fragment: &str) -> Result, TransformError> { + if fragment.is_empty() { + // Bare "#" is not a valid same-document reference + return Err(TransformError::UnsupportedUri("#".to_string())); + } + if fragment == "xpointer(/)" { // XPointer root: entire document WITH comments (unlike empty URI). // Per XMLDSig §4.3.3.3: "the XPointer expression [...] includes @@ -175,6 +195,7 @@ fn parse_xpointer_id(fragment: &str) -> Option<&str> { #[cfg(test)] #[allow(clippy::unwrap_used)] mod tests { + use super::super::types::NodeSet; use super::*; #[test] @@ -329,6 +350,43 @@ mod tests { )); } + #[test] + fn empty_fragment_rejected() { + // Bare "#" (empty fragment) is not a valid same-document reference + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let result = resolver.dereference("#"); + assert!(result.is_err()); + match result.unwrap_err() { + TransformError::UnsupportedUri(uri) => assert_eq!(uri, "#"), + other => panic!("expected UnsupportedUri, got: {other:?}"), + } + } + + #[test] + fn foreign_document_node_rejected() { + // NodeSet.contains() must reject nodes from a different document + let xml1 = ""; + let xml2 = ""; + let doc1 = Document::parse(xml1).unwrap(); + let doc2 = Document::parse(xml2).unwrap(); + + let node_set = NodeSet::entire_document_without_comments(&doc1); + + // Node from doc2 should NOT be in doc1's node set + let foreign_node = doc2.root_element(); + assert!( + !node_set.contains(foreign_node), + "foreign document node should be rejected" + ); + + // Node from doc1 should be in the set + let own_node = doc1.root_element(); + assert!(node_set.contains(own_node)); + } + #[test] fn custom_id_attr_name() { // roxmltree stores `wsu:Id` with local name "Id" — already in DEFAULT_ID_ATTRS. @@ -370,21 +428,21 @@ mod tests { } #[test] - fn first_id_wins_for_duplicates() { - // If two elements have the same ID value, first occurrence wins + fn duplicate_ids_are_rejected() { + // Duplicate IDs are removed from the index to prevent signature-wrapping + // attacks — lookups for ambiguous IDs fail instead of picking arbitrarily. let xml = r#"firstsecond"#; let doc = Document::parse(xml).unwrap(); let resolver = UriReferenceResolver::new(&doc); - let data = resolver.dereference("#dup").unwrap(); - let node_set = data.into_node_set().unwrap(); - - // Should resolve to the first element - let first = doc - .descendants() - .find(|n| n.is_element() && n.has_tag_name("a")) - .unwrap(); - assert!(node_set.contains(first)); + // "dup" appears twice → removed from index + assert!(!resolver.has_id("dup")); + let result = resolver.dereference("#dup"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + TransformError::ElementNotFound(_) + )); } #[test] From 793bb8f03dcbb841fdfd49cc2d3a24a7bd9b4c7c Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 04:54:56 +0200 Subject: [PATCH 4/9] fix(xmldsig): prevent duplicate ID re-insertion on 3+ occurrences - Track duplicate IDs in a HashSet so they are permanently excluded from the index (fixes bypass where 3rd occurrence re-inserts via Entry::Vacant after 2nd occurrence removed the entry) - Add xpointer(/) and xpointer(id('...')) to uri.rs module-level docs - Add test for triple-duplicate ID rejection --- src/xmldsig/uri.rs | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs index 11dfdb8..eca8d17 100644 --- a/src/xmldsig/uri.rs +++ b/src/xmldsig/uri.rs @@ -5,12 +5,14 @@ //! //! - **Empty URI** (`""` or absent): the entire document, excluding comments. //! - **Bare-name `#id`**: the element whose ID attribute matches `id`, as a subtree. +//! - **`#xpointer(/)`**: the entire document, including comments. +//! - **`#xpointer(id('id'))` / `#xpointer(id("id"))`**: element by ID (equivalent to bare-name). //! //! External URIs (http://, file://, etc.) are not supported — only same-document //! references are needed for SAML signature verification. use std::collections::hash_map::Entry; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use roxmltree::{Document, Node}; @@ -63,6 +65,9 @@ impl<'a> UriReferenceResolver<'a> { /// adds to them (does not replace). Pass an empty slice to use only defaults. pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self { let mut id_map = HashMap::new(); + // Track IDs seen more than once so they are never reinserted + // after being removed (handles 3+ occurrences correctly). + let mut duplicate_ids: HashSet<&'a str> = HashSet::new(); // Merge default + extra attribute names, dedup let mut attr_names: Vec<&str> = DEFAULT_ID_ATTRS.to_vec(); @@ -77,6 +82,11 @@ impl<'a> UriReferenceResolver<'a> { if node.is_element() { for attr_name in &attr_names { if let Some(value) = node.attribute(*attr_name) { + // Skip IDs already marked as duplicate + if duplicate_ids.contains(value) { + continue; + } + // Duplicate IDs are invalid per XML spec and can enable // signature-wrapping attacks. Remove the entry so that // lookups for ambiguous IDs fail with ElementNotFound @@ -87,6 +97,7 @@ impl<'a> UriReferenceResolver<'a> { } Entry::Occupied(o) => { o.remove(); + duplicate_ids.insert(value); } } } @@ -445,6 +456,18 @@ mod tests { )); } + #[test] + fn triple_duplicate_ids_stay_rejected() { + // Verify that 3+ occurrences don't re-insert (the HashSet tracks + // permanently removed IDs so Entry::Vacant after remove doesn't re-add) + let xml = r#"123"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + assert!(!resolver.has_id("dup")); + assert!(resolver.dereference("#dup").is_err()); + } + #[test] fn node_set_exclude_subtree() { let xml = r#"yesno"#; From 85c3e605b117652d8c356418c08908c5cb0e6345 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 05:06:34 +0200 Subject: [PATCH 5/9] fix(xmldsig): safe xpointer quote parsing, same-element dedup guard - Use strip_prefix/strip_suffix instead of panicking slice in parse_xpointer_id (prevents panic on malformed input like `id(')`) - Only mark ID as duplicate when mapped to a *different* element (same element with both ID="x" and Id="x" is not ambiguous) --- src/xmldsig/uri.rs | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs index eca8d17..9633902 100644 --- a/src/xmldsig/uri.rs +++ b/src/xmldsig/uri.rs @@ -96,8 +96,14 @@ impl<'a> UriReferenceResolver<'a> { v.insert(node); } Entry::Occupied(o) => { - o.remove(); - duplicate_ids.insert(value); + // Only treat as duplicate if a *different* element + // maps the same ID value. The same element can + // expose the same value via multiple scanned attrs + // (e.g., both `ID="x"` and `Id="x"`). + if o.get().id() != node.id() { + o.remove(); + duplicate_ids.insert(value); + } } } } @@ -193,11 +199,12 @@ impl<'a> UriReferenceResolver<'a> { fn parse_xpointer_id(fragment: &str) -> Option<&str> { let inner = fragment.strip_prefix("xpointer(id(")?.strip_suffix("))")?; - // Strip single or double quotes - if (inner.starts_with('\'') && inner.ends_with('\'')) - || (inner.starts_with('"') && inner.ends_with('"')) - { - Some(&inner[1..inner.len() - 1]) + // Strip single or double quotes using safe helpers to avoid panics + // on malformed input (e.g., `xpointer(id('))` where inner is `'`) + if let Some(stripped) = inner.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) { + Some(stripped) + } else if let Some(stripped) = inner.strip_prefix('"').and_then(|s| s.strip_suffix('"')) { + Some(stripped) } else { None } @@ -601,6 +608,23 @@ mod tests { assert_eq!(super::parse_xpointer_id("xpointer(id(foo))"), None); // no quotes assert_eq!(super::parse_xpointer_id("not-xpointer"), None); assert_eq!(super::parse_xpointer_id(""), None); + + // Malformed: single quote char — must not panic (was slicing bug) + assert_eq!(super::parse_xpointer_id("xpointer(id('))"), None); + assert_eq!(super::parse_xpointer_id(r#"xpointer(id("))"#), None); + } + + #[test] + fn same_element_multiple_id_attrs_not_duplicate() { + // An element with both ID="x" and Id="x" should NOT be treated as + // duplicate — it's the same element exposing the same value via + // different scanned attribute names. + let xml = r#"data"#; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + assert!(resolver.has_id("x")); + assert!(resolver.dereference("#x").is_ok()); } #[test] From bc5c7039a1b33143a23049fd9c507188e8e26566 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 05:52:18 +0200 Subject: [PATCH 6/9] docs(xmldsig): clarify NodeSet::subtree doc re attribute/namespace tracking --- src/xmldsig/types.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs index 610cb17..8bce6a9 100644 --- a/src/xmldsig/types.rs +++ b/src/xmldsig/types.rs @@ -103,8 +103,14 @@ impl<'a> NodeSet<'a> { } } - /// Create a node set representing an element and all its descendants - /// (including attributes, namespaces, and text nodes), with comments. + /// Create a node set rooted at `element`, containing that element and all + /// of its descendant nodes (elements, text, and, for this constructor, + /// comment nodes). + /// + /// Note: in `roxmltree`, attributes and namespaces are not separate nodes + /// and therefore are not tracked individually in this `NodeSet`. During + /// canonicalization, any attributes and namespace declarations belonging to + /// the included elements are serialized as part of those elements. pub fn subtree(doc: &'a Document<'a>, element: Node<'a, 'a>) -> Self { let mut ids = HashSet::new(); collect_subtree_ids(element, &mut ids); From cd6297835d80478142e5bce75cca1892cbffad95 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 10:45:39 +0200 Subject: [PATCH 7/9] fix(xmldsig): reject empty xpointer id, document local-name matching - Reject xpointer(id('')) as UnsupportedUri (empty string is not a valid XML Name, consistent with bare '#' rejection) - Document roxmltree local-name attribute matching in with_id_attrs() - Clarify element-only NodeId tracking in collect_subtree_ids --- src/xmldsig/types.rs | 14 +++++++------- src/xmldsig/uri.rs | 25 +++++++++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs index 8bce6a9..93863e7 100644 --- a/src/xmldsig/types.rs +++ b/src/xmldsig/types.rs @@ -187,13 +187,13 @@ fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet) { stack.push(child); } } - // roxmltree models attributes as children only for elements, - // but they're accessible via node.attributes(). For node-set - // membership, we need to track the element ID — the C14N - // serializer checks element membership and then serializes - // all its attributes. Individual attribute NodeIds are not - // needed because roxmltree doesn't expose them as separate nodes - // in the tree traversal. + // In roxmltree, attributes and namespaces are not nodes and do not + // appear in `children()` traversal; they're accessed via + // node.attributes(). For node-set membership we therefore track only + // element NodeIds. During C14N, the serializer checks whether an + // element is in the node set and then serializes all of that element's + // attributes/namespaces as part of the element, so separate attribute + // identifiers are unnecessary. } /// Errors during transform processing. diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs index 9633902..d35543b 100644 --- a/src/xmldsig/uri.rs +++ b/src/xmldsig/uri.rs @@ -63,6 +63,12 @@ impl<'a> UriReferenceResolver<'a> { /// /// The defaults (`ID`, `Id`, `id`) are always included; `extra_attrs` /// adds to them (does not replace). Pass an empty slice to use only defaults. + /// + /// Attribute names are matched using `roxmltree`'s *local-name* view of + /// attributes: any namespace prefix is stripped before comparison. For + /// example, an attribute written as `wsu:Id="..."` in the XML is seen as + /// simply `Id` by `roxmltree`, so callers **must** pass `"Id"`, not + /// `"wsu:Id"` or `"{namespace}Id"`. pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self { let mut id_map = HashMap::new(); // Track IDs seen more than once so they are never reinserted @@ -165,6 +171,10 @@ impl<'a> UriReferenceResolver<'a> { )) } else if let Some(id) = parse_xpointer_id(fragment) { // xpointer(id('foo')) → same as bare-name #foo + // Reject empty parsed ID (e.g., xpointer(id(''))) — not a valid XML Name + if id.is_empty() { + return Err(TransformError::UnsupportedUri(format!("#{fragment}"))); + } self.resolve_id(id) } else if fragment.starts_with("xpointer(") { // Any other XPointer expression is unsupported @@ -594,6 +604,21 @@ mod tests { } } + #[test] + fn xpointer_id_empty_value_rejected() { + // xpointer(id('')) parses to empty string — reject as UnsupportedUri + let xml = ""; + let doc = Document::parse(xml).unwrap(); + let resolver = UriReferenceResolver::new(&doc); + + let result = resolver.dereference("#xpointer(id(''))"); + assert!(result.is_err()); + assert!(matches!( + result.unwrap_err(), + TransformError::UnsupportedUri(_) + )); + } + #[test] fn parse_xpointer_id_variants() { // Valid forms From ce155a8bd2fbdd47429c78bcc4726277a4dd32b6 Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 13:42:19 +0200 Subject: [PATCH 8/9] refactor(xmldsig): remove unused UriDeref error variant --- src/xmldsig/types.rs | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs index 93863e7..6ed17fe 100644 --- a/src/xmldsig/types.rs +++ b/src/xmldsig/types.rs @@ -208,10 +208,6 @@ pub enum TransformError { got: &'static str, }, - /// URI dereference failed. - #[error("URI dereference failed: {0}")] - UriDeref(String), - /// Element not found by ID. #[error("element not found by ID: {0}")] ElementNotFound(String), From f468dd42c7b15b4c5e49f0af149750bbc79c2f9c Mon Sep 17 00:00:00 2001 From: Dmitry Prudnikov Date: Mon, 16 Mar 2026 14:18:18 +0200 Subject: [PATCH 9/9] refactor(xmldsig): derive doc from element in NodeSet::subtree, consolidate test helpers - Remove redundant doc parameter from NodeSet::subtree(), derive from element.document() to enforce same-document invariant at construction - Correct collect_subtree_ids inline note: tracks all children() nodes (elements, text, comments, PIs), not only elements - Consolidate deref_and_canonicalize test helpers into shared impl --- src/xmldsig/types.rs | 15 ++++++++------- src/xmldsig/uri.rs | 2 +- tests/uri_integration.rs | 20 ++++++++------------ 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs index 6ed17fe..8513bd6 100644 --- a/src/xmldsig/types.rs +++ b/src/xmldsig/types.rs @@ -111,11 +111,11 @@ impl<'a> NodeSet<'a> { /// and therefore are not tracked individually in this `NodeSet`. During /// canonicalization, any attributes and namespace declarations belonging to /// the included elements are serialized as part of those elements. - pub fn subtree(doc: &'a Document<'a>, element: Node<'a, 'a>) -> Self { + pub fn subtree(element: Node<'a, 'a>) -> Self { let mut ids = HashSet::new(); collect_subtree_ids(element, &mut ids); Self { - doc, + doc: element.document(), included: Some(ids), excluded: HashSet::new(), with_comments: true, @@ -189,11 +189,12 @@ fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet) { } // In roxmltree, attributes and namespaces are not nodes and do not // appear in `children()` traversal; they're accessed via - // node.attributes(). For node-set membership we therefore track only - // element NodeIds. During C14N, the serializer checks whether an - // element is in the node set and then serializes all of that element's - // attributes/namespaces as part of the element, so separate attribute - // identifiers are unnecessary. + // node.attributes(). We therefore track the NodeIds of all descendant + // nodes reachable via `children()` (elements, text, comments, + // processing instructions, etc.). During C14N, the serializer checks + // whether an element is in the node set and then serializes all of + // that element's attributes/namespaces as part of the element, so + // separate attribute/namespace identifiers are unnecessary. } /// Errors during transform processing. diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs index d35543b..539cf83 100644 --- a/src/xmldsig/uri.rs +++ b/src/xmldsig/uri.rs @@ -188,7 +188,7 @@ impl<'a> UriReferenceResolver<'a> { /// Look up an element by its ID attribute value and return a subtree node set. fn resolve_id(&self, id: &str) -> Result, TransformError> { match self.id_map.get(id) { - Some(&element) => Ok(TransformData::NodeSet(NodeSet::subtree(self.doc, element))), + Some(&element) => Ok(TransformData::NodeSet(NodeSet::subtree(element))), None => Err(TransformError::ElementNotFound(id.to_string())), } } diff --git a/tests/uri_integration.rs b/tests/uri_integration.rs index c29c578..b4776fd 100644 --- a/tests/uri_integration.rs +++ b/tests/uri_integration.rs @@ -11,28 +11,24 @@ use xml_sec::xmldsig::uri::UriReferenceResolver; /// Dereference `uri`, build a C14N predicate from the resulting NodeSet, /// canonicalize with inclusive C14N 1.0, and return the canonical string. fn deref_and_canonicalize(xml: &str, uri: &str) -> String { - let doc = roxmltree::Document::parse(xml).expect("parse"); - let resolver = UriReferenceResolver::new(&doc); - - let data = resolver.dereference(uri).expect("dereference"); - let node_set = data.into_node_set().expect("into_node_set"); - - let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false); - let predicate = |n: roxmltree::Node| node_set.contains(n); - let mut output = Vec::new(); - canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize"); - String::from_utf8(output).expect("utf8") + deref_and_canonicalize_impl(xml, uri, false) } /// Same but with comments enabled (for xpointer(/) which includes comments). fn deref_and_canonicalize_with_comments(xml: &str, uri: &str) -> String { + deref_and_canonicalize_impl(xml, uri, true) +} + +/// Shared implementation for dereferencing and canonicalizing, parameterized +/// by whether comments should be included. +fn deref_and_canonicalize_impl(xml: &str, uri: &str, with_comments: bool) -> String { let doc = roxmltree::Document::parse(xml).expect("parse"); let resolver = UriReferenceResolver::new(&doc); let data = resolver.dereference(uri).expect("dereference"); let node_set = data.into_node_set().expect("into_node_set"); - let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, true); + let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, with_comments); let predicate = |n: roxmltree::Node| node_set.contains(n); let mut output = Vec::new(); canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize");