From 865e79f68d16b8709a9c4f280af251a94c223b0b Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 02:45:14 +0200
Subject: [PATCH 1/9] feat(xmldsig): add URI dereference for Reference elements
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- UriReferenceResolver with configurable ID attribute names (ID/Id/id + custom)
- Empty URI → entire document without comments (XMLDSig §4.3.3.2)
- Bare-name #id → element subtree by ID attribute
- #xpointer(/) → document root with comments
- #xpointer(id('...')) → element subtree (XPointer form)
- NodeSet type with subtree inclusion/exclusion for transform pipeline
- TransformData enum (NodeSet/Binary) for transform chain data flow

Closes #8
---
 src/xmldsig/mod.rs       |  11 +
 src/xmldsig/types.rs     | 202 +++++++++++++++
 src/xmldsig/uri.rs       | 538 +++++++++++++++++++++++++++++++++++++++
 tests/uri_integration.rs | 222 ++++++++++++++++
 4 files changed, 973 insertions(+)
 create mode 100644 src/xmldsig/types.rs
 create mode 100644 src/xmldsig/uri.rs
 create mode 100644 tests/uri_integration.rs
diff --git a/src/xmldsig/mod.rs b/src/xmldsig/mod.rs
index 04a4156..0a33452 100644
--- a/src/xmldsig/mod.rs
+++ b/src/xmldsig/mod.rs
@@ -1,3 +1,14 @@
 //! XML Digital Signatures (XMLDSig).
 //!
 //! Implements [XML Signature Syntax and Processing](https://www.w3.org/TR/xmldsig-core1/).
+//!
+//! ## Current Status
+//!
+//! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`)
+//! - ID attribute resolution with configurable attribute names
+//! - Node set types for the transform pipeline
+
+pub mod types;
+pub mod uri;
+
+pub use types::{NodeSet, TransformData, TransformError};
diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
new file mode 100644
index 0000000..b9001dc
--- /dev/null
+++ b/src/xmldsig/types.rs
@@ -0,0 +1,202 @@
+//! Core types for the XMLDSig transform pipeline.
+//!
+//! These types flow between URI dereference, transforms, and digest computation.
+//!
+//! These types are consumed by URI dereference, the transform chain (P1-014,
+//! P1-015), and reference processing (P1-018).
+
+use std::collections::HashSet;
+
+use roxmltree::{Document, Node, NodeId};
+
+// roxmltree 0.21 uses `Node<'a, 'input: 'a>`. We tie both lifetimes together
+// with a single `'a` by requiring `'input = 'a` at every use site (`Node<'a, 'a>`).
+// This is safe because our NodeSet borrows the Document which owns the input.
+
+/// Data flowing between transforms in the verification/signing pipeline.
+///
+/// Transforms consume and produce either a node set (XML-level) or raw bytes
+/// (after canonicalization or base64 decode).
+pub enum TransformData<'a> {
+    /// A set of nodes from the parsed XML document.
+    NodeSet(NodeSet<'a>),
+    /// Raw bytes (e.g., after canonicalization).
+    Binary(Vec<u8>),
+}
+
+impl std::fmt::Debug for TransformData<'_> {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Self::NodeSet(_) => f.debug_tuple("NodeSet").field(&"...").finish(),
+            Self::Binary(b) => f.debug_tuple("Binary").field(&b.len()).finish(),
+        }
+    }
+}
+
+impl<'a> TransformData<'a> {
+    /// Convert to `NodeSet`, returning an error if this is `Binary` data.
+    pub fn into_node_set(self) -> Result<NodeSet<'a>, TransformError> {
+        match self {
+            Self::NodeSet(ns) => Ok(ns),
+            Self::Binary(_) => Err(TransformError::TypeMismatch {
+                expected: "NodeSet",
+                got: "Binary",
+            }),
+        }
+    }
+
+    /// Convert to binary bytes, returning an error if this is a `NodeSet`.
+    pub fn into_binary(self) -> Result<Vec<u8>, TransformError> {
+        match self {
+            Self::Binary(b) => Ok(b),
+            Self::NodeSet(_) => Err(TransformError::TypeMismatch {
+                expected: "Binary",
+                got: "NodeSet",
+            }),
+        }
+    }
+}
+
+/// A set of nodes from a roxmltree document.
+///
+/// Represents "which nodes are included" for canonicalization and transforms.
+/// Two modes:
+/// - **Whole document**: `included` is `None`, meaning all nodes are in the set
+///   (minus any in `excluded`).
+/// - **Subset**: `included` is `Some(ids)`, meaning only those node IDs are in
+///   the set (minus any in `excluded`).
+pub struct NodeSet<'a> {
+    /// Reference to the parsed document.
+    doc: &'a Document<'a>,
+    /// If `None`, all nodes are included. If `Some`, only these nodes.
+    included: Option<HashSet<NodeId>>,
+    /// Nodes explicitly excluded (e.g., `<Signature>` subtree for enveloped transform).
+    excluded: HashSet<NodeId>,
+    /// Whether comment nodes are included. For empty URI dereference (whole
+    /// document), comments are excluded per XMLDSig spec.
+    with_comments: bool,
+}
+
+impl<'a> NodeSet<'a> {
+    /// Create a node set representing the entire document without comments.
+    ///
+    /// Per XMLDSig §4.3.3.2: "An empty URI [...] is a reference to the document
+    /// [...] and the comment nodes are not included."
+    pub fn entire_document_without_comments(doc: &'a Document<'a>) -> Self {
+        Self {
+            doc,
+            included: None,
+            excluded: HashSet::new(),
+            with_comments: false,
+        }
+    }
+
+    /// Create a node set representing the entire document with comments.
+    ///
+    /// Used for `#xpointer(/)` which, unlike empty URI, includes comment nodes.
+    pub fn entire_document_with_comments(doc: &'a Document<'a>) -> Self {
+        Self {
+            doc,
+            included: None,
+            excluded: HashSet::new(),
+            with_comments: true,
+        }
+    }
+
+    /// Create a node set representing an element and all its descendants
+    /// (including attributes, namespaces, and text nodes), with comments.
+    pub fn subtree(doc: &'a Document<'a>, element: Node<'a, 'a>) -> Self {
+        let mut ids = HashSet::new();
+        collect_subtree_ids(element, &mut ids);
+        Self {
+            doc,
+            included: Some(ids),
+            excluded: HashSet::new(),
+            with_comments: true,
+        }
+    }
+
+    /// Reference to the underlying document.
+    pub fn document(&self) -> &'a Document<'a> {
+        self.doc
+    }
+
+    /// Check whether a node is in this set.
+    pub fn contains(&self, node: Node<'_, '_>) -> bool {
+        let id = node.id();
+
+        // Check exclusion first
+        if self.excluded.contains(&id) {
+            return false;
+        }
+
+        // Filter comments if not included
+        if !self.with_comments && node.is_comment() {
+            return false;
+        }
+
+        // Check inclusion
+        match &self.included {
+            None => true,
+            Some(ids) => ids.contains(&id),
+        }
+    }
+
+    /// Exclude a node and all its descendants from this set.
+    pub fn exclude_subtree(&mut self, node: Node<'_, '_>) {
+        collect_subtree_ids(node, &mut self.excluded);
+    }
+
+    /// Whether comments are included in this node set.
+    pub fn with_comments(&self) -> bool {
+        self.with_comments
+    }
+}
+
+/// Collect a node and all its descendants into a set of `NodeId`s.
+fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) {
+    ids.insert(node.id());
+    for child in node.children() {
+        collect_subtree_ids(child, ids);
+    }
+    // roxmltree models attributes as children only for elements,
+    // but they're accessible via node.attributes(). For node-set
+    // membership, we need to track the element ID — the C14N
+    // serializer checks element membership and then serializes
+    // all its attributes. Individual attribute NodeIds are not
+    // needed because roxmltree doesn't expose them as separate nodes
+    // in the tree traversal.
+}
+
+/// Errors during transform processing.
+#[derive(Debug, thiserror::Error)]
+pub enum TransformError {
+    /// Data type mismatch between transforms.
+    #[error("type mismatch: expected {expected}, got {got}")]
+    TypeMismatch {
+        /// Expected data type.
+        expected: &'static str,
+        /// Actual data type.
+        got: &'static str,
+    },
+
+    /// URI dereference failed.
+    #[error("URI dereference failed: {0}")]
+    UriDeref(String),
+
+    /// Element not found by ID.
+    #[error("element not found by ID: {0}")]
+    ElementNotFound(String),
+
+    /// Unsupported URI scheme or format.
+    #[error("unsupported URI: {0}")]
+    UnsupportedUri(String),
+
+    /// Unsupported transform algorithm.
+    #[error("unsupported transform: {0}")]
+    UnsupportedTransform(String),
+
+    /// Canonicalization error during transform.
+    #[error("C14N error: {0}")]
+    C14n(String),
+}
diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
new file mode 100644
index 0000000..b3745e2
--- /dev/null
+++ b/src/xmldsig/uri.rs
@@ -0,0 +1,538 @@
+//! URI dereference for XMLDSig `<Reference>` elements.
+//!
+//! Implements same-document URI resolution per
+//! [XMLDSig §4.3.3.2](https://www.w3.org/TR/xmldsig-core1/#sec-Same-Document):
+//!
+//! - **Empty URI** (`""` or absent): the entire document, excluding comments.
+//! - **Bare-name `#id`**: the element whose ID attribute matches `id`, as a subtree.
+//!
+//! External URIs (http://, file://, etc.) are not supported — only same-document
+//! references are needed for SAML signature verification.
+
+use std::collections::HashMap;
+
+use roxmltree::{Document, Node};
+
+use super::types::{NodeSet, TransformData, TransformError};
+
+/// Default ID attribute names to scan when building the ID index.
+///
+/// These cover the most common conventions:
+/// - `ID` — SAML 2.0 (`<saml:Assertion ID="...">`)
+/// - `Id` — XMLDSig (`<ds:Signature Id="...">`)
+/// - `id` — general XML
+const DEFAULT_ID_ATTRS: &[&str] = &["ID", "Id", "id"];
+
+/// Resolves same-document URI references against a parsed XML document.
+///
+/// Builds a `HashMap<&str, Node>` index on construction for O(1) fragment
+/// lookups. Supports caller-provided ID attribute names (important for SAML
+/// which uses `ID` rather than the xml:id mechanism).
+///
+/// # Example
+///
+/// ```
+/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+/// use xml_sec::xmldsig::uri::UriReferenceResolver;
+///
+/// let xml = r#"<root><item ID="abc">content</item></root>"#;
+/// let doc = roxmltree::Document::parse(xml)?;
+/// let resolver = UriReferenceResolver::new(&doc);
+///
+/// assert!(resolver.has_id("abc"));
+/// assert_eq!(resolver.id_count(), 1);
+/// # Ok(())
+/// # }
+/// ```
+pub struct UriReferenceResolver<'a> {
+    doc: &'a Document<'a>,
+    /// ID → element node mapping for O(1) fragment lookups.
+    id_map: HashMap<&'a str, Node<'a, 'a>>,
+}
+
+impl<'a> UriReferenceResolver<'a> {
+    /// Build a resolver with default ID attribute names (`ID`, `Id`, `id`).
+    pub fn new(doc: &'a Document<'a>) -> Self {
+        Self::with_id_attrs(doc, DEFAULT_ID_ATTRS)
+    }
+
+    /// Build a resolver with custom ID attribute names.
+    ///
+    /// The defaults (`ID`, `Id`, `id`) are always included. Additional names
+    /// are appended. Pass an empty slice to use only the defaults.
+    pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self {
+        let mut id_map = HashMap::new();
+
+        // Merge default + extra attribute names, dedup
+        let mut attr_names: Vec<&str> = DEFAULT_ID_ATTRS.to_vec();
+        for name in extra_attrs {
+            if !attr_names.contains(name) {
+                attr_names.push(name);
+            }
+        }
+
+        // Scan all elements for ID attributes
+        for node in doc.descendants() {
+            if node.is_element() {
+                for attr_name in &attr_names {
+                    if let Some(value) = node.attribute(*attr_name) {
+                        // First occurrence wins (per XML spec, IDs should be unique)
+                        id_map.entry(value).or_insert(node);
+                    }
+                }
+            }
+        }
+
+        Self { doc, id_map }
+    }
+
+    /// Dereference a URI string to a [`TransformData`].
+    ///
+    /// # URI forms
+    ///
+    /// | URI | Result |
+    /// |-----|--------|
+    /// | `""` (empty) | Entire document, comments excluded |
+    /// | `"#foo"` | Subtree rooted at element with ID `foo` |
+    /// | other | `Err(UnsupportedUri)` |
+    pub fn dereference(&self, uri: &str) -> Result<TransformData<'a>, TransformError> {
+        if uri.is_empty() {
+            // Empty URI = entire document without comments
+            // XMLDSig §4.3.3.2: "the reference is to the document [...],
+            // and the comment nodes are not included"
+            Ok(TransformData::NodeSet(
+                NodeSet::entire_document_without_comments(self.doc),
+            ))
+        } else if let Some(fragment) = uri.strip_prefix('#') {
+            self.dereference_fragment(fragment)
+        } else {
+            Err(TransformError::UnsupportedUri(uri.to_string()))
+        }
+    }
+
+    /// Resolve a URI fragment (the part after `#`).
+    ///
+    /// Handles:
+    /// - `xpointer(/)` → entire document (with comments, per XPointer spec)
+    /// - `xpointer(id('foo'))` → element by ID (equivalent to bare-name `#foo`)
+    /// - bare name `foo` → element by ID attribute
+    fn dereference_fragment(&self, fragment: &str) -> Result<TransformData<'a>, TransformError> {
+        if fragment == "xpointer(/)" {
+            // XPointer root: entire document WITH comments (unlike empty URI).
+            // Per XMLDSig §4.3.3.3: "the XPointer expression [...] includes
+            // comment nodes"
+            Ok(TransformData::NodeSet(
+                NodeSet::entire_document_with_comments(self.doc),
+            ))
+        } else if let Some(id) = parse_xpointer_id(fragment) {
+            // xpointer(id('foo')) → same as bare-name #foo
+            self.resolve_id(id)
+        } else {
+            // Bare-name fragment: #foo → element by ID
+            self.resolve_id(fragment)
+        }
+    }
+
+    /// Look up an element by its ID attribute value and return a subtree node set.
+    fn resolve_id(&self, id: &str) -> Result<TransformData<'a>, TransformError> {
+        match self.id_map.get(id) {
+            Some(&element) => Ok(TransformData::NodeSet(NodeSet::subtree(self.doc, element))),
+            None => Err(TransformError::ElementNotFound(id.to_string())),
+        }
+    }
+
+    /// Check if an ID is registered in the resolver's index.
+    pub fn has_id(&self, id: &str) -> bool {
+        self.id_map.contains_key(id)
+    }
+
+    /// Get the number of registered IDs.
+    pub fn id_count(&self) -> usize {
+        self.id_map.len()
+    }
+}
+
+/// Parse `xpointer(id('value'))` or `xpointer(id("value"))` and return the ID value.
+/// Returns `None` if the fragment doesn't match this pattern.
+fn parse_xpointer_id(fragment: &str) -> Option<&str> {
+    let inner = fragment.strip_prefix("xpointer(id(")?.strip_suffix("))")?;
+
+    // Strip single or double quotes
+    if (inner.starts_with('\'') && inner.ends_with('\''))
+        || (inner.starts_with('"') && inner.ends_with('"'))
+    {
+        Some(&inner[1..inner.len() - 1])
+    } else {
+        None
+    }
+}
+
+#[cfg(test)]
+#[allow(clippy::unwrap_used)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn empty_uri_returns_whole_document() {
+        let xml = "<root><child>text</child></root>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        // Whole document: root and child should be in the set
+        let root = doc.root_element();
+        assert!(node_set.contains(root));
+        let child = root.first_child().unwrap();
+        assert!(node_set.contains(child));
+    }
+
+    #[test]
+    fn empty_uri_excludes_comments() {
+        let xml = "<root><!-- comment --><child/></root>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        // Comment should be excluded
+        for node in doc.descendants() {
+            if node.is_comment() {
+                assert!(
+                    !node_set.contains(node),
+                    "comment should be excluded for empty URI"
+                );
+            }
+        }
+        // Element should still be included
+        assert!(node_set.contains(doc.root_element()));
+    }
+
+    #[test]
+    fn fragment_uri_resolves_by_id_attr() {
+        let xml = r#"<root><item ID="abc">content</item><item ID="def">other</item></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("#abc").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        // The element with ID="abc" and its children should be in the set
+        let abc_elem = doc
+            .descendants()
+            .find(|n| n.attribute("ID") == Some("abc"))
+            .unwrap();
+        assert!(node_set.contains(abc_elem));
+
+        // The text child "content" should also be in the set
+        let text_child = abc_elem.first_child().unwrap();
+        assert!(node_set.contains(text_child));
+
+        // The root element should NOT be in the set (subtree only)
+        assert!(!node_set.contains(doc.root_element()));
+
+        // The element with ID="def" should NOT be in the set
+        let def_elem = doc
+            .descendants()
+            .find(|n| n.attribute("ID") == Some("def"))
+            .unwrap();
+        assert!(!node_set.contains(def_elem));
+    }
+
+    #[test]
+    fn fragment_uri_resolves_lowercase_id() {
+        let xml = r#"<root><item id="lower">text</item></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("#lower").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        let elem = doc
+            .descendants()
+            .find(|n| n.attribute("id") == Some("lower"))
+            .unwrap();
+        assert!(node_set.contains(elem));
+    }
+
+    #[test]
+    fn fragment_uri_resolves_mixed_case_id() {
+        let xml = r#"<root><ds:Signature Id="sig1" xmlns:ds="http://www.w3.org/2000/09/xmldsig#"/></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        assert!(resolver.has_id("sig1"));
+        let data = resolver.dereference("#sig1").unwrap();
+        assert!(data.into_node_set().is_ok());
+    }
+
+    #[test]
+    fn fragment_uri_not_found() {
+        let xml = "<root><child>text</child></root>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let result = resolver.dereference("#nonexistent");
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            TransformError::ElementNotFound(id) => assert_eq!(id, "nonexistent"),
+            other => panic!("expected ElementNotFound, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn unsupported_external_uri() {
+        let xml = "<root/>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let result = resolver.dereference("http://example.com/doc.xml");
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            TransformError::UnsupportedUri(uri) => {
+                assert_eq!(uri, "http://example.com/doc.xml")
+            }
+            other => panic!("expected UnsupportedUri, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn custom_id_attr_name() {
+        // roxmltree stores `wsu:Id` with local name "Id" — already in DEFAULT_ID_ATTRS.
+        // Test with a truly custom attribute name instead.
+        let xml = r#"<root><elem myid="custom1">data</elem></root>"#;
+        let doc = Document::parse(xml).unwrap();
+
+        // Default resolver doesn't know about "myid"
+        let resolver_default = UriReferenceResolver::new(&doc);
+        assert!(!resolver_default.has_id("custom1"));
+
+        // Custom resolver with "myid" added
+        let resolver_custom = UriReferenceResolver::with_id_attrs(&doc, &["myid"]);
+        assert!(resolver_custom.has_id("custom1"));
+
+        let data = resolver_custom.dereference("#custom1").unwrap();
+        assert!(data.into_node_set().is_ok());
+    }
+
+    #[test]
+    fn namespaced_id_attr_found_by_local_name() {
+        // roxmltree strips prefix: `wsu:Id` → local name "Id", which is in DEFAULT_ID_ATTRS
+        let xml =
+            r#"<root><elem wsu:Id="ts1" xmlns:wsu="http://example.com/wsu">data</elem></root>"#;
+        let doc = Document::parse(xml).unwrap();
+
+        let resolver = UriReferenceResolver::new(&doc);
+        assert!(resolver.has_id("ts1"));
+    }
+
+    #[test]
+    fn id_count_reports_unique_ids() {
+        let xml = r#"<root ID="r1"><a ID="a1"/><b Id="b1"/><c id="c1"/></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        // 4 elements with ID-like attributes
+        assert_eq!(resolver.id_count(), 4);
+    }
+
+    #[test]
+    fn first_id_wins_for_duplicates() {
+        // If two elements have the same ID value, first occurrence wins
+        let xml = r#"<root><a ID="dup">first</a><b ID="dup">second</b></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("#dup").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        // Should resolve to the first <a> element
+        let first = doc
+            .descendants()
+            .find(|n| n.is_element() && n.has_tag_name("a"))
+            .unwrap();
+        assert!(node_set.contains(first));
+    }
+
+    #[test]
+    fn node_set_exclude_subtree() {
+        let xml = r#"<root><keep>yes</keep><remove><deep>no</deep></remove></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("").unwrap();
+        let mut node_set = data.into_node_set().unwrap();
+
+        // Find and exclude the <remove> subtree
+        let remove_elem = doc
+            .descendants()
+            .find(|n| n.is_element() && n.has_tag_name("remove"))
+            .unwrap();
+        node_set.exclude_subtree(remove_elem);
+
+        // <keep> should still be in the set
+        let keep_elem = doc
+            .descendants()
+            .find(|n| n.is_element() && n.has_tag_name("keep"))
+            .unwrap();
+        assert!(node_set.contains(keep_elem));
+
+        // <remove> and its children should be excluded
+        assert!(!node_set.contains(remove_elem));
+        let deep_elem = doc
+            .descendants()
+            .find(|n| n.is_element() && n.has_tag_name("deep"))
+            .unwrap();
+        assert!(!node_set.contains(deep_elem));
+    }
+
+    #[test]
+    fn subtree_includes_comments() {
+        // Subtree dereference (via #id) includes comments, unlike empty URI
+        let xml = r#"<root><item ID="x"><!-- comment --><child/></item></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("#x").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        for node in doc.descendants() {
+            if node.is_comment() {
+                assert!(
+                    node_set.contains(node),
+                    "comment should be included in #id subtree"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn xpointer_root_returns_whole_document_with_comments() {
+        let xml = "<root><!-- comment --><child/></root>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("#xpointer(/)").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        // Unlike empty URI, xpointer(/) includes comments
+        for node in doc.descendants() {
+            if node.is_comment() {
+                assert!(
+                    node_set.contains(node),
+                    "comment should be included for #xpointer(/)"
+                );
+            }
+        }
+        assert!(node_set.contains(doc.root_element()));
+    }
+
+    #[test]
+    fn xpointer_id_single_quotes() {
+        let xml = r#"<root><item ID="abc">content</item></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference("#xpointer(id('abc'))").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        let elem = doc
+            .descendants()
+            .find(|n| n.attribute("ID") == Some("abc"))
+            .unwrap();
+        assert!(node_set.contains(elem));
+    }
+
+    #[test]
+    fn xpointer_id_double_quotes() {
+        let xml = r#"<root><item ID="xyz">content</item></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let data = resolver.dereference(r#"#xpointer(id("xyz"))"#).unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        let elem = doc
+            .descendants()
+            .find(|n| n.attribute("ID") == Some("xyz"))
+            .unwrap();
+        assert!(node_set.contains(elem));
+    }
+
+    #[test]
+    fn xpointer_id_not_found() {
+        let xml = "<root/>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let result = resolver.dereference("#xpointer(id('missing'))");
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            TransformError::ElementNotFound(id) => assert_eq!(id, "missing"),
+            other => panic!("expected ElementNotFound, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn parse_xpointer_id_variants() {
+        // Valid forms
+        assert_eq!(super::parse_xpointer_id("xpointer(id('foo'))"), Some("foo"));
+        assert_eq!(
+            super::parse_xpointer_id(r#"xpointer(id("bar"))"#),
+            Some("bar")
+        );
+
+        // Invalid forms
+        assert_eq!(super::parse_xpointer_id("xpointer(/)"), None);
+        assert_eq!(super::parse_xpointer_id("xpointer(id(foo))"), None); // no quotes
+        assert_eq!(super::parse_xpointer_id("not-xpointer"), None);
+        assert_eq!(super::parse_xpointer_id(""), None);
+    }
+
+    #[test]
+    fn saml_style_document() {
+        // Realistic SAML-like structure
+        let xml = r#"<samlp:Response xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol"
+                                     xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion"
+                                     ID="_resp1">
+            <saml:Assertion ID="_assert1">
+                <saml:Subject>user@example.com</saml:Subject>
+            </saml:Assertion>
+            <ds:Signature xmlns:ds="http://www.w3.org/2000/09/xmldsig#" Id="sig1">
+                <ds:SignedInfo/>
+            </ds:Signature>
+        </samlp:Response>"#;
+
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        // Should find all three IDs
+        assert!(resolver.has_id("_resp1"));
+        assert!(resolver.has_id("_assert1"));
+        assert!(resolver.has_id("sig1"));
+        assert_eq!(resolver.id_count(), 3);
+
+        // Dereference the assertion
+        let data = resolver.dereference("#_assert1").unwrap();
+        let node_set = data.into_node_set().unwrap();
+
+        // Assertion element should be in the set
+        let assertion = doc
+            .descendants()
+            .find(|n| n.attribute("ID") == Some("_assert1"))
+            .unwrap();
+        assert!(node_set.contains(assertion));
+
+        // Subject (child of assertion) should be in the set
+        let subject = assertion
+            .children()
+            .find(|n| n.is_element() && n.has_tag_name("Subject"))
+            .unwrap();
+        assert!(node_set.contains(subject));
+
+        // Response (parent) should NOT be in the set
+        assert!(!node_set.contains(doc.root_element()));
+    }
+}
diff --git a/tests/uri_integration.rs b/tests/uri_integration.rs
new file mode 100644
index 0000000..c29c578
--- /dev/null
+++ b/tests/uri_integration.rs
@@ -0,0 +1,222 @@
+//! Integration tests: URI dereference → NodeSet → C14N canonicalization.
+//!
+//! Verifies that dereferencing a URI produces a NodeSet that, when used as
+//! a predicate for C14N, produces the correct canonical output.
+
+use xml_sec::c14n::{canonicalize, C14nAlgorithm, C14nMode};
+use xml_sec::xmldsig::uri::UriReferenceResolver;
+
+// ─── Helpers ────────────────────────────────────────────────────────────────
+
+/// Dereference `uri`, build a C14N predicate from the resulting NodeSet,
+/// canonicalize with inclusive C14N 1.0, and return the canonical string.
+fn deref_and_canonicalize(xml: &str, uri: &str) -> String {
+    let doc = roxmltree::Document::parse(xml).expect("parse");
+    let resolver = UriReferenceResolver::new(&doc);
+
+    let data = resolver.dereference(uri).expect("dereference");
+    let node_set = data.into_node_set().expect("into_node_set");
+
+    let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
+    let predicate = |n: roxmltree::Node| node_set.contains(n);
+    let mut output = Vec::new();
+    canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize");
+    String::from_utf8(output).expect("utf8")
+}
+
+/// Same but with comments enabled (for xpointer(/) which includes comments).
+fn deref_and_canonicalize_with_comments(xml: &str, uri: &str) -> String {
+    let doc = roxmltree::Document::parse(xml).expect("parse");
+    let resolver = UriReferenceResolver::new(&doc);
+
+    let data = resolver.dereference(uri).expect("dereference");
+    let node_set = data.into_node_set().expect("into_node_set");
+
+    let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, true);
+    let predicate = |n: roxmltree::Node| node_set.contains(n);
+    let mut output = Vec::new();
+    canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize");
+    String::from_utf8(output).expect("utf8")
+}
+
+// ─── Empty URI: whole document without comments ─────────────────────────────
+
+#[test]
+fn empty_uri_canonicalizes_whole_document() {
+    let xml = r#"<root b="2" a="1"><child/></root>"#;
+    let result = deref_and_canonicalize(xml, "");
+    // C14N sorts attributes and expands empty elements
+    assert_eq!(result, r#"<root a="1" b="2"><child></child></root>"#);
+}
+
+#[test]
+fn empty_uri_strips_comments() {
+    let xml = "<root><!-- comment --><child>text</child></root>";
+    let result = deref_and_canonicalize(xml, "");
+    // Comment should be stripped for empty URI dereference
+    assert_eq!(result, "<root><child>text</child></root>");
+}
+
+#[test]
+fn empty_uri_with_namespaces() {
+    let xml = r#"<root xmlns:a="http://a" xmlns:b="http://b"><a:child b:attr="val"/></root>"#;
+    let result = deref_and_canonicalize(xml, "");
+    // Inclusive C14N: root declares both ns, child suppresses redundant redeclarations
+    assert_eq!(
+        result,
+        r#"<root xmlns:a="http://a" xmlns:b="http://b"><a:child b:attr="val"></a:child></root>"#
+    );
+}
+
+// ─── #id: subtree by ID ─────────────────────────────────────────────────────
+
+#[test]
+fn fragment_id_canonicalizes_subtree_only() {
+    let xml = r#"<root><before>skip</before><target ID="t1"><inner a="1">text</inner></target><after>skip</after></root>"#;
+    let result = deref_and_canonicalize(xml, "#t1");
+    // Only the target subtree should appear; root, before, after excluded
+    assert_eq!(
+        result,
+        r#"<target ID="t1"><inner a="1">text</inner></target>"#
+    );
+}
+
+#[test]
+fn fragment_id_includes_comments_in_subtree() {
+    // Unlike empty URI, #id subtrees include comments
+    let xml = r#"<root><item ID="x"><!-- keep this --><child/></item></root>"#;
+    let result = deref_and_canonicalize_with_comments(xml, "#x");
+    assert_eq!(
+        result,
+        r#"<item ID="x"><!-- keep this --><child></child></item>"#
+    );
+}
+
+#[test]
+fn fragment_id_inherits_ancestor_namespaces() {
+    // When canonicalizing a subtree, inclusive C14N emits in-scope namespaces
+    // from ancestor elements even though those ancestors are not in the node set
+    let xml =
+        r#"<root xmlns:ns="http://example.com"><ns:item ID="sub"><ns:child/></ns:item></root>"#;
+    let result = deref_and_canonicalize(xml, "#sub");
+    // ns:item declares xmlns:ns (inherited from root ancestor outside subset).
+    // ns:child suppresses redundant redeclaration since parent ns:item already declared it.
+    assert_eq!(
+        result,
+        r#"<ns:item xmlns:ns="http://example.com" ID="sub"><ns:child></ns:child></ns:item>"#
+    );
+}
+
+// ─── #xpointer(/) : whole document WITH comments ───────────────────────────
+
+#[test]
+fn xpointer_root_includes_comments() {
+    let xml = "<root><!-- visible --><child/></root>";
+    let result = deref_and_canonicalize_with_comments(xml, "#xpointer(/)");
+    // xpointer(/) includes comments, unlike empty URI
+    assert_eq!(result, "<root><!-- visible --><child></child></root>");
+}
+
+#[test]
+fn xpointer_root_vs_empty_uri_comment_difference() {
+    let xml = "<root><!-- comment --><child/></root>";
+
+    let empty_uri = deref_and_canonicalize(xml, "");
+    let xpointer_root = deref_and_canonicalize_with_comments(xml, "#xpointer(/)");
+
+    // Empty URI: comments stripped
+    assert_eq!(empty_uri, "<root><child></child></root>");
+    // xpointer(/): comments preserved
+    assert_eq!(
+        xpointer_root,
+        "<root><!-- comment --><child></child></root>"
+    );
+}
+
+// ─── #xpointer(id('...')) : equivalent to bare-name ────────────────────────
+
+#[test]
+fn xpointer_id_canonicalizes_same_as_bare_name() {
+    let xml = r#"<root><item ID="abc"><child>data</child></item></root>"#;
+
+    let bare_name = deref_and_canonicalize(xml, "#abc");
+    let xpointer = deref_and_canonicalize(xml, "#xpointer(id('abc'))");
+
+    assert_eq!(bare_name, xpointer);
+}
+
+// ─── SAML-like scenario ─────────────────────────────────────────────────────
+
+#[test]
+fn saml_assertion_subtree_canonicalization() {
+    // Realistic SAML: dereference assertion by ID, canonicalize the subtree
+    let xml = r#"<samlp:Response xmlns:samlp="urn:oasis:names:tc:SAML:2.0:protocol" xmlns:saml="urn:oasis:names:tc:SAML:2.0:assertion" ID="_resp1">
+  <saml:Assertion ID="_a1">
+    <saml:Subject>user@example.com</saml:Subject>
+  </saml:Assertion>
+</samlp:Response>"#;
+
+    let result = deref_and_canonicalize(xml, "#_a1");
+
+    // Assertion subtree with inherited namespace declarations
+    assert!(
+        result.contains("xmlns:saml=\"urn:oasis:names:tc:SAML:2.0:assertion\""),
+        "should inherit saml namespace from ancestor: {result}"
+    );
+    assert!(
+        result.contains("<saml:Subject>user@example.com</saml:Subject>"),
+        "should include Subject child: {result}"
+    );
+    // Response element should NOT appear
+    assert!(
+        !result.contains("samlp:Response"),
+        "Response should not be in subtree: {result}"
+    );
+}
+
+#[test]
+fn saml_enveloped_signature_exclusion() {
+    // Simulate enveloped signature: dereference whole doc, then exclude Signature subtree
+    // This is what P1-014 (enveloped transform) will do, but we can test the
+    // NodeSet.exclude_subtree() + C14N combination here
+    let xml = r#"<Response ID="_r1">
+  <Assertion>data</Assertion>
+  <Signature Id="sig1">
+    <SignedInfo>digest</SignedInfo>
+  </Signature>
+</Response>"#;
+
+    let doc = roxmltree::Document::parse(xml).expect("parse");
+    let resolver = UriReferenceResolver::new(&doc);
+
+    let data = resolver.dereference("").expect("dereference");
+    let mut node_set = data.into_node_set().expect("into_node_set");
+
+    // Exclude the Signature subtree (simulating enveloped-signature transform)
+    let sig_elem = doc
+        .descendants()
+        .find(|n| n.is_element() && n.has_tag_name("Signature"))
+        .expect("Signature element");
+    node_set.exclude_subtree(sig_elem);
+
+    let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
+    let predicate = |n: roxmltree::Node| node_set.contains(n);
+    let mut output = Vec::new();
+    canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize");
+    let result = String::from_utf8(output).expect("utf8");
+
+    // Signature and its children should be gone
+    assert!(
+        !result.contains("Signature"),
+        "Signature should be excluded: {result}"
+    );
+    assert!(
+        !result.contains("SignedInfo"),
+        "SignedInfo should be excluded: {result}"
+    );
+    // Assertion should remain
+    assert!(
+        result.contains("<Assertion>data</Assertion>"),
+        "Assertion should remain: {result}"
+    );
+}

From 67abda9632855b5fbb5a7febad94927dc7e6e5d0 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 04:06:54 +0200
Subject: [PATCH 2/9] fix(xmldsig): harden URI dereference error handling and
 DoS safety

- Return UnsupportedUri for unrecognized XPointer expressions instead
  of falling through to ID lookup (misclassified errors)
- Replace recursive collect_subtree_ids with iterative stack traversal
  to prevent stack overflow on deeply nested attacker-controlled XML
- Document all supported URI forms in dereference() doc comment
- Add unit test for unsupported XPointer expressions
---
 src/xmldsig/types.rs | 12 +++++++++---
 src/xmldsig/uri.rs   | 31 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
index b9001dc..187fc17 100644
--- a/src/xmldsig/types.rs
+++ b/src/xmldsig/types.rs
@@ -154,10 +154,16 @@ impl<'a> NodeSet<'a> {
 }
 
 /// Collect a node and all its descendants into a set of `NodeId`s.
+///
+/// Uses an explicit stack instead of recursion to avoid stack overflow
+/// on deeply nested XML (attacker-controlled input in SAML contexts).
 fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) {
-    ids.insert(node.id());
-    for child in node.children() {
-        collect_subtree_ids(child, ids);
+    let mut stack = vec![node];
+    while let Some(current) = stack.pop() {
+        ids.insert(current.id());
+        for child in current.children() {
+            stack.push(child);
+        }
     }
     // roxmltree models attributes as children only for elements,
     // but they're accessible via node.attributes(). For node-set
diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
index b3745e2..e532344 100644
--- a/src/xmldsig/uri.rs
+++ b/src/xmldsig/uri.rs
@@ -94,6 +94,8 @@ impl<'a> UriReferenceResolver<'a> {
     /// |-----|--------|
     /// | `""` (empty) | Entire document, comments excluded |
     /// | `"#foo"` | Subtree rooted at element with ID `foo` |
+    /// | `"#xpointer(/)"` | Entire document, comments included |
+    /// | `"#xpointer(id('foo'))"` | Subtree rooted at element with ID `foo` |
     /// | other | `Err(UnsupportedUri)` |
     pub fn dereference(&self, uri: &str) -> Result<TransformData<'a>, TransformError> {
         if uri.is_empty() {
@@ -127,6 +129,9 @@ impl<'a> UriReferenceResolver<'a> {
         } else if let Some(id) = parse_xpointer_id(fragment) {
             // xpointer(id('foo')) → same as bare-name #foo
             self.resolve_id(id)
+        } else if fragment.starts_with("xpointer(") {
+            // Any other XPointer expression is unsupported
+            Err(TransformError::UnsupportedUri(format!("#{fragment}")))
         } else {
             // Bare-name fragment: #foo → element by ID
             self.resolve_id(fragment)
@@ -298,6 +303,32 @@ mod tests {
         }
     }
 
+    #[test]
+    fn unsupported_xpointer_expression() {
+        // XPointer expressions other than xpointer(/) and xpointer(id(...))
+        // should return UnsupportedUri, not fall through to ID lookup
+        let xml = "<root/>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let result = resolver.dereference("#xpointer(foo())");
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            TransformError::UnsupportedUri(uri) => {
+                assert_eq!(uri, "#xpointer(foo())")
+            }
+            other => panic!("expected UnsupportedUri, got: {other:?}"),
+        }
+
+        // Generic XPointer with XPath should also be unsupported
+        let result = resolver.dereference("#xpointer(//element)");
+        assert!(result.is_err());
+        assert!(matches!(
+            result.unwrap_err(),
+            TransformError::UnsupportedUri(_)
+        ));
+    }
+
     #[test]
     fn custom_id_attr_name() {
         // roxmltree stores `wsu:Id` with local name "Id" — already in DEFAULT_ID_ATTRS.

From eac825f55d56adefad499cc43056f3c271ac11ac Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 04:33:53 +0200
Subject: [PATCH 3/9] fix(xmldsig): reject duplicate IDs, guard foreign nodes,
 reject empty fragment

- Remove duplicate IDs from index to prevent signature-wrapping attacks
  (ambiguous IDs fail with ElementNotFound instead of picking arbitrarily)
- Guard NodeSet.contains() and exclude_subtree() against cross-document
  nodes via pointer comparison (prevents NodeId collisions)
- Reject bare "#" (empty fragment) as UnsupportedUri
- Add xpointer(id('...')) to module-level doc comment
- Clarify with_id_attrs() doc: extra_attrs appends, does not replace
- Add code comment explaining intentional lack of percent-decoding
---
 src/xmldsig/mod.rs   |  2 +-
 src/xmldsig/types.rs | 16 ++++++++
 src/xmldsig/uri.rs   | 90 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 91 insertions(+), 17 deletions(-)

diff --git a/src/xmldsig/mod.rs b/src/xmldsig/mod.rs
index 0a33452..c68dd30 100644
--- a/src/xmldsig/mod.rs
+++ b/src/xmldsig/mod.rs
@@ -4,7 +4,7 @@
 //!
 //! ## Current Status
 //!
-//! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`)
+//! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`, `#xpointer(id('...'))`)
 //! - ID attribute resolution with configurable attribute names
 //! - Node set types for the transform pipeline
 
diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
index 187fc17..610cb17 100644
--- a/src/xmldsig/types.rs
+++ b/src/xmldsig/types.rs
@@ -122,7 +122,17 @@ impl<'a> NodeSet<'a> {
     }
 
     /// Check whether a node is in this set.
+    ///
+    /// Returns `false` for nodes from a different document than this set's
+    /// owning document (prevents cross-document NodeId collisions).
     pub fn contains(&self, node: Node<'_, '_>) -> bool {
+        // Guard: reject nodes from a different document. NodeIds are
+        // per-document indices — the same index from another document
+        // would reference a completely different node.
+        if !std::ptr::eq(node.document() as *const _, self.doc as *const _) {
+            return false;
+        }
+
         let id = node.id();
 
         // Check exclusion first
@@ -143,7 +153,13 @@ impl<'a> NodeSet<'a> {
     }
 
     /// Exclude a node and all its descendants from this set.
+    ///
+    /// No-op for nodes from a different document.
     pub fn exclude_subtree(&mut self, node: Node<'_, '_>) {
+        // Guard: only exclude nodes from our document
+        if !std::ptr::eq(node.document() as *const _, self.doc as *const _) {
+            return;
+        }
         collect_subtree_ids(node, &mut self.excluded);
     }
 
diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
index e532344..11dfdb8 100644
--- a/src/xmldsig/uri.rs
+++ b/src/xmldsig/uri.rs
@@ -9,6 +9,7 @@
 //! External URIs (http://, file://, etc.) are not supported — only same-document
 //! references are needed for SAML signature verification.
 
+use std::collections::hash_map::Entry;
 use std::collections::HashMap;
 
 use roxmltree::{Document, Node};
@@ -56,10 +57,10 @@ impl<'a> UriReferenceResolver<'a> {
         Self::with_id_attrs(doc, DEFAULT_ID_ATTRS)
     }
 
-    /// Build a resolver with custom ID attribute names.
+    /// Build a resolver scanning additional ID attribute names beyond the defaults.
     ///
-    /// The defaults (`ID`, `Id`, `id`) are always included. Additional names
-    /// are appended. Pass an empty slice to use only the defaults.
+    /// The defaults (`ID`, `Id`, `id`) are always included; `extra_attrs`
+    /// adds to them (does not replace). Pass an empty slice to use only defaults.
     pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self {
         let mut id_map = HashMap::new();
 
@@ -76,8 +77,18 @@ impl<'a> UriReferenceResolver<'a> {
             if node.is_element() {
                 for attr_name in &attr_names {
                     if let Some(value) = node.attribute(*attr_name) {
-                        // First occurrence wins (per XML spec, IDs should be unique)
-                        id_map.entry(value).or_insert(node);
+                        // Duplicate IDs are invalid per XML spec and can enable
+                        // signature-wrapping attacks. Remove the entry so that
+                        // lookups for ambiguous IDs fail with ElementNotFound
+                        // rather than silently picking an arbitrary node.
+                        match id_map.entry(value) {
+                            Entry::Vacant(v) => {
+                                v.insert(node);
+                            }
+                            Entry::Occupied(o) => {
+                                o.remove();
+                            }
+                        }
                     }
                 }
             }
@@ -106,6 +117,10 @@ impl<'a> UriReferenceResolver<'a> {
                 NodeSet::entire_document_without_comments(self.doc),
             ))
         } else if let Some(fragment) = uri.strip_prefix('#') {
+            // Note: we intentionally do NOT percent-decode the fragment.
+            // XMLDSig ID values are XML Name tokens (no spaces/special chars),
+            // and real-world SAML never uses percent-encoded fragments.
+            // xmlsec1 also passes fragments through without decoding.
             self.dereference_fragment(fragment)
         } else {
             Err(TransformError::UnsupportedUri(uri.to_string()))
@@ -119,6 +134,11 @@ impl<'a> UriReferenceResolver<'a> {
     /// - `xpointer(id('foo'))` → element by ID (equivalent to bare-name `#foo`)
     /// - bare name `foo` → element by ID attribute
     fn dereference_fragment(&self, fragment: &str) -> Result<TransformData<'a>, TransformError> {
+        if fragment.is_empty() {
+            // Bare "#" is not a valid same-document reference
+            return Err(TransformError::UnsupportedUri("#".to_string()));
+        }
+
         if fragment == "xpointer(/)" {
             // XPointer root: entire document WITH comments (unlike empty URI).
             // Per XMLDSig §4.3.3.3: "the XPointer expression [...] includes
@@ -175,6 +195,7 @@ fn parse_xpointer_id(fragment: &str) -> Option<&str> {
 #[cfg(test)]
 #[allow(clippy::unwrap_used)]
 mod tests {
+    use super::super::types::NodeSet;
     use super::*;
 
     #[test]
@@ -329,6 +350,43 @@ mod tests {
         ));
     }
 
+    #[test]
+    fn empty_fragment_rejected() {
+        // Bare "#" (empty fragment) is not a valid same-document reference
+        let xml = "<root/>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let result = resolver.dereference("#");
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            TransformError::UnsupportedUri(uri) => assert_eq!(uri, "#"),
+            other => panic!("expected UnsupportedUri, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn foreign_document_node_rejected() {
+        // NodeSet.contains() must reject nodes from a different document
+        let xml1 = "<root><child/></root>";
+        let xml2 = "<other><item/></other>";
+        let doc1 = Document::parse(xml1).unwrap();
+        let doc2 = Document::parse(xml2).unwrap();
+
+        let node_set = NodeSet::entire_document_without_comments(&doc1);
+
+        // Node from doc2 should NOT be in doc1's node set
+        let foreign_node = doc2.root_element();
+        assert!(
+            !node_set.contains(foreign_node),
+            "foreign document node should be rejected"
+        );
+
+        // Node from doc1 should be in the set
+        let own_node = doc1.root_element();
+        assert!(node_set.contains(own_node));
+    }
+
     #[test]
     fn custom_id_attr_name() {
         // roxmltree stores `wsu:Id` with local name "Id" — already in DEFAULT_ID_ATTRS.
@@ -370,21 +428,21 @@ mod tests {
     }
 
     #[test]
-    fn first_id_wins_for_duplicates() {
-        // If two elements have the same ID value, first occurrence wins
+    fn duplicate_ids_are_rejected() {
+        // Duplicate IDs are removed from the index to prevent signature-wrapping
+        // attacks — lookups for ambiguous IDs fail instead of picking arbitrarily.
         let xml = r#"<root><a ID="dup">first</a><b ID="dup">second</b></root>"#;
         let doc = Document::parse(xml).unwrap();
         let resolver = UriReferenceResolver::new(&doc);
 
-        let data = resolver.dereference("#dup").unwrap();
-        let node_set = data.into_node_set().unwrap();
-
-        // Should resolve to the first <a> element
-        let first = doc
-            .descendants()
-            .find(|n| n.is_element() && n.has_tag_name("a"))
-            .unwrap();
-        assert!(node_set.contains(first));
+        // "dup" appears twice → removed from index
+        assert!(!resolver.has_id("dup"));
+        let result = resolver.dereference("#dup");
+        assert!(result.is_err());
+        assert!(matches!(
+            result.unwrap_err(),
+            TransformError::ElementNotFound(_)
+        ));
     }
 
     #[test]

From 793bb8f03dcbb841fdfd49cc2d3a24a7bd9b4c7c Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 04:54:56 +0200
Subject: [PATCH 4/9] fix(xmldsig): prevent duplicate ID re-insertion on 3+
 occurrences

- Track duplicate IDs in a HashSet so they are permanently excluded
  from the index (fixes bypass where 3rd occurrence re-inserts via
  Entry::Vacant after 2nd occurrence removed the entry)
- Add xpointer(/) and xpointer(id('...')) to uri.rs module-level docs
- Add test for triple-duplicate ID rejection
---
 src/xmldsig/uri.rs | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
index 11dfdb8..eca8d17 100644
--- a/src/xmldsig/uri.rs
+++ b/src/xmldsig/uri.rs
@@ -5,12 +5,14 @@
 //!
 //! - **Empty URI** (`""` or absent): the entire document, excluding comments.
 //! - **Bare-name `#id`**: the element whose ID attribute matches `id`, as a subtree.
+//! - **`#xpointer(/)`**: the entire document, including comments.
+//! - **`#xpointer(id('id'))` / `#xpointer(id("id"))`**: element by ID (equivalent to bare-name).
 //!
 //! External URIs (http://, file://, etc.) are not supported — only same-document
 //! references are needed for SAML signature verification.
 
 use std::collections::hash_map::Entry;
-use std::collections::HashMap;
+use std::collections::{HashMap, HashSet};
 
 use roxmltree::{Document, Node};
 
@@ -63,6 +65,9 @@ impl<'a> UriReferenceResolver<'a> {
     /// adds to them (does not replace). Pass an empty slice to use only defaults.
     pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self {
         let mut id_map = HashMap::new();
+        // Track IDs seen more than once so they are never reinserted
+        // after being removed (handles 3+ occurrences correctly).
+        let mut duplicate_ids: HashSet<&'a str> = HashSet::new();
 
         // Merge default + extra attribute names, dedup
         let mut attr_names: Vec<&str> = DEFAULT_ID_ATTRS.to_vec();
@@ -77,6 +82,11 @@ impl<'a> UriReferenceResolver<'a> {
             if node.is_element() {
                 for attr_name in &attr_names {
                     if let Some(value) = node.attribute(*attr_name) {
+                        // Skip IDs already marked as duplicate
+                        if duplicate_ids.contains(value) {
+                            continue;
+                        }
+
                         // Duplicate IDs are invalid per XML spec and can enable
                         // signature-wrapping attacks. Remove the entry so that
                         // lookups for ambiguous IDs fail with ElementNotFound
@@ -87,6 +97,7 @@ impl<'a> UriReferenceResolver<'a> {
                             }
                             Entry::Occupied(o) => {
                                 o.remove();
+                                duplicate_ids.insert(value);
                             }
                         }
                     }
@@ -445,6 +456,18 @@ mod tests {
         ));
     }
 
+    #[test]
+    fn triple_duplicate_ids_stay_rejected() {
+        // Verify that 3+ occurrences don't re-insert (the HashSet tracks
+        // permanently removed IDs so Entry::Vacant after remove doesn't re-add)
+        let xml = r#"<root><a ID="dup">1</a><b ID="dup">2</b><c ID="dup">3</c></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        assert!(!resolver.has_id("dup"));
+        assert!(resolver.dereference("#dup").is_err());
+    }
+
     #[test]
     fn node_set_exclude_subtree() {
         let xml = r#"<root><keep>yes</keep><remove><deep>no</deep></remove></root>"#;

From 85c3e605b117652d8c356418c08908c5cb0e6345 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 05:06:34 +0200
Subject: [PATCH 5/9] fix(xmldsig): safe xpointer quote parsing, same-element
 dedup guard

- Use strip_prefix/strip_suffix instead of panicking slice in
  parse_xpointer_id (prevents panic on malformed input like `id(')`)
- Only mark ID as duplicate when mapped to a *different* element
  (same element with both ID="x" and Id="x" is not ambiguous)
---
 src/xmldsig/uri.rs | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
index eca8d17..9633902 100644
--- a/src/xmldsig/uri.rs
+++ b/src/xmldsig/uri.rs
@@ -96,8 +96,14 @@ impl<'a> UriReferenceResolver<'a> {
                                 v.insert(node);
                             }
                             Entry::Occupied(o) => {
-                                o.remove();
-                                duplicate_ids.insert(value);
+                                // Only treat as duplicate if a *different* element
+                                // maps the same ID value. The same element can
+                                // expose the same value via multiple scanned attrs
+                                // (e.g., both `ID="x"` and `Id="x"`).
+                                if o.get().id() != node.id() {
+                                    o.remove();
+                                    duplicate_ids.insert(value);
+                                }
                             }
                         }
                     }
@@ -193,11 +199,12 @@ impl<'a> UriReferenceResolver<'a> {
 fn parse_xpointer_id(fragment: &str) -> Option<&str> {
     let inner = fragment.strip_prefix("xpointer(id(")?.strip_suffix("))")?;
 
-    // Strip single or double quotes
-    if (inner.starts_with('\'') && inner.ends_with('\''))
-        || (inner.starts_with('"') && inner.ends_with('"'))
-    {
-        Some(&inner[1..inner.len() - 1])
+    // Strip single or double quotes using safe helpers to avoid panics
+    // on malformed input (e.g., `xpointer(id('))` where inner is `'`)
+    if let Some(stripped) = inner.strip_prefix('\'').and_then(|s| s.strip_suffix('\'')) {
+        Some(stripped)
+    } else if let Some(stripped) = inner.strip_prefix('"').and_then(|s| s.strip_suffix('"')) {
+        Some(stripped)
     } else {
         None
     }
@@ -601,6 +608,23 @@ mod tests {
         assert_eq!(super::parse_xpointer_id("xpointer(id(foo))"), None); // no quotes
         assert_eq!(super::parse_xpointer_id("not-xpointer"), None);
         assert_eq!(super::parse_xpointer_id(""), None);
+
+        // Malformed: single quote char — must not panic (was slicing bug)
+        assert_eq!(super::parse_xpointer_id("xpointer(id('))"), None);
+        assert_eq!(super::parse_xpointer_id(r#"xpointer(id("))"#), None);
+    }
+
+    #[test]
+    fn same_element_multiple_id_attrs_not_duplicate() {
+        // An element with both ID="x" and Id="x" should NOT be treated as
+        // duplicate — it's the same element exposing the same value via
+        // different scanned attribute names.
+        let xml = r#"<root><item ID="x" Id="x">data</item></root>"#;
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        assert!(resolver.has_id("x"));
+        assert!(resolver.dereference("#x").is_ok());
     }
 
     #[test]

From bc5c7039a1b33143a23049fd9c507188e8e26566 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 05:52:18 +0200
Subject: [PATCH 6/9] docs(xmldsig): clarify NodeSet::subtree doc re
 attribute/namespace tracking

---
 src/xmldsig/types.rs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
index 610cb17..8bce6a9 100644
--- a/src/xmldsig/types.rs
+++ b/src/xmldsig/types.rs
@@ -103,8 +103,14 @@ impl<'a> NodeSet<'a> {
         }
     }
 
-    /// Create a node set representing an element and all its descendants
-    /// (including attributes, namespaces, and text nodes), with comments.
+    /// Create a node set rooted at `element`, containing that element and all
+    /// of its descendant nodes (elements, text, and, for this constructor,
+    /// comment nodes).
+    ///
+    /// Note: in `roxmltree`, attributes and namespaces are not separate nodes
+    /// and therefore are not tracked individually in this `NodeSet`. During
+    /// canonicalization, any attributes and namespace declarations belonging to
+    /// the included elements are serialized as part of those elements.
     pub fn subtree(doc: &'a Document<'a>, element: Node<'a, 'a>) -> Self {
         let mut ids = HashSet::new();
         collect_subtree_ids(element, &mut ids);

From cd6297835d80478142e5bce75cca1892cbffad95 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 10:45:39 +0200
Subject: [PATCH 7/9] fix(xmldsig): reject empty xpointer id, document
 local-name matching

- Reject xpointer(id('')) as UnsupportedUri (empty string is not a
  valid XML Name, consistent with bare '#' rejection)
- Document roxmltree local-name attribute matching in with_id_attrs()
- Clarify element-only NodeId tracking in collect_subtree_ids
---
 src/xmldsig/types.rs | 14 +++++++-------
 src/xmldsig/uri.rs   | 25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
index 8bce6a9..93863e7 100644
--- a/src/xmldsig/types.rs
+++ b/src/xmldsig/types.rs
@@ -187,13 +187,13 @@ fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) {
             stack.push(child);
         }
     }
-    // roxmltree models attributes as children only for elements,
-    // but they're accessible via node.attributes(). For node-set
-    // membership, we need to track the element ID — the C14N
-    // serializer checks element membership and then serializes
-    // all its attributes. Individual attribute NodeIds are not
-    // needed because roxmltree doesn't expose them as separate nodes
-    // in the tree traversal.
+    // In roxmltree, attributes and namespaces are not nodes and do not
+    // appear in `children()` traversal; they're accessed via
+    // node.attributes(). For node-set membership we therefore track only
+    // element NodeIds. During C14N, the serializer checks whether an
+    // element is in the node set and then serializes all of that element's
+    // attributes/namespaces as part of the element, so separate attribute
+    // identifiers are unnecessary.
 }
 
 /// Errors during transform processing.
diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
index 9633902..d35543b 100644
--- a/src/xmldsig/uri.rs
+++ b/src/xmldsig/uri.rs
@@ -63,6 +63,12 @@ impl<'a> UriReferenceResolver<'a> {
     ///
     /// The defaults (`ID`, `Id`, `id`) are always included; `extra_attrs`
     /// adds to them (does not replace). Pass an empty slice to use only defaults.
+    ///
+    /// Attribute names are matched using `roxmltree`'s *local-name* view of
+    /// attributes: any namespace prefix is stripped before comparison. For
+    /// example, an attribute written as `wsu:Id="..."` in the XML is seen as
+    /// simply `Id` by `roxmltree`, so callers **must** pass `"Id"`, not
+    /// `"wsu:Id"` or `"{namespace}Id"`.
     pub fn with_id_attrs(doc: &'a Document<'a>, extra_attrs: &[&str]) -> Self {
         let mut id_map = HashMap::new();
         // Track IDs seen more than once so they are never reinserted
@@ -165,6 +171,10 @@ impl<'a> UriReferenceResolver<'a> {
             ))
         } else if let Some(id) = parse_xpointer_id(fragment) {
             // xpointer(id('foo')) → same as bare-name #foo
+            // Reject empty parsed ID (e.g., xpointer(id(''))) — not a valid XML Name
+            if id.is_empty() {
+                return Err(TransformError::UnsupportedUri(format!("#{fragment}")));
+            }
             self.resolve_id(id)
         } else if fragment.starts_with("xpointer(") {
             // Any other XPointer expression is unsupported
@@ -594,6 +604,21 @@ mod tests {
         }
     }
 
+    #[test]
+    fn xpointer_id_empty_value_rejected() {
+        // xpointer(id('')) parses to empty string — reject as UnsupportedUri
+        let xml = "<root/>";
+        let doc = Document::parse(xml).unwrap();
+        let resolver = UriReferenceResolver::new(&doc);
+
+        let result = resolver.dereference("#xpointer(id(''))");
+        assert!(result.is_err());
+        assert!(matches!(
+            result.unwrap_err(),
+            TransformError::UnsupportedUri(_)
+        ));
+    }
+
     #[test]
     fn parse_xpointer_id_variants() {
         // Valid forms

From ce155a8bd2fbdd47429c78bcc4726277a4dd32b6 Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 13:42:19 +0200
Subject: [PATCH 8/9] refactor(xmldsig): remove unused UriDeref error variant

---
 src/xmldsig/types.rs | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
index 93863e7..6ed17fe 100644
--- a/src/xmldsig/types.rs
+++ b/src/xmldsig/types.rs
@@ -208,10 +208,6 @@ pub enum TransformError {
         got: &'static str,
     },
 
-    /// URI dereference failed.
-    #[error("URI dereference failed: {0}")]
-    UriDeref(String),
-
     /// Element not found by ID.
     #[error("element not found by ID: {0}")]
     ElementNotFound(String),

From f468dd42c7b15b4c5e49f0af149750bbc79c2f9c Mon Sep 17 00:00:00 2001
From: Dmitry Prudnikov <mail@polaz.com>
Date: Mon, 16 Mar 2026 14:18:18 +0200
Subject: [PATCH 9/9] refactor(xmldsig): derive doc from element in
 NodeSet::subtree, consolidate test helpers

- Remove redundant doc parameter from NodeSet::subtree(), derive from
  element.document() to enforce same-document invariant at construction
- Correct collect_subtree_ids inline note: tracks all children() nodes
  (elements, text, comments, PIs), not only elements
- Consolidate deref_and_canonicalize test helpers into shared impl
---
 src/xmldsig/types.rs     | 15 ++++++++-------
 src/xmldsig/uri.rs       |  2 +-
 tests/uri_integration.rs | 20 ++++++++------------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/src/xmldsig/types.rs b/src/xmldsig/types.rs
index 6ed17fe..8513bd6 100644
--- a/src/xmldsig/types.rs
+++ b/src/xmldsig/types.rs
@@ -111,11 +111,11 @@ impl<'a> NodeSet<'a> {
     /// and therefore are not tracked individually in this `NodeSet`. During
     /// canonicalization, any attributes and namespace declarations belonging to
     /// the included elements are serialized as part of those elements.
-    pub fn subtree(doc: &'a Document<'a>, element: Node<'a, 'a>) -> Self {
+    pub fn subtree(element: Node<'a, 'a>) -> Self {
         let mut ids = HashSet::new();
         collect_subtree_ids(element, &mut ids);
         Self {
-            doc,
+            doc: element.document(),
             included: Some(ids),
             excluded: HashSet::new(),
             with_comments: true,
@@ -189,11 +189,12 @@ fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) {
     }
     // In roxmltree, attributes and namespaces are not nodes and do not
     // appear in `children()` traversal; they're accessed via
-    // node.attributes(). For node-set membership we therefore track only
-    // element NodeIds. During C14N, the serializer checks whether an
-    // element is in the node set and then serializes all of that element's
-    // attributes/namespaces as part of the element, so separate attribute
-    // identifiers are unnecessary.
+    // node.attributes(). We therefore track the NodeIds of all descendant
+    // nodes reachable via `children()` (elements, text, comments,
+    // processing instructions, etc.). During C14N, the serializer checks
+    // whether an element is in the node set and then serializes all of
+    // that element's attributes/namespaces as part of the element, so
+    // separate attribute/namespace identifiers are unnecessary.
 }
 
 /// Errors during transform processing.
diff --git a/src/xmldsig/uri.rs b/src/xmldsig/uri.rs
index d35543b..539cf83 100644
--- a/src/xmldsig/uri.rs
+++ b/src/xmldsig/uri.rs
@@ -188,7 +188,7 @@ impl<'a> UriReferenceResolver<'a> {
     /// Look up an element by its ID attribute value and return a subtree node set.
     fn resolve_id(&self, id: &str) -> Result<TransformData<'a>, TransformError> {
         match self.id_map.get(id) {
-            Some(&element) => Ok(TransformData::NodeSet(NodeSet::subtree(self.doc, element))),
+            Some(&element) => Ok(TransformData::NodeSet(NodeSet::subtree(element))),
             None => Err(TransformError::ElementNotFound(id.to_string())),
         }
     }
diff --git a/tests/uri_integration.rs b/tests/uri_integration.rs
index c29c578..b4776fd 100644
--- a/tests/uri_integration.rs
+++ b/tests/uri_integration.rs
@@ -11,28 +11,24 @@ use xml_sec::xmldsig::uri::UriReferenceResolver;
 /// Dereference `uri`, build a C14N predicate from the resulting NodeSet,
 /// canonicalize with inclusive C14N 1.0, and return the canonical string.
 fn deref_and_canonicalize(xml: &str, uri: &str) -> String {
-    let doc = roxmltree::Document::parse(xml).expect("parse");
-    let resolver = UriReferenceResolver::new(&doc);
-
-    let data = resolver.dereference(uri).expect("dereference");
-    let node_set = data.into_node_set().expect("into_node_set");
-
-    let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, false);
-    let predicate = |n: roxmltree::Node| node_set.contains(n);
-    let mut output = Vec::new();
-    canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize");
-    String::from_utf8(output).expect("utf8")
+    deref_and_canonicalize_impl(xml, uri, false)
 }
 
 /// Same but with comments enabled (for xpointer(/) which includes comments).
 fn deref_and_canonicalize_with_comments(xml: &str, uri: &str) -> String {
+    deref_and_canonicalize_impl(xml, uri, true)
+}
+
+/// Shared implementation for dereferencing and canonicalizing, parameterized
+/// by whether comments should be included.
+fn deref_and_canonicalize_impl(xml: &str, uri: &str, with_comments: bool) -> String {
     let doc = roxmltree::Document::parse(xml).expect("parse");
     let resolver = UriReferenceResolver::new(&doc);
 
     let data = resolver.dereference(uri).expect("dereference");
     let node_set = data.into_node_set().expect("into_node_set");
 
-    let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, true);
+    let algo = C14nAlgorithm::new(C14nMode::Inclusive1_0, with_comments);
     let predicate = |n: roxmltree::Node| node_set.contains(n);
     let mut output = Vec::new();
     canonicalize(&doc, Some(&predicate), &algo, &mut output).expect("canonicalize");