-
Notifications
You must be signed in to change notification settings - Fork 0
feat(xmldsig): add URI dereference for Reference elements #9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
10 commits
Select commit
Hold shift + click to select a range
865e79f
feat(xmldsig): add URI dereference for Reference elements
polaz 67abda9
fix(xmldsig): harden URI dereference error handling and DoS safety
polaz 04be06a
Merge branch 'main' into feat/#8-uri-dereference
polaz eac825f
fix(xmldsig): reject duplicate IDs, guard foreign nodes, reject empty…
polaz 793bb8f
fix(xmldsig): prevent duplicate ID re-insertion on 3+ occurrences
polaz 85c3e60
fix(xmldsig): safe xpointer quote parsing, same-element dedup guard
polaz bc5c703
docs(xmldsig): clarify NodeSet::subtree doc re attribute/namespace tr…
polaz cd62978
fix(xmldsig): reject empty xpointer id, document local-name matching
polaz ce155a8
refactor(xmldsig): remove unused UriDeref error variant
polaz f468dd4
refactor(xmldsig): derive doc from element in NodeSet::subtree, conso…
polaz File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,14 @@ | ||
| //! XML Digital Signatures (XMLDSig). | ||
| //! | ||
| //! Implements [XML Signature Syntax and Processing](https://www.w3.org/TR/xmldsig-core1/). | ||
| //! | ||
| //! ## Current Status | ||
| //! | ||
| //! - URI dereference: same-document references (`""`, `#id`, `#xpointer(/)`, `#xpointer(id('...'))`) | ||
| //! - ID attribute resolution with configurable attribute names | ||
| //! - Node set types for the transform pipeline | ||
|
|
||
| pub mod types; | ||
| pub mod uri; | ||
|
|
||
| pub use types::{NodeSet, TransformData, TransformError}; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,227 @@ | ||
| //! Core types for the XMLDSig transform pipeline. | ||
| //! | ||
| //! These types flow between URI dereference, transforms, and digest computation. | ||
| //! | ||
| //! These types are consumed by URI dereference, the transform chain (P1-014, | ||
| //! P1-015), and reference processing (P1-018). | ||
|
|
||
| use std::collections::HashSet; | ||
|
|
||
| use roxmltree::{Document, Node, NodeId}; | ||
|
|
||
| // roxmltree 0.21 uses `Node<'a, 'input: 'a>`. We tie both lifetimes together | ||
| // with a single `'a` by requiring `'input = 'a` at every use site (`Node<'a, 'a>`). | ||
| // This is safe because our NodeSet borrows the Document which owns the input. | ||
|
|
||
| /// Data flowing between transforms in the verification/signing pipeline. | ||
| /// | ||
| /// Transforms consume and produce either a node set (XML-level) or raw bytes | ||
| /// (after canonicalization or base64 decode). | ||
| pub enum TransformData<'a> { | ||
| /// A set of nodes from the parsed XML document. | ||
| NodeSet(NodeSet<'a>), | ||
| /// Raw bytes (e.g., after canonicalization). | ||
| Binary(Vec<u8>), | ||
| } | ||
|
|
||
| impl std::fmt::Debug for TransformData<'_> { | ||
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | ||
| match self { | ||
| Self::NodeSet(_) => f.debug_tuple("NodeSet").field(&"...").finish(), | ||
| Self::Binary(b) => f.debug_tuple("Binary").field(&b.len()).finish(), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| impl<'a> TransformData<'a> { | ||
| /// Convert to `NodeSet`, returning an error if this is `Binary` data. | ||
| pub fn into_node_set(self) -> Result<NodeSet<'a>, TransformError> { | ||
| match self { | ||
| Self::NodeSet(ns) => Ok(ns), | ||
| Self::Binary(_) => Err(TransformError::TypeMismatch { | ||
| expected: "NodeSet", | ||
| got: "Binary", | ||
| }), | ||
| } | ||
| } | ||
|
|
||
| /// Convert to binary bytes, returning an error if this is a `NodeSet`. | ||
| pub fn into_binary(self) -> Result<Vec<u8>, TransformError> { | ||
| match self { | ||
| Self::Binary(b) => Ok(b), | ||
| Self::NodeSet(_) => Err(TransformError::TypeMismatch { | ||
| expected: "Binary", | ||
| got: "NodeSet", | ||
| }), | ||
| } | ||
| } | ||
| } | ||
|
|
||
| /// A set of nodes from a roxmltree document. | ||
| /// | ||
| /// Represents "which nodes are included" for canonicalization and transforms. | ||
| /// Two modes: | ||
| /// - **Whole document**: `included` is `None`, meaning all nodes are in the set | ||
| /// (minus any in `excluded`). | ||
| /// - **Subset**: `included` is `Some(ids)`, meaning only those node IDs are in | ||
| /// the set (minus any in `excluded`). | ||
| pub struct NodeSet<'a> { | ||
| /// Reference to the parsed document. | ||
| doc: &'a Document<'a>, | ||
| /// If `None`, all nodes are included. If `Some`, only these nodes. | ||
| included: Option<HashSet<NodeId>>, | ||
| /// Nodes explicitly excluded (e.g., `<Signature>` subtree for enveloped transform). | ||
| excluded: HashSet<NodeId>, | ||
| /// Whether comment nodes are included. For empty URI dereference (whole | ||
| /// document), comments are excluded per XMLDSig spec. | ||
| with_comments: bool, | ||
| } | ||
|
|
||
| impl<'a> NodeSet<'a> { | ||
| /// Create a node set representing the entire document without comments. | ||
| /// | ||
| /// Per XMLDSig §4.3.3.2: "An empty URI [...] is a reference to the document | ||
| /// [...] and the comment nodes are not included." | ||
| pub fn entire_document_without_comments(doc: &'a Document<'a>) -> Self { | ||
| Self { | ||
| doc, | ||
| included: None, | ||
| excluded: HashSet::new(), | ||
| with_comments: false, | ||
| } | ||
| } | ||
|
|
||
| /// Create a node set representing the entire document with comments. | ||
| /// | ||
| /// Used for `#xpointer(/)` which, unlike empty URI, includes comment nodes. | ||
| pub fn entire_document_with_comments(doc: &'a Document<'a>) -> Self { | ||
| Self { | ||
| doc, | ||
| included: None, | ||
| excluded: HashSet::new(), | ||
| with_comments: true, | ||
| } | ||
| } | ||
|
|
||
| /// Create a node set rooted at `element`, containing that element and all | ||
| /// of its descendant nodes (elements, text, and, for this constructor, | ||
| /// comment nodes). | ||
| /// | ||
| /// Note: in `roxmltree`, attributes and namespaces are not separate nodes | ||
| /// and therefore are not tracked individually in this `NodeSet`. During | ||
| /// canonicalization, any attributes and namespace declarations belonging to | ||
| /// the included elements are serialized as part of those elements. | ||
| pub fn subtree(element: Node<'a, 'a>) -> Self { | ||
| let mut ids = HashSet::new(); | ||
| collect_subtree_ids(element, &mut ids); | ||
| Self { | ||
| doc: element.document(), | ||
| included: Some(ids), | ||
| excluded: HashSet::new(), | ||
| with_comments: true, | ||
| } | ||
| } | ||
|
|
||
| /// Reference to the underlying document. | ||
| pub fn document(&self) -> &'a Document<'a> { | ||
| self.doc | ||
| } | ||
|
|
||
| /// Check whether a node is in this set. | ||
| /// | ||
| /// Returns `false` for nodes from a different document than this set's | ||
| /// owning document (prevents cross-document NodeId collisions). | ||
| pub fn contains(&self, node: Node<'_, '_>) -> bool { | ||
| // Guard: reject nodes from a different document. NodeIds are | ||
| // per-document indices — the same index from another document | ||
| // would reference a completely different node. | ||
| if !std::ptr::eq(node.document() as *const _, self.doc as *const _) { | ||
| return false; | ||
| } | ||
|
|
||
| let id = node.id(); | ||
|
|
||
| // Check exclusion first | ||
| if self.excluded.contains(&id) { | ||
| return false; | ||
| } | ||
|
|
||
| // Filter comments if not included | ||
| if !self.with_comments && node.is_comment() { | ||
| return false; | ||
| } | ||
|
|
||
| // Check inclusion | ||
| match &self.included { | ||
| None => true, | ||
| Some(ids) => ids.contains(&id), | ||
| } | ||
| } | ||
|
|
||
| /// Exclude a node and all its descendants from this set. | ||
| /// | ||
| /// No-op for nodes from a different document. | ||
| pub fn exclude_subtree(&mut self, node: Node<'_, '_>) { | ||
| // Guard: only exclude nodes from our document | ||
| if !std::ptr::eq(node.document() as *const _, self.doc as *const _) { | ||
| return; | ||
| } | ||
| collect_subtree_ids(node, &mut self.excluded); | ||
| } | ||
|
|
||
| /// Whether comments are included in this node set. | ||
| pub fn with_comments(&self) -> bool { | ||
| self.with_comments | ||
| } | ||
| } | ||
|
|
||
| /// Collect a node and all its descendants into a set of `NodeId`s. | ||
| /// | ||
| /// Uses an explicit stack instead of recursion to avoid stack overflow | ||
| /// on deeply nested XML (attacker-controlled input in SAML contexts). | ||
| fn collect_subtree_ids(node: Node<'_, '_>, ids: &mut HashSet<NodeId>) { | ||
| let mut stack = vec![node]; | ||
| while let Some(current) = stack.pop() { | ||
| ids.insert(current.id()); | ||
| for child in current.children() { | ||
| stack.push(child); | ||
| } | ||
| } | ||
| // In roxmltree, attributes and namespaces are not nodes and do not | ||
| // appear in `children()` traversal; they're accessed via | ||
| // node.attributes(). We therefore track the NodeIds of all descendant | ||
| // nodes reachable via `children()` (elements, text, comments, | ||
| // processing instructions, etc.). During C14N, the serializer checks | ||
| // whether an element is in the node set and then serializes all of | ||
| // that element's attributes/namespaces as part of the element, so | ||
| // separate attribute/namespace identifiers are unnecessary. | ||
| } | ||
|
|
||
| /// Errors during transform processing. | ||
| #[derive(Debug, thiserror::Error)] | ||
| pub enum TransformError { | ||
| /// Data type mismatch between transforms. | ||
| #[error("type mismatch: expected {expected}, got {got}")] | ||
| TypeMismatch { | ||
| /// Expected data type. | ||
| expected: &'static str, | ||
| /// Actual data type. | ||
| got: &'static str, | ||
| }, | ||
|
|
||
| /// Element not found by ID. | ||
| #[error("element not found by ID: {0}")] | ||
| ElementNotFound(String), | ||
|
|
||
| /// Unsupported URI scheme or format. | ||
| #[error("unsupported URI: {0}")] | ||
| UnsupportedUri(String), | ||
|
|
||
| /// Unsupported transform algorithm. | ||
| #[error("unsupported transform: {0}")] | ||
| UnsupportedTransform(String), | ||
|
|
||
| /// Canonicalization error during transform. | ||
| #[error("C14N error: {0}")] | ||
| C14n(String), | ||
| } | ||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.