diff --git a/dash/Cargo.toml b/dash/Cargo.toml index a6574f853..22d697b9b 100644 --- a/dash/Cargo.toml +++ b/dash/Cargo.toml @@ -67,7 +67,7 @@ serde_json = "1.0.140" serde_test = "1.0.177" serde_derive = "1.0.219" secp256k1 = { features = [ "recovery", "rand", "hashes" ], version="0.30.0" } -bincode = { version = "2.0.1" } +bincode = { version = "2.0.1", features = ["serde"] } assert_matches = "1.5.0" dashcore = { path = ".", features = ["core-block-hash-use-x11", "message_verification", "quorum_validation", "signer"] } criterion = "0.5" diff --git a/dash/src/hash_types.rs b/dash/src/hash_types.rs index 89cee78e9..78ea92e1a 100644 --- a/dash/src/hash_types.rs +++ b/dash/src/hash_types.rs @@ -389,3 +389,117 @@ mod newtypes { } } } + +#[cfg(all(test, feature = "serde"))] +mod tests { + use super::*; + use serde_derive::{Deserialize, Serialize}; + + /// Regression test for the bug where hash newtypes' `Deserialize` errored + /// with "bad hex string length 32 (expected 64)" (or similar) when an + /// hash-bearing struct was wrapped by an internally-tagged enum and + /// round-tripped through serde's intermediate `ContentDeserializer`. + /// `ContentDeserializer` always reports `is_human_readable() == true`, + /// so a value originally produced by a non-human-readable encoder ends up + /// replayed into the HR branch as raw bytes — which the previous + /// string-only `HexVisitor` rejected because `from_str` saw 32 UTF-8 chars + /// instead of the expected 64-char hex form. + #[test] + fn serde_round_trip_through_internally_tagged_enum() { + #[derive(Debug, PartialEq, Serialize, Deserialize)] + struct WithTxid { + txid: Txid, + } + + #[derive(Debug, PartialEq, Serialize, Deserialize)] + #[serde(tag = "type")] + enum Tagged { + A(WithTxid), + } + + let original = Tagged::A(WithTxid { + txid: "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456" + .parse() + .unwrap(), + }); + + // Round-trip through serde_json::Value forces serde to buffer the + // value into a Content tree, then replay it through + // ContentDeserializer when resolving the internally-tagged enum + // variant. The canonical HR form serializes a hash as a hex string, + // so this exercises the visit_str path through ContentDeserializer. + let value = serde_json::to_value(&original).unwrap(); + let restored: Tagged = serde_json::from_value(value).unwrap(); + assert_eq!(original, restored); + + // Hand-build the array-of-numbers form of the Txid inside the tagged + // enum and deserialize from it. This routes through `ContentDeserializer` + // (because of the internally-tagged enum) and exercises the + // `visit_seq` path on the unified visitor — the exact shape produced + // downstream when a non-human-readable encoder hands hash bytes to a + // tagged-enum-bearing context. Before the fix the visitor only had + // string/bytes-disjoint visitors and rejected this shape. + let raw_txid_bytes: [u8; 32] = [ + 0x56, 0x94, 0x4c, 0x5d, 0x3f, 0x98, 0x41, 0x3e, 0xf4, 0x5c, 0xf5, 0x45, 0x45, 0x53, + 0x81, 0x03, 0xcc, 0x9f, 0x29, 0x8e, 0x05, 0x75, 0x82, 0x0a, 0xd3, 0x59, 0x13, 0x76, + 0xe2, 0xe0, 0xf6, 0x5d, + ]; + let arr_value = serde_json::Value::Array( + raw_txid_bytes.iter().map(|b| serde_json::Value::Number((*b).into())).collect(), + ); + let map_form = serde_json::json!({ + "type": "A", + "txid": arr_value, + }); + let from_arr: Tagged = serde_json::from_value(map_form).unwrap(); + assert_eq!(original, from_arr); + + // The canonical HR string form must still deserialize, so existing + // JSON producers do not break. + let from_string: Txid = serde_json::from_str( + "\"5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456\"", + ) + .unwrap(); + assert_eq!( + from_string, + "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456" + .parse::() + .unwrap(), + ); + + // Plain bincode (non-human-readable) round-trip of a `Txid` must + // still succeed via the byte-shape branch — guards against breaking + // the `visit_seq` path used by length-prefixed sequence formats. + let raw: Txid = + "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456".parse().unwrap(); + let cfg = bincode::config::standard(); + let bytes = bincode::serde::encode_to_vec(raw, cfg).unwrap(); + let (decoded, _): (Txid, _) = bincode::serde::decode_from_slice(&bytes, cfg).unwrap(); + assert_eq!(raw, decoded); + } + + /// 20-byte hash (PubkeyHash) goes through the same path. Smaller hash + /// length exercises a different `raw_len_bytes` / `hex_len_bytes` + /// disambiguation in the visitor. + #[test] + fn serde_round_trip_through_internally_tagged_enum_pubkey_hash() { + #[derive(Debug, PartialEq, Serialize, Deserialize)] + struct WithPubkeyHash { + pkh: PubkeyHash, + } + + #[derive(Debug, PartialEq, Serialize, Deserialize)] + #[serde(tag = "type")] + enum Tagged { + A(WithPubkeyHash), + } + + let original = Tagged::A(WithPubkeyHash { + pkh: PubkeyHash::from_hex("e8b43025641eea4fd21190f01bd870ef90f1a8b1").unwrap(), + }); + + let value = serde_json::to_value(&original).unwrap(); + let restored: Tagged = serde_json::from_value(value).unwrap(); + assert_eq!(original, restored); + } +} diff --git a/hashes/src/serde_macros.rs b/hashes/src/serde_macros.rs index 0cf1eecd2..2f098c6db 100644 --- a/hashes/src/serde_macros.rs +++ b/hashes/src/serde_macros.rs @@ -23,60 +23,120 @@ pub mod serde_details { use core::{fmt, ops, str}; use crate::Error; - struct HexVisitor(PhantomData); use serde::{de, Deserializer, Serializer}; - impl<'de, ValueT> de::Visitor<'de> for HexVisitor + /// Single visitor that accepts every shape a hash can arrive in: an ASCII + /// hex string (`visit_str`), a UTF-8 byte slice that decodes as hex + /// (`visit_bytes` of length `2*N` — note `N` is in BYTES per the macro, + /// see `serde_impl!` invocation in `internal_macros.rs`), a raw byte + /// slice of the hash's length-in-bytes (`visit_bytes` of length `N`), + /// or a length-prefixed sequence of `u8` from non-self-describing + /// formats (`visit_seq`, used by bincode). + /// + /// Required to interoperate with serde's `ContentDeserializer`, the + /// format-agnostic intermediate buffer serde uses to dispatch + /// internally-tagged enums (`#[serde(tag = "...")]`), `flatten`, and + /// untagged enums. `ContentDeserializer` always reports + /// `is_human_readable() == true` regardless of the upstream format. This + /// is intentional in serde's source — see long-standing upstream issues; + /// the maintainers consider it working-as-intended and the recommended + /// pattern is **"don't branch on `is_human_readable()` for shape dispatch + /// — accept any shape."** A value originally written by a + /// non-human-readable encoder (raw bytes) can therefore be replayed into + /// the human-readable branch as bytes / a byte-buf and must be accepted + /// there. See the regression tests in + /// `dash/src/hash_types.rs::serde_round_trip_through_internally_tagged_enum`. + struct AnyShapeVisitor(PhantomData); + + impl<'de, ValueT> de::Visitor<'de> for AnyShapeVisitor where - ValueT: FromStr, + ValueT: SerdeHash, ::Err: fmt::Display, { type Value = ValueT; fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("an ASCII hex string") + formatter.write_str("an ASCII hex string or a byte string of the hash length") } - fn visit_bytes(self, v: &[u8]) -> Result + fn visit_str(self, v: &str) -> Result where E: de::Error, { - if let Ok(hex) = str::from_utf8(v) { - Self::Value::from_str(hex).map_err(E::custom) - } else { - Err(E::invalid_value(de::Unexpected::Bytes(v), &self)) - } + Self::Value::from_str(v).map_err(E::custom) } - fn visit_str(self, v: &str) -> Result + fn visit_borrowed_str(self, v: &'de str) -> Result where E: de::Error, { Self::Value::from_str(v).map_err(E::custom) } - } - - struct BytesVisitor(PhantomData); - - impl<'de, ValueT> de::Visitor<'de> for BytesVisitor - where - ValueT: SerdeHash, - ::Err: fmt::Display, - { - type Value = ValueT; - fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result { - formatter.write_str("a bytestring") + fn visit_bytes(self, v: &[u8]) -> Result + where + E: de::Error, + { + // Disambiguate by length. A correctly-sized raw hash byte string + // is exactly `N` bytes (`N` is in bytes per the macro — see + // `serde_impl!` invocation in `internal_macros.rs`); a hex-encoded + // form of that hash is `2*N` ASCII bytes. Any other length is + // rejected. + let raw_len_bytes = ValueT::N; + let hex_len_bytes = raw_len_bytes * 2; + if v.len() == raw_len_bytes { + SerdeHash::from_slice_delegated(v) + .map_err(|_| E::invalid_length(v.len(), &stringify!(N))) + } else if v.len() == hex_len_bytes { + if let Ok(hex) = str::from_utf8(v) { + Self::Value::from_str(hex).map_err(E::custom) + } else { + Err(E::invalid_value(de::Unexpected::Bytes(v), &self)) + } + } else { + Err(E::invalid_length(v.len(), &self)) + } } - fn visit_bytes(self, v: &[u8]) -> Result + fn visit_borrowed_bytes(self, v: &'de [u8]) -> Result where E: de::Error, { - SerdeHash::from_slice_delegated(v).map_err(|_| { - // from_slice only errors on incorrect length - E::invalid_length(v.len(), &stringify!(N)) - }) + self.visit_bytes(v) + } + + fn visit_seq(self, mut seq: A) -> Result + where + A: de::SeqAccess<'de>, + { + // Used by bincode and any non-self-describing format that emits a + // length-prefixed sequence of u8. Use a stack buffer sized to fit + // the largest hash this trait services (sha512 / Hmac at + // 64 bytes) so we keep `no_std`-compatible (no `Vec`/`alloc`). + // Bumping `MAX_HASH_BYTES` is only needed if a wider digest type + // is added — `debug_assert!` catches that in tests. + const MAX_HASH_BYTES: usize = 64; + let raw_len_bytes = ValueT::N; + debug_assert!( + raw_len_bytes <= MAX_HASH_BYTES, + "hash byte-length {} exceeds AnyShapeVisitor stack buffer ({}); bump MAX_HASH_BYTES", + raw_len_bytes, + MAX_HASH_BYTES, + ); + let mut buf = [0u8; MAX_HASH_BYTES]; + let mut len: usize = 0; + while let Some(b) = seq.next_element::()? { + if len == raw_len_bytes { + return Err(de::Error::invalid_length(len + 1, &stringify!(N))); + } + buf[len] = b; + len += 1; + } + if len != raw_len_bytes { + return Err(de::Error::invalid_length(len, &stringify!(N))); + } + SerdeHash::from_slice_delegated(&buf[..len]) + .map_err(|_| de::Error::invalid_length(len, &stringify!(N))) } } @@ -90,7 +150,7 @@ pub mod serde_details { + ops::Index, ::Err: fmt::Display, { - /// Size, in bits, of the hash. + /// Size of the hash, in bytes. const N: usize; /// Helper function to turn a deserialized slice into the correct hash type. @@ -106,11 +166,22 @@ pub mod serde_details { } /// Do serde deserialization. + /// + /// Uses a single visitor that accepts every shape a hash can arrive + /// in (ASCII hex string, raw byte slice, length-prefixed `u8` + /// sequence). The HR branch dispatches via `deserialize_any` to + /// handle both true human-readable deserializers (where the visitor + /// receives `visit_str`) and serde's `ContentDeserializer` (which + /// reports `is_human_readable() == true` even when wrapping bytes + /// from a non-HR source — internally-tagged enums, `flatten`, and + /// untagged enums all route through it). The non-HR branch keeps + /// `deserialize_bytes` because bincode is non-self-describing and + /// does not support `deserialize_any`. fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result { if d.is_human_readable() { - d.deserialize_str(HexVisitor::(PhantomData)) + d.deserialize_any(AnyShapeVisitor::(PhantomData)) } else { - d.deserialize_bytes(BytesVisitor::(PhantomData)) + d.deserialize_bytes(AnyShapeVisitor::(PhantomData)) } } }