Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dash/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ serde_json = "1.0.140"
serde_test = "1.0.177"
serde_derive = "1.0.219"
secp256k1 = { features = [ "recovery", "rand", "hashes" ], version="0.30.0" }
bincode = { version = "2.0.1" }
bincode = { version = "2.0.1", features = ["serde"] }
assert_matches = "1.5.0"
dashcore = { path = ".", features = ["core-block-hash-use-x11", "message_verification", "quorum_validation", "signer"] }
criterion = "0.5"
Expand Down
114 changes: 114 additions & 0 deletions dash/src/hash_types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,117 @@ mod newtypes {
}
}
}

#[cfg(all(test, feature = "serde"))]
mod tests {
use super::*;
use serde_derive::{Deserialize, Serialize};

/// Regression test for the bug where hash newtypes' `Deserialize` errored
/// with "bad hex string length 32 (expected 64)" (or similar) when an
/// hash-bearing struct was wrapped by an internally-tagged enum and
/// round-tripped through serde's intermediate `ContentDeserializer`.
/// `ContentDeserializer` always reports `is_human_readable() == true`,
/// so a value originally produced by a non-human-readable encoder ends up
/// replayed into the HR branch as raw bytes — which the previous
/// string-only `HexVisitor` rejected because `from_str` saw 32 UTF-8 chars
/// instead of the expected 64-char hex form.
#[test]
fn serde_round_trip_through_internally_tagged_enum() {
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct WithTxid {
txid: Txid,
}

#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
enum Tagged {
A(WithTxid),
}

let original = Tagged::A(WithTxid {
txid: "5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
.parse()
.unwrap(),
});

// Round-trip through serde_json::Value forces serde to buffer the
// value into a Content tree, then replay it through
// ContentDeserializer when resolving the internally-tagged enum
// variant. The canonical HR form serializes a hash as a hex string,
// so this exercises the visit_str path through ContentDeserializer.
let value = serde_json::to_value(&original).unwrap();
let restored: Tagged = serde_json::from_value(value).unwrap();
assert_eq!(original, restored);

// Hand-build the array-of-numbers form of the Txid inside the tagged
// enum and deserialize from it. This routes through `ContentDeserializer`
// (because of the internally-tagged enum) and exercises the
// `visit_seq` path on the unified visitor — the exact shape produced
// downstream when a non-human-readable encoder hands hash bytes to a
// tagged-enum-bearing context. Before the fix the visitor only had
// string/bytes-disjoint visitors and rejected this shape.
let raw_txid_bytes: [u8; 32] = [
0x56, 0x94, 0x4c, 0x5d, 0x3f, 0x98, 0x41, 0x3e, 0xf4, 0x5c, 0xf5, 0x45, 0x45, 0x53,
0x81, 0x03, 0xcc, 0x9f, 0x29, 0x8e, 0x05, 0x75, 0x82, 0x0a, 0xd3, 0x59, 0x13, 0x76,
0xe2, 0xe0, 0xf6, 0x5d,
];
let arr_value = serde_json::Value::Array(
raw_txid_bytes.iter().map(|b| serde_json::Value::Number((*b).into())).collect(),
);
let map_form = serde_json::json!({
"type": "A",
"txid": arr_value,
});
let from_arr: Tagged = serde_json::from_value(map_form).unwrap();
assert_eq!(original, from_arr);

// The canonical HR string form must still deserialize, so existing
// JSON producers do not break.
let from_string: Txid = serde_json::from_str(
"\"5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456\"",
)
.unwrap();
assert_eq!(
from_string,
"5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456"
.parse::<Txid>()
.unwrap(),
);

// Plain bincode (non-human-readable) round-trip of a `Txid` must
// still succeed via the byte-shape branch — guards against breaking
// the `visit_seq` path used by length-prefixed sequence formats.
let raw: Txid =
"5df6e0e2761359d30a8275058e299fcc0381534545f55cf43e41983f5d4c9456".parse().unwrap();
let cfg = bincode::config::standard();
let bytes = bincode::serde::encode_to_vec(raw, cfg).unwrap();
let (decoded, _): (Txid, _) = bincode::serde::decode_from_slice(&bytes, cfg).unwrap();
assert_eq!(raw, decoded);
}

/// 20-byte hash (PubkeyHash) goes through the same path. Smaller hash
/// length exercises a different `raw_len_bytes` / `hex_len_bytes`
/// disambiguation in the visitor.
#[test]
fn serde_round_trip_through_internally_tagged_enum_pubkey_hash() {
#[derive(Debug, PartialEq, Serialize, Deserialize)]
struct WithPubkeyHash {
pkh: PubkeyHash,
}

#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(tag = "type")]
enum Tagged {
A(WithPubkeyHash),
}

let original = Tagged::A(WithPubkeyHash {
pkh: PubkeyHash::from_hex("e8b43025641eea4fd21190f01bd870ef90f1a8b1").unwrap(),
});

let value = serde_json::to_value(&original).unwrap();
let restored: Tagged = serde_json::from_value(value).unwrap();
assert_eq!(original, restored);
}
}
133 changes: 102 additions & 31 deletions hashes/src/serde_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,60 +23,120 @@ pub mod serde_details {
use core::{fmt, ops, str};

use crate::Error;
struct HexVisitor<ValueT>(PhantomData<ValueT>);
use serde::{de, Deserializer, Serializer};

impl<'de, ValueT> de::Visitor<'de> for HexVisitor<ValueT>
/// Single visitor that accepts every shape a hash can arrive in: an ASCII
/// hex string (`visit_str`), a UTF-8 byte slice that decodes as hex
/// (`visit_bytes` of length `2*N` — note `N` is in BYTES per the macro,
/// see `serde_impl!` invocation in `internal_macros.rs`), a raw byte
/// slice of the hash's length-in-bytes (`visit_bytes` of length `N`),
/// or a length-prefixed sequence of `u8` from non-self-describing
/// formats (`visit_seq`, used by bincode).
///
/// Required to interoperate with serde's `ContentDeserializer`, the
/// format-agnostic intermediate buffer serde uses to dispatch
/// internally-tagged enums (`#[serde(tag = "...")]`), `flatten`, and
/// untagged enums. `ContentDeserializer` always reports
/// `is_human_readable() == true` regardless of the upstream format. This
/// is intentional in serde's source — see long-standing upstream issues;
/// the maintainers consider it working-as-intended and the recommended
/// pattern is **"don't branch on `is_human_readable()` for shape dispatch
/// — accept any shape."** A value originally written by a
/// non-human-readable encoder (raw bytes) can therefore be replayed into
/// the human-readable branch as bytes / a byte-buf and must be accepted
/// there. See the regression tests in
/// `dash/src/hash_types.rs::serde_round_trip_through_internally_tagged_enum`.
struct AnyShapeVisitor<ValueT>(PhantomData<ValueT>);

impl<'de, ValueT> de::Visitor<'de> for AnyShapeVisitor<ValueT>
where
ValueT: FromStr,
ValueT: SerdeHash,
<ValueT as FromStr>::Err: fmt::Display,
{
type Value = ValueT;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("an ASCII hex string")
formatter.write_str("an ASCII hex string or a byte string of the hash length")
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
if let Ok(hex) = str::from_utf8(v) {
Self::Value::from_str(hex).map_err(E::custom)
} else {
Err(E::invalid_value(de::Unexpected::Bytes(v), &self))
}
Self::Value::from_str(v).map_err(E::custom)
}

fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
fn visit_borrowed_str<E>(self, v: &'de str) -> Result<Self::Value, E>
where
E: de::Error,
{
Self::Value::from_str(v).map_err(E::custom)
}
}

struct BytesVisitor<ValueT>(PhantomData<ValueT>);

impl<'de, ValueT> de::Visitor<'de> for BytesVisitor<ValueT>
where
ValueT: SerdeHash,
<ValueT as FromStr>::Err: fmt::Display,
{
type Value = ValueT;

fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("a bytestring")
fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
where
E: de::Error,
{
// Disambiguate by length. A correctly-sized raw hash byte string
// is exactly `N` bytes (`N` is in bytes per the macro — see
// `serde_impl!` invocation in `internal_macros.rs`); a hex-encoded
// form of that hash is `2*N` ASCII bytes. Any other length is
// rejected.
let raw_len_bytes = ValueT::N;
let hex_len_bytes = raw_len_bytes * 2;
if v.len() == raw_len_bytes {
SerdeHash::from_slice_delegated(v)
.map_err(|_| E::invalid_length(v.len(), &stringify!(N)))
} else if v.len() == hex_len_bytes {
if let Ok(hex) = str::from_utf8(v) {
Self::Value::from_str(hex).map_err(E::custom)
} else {
Err(E::invalid_value(de::Unexpected::Bytes(v), &self))
}
} else {
Err(E::invalid_length(v.len(), &self))
}
}

fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
fn visit_borrowed_bytes<E>(self, v: &'de [u8]) -> Result<Self::Value, E>
where
E: de::Error,
{
SerdeHash::from_slice_delegated(v).map_err(|_| {
// from_slice only errors on incorrect length
E::invalid_length(v.len(), &stringify!(N))
})
self.visit_bytes(v)
}

fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
where
A: de::SeqAccess<'de>,
{
// Used by bincode and any non-self-describing format that emits a
// length-prefixed sequence of u8. Use a stack buffer sized to fit
// the largest hash this trait services (sha512 / Hmac<sha512> at
// 64 bytes) so we keep `no_std`-compatible (no `Vec`/`alloc`).
// Bumping `MAX_HASH_BYTES` is only needed if a wider digest type
// is added — `debug_assert!` catches that in tests.
const MAX_HASH_BYTES: usize = 64;
let raw_len_bytes = ValueT::N;
debug_assert!(
raw_len_bytes <= MAX_HASH_BYTES,
"hash byte-length {} exceeds AnyShapeVisitor stack buffer ({}); bump MAX_HASH_BYTES",
raw_len_bytes,
MAX_HASH_BYTES,
);
let mut buf = [0u8; MAX_HASH_BYTES];
let mut len: usize = 0;
while let Some(b) = seq.next_element::<u8>()? {
if len == raw_len_bytes {
return Err(de::Error::invalid_length(len + 1, &stringify!(N)));
}
buf[len] = b;
len += 1;
}
if len != raw_len_bytes {
return Err(de::Error::invalid_length(len, &stringify!(N)));
}
SerdeHash::from_slice_delegated(&buf[..len])
.map_err(|_| de::Error::invalid_length(len, &stringify!(N)))
}
}

Expand All @@ -90,7 +150,7 @@ pub mod serde_details {
+ ops::Index<ops::RangeFull, Output = [u8]>,
<Self as FromStr>::Err: fmt::Display,
{
/// Size, in bits, of the hash.
/// Size of the hash, in bytes.
const N: usize;

/// Helper function to turn a deserialized slice into the correct hash type.
Expand All @@ -106,11 +166,22 @@ pub mod serde_details {
}

/// Do serde deserialization.
///
/// Uses a single visitor that accepts every shape a hash can arrive
/// in (ASCII hex string, raw byte slice, length-prefixed `u8`
/// sequence). The HR branch dispatches via `deserialize_any` to
/// handle both true human-readable deserializers (where the visitor
/// receives `visit_str`) and serde's `ContentDeserializer` (which
/// reports `is_human_readable() == true` even when wrapping bytes
/// from a non-HR source — internally-tagged enums, `flatten`, and
/// untagged enums all route through it). The non-HR branch keeps
/// `deserialize_bytes` because bincode is non-self-describing and
/// does not support `deserialize_any`.
fn deserialize<'de, D: Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
if d.is_human_readable() {
d.deserialize_str(HexVisitor::<Self>(PhantomData))
d.deserialize_any(AnyShapeVisitor::<Self>(PhantomData))
} else {
d.deserialize_bytes(BytesVisitor::<Self>(PhantomData))
d.deserialize_bytes(AnyShapeVisitor::<Self>(PhantomData))
}
}
}
Expand Down
Loading