diff --git a/Cargo.lock b/Cargo.lock index 6551b5a..6482e4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -361,6 +361,14 @@ dependencies = [ "zip", ] +[[package]] +name = "flutterdec-serwalker" +version = "0.1.0-alpha.2" +dependencies = [ + "goblin", + "paste", +] + [[package]] name = "foldhash" version = "0.1.5" @@ -564,6 +572,12 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + [[package]] name = "plain" version = "0.2.3" diff --git a/Cargo.toml b/Cargo.toml index ce04ace..a529bc5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,7 @@ members = [ "crates/flutterdec-disasm-arm64", "crates/flutterdec-ir", "crates/flutterdec-decompiler", + "crates/flutterdec-serwalker", ] resolver = "2" diff --git a/crates/flutterdec-serwalker/Cargo.toml b/crates/flutterdec-serwalker/Cargo.toml new file mode 100644 index 0000000..1885862 --- /dev/null +++ b/crates/flutterdec-serwalker/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "flutterdec-serwalker" +version.workspace = true +edition.workspace = true +license.workspace = true + +[dependencies] +goblin.workspace = true +paste = "1.0.15" diff --git a/crates/flutterdec-serwalker/src/cluster/mod.rs b/crates/flutterdec-serwalker/src/cluster/mod.rs new file mode 100644 index 0000000..c856999 --- /dev/null +++ b/crates/flutterdec-serwalker/src/cluster/mod.rs @@ -0,0 +1,98 @@ +use crate::constants::{ClassId, ClassId::*, SIGNED_M, UNSIGNED_M}; +use crate::raw_object::*; +use crate::stream::Stream; +use crate::DECLARE_FIXED_LENGTH_CLUSTER; +use crate::DECLARE_VARIABLE_LENGTH_CLUSTER; +use crate::FFI_TYPES_LIST; + +type Smi = i32; + +pub trait Cluster { + fn is_fixed_len(&self) -> bool; + fn read_alloc(&mut self, last_ref_id: &mut u64, stream: &mut Stream) -> usize; + fn read_fill(&mut self, stream: &mut Stream) -> usize; +} + +pub fn read_smi(stream: &mut Stream) -> Smi { + let raw_smi = stream.read_modified_leb128(SIGNED_M); // smis are always written as signed numbers + + raw_smi as Smi +} + +macro_rules! FFI_CASE_PATTERN { + ( $( $ffi_type:ident ),* ) => { + $( $ffi_type )|* + }; +} + +pub fn decide_cluster(class_id: ClassId) -> Result, &'static str> { + match class_id { + // we assume compressed pointers, it supports only Android for now... + IllegalCid => Err("Not a supported class (illegal class)..."), + FFI_TYPES_LIST!(FFI_CASE_PATTERN) => Err("To do..."), + _ => Err("Not a supported class..."), + } +} + +// These are the objects that call ReadAllocFixedSize during deserialization, +// whose fill cluster size is uniquely determined by sizeof(Object) * num_of_objects +// and alloc cluster size is tags (MULEB128) + num_of_objects (MULEB128) + +DECLARE_FIXED_LENGTH_CLUSTER!(TypeParameters<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(PatchClass<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Function<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(ClosureData<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(FfiTrampolineData<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Field<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Script<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Library<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Namespace<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(KernelProgramInfo<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(UnlinkedCall, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(ICData, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(MegamorphicCache, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(SubtypeTestCache, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(LoadingUnit<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(LanguageError, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(UnhandledException, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(LibraryPrefix, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Type<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(FunctionType<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(RecordType, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(TypeParameter<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Closure<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Double, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(Int32x4, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(GrowableObjectArray<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(TypedDataView<'a>, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(ExternalTypedData, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(StackTrace, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(RegExp, |_self, last_ref_id, stream| { 1 }); +DECLARE_FIXED_LENGTH_CLUSTER!(WeakProperty, |_self, last_ref_id, stream| { 1 }); + +DECLARE_VARIABLE_LENGTH_CLUSTER!(Map); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Set); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Instance<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(TypedData<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Class<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(TypeArguments<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Code<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(ObjectPool); +DECLARE_VARIABLE_LENGTH_CLUSTER!(ExceptionHandlers<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Context<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(ContextScope); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Mint); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Float32x4); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Float64x2); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Record); +DECLARE_VARIABLE_LENGTH_CLUSTER!(Array<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(WeakArray<'a>); +DECLARE_VARIABLE_LENGTH_CLUSTER!(ImmutableArray); +DECLARE_VARIABLE_LENGTH_CLUSTER!(ConstMap); +DECLARE_VARIABLE_LENGTH_CLUSTER!(ConstSet); +DECLARE_VARIABLE_LENGTH_CLUSTER!(CodeSourceMap); +DECLARE_VARIABLE_LENGTH_CLUSTER!(CompressedStackMaps); +DECLARE_VARIABLE_LENGTH_CLUSTER!(PcDescriptors); +DECLARE_VARIABLE_LENGTH_CLUSTER!(OneByteString); +DECLARE_VARIABLE_LENGTH_CLUSTER!(TwoByteString); +DECLARE_VARIABLE_LENGTH_CLUSTER!(_String); diff --git a/crates/flutterdec-serwalker/src/constants.rs b/crates/flutterdec-serwalker/src/constants.rs new file mode 100644 index 0000000..3cb5430 --- /dev/null +++ b/crates/flutterdec-serwalker/src/constants.rs @@ -0,0 +1,268 @@ +use std::mem::size_of; + +pub const MAGIC_BYTES: u32 = 0xdcdcf5f5; + +pub const SNAPSHOT_MAGIC_NUMBER_SZ: usize = size_of::(); +pub const SNAPSHOT_LEN_SZ: usize = size_of::(); +pub const SNAPSHOT_KIND_SZ: usize = size_of::(); + +pub const SNAPSHOT_HEADER_SZ: usize = SNAPSHOT_MAGIC_NUMBER_SZ // 20 bytes of header + + SNAPSHOT_LEN_SZ + + SNAPSHOT_KIND_SZ; + +pub const MAX_CLUSTER_NUM: usize = 67usize; + +pub const UNSIGNED_END_OF_DATA_BYTE: u8 = 0x80u8; // last byte +pub const UNSIGNED_MAX_DATA_PER_BYTE: u8 = 0x7fu8; // more bytes to follow (for both) + +pub const SIGNED_END_OF_DATA_BYTE: u8 = 0xc0u8; // last byte + +pub const SIGNED_M: u8 = SIGNED_END_OF_DATA_BYTE; +pub const UNSIGNED_M: u8 = UNSIGNED_END_OF_DATA_BYTE; + +pub const DATA_BITS_PER_BYTE: usize = 7usize; + +pub const SMI_SHIFT: usize = 1usize; + +pub const VERSION_HASH_LENGTH: usize = 32usize; + +macro_rules! DEFINE_CLASS_ID { + ( $( $name:ident = $val:expr ),* ) => { + #[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)] + #[repr(u32)] + pub enum ClassId { + #[default] + IllegalCid = 0, + $( $name = $val, )* + } + + impl TryFrom for ClassId { + type Error = &'static str; + fn try_from(value: u32) -> Result { + match value { + 0 => Ok(ClassId::IllegalCid), + $( $val => Ok(ClassId::$name), )* + _ => Err("Invalid ClassId"), + } + } + } + }; +} + +DEFINE_CLASS_ID! { + NativePointer = 1, + FreeListElement = 2, + ForwardingCorpse = 3, + ObjectCid = 4, + ClassCid = 5, + PatchClassCid = 6, + FunctionCid = 7, + TypeParametersCid = 8, + ClosureDataCid = 9, + FfiTrampolineDataCid = 10, + FieldCid = 11, + ScriptCid = 12, + LibraryCid = 13, + NamespaceCid = 14, + KernelProgramInfoCid = 15, + WeakSerializationReferenceCid = 16, + WeakArrayCid = 17, + CodeCid = 18, + BytecodeCid = 19, + InstructionsCid = 20, + InstructionsSectionCid = 21, + InstructionsTableCid = 22, + ObjectPoolCid = 23, + PcDescriptorsCid = 24, + CodeSourceMapCid = 25, + CompressedStackMapsCid = 26, + LocalVarDescriptorsCid = 27, + ExceptionHandlersCid = 28, + ContextCid = 29, + ContextScopeCid = 30, + SentinelCid = 31, + SingleTargetCacheCid = 32, + MonomorphicSmiableCallCid = 33, + CallSiteDataCid = 34, + UnlinkedCallCid = 35, + ICDataCid = 36, + MegamorphicCacheCid = 37, + SubtypeTestCacheCid = 38, + LoadingUnitCid = 39, + ErrorCid = 40, + ApiErrorCid = 41, + LanguageErrorCid = 42, + UnhandledExceptionCid = 43, + UnwindErrorCid = 44, + InstanceCid = 45, + LibraryPrefixCid = 46, + TypeArgumentsCid = 47, + AbstractTypeCid = 48, + TypeCid = 49, + FunctionTypeCid = 50, + RecordTypeCid = 51, + TypeParameterCid = 52, + FinalizerBaseCid = 53, + FinalizerCid = 54, + NativeFinalizerCid = 55, + FinalizerEntryCid = 56, + ClosureCid = 57, + NumberCid = 58, + IntegerCid = 59, + SmiCid = 60, + MintCid = 61, + DoubleCid = 62, + BoolCid = 63, + Float32x4Cid = 64, + Int32x4Cid = 65, + Float64x2Cid = 66, + RecordCid = 67, + TypedDataBaseCid = 68, + TypedDataCid = 69, + ExternalTypedDataCid = 70, + TypedDataViewCid = 71, + PointerCid = 72, + DynamicLibraryCid = 73, + CapabilityCid = 74, + ReceivePortCid = 75, + SendPortCid = 76, + StackTraceCid = 77, + SuspendStateCid = 78, + RegExpCid = 79, + WeakPropertyCid = 80, + WeakReferenceCid = 81, + MirrorReferenceCid = 82, + FutureOrCid = 83, + UserTagCid = 84, + TransferableTypedDataCid = 85, + MapCid = 86, + ConstMapCid = 87, + SetCid = 88, + ConstSetCid = 89, + ArrayCid = 90, + ImmutableArrayCid = 91, + GrowableObjectArrayCid = 92, + _StringCid = 93, + OneByteStringCid = 94, + TwoByteStringCid = 95, + FfiNativeFunctionCid = 96, + FfiInt8Cid = 97, + FfiInt16Cid = 98, + FfiInt32Cid = 99, + FfiInt64Cid = 100, + FfiUint8Cid = 101, + FfiUint16Cid = 102, + FfiUint32Cid = 103, + FfiUint64Cid = 104, + FfiFloatCid = 105, + FfiDoubleCid = 106, + FfiVoidCid = 107, + FfiHandleCid = 108, + FfiBoolCid = 109, + FfiNativeTypeCid = 110, + FfiStructCid = 111, + TypedDataInt8ArrayCid = 112, + TypedDataInt8ArrayViewCid = 113, + ExternalTypedDataInt8ArrayCid = 114, + UnmodifiableTypedDataInt8ArrayViewCid = 115, + TypedDataUint8ArrayCid = 116, + TypedDataUint8ArrayViewCid = 117, + ExternalTypedDataUint8ArrayCid = 118, + UnmodifiableTypedDataUint8ArrayViewCid = 119, + TypedDataUint8ClampedArrayCid = 120, + TypedDataUint8ClampedArrayViewCid = 121, + ExternalTypedDataUint8ClampedArrayCid = 122, + UnmodifiableTypedDataUint8ClampedArrayViewCid = 123, + TypedDataInt16ArrayCid = 124, + TypedDataInt16ArrayViewCid = 125, + ExternalTypedDataInt16ArrayCid = 126, + UnmodifiableTypedDataInt16ArrayViewCid = 127, + TypedDataUint16ArrayCid = 128, + TypedDataUint16ArrayViewCid = 129, + ExternalTypedDataUint16ArrayCid = 130, + UnmodifiableTypedDataUint16ArrayViewCid = 131, + TypedDataInt32ArrayCid = 132, + TypedDataInt32ArrayViewCid = 133, + ExternalTypedDataInt32ArrayCid = 134, + UnmodifiableTypedDataInt32ArrayViewCid = 135, + TypedDataUint32ArrayCid = 136, + TypedDataUint32ArrayViewCid = 137, + ExternalTypedDataUint32ArrayCid = 138, + UnmodifiableTypedDataUint32ArrayViewCid = 139, + TypedDataInt64ArrayCid = 140, + TypedDataInt64ArrayViewCid = 141, + ExternalTypedDataInt64ArrayCid = 142, + UnmodifiableTypedDataInt64ArrayViewCid = 143, + TypedDataUint64ArrayCid = 144, + TypedDataUint64ArrayViewCid = 145, + ExternalTypedDataUint64ArrayCid = 146, + UnmodifiableTypedDataUint64ArrayViewCid = 147, + TypedDataFloat32ArrayCid = 148, + TypedDataFloat32ArrayViewCid = 149, + ExternalTypedDataFloat32ArrayCid = 150, + UnmodifiableTypedDataFloat32ArrayViewCid = 151, + TypedDataFloat64ArrayCid = 152, + TypedDataFloat64ArrayViewCid = 153, + ExternalTypedDataFloat64ArrayCid = 154, + UnmodifiableTypedDataFloat64ArrayViewCid = 155, + TypedDataFloat32x4ArrayCid = 156, + TypedDataFloat32x4ArrayViewCid = 157, + ExternalTypedDataFloat32x4ArrayCid = 158, + UnmodifiableTypedDataFloat32x4ArrayViewCid = 159, + TypedDataInt32x4ArrayCid = 160, + TypedDataInt32x4ArrayViewCid = 161, + ExternalTypedDataInt32x4ArrayCid = 162, + UnmodifiableTypedDataInt32x4ArrayViewCid = 163, + TypedDataFloat64x2ArrayCid = 164, + TypedDataFloat64x2ArrayViewCid = 165, + ExternalTypedDataFloat64x2ArrayCid = 166, + UnmodifiableTypedDataFloat64x2ArrayViewCid = 167, + ByteDataViewCid = 168, + UnmodifiableByteDataViewCid = 169, + ByteBufferCid = 170, + NullCid = 171, + DynamicCid = 172, + VoidCid = 173, + NeverCid = 174, + NumPredefinedCids = 175 +} + +#[macro_export] +macro_rules! FFI_TYPES_LIST { + ($callback:ident) => { + $callback! { + FfiNativeFunctionCid, + FfiInt8Cid, + FfiInt16Cid, + FfiInt32Cid, + FfiInt64Cid, + FfiUint8Cid, + FfiUint16Cid, + FfiUint32Cid, + FfiUint64Cid, + FfiFloatCid, + FfiDoubleCid, + FfiVoidCid, + FfiHandleCid, + FfiBoolCid, + FfiNativeTypeCid, + FfiStructCid + } + }; +} + +/* + + +pub const NUM_BASE_OBJECTS_SZ: usize = size_of::(); +pub const NUM_OBJECTS_SZ: usize = size_of::(); +pub const NUM_CLUSTERS_SZ: usize = size_of::(); + +pub const INSTR_TABLE_LEN_SZ: usize = size_of::(); +pub const INSTR_TABLE_OFFSET_SZ: usize = size_of::(); + +pub const CLUSTER_TAGS_SZ: usize = size_of::(); +pub const CLUSTER_OBJ_COUNT_SZ: usize = size_of::(); + +pub const OBJECT_STORE_ENTRY_SIZE: usize = size_of::(); +*/ diff --git a/crates/flutterdec-serwalker/src/lib.rs b/crates/flutterdec-serwalker/src/lib.rs new file mode 100644 index 0000000..b5aaf10 --- /dev/null +++ b/crates/flutterdec-serwalker/src/lib.rs @@ -0,0 +1,8 @@ +mod constants; +mod utils; + +mod cluster; +mod raw_object; +mod stream; + +mod snapshot; diff --git a/crates/flutterdec-serwalker/src/raw_object/mod.rs b/crates/flutterdec-serwalker/src/raw_object/mod.rs new file mode 100644 index 0000000..49f0429 --- /dev/null +++ b/crates/flutterdec-serwalker/src/raw_object/mod.rs @@ -0,0 +1,326 @@ +type Smi = i32; // Using i64 for Smi fields to be decompressed + +// --- Fixed-Size Objects with defined fields --- + +#[derive(Default)] +pub struct Mint { + pub value: i64, +} + +#[derive(Default)] +pub struct Double { + pub value: f64, +} + +#[derive(Default)] +pub struct TypeArguments<'a> { + pub instantiations: Option<&'a mut Array<'a>>, // ArrayPtr + pub length: Smi, // Smi + pub hash: Smi, // Smi + pub nullability: Smi, // Smi +} + +#[derive(Default)] +pub struct TypeParameter<'a> { + pub owner: Option<&'a mut Object<'a>>, // ObjectPtr + pub base: i16, + pub index: i16, +} + +#[derive(Default)] +pub struct Type<'a> { + pub arguments: Option<&'a mut TypeArguments<'a>>, // TypeArgumentsPtr +} + +#[derive(Default)] +pub struct TypeParameters<'a> { + pub names: Option<&'a mut Array<'a>>, // ArrayPtr + pub flags: Option<&'a mut Array<'a>>, // ArrayPtr + pub bounds: Option<&'a mut TypeArguments<'a>>, // TypeArgumentsPtr + pub defaults: Option<&'a mut TypeArguments<'a>>, // TypeArgumentsPtr +} + +#[derive(Default)] +pub struct PatchClass<'a> { + pub wrapped_class: Option<&'a mut Class<'a>>, // ClassPtr + pub script: Option<&'a mut Script<'a>>, // ScriptPtr + pub kernel_program_info: Option<&'a mut KernelProgramInfo<'a>>, // KernelProgramInfoPtr +} + +#[derive(Default)] +pub struct ClosureData<'a> { + pub context_scope: Option<&'a mut ContextScope>, // ContextScopePtr + pub parent_function: Option<&'a mut Function<'a>>, // FunctionPtr + pub closure: Option<&'a mut Closure<'a>>, // ClosurePtr + pub packed_fields: u32, +} + +#[derive(Default)] +pub struct FfiTrampolineData<'a> { + pub signature_type: Option<&'a mut Type<'a>>, // TypePtr + pub c_signature: Option<&'a mut FunctionType<'a>>, // FunctionTypePtr + pub callback_target: Option<&'a mut Function<'a>>, // FunctionPtr + pub callback_exceptional_return: Option<&'a mut Instance<'a>>, // InstancePtr + pub ffi_function_kind: u8, + pub callback_id: i32, +} + +#[derive(Default)] +pub struct Field<'a> { + pub name: _String, // StringPtr -> Raw String + pub owner: Option<&'a mut Object<'a>>, // ObjectPtr + pub type_field: Option<&'a mut AbstractType<'a>>, // AbstractTypePtr + pub initializer_function: Option<&'a mut Function<'a>>, // FunctionPtr + pub host_offset_or_field_id: Smi, // Smi + pub guarded_list_length: Smi, // Smi + pub exact_type: Option<&'a mut AbstractType<'a>>, // AbstractTypePtr + pub dependent_code: Option<&'a mut WeakArray<'a>>, // WeakArrayPtr + pub kernel_offset: i32, + pub guarded_list_length_in_object_offset: i8, + pub static_type_exactness_state: i8, + pub target_offset: i32, + pub kind_bits: u32, +} + +#[derive(Default)] +pub struct Namespace<'a> { + pub target: Option<&'a mut Library<'a>>, // LibraryPtr + pub show_names: Option<&'a mut Array<'a>>, // ArrayPtr + pub hide_names: Option<&'a mut Array<'a>>, // ArrayPtr + pub owner: Option<&'a mut Library<'a>>, // LibraryPtr +} + +#[derive(Default)] +pub struct KernelProgramInfo<'a> { + pub kernel_component: Option<&'a mut TypedDataBase<'a>>, // TypedDataBasePtr + pub string_offsets: Option<&'a mut TypedData<'a>>, // TypedDataPtr + pub string_data: Option<&'a mut TypedDataView<'a>>, // TypedDataViewPtr + pub canonical_names: Option<&'a mut TypedData<'a>>, // TypedDataPtr + pub metadata_payloads: Option<&'a mut TypedDataView<'a>>, // TypedDataViewPtr + pub metadata_mappings: Option<&'a mut TypedDataView<'a>>, // TypedDataViewPtr + pub scripts: Option<&'a mut Array<'a>>, // ArrayPtr + pub constants: Option<&'a mut Array<'a>>, // ArrayPtr + pub constants_table: Option<&'a mut TypedDataView<'a>>, // TypedDataViewPtr + pub libraries_cache: Option<&'a mut Array<'a>>, // ArrayPtr + pub classes_cache: Option<&'a mut Array<'a>>, // ArrayPtr +} + +#[derive(Default)] +pub struct ExceptionHandlers<'a> { + pub handled_types_data: Option<&'a mut Array<'a>>, // ArrayPtr + pub packed_fields: u32, +} + +#[derive(Default)] +pub struct Context<'a> { + pub parent: Option<&'a mut Context<'a>>, // ContextPtr + pub num_variables: i32, +} + +#[derive(Default)] +pub struct UnlinkedCall { + pub can_patch_to_monomorphic: bool, +} + +#[derive(Default)] +pub struct _String { + // added underscore so there's no conflict between this type and rust's _String + pub hash: Smi, // Smi + pub length: Smi, // Smi + pub inner_string: String, +} + +#[derive(Default)] +pub struct Class<'a> { + pub name: _String, // StringPtr -> Raw String + pub user_name: _String, // StringPtr -> Raw String + pub functions: Option<&'a mut Array<'a>>, // ArrayPtr + pub functions_hash_table: Option<&'a mut Array<'a>>, // ArrayPtr + pub fields: Option<&'a mut Array<'a>>, // ArrayPtr + pub offset_in_words_to_field: Option<&'a mut Array<'a>>, // ArrayPtr + pub interfaces: Option<&'a mut Array<'a>>, // ArrayPtr + pub script: Option<&'a mut Script<'a>>, // ScriptPtr + pub library: Option<&'a mut Library<'a>>, // LibraryPtr + pub type_parameters: Option<&'a mut TypeParameters<'a>>, // TypeParametersPtr + pub super_type: Option<&'a mut Type<'a>>, // TypePtr + pub constants: Option<&'a mut Array<'a>>, // ArrayPtr + pub declaration_type: Option<&'a mut Type<'a>>, // TypePtr + pub invocation_dispatcher_cache: Option<&'a mut Array<'a>>, // ArrayPtr + pub direct_implementors: Option<&'a mut GrowableObjectArray<'a>>, // GrowableObjectArrayPtr + pub direct_subclasses: Option<&'a mut GrowableObjectArray<'a>>, // GrowableObjectArrayPtr + pub declaration_instance_type_arguments: Option<&'a mut TypeArguments<'a>>, // TypeArgumentsPtr + pub allocation_stub: Option<&'a mut Code<'a>>, // CodePtr + pub dependent_code: Option<&'a mut WeakArray<'a>>, // WeakArrayPtr + pub num_type_arguments: i16, + pub num_native_fields: u16, + pub state_bits: u32, + pub host_instance_size_in_words: i32, + pub host_type_arguments_field_offset_in_words: i32, + pub host_next_field_offset_in_words: i32, + pub target_instance_size_in_words: i32, + pub target_type_arguments_field_offset_in_words: i32, + pub target_next_field_offset_in_words: i32, + pub kernel_offset: i32, +} + +#[derive(Default)] +pub struct Function<'a> { + pub name: _String, // StringPtr -> Raw String + pub owner: Option<&'a mut Object<'a>>, // ObjectPtr + pub signature: Option<&'a mut FunctionType<'a>>, // FunctionTypePtr + pub data: Option<&'a mut Object<'a>>, // ObjectPtr + pub ic_data_array_or_bytecode: Option<&'a mut Object<'a>>, // ObjectPtr + pub code: Option<&'a mut Code<'a>>, // CodePtr + pub positional_parameter_names: Option<&'a mut Array<'a>>, // ArrayPtr + pub unoptimized_code: Option<&'a mut Code<'a>>, // CodePtr + pub bitmap: u64, + pub kernel_offset: i32, + pub kind_tag: u32, +} + +#[derive(Default)] +pub struct Library<'a> { + pub name: _String, // StringPtr -> Raw String + pub url: _String, // StringPtr -> Raw String + pub private_key: _String, // StringPtr -> Raw String + pub dictionary: Option<&'a mut Array<'a>>, // ArrayPtr + pub metadata: Option<&'a mut Array<'a>>, // ArrayPtr + pub toplevel_class: Option<&'a mut Class<'a>>, // ClassPtr + pub used_scripts: Option<&'a mut GrowableObjectArray<'a>>, // GrowableObjectArrayPtr + pub loading_unit: Option<&'a mut LoadingUnit<'a>>, // LoadingUnitPtr + pub imports: Option<&'a mut Array<'a>>, // ArrayPtr + pub exports: Option<&'a mut Array<'a>>, // ArrayPtr + pub dependencies: Option<&'a mut Array<'a>>, // ArrayPtr + pub kernel_program_info: Option<&'a mut KernelProgramInfo<'a>>, // KernelProgramInfoPtr + pub loaded_scripts: Option<&'a mut Array<'a>>, // ArrayPtr + pub num_imports: u16, + pub load_state: i8, + pub flags: u8, + pub kernel_library_index: i32, +} + +#[derive(Default)] +pub struct ContextScope { + pub num_variables: i32, + pub is_implicit: bool, +} + +// Fieldless classes +// These classes either have no additional payload fields beyond the standard +// instance headers or their payloads are purely variable-length or dynamically read/overlayed on top of a byte stream + +#[derive(Default)] +pub struct CodeSourceMap; + +#[derive(Default)] +pub struct CompressedStackMaps; + +#[derive(Default)] +pub struct PcDescriptors; + +#[derive(Default)] +pub struct ObjectPool; + +// Placeholder structs for references used above that aren't defined yet +// it wouldn't compile without this, though for now they remain unimplemented... +#[derive(Default)] +pub struct Array<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct Object<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct AbstractType<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct FunctionType<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct Script<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct Closure<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct Instance<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct WeakArray<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct TypedDataBase<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct TypedData<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct TypedDataView<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct GrowableObjectArray<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct Code<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} +#[derive(Default)] +pub struct LoadingUnit<'a> { + _marker: std::marker::PhantomData<&'a ()>, +} + +#[derive(Default)] +pub struct ICData; +#[derive(Default)] +pub struct MegamorphicCache; +#[derive(Default)] +pub struct SubtypeTestCache; +#[derive(Default)] +pub struct LanguageError; +#[derive(Default)] +pub struct UnhandledException; +#[derive(Default)] +pub struct LibraryPrefix; +#[derive(Default)] +pub struct RecordType; +#[derive(Default)] +pub struct Int32x4; +#[derive(Default)] +pub struct ExternalTypedData; +#[derive(Default)] +pub struct StackTrace; +#[derive(Default)] +pub struct RegExp; +#[derive(Default)] +pub struct WeakProperty; +#[derive(Default)] +pub struct Map; +#[derive(Default)] +pub struct Set; +#[derive(Default)] +pub struct Float32x4; +#[derive(Default)] +pub struct Float64x2; +#[derive(Default)] +pub struct ConstMap; +#[derive(Default)] +pub struct ConstSet; +#[derive(Default)] +pub struct Record; +#[derive(Default)] +pub struct ImmutableArray; +#[derive(Default)] +pub struct OneByteString; +#[derive(Default)] +pub struct TwoByteString; diff --git a/crates/flutterdec-serwalker/src/snapshot.rs b/crates/flutterdec-serwalker/src/snapshot.rs new file mode 100644 index 0000000..c4cbe5c --- /dev/null +++ b/crates/flutterdec-serwalker/src/snapshot.rs @@ -0,0 +1,134 @@ +use std::collections::HashMap; + +use crate::cluster::{decide_cluster, Cluster}; +use crate::constants::{self, ClassId, MAGIC_BYTES, UNSIGNED_M}; +use crate::stream::Stream; +use crate::utils::{decode_tags, DecodedTags}; + +#[derive(Default)] +enum SnapshotKind +// pulled straight out of the C++ def +{ + Full, + FullCore, + FullJIT, + FullAOT, // Full + AOT code, this is the one we care about, as th + Module, + #[default] + None, + Invalid, +} + +impl TryFrom for SnapshotKind { + type Error = &'static str; + + fn try_from(value: u64) -> Result { + match value { + 0 => Ok(SnapshotKind::Full), + 1 => Ok(SnapshotKind::FullCore), + 2 => Ok(SnapshotKind::FullJIT), + 3 => Ok(SnapshotKind::FullAOT), + 4 => Ok(SnapshotKind::Module), + 5 => Ok(SnapshotKind::None), + 6 => Ok(SnapshotKind::Invalid), + _ => Err("Invalid snapshot kind... Either headers are corrupt, or this is not a snapshot at all."), // Handle invalid snapshot kidjns + } + } +} + +#[derive(Default)] +pub struct DataSnapshot { + // this array will contain mutable references to all clusters, and it will be indexed using the class id + clusters: HashMap>, // thus each cluster must have its own UNIQUE class id + cluster_order: Vec, // used in the fill step to know which cluster's read_fill function to call + + magic_bytes: u32, + size: u64, + kind: SnapshotKind, + + version_hash: String, + features: String, + + num_base_objects: u64, + num_objects: u64, + num_clusters: u64, + + instr_table_len: usize, + instr_table_offset: usize, + + start_of_alloc_area: usize, + start_of_fill_area: usize, + + end_of_alloc_area: usize, + end_of_fill_area: usize, +} + +impl DataSnapshot { + fn parse_version_and_features(&mut self, stream: &mut Stream) { + let mut version_and_features = stream.read_c_string(); + + self.features = version_and_features.split_off(constants::VERSION_HASH_LENGTH); // returns (str[hash_len..]) + self.version_hash = version_and_features; + } + + fn parse_header(&mut self, stream: &mut Stream) { + self.magic_bytes = stream.read_u32(); + + if self.magic_bytes != MAGIC_BYTES { + panic!("Not a snapshot...") + } + + self.size = stream.read_u64(); + self.kind = SnapshotKind::try_from(stream.read_u64()).unwrap(); + + self.parse_version_and_features(stream); + + self.num_base_objects = stream.read_modified_leb128(UNSIGNED_M); + self.num_objects = stream.read_modified_leb128(UNSIGNED_M); + self.num_clusters = stream.read_modified_leb128(UNSIGNED_M); + + self.instr_table_len = stream.read_modified_leb128(UNSIGNED_M) as usize; + self.instr_table_offset = stream.read_modified_leb128(UNSIGNED_M) as usize; + } + + fn parse_clusters(&mut self, stream: &mut Stream) { + let mut curr_ref_id: u64 = 0; // all objects are numbered starting from 0 + + self.start_of_alloc_area = stream.get_current_pos(); + for _cluster_idx in 0..self.num_clusters { + let tags: u32 = stream.read_modified_leb128(UNSIGNED_M) as u32; + let decoded_tags: DecodedTags = decode_tags(tags); + let cid = decoded_tags.get_cid(); + + let mut cluster = + decide_cluster(cid).expect("Couldn't find cluster implementation for class {cid}"); + + cluster.read_alloc(&mut curr_ref_id, stream); + self.clusters.insert(cid, cluster); // hashmap takes ownership of box + self.cluster_order.push(cid); + } + self.end_of_alloc_area = stream.get_current_pos(); + + self.start_of_fill_area = stream.get_current_pos(); + for cluster_idx in 0..self.num_clusters { + let cid = self.cluster_order[cluster_idx as usize]; + let cluster_wrapper = self.clusters.get_mut(&cid); + + let cluster = cluster_wrapper.unwrap(); // this should never panic + (*cluster).read_fill(stream); + } + self.end_of_fill_area = stream.get_current_pos(); + } + + fn parse_roots(&mut self, stream: &mut Stream) {} +} + +pub fn parse_snapshot(stream: &mut Stream) -> DataSnapshot { + let mut snapshot = DataSnapshot::default(); + + println!("Now parsing the snapshot..."); + snapshot.parse_header(stream); + snapshot.parse_clusters(stream); + + snapshot +} diff --git a/crates/flutterdec-serwalker/src/stream.rs b/crates/flutterdec-serwalker/src/stream.rs new file mode 100644 index 0000000..35681ac --- /dev/null +++ b/crates/flutterdec-serwalker/src/stream.rs @@ -0,0 +1,155 @@ +use crate::constants::{DATA_BITS_PER_BYTE, UNSIGNED_END_OF_DATA_BYTE, UNSIGNED_MAX_DATA_PER_BYTE}; +pub struct Stream<'a> { + byte_stream: &'a [u8], + curr_stream_offset: usize, +} + +impl<'a> Stream<'a> { + fn seek(&mut self, pos: usize) // might be useful? + { + if self.byte_stream.len() > pos && pos >= 0 { + self.curr_stream_offset = pos; + } + } + + pub fn advance_pos(&mut self, num_bytes: usize) { + self.curr_stream_offset += num_bytes; + } + + pub fn get_current_pos(&self) -> usize { + self.curr_stream_offset + } + /* + Reads a modified uleb from the current stream offset. + + Dart uses a modified LEB128 format. The normal format uses bytes with their MSb set in order + to signify that there are more bytes ahead, and the last byte has its MSb unset, whereas Dart's + implementation does the opposite. The "continuation" bit on each byte is 0, and the last byte + has its MSb set. + */ + pub fn read_modified_leb128(&mut self, sign_marker: u8) -> u64 // 8 bytes should be enough for anything... + { + let mut idx: u8 = 0; + + let first_byte = self.byte_stream[self.curr_stream_offset]; + if first_byte > UNSIGNED_MAX_DATA_PER_BYTE + // if the first byte has its MSb set + { + self.advance_pos(1); + // wrapping_sub mimics C++ unsigned underflow, giving us perfect sign-extension + // for negative numbers, while behaving normally for positive numbers. gotta get used to this :) + return (first_byte as u64).wrapping_sub(sign_marker as u64); + } + + let mut read_num: u64 = 0; + let mut byte: u8; + + loop { + byte = self.byte_stream[self.curr_stream_offset + idx as usize]; + if byte & UNSIGNED_END_OF_DATA_BYTE == UNSIGNED_END_OF_DATA_BYTE { + break; + } // final byte + read_num |= (byte as u64) << (idx as usize * DATA_BITS_PER_BYTE); + idx += 1; + } + + self.advance_pos((idx + 1) as usize); // advance the stream position + + // Same wrapping trick for the final byte + let final_chunk = (byte as u64).wrapping_sub(sign_marker as u64); + read_num |= final_chunk << (idx as usize * DATA_BITS_PER_BYTE); + + read_num + } + + pub fn read_u64(&mut self) -> u64 { + let u64_size = std::mem::size_of::(); + let num_slice = + &self.byte_stream[self.curr_stream_offset..self.curr_stream_offset + u64_size]; + + let converted_slice: [u8; 8] = num_slice.try_into().expect("Slice wasn't 8 bytes long..."); + + self.advance_pos(u64_size); + + u64::from_le_bytes(converted_slice) + } + + pub fn read_u32(&mut self) -> u32 { + let u32_size = std::mem::size_of::(); + let num_slice = + &self.byte_stream[self.curr_stream_offset..self.curr_stream_offset + u32_size]; + + let converted_slice: [u8; 4] = num_slice.try_into().expect("Slice wasn't 4 bytes long..."); + + self.advance_pos(u32_size); + + u32::from_le_bytes(converted_slice) + } + + /* + Panics if it isn't possible to create a stream from the utf-8 representation stored in + the byte slice. It shouldn't happen, so the best possible outcome is to assume some + logic mistake has been made and end the application. It should be a good thing to change this to an + unwrap_or_else so that we can also print the stream offset and cluster where this error occurred, in order + to have static debug info. + */ + pub fn read_c_string(&mut self) -> String { + let first_nullbyte_pos = self.byte_stream[self.curr_stream_offset..] + .iter() + .position(|&b| b == 0x00) + .expect( + "Reading a string until the end of the stream? Something definitely went wrong...", + ); + + let raw_str = &self.byte_stream + [self.curr_stream_offset..self.curr_stream_offset + first_nullbyte_pos]; + self.advance_pos(raw_str.len() + 1); + + String::from_utf8(raw_str.to_vec()) + .expect("Couldn't turn null-terminated UTF-8 bytes into a String.") // it should be horrible if for some reason a string just isn't there + } + + // read a non null-terminated string given a length + pub fn read_string(&mut self, len: usize) -> String { + let final_pos = self.curr_stream_offset + len; + let raw_str = &self.byte_stream[self.curr_stream_offset..final_pos]; + + self.advance_pos(len); + + String::from_utf8(raw_str.to_vec()).expect("Couldn't turn UTF-8 bytes into a String.") + } + + /* + Complex object types (i.e, object types that contain other object types) + point to other objects through refids, which is essentially the core + mechanism of Dart's serialization/deserialization process, allowing the + reconstruction of all objects from the snapshot into the heap. + */ + pub fn read_ref_id(&mut self) -> u32 { + let mut idx: usize = 0; + let mut byte: i8 = self.byte_stream[self.curr_stream_offset + idx] as i8; + let mut ref_id: i32 = 0; // as far as I know, ref_ids are up to 2^28, so 32 bits is good enough + + if byte < 0 { + ref_id += byte as i32; + self.advance_pos(1); + return (ref_id + 128) as u32; + } + + loop { + ref_id = ref_id << 7; + ref_id += byte as i32; + idx += 1; + + if byte < 0 { + break; + } + + byte = self.byte_stream[self.curr_stream_offset + idx] as i8; + } + + self.advance_pos(idx); + + (ref_id + 128) as u32 // ref_ids are always unsigned + } +} diff --git a/crates/flutterdec-serwalker/src/utils.rs b/crates/flutterdec-serwalker/src/utils.rs new file mode 100644 index 0000000..4fe79b1 --- /dev/null +++ b/crates/flutterdec-serwalker/src/utils.rs @@ -0,0 +1,141 @@ +use paste::paste; + +use crate::constants::ClassId; + +#[macro_export] +macro_rules! DECLARE_FIXED_LENGTH_CLUSTER { + ($name:ident $(<$lt:lifetime>)?, |$_self:ident, $last_ref_id:ident, $stream:ident| $fill_impl:block) => { + ::paste::paste! { // this is ugly, but the language doesn't support identifier concatenation + pub struct [<$name Cluster>] $(<$lt>)? + { + tags: u32, + obj_count: u64, + + start_of_fill: usize, + start_of_alloc: usize, + + end_of_fill: usize, + end_of_alloc: usize, + + first_ref_id: u32, + + objs: Vec)? >> // a pair (ref_id, object) + } + + impl $(<$lt>)? Cluster for [<$name Cluster>] $(<$lt>)? // optional lifetime parameter + { + fn read_alloc(&mut self, last_ref_id: &mut u64, stream: &mut Stream) -> usize // read tags and count + { + self.start_of_alloc = stream.get_current_pos(); + self.first_ref_id = *last_ref_id as u32; // later used to index the objs vector + + self.obj_count = stream.read_modified_leb128(UNSIGNED_M); + + for _obj_idx in 0..self.obj_count + { + self.objs.push(Box::<$name $(<$lt>)? >::default()); + } + + *last_ref_id = *last_ref_id + self.obj_count; + self.end_of_alloc = stream.get_current_pos(); + + self.end_of_alloc - self.start_of_alloc + } + + fn read_fill(&mut self, stream: &mut Stream) -> usize + { + self.start_of_fill = stream.get_current_pos(); + + let $_self = self; + let $stream = stream; + + let fill_size = $fill_impl; + + $_self.end_of_fill = $stream.get_current_pos(); + fill_size + } + + fn is_fixed_len(&self) -> bool + { + true + } + } + + } + } +} + +#[macro_export] +macro_rules! DECLARE_VARIABLE_LENGTH_CLUSTER { + ($name:ident $(<$lt:lifetime>)?) => { + ::paste::paste! { + pub struct [<$name Cluster>] $(<$lt>)? + { + tags: u32, + obj_count: u64, + + start_of_fill: usize, + start_of_alloc: usize, + + end_of_fill: usize, + end_of_alloc: usize, + + first_ref_id: u32, + + objs: Vec)? >> // a pair (ref_id, object) + } + } + } +} + +pub struct DecodedTags { + class_id: ClassId, + is_deeply_immutable: bool, // we don't really care about this for now, at least + is_canonical: bool, +} + +impl DecodedTags { + pub fn new(cid: ClassId, immut: bool, canonical: bool) -> Self { + Self { + class_id: cid, + is_deeply_immutable: immut, + is_canonical: canonical, + } + } + + pub fn get_cid(&self) -> ClassId { + self.class_id + } + + pub fn is_deeply_immutable(&self) -> bool { + self.is_deeply_immutable + } + + pub fn is_canonical(&self) -> bool { + self.is_canonical + } +} + +macro_rules! DECODE_IS_CID { + ($tags:expr) => { + ClassId::try_from(($tags >> 12) & 0xFFFFF) + }; +} +macro_rules! DECODE_IS_DEEPLY_IMMUTABLE { + ($tags:expr) => { + (($tags >> 7) & 0x1) == 1 + }; +} +macro_rules! DECODE_IS_CANONICAL { + ($tags:expr) => { + (($tags >> 1) & 0x1) == 1 + }; +} + +pub fn decode_tags(tags: u32) -> DecodedTags { + let class_id: ClassId = DECODE_IS_CID!(tags).unwrap(); + let is_deeply_immutable: bool = DECODE_IS_DEEPLY_IMMUTABLE!(tags); + let is_canonical: bool = DECODE_IS_CANONICAL!(tags); + + DecodedTags::new(class_id, is_deeply_immutable, is_canonical) +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 73cb934..3fe7418 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,3 +1,3 @@ [toolchain] channel = "stable" -components = ["rustfmt", "clippy"] +components = ["rustfmt", "clippy", "rust-analyzer"]