diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index 505caa17..9c8be08d 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -8,24 +8,37 @@ use crate::regex_manager::RegexManager; use crate::request::Request; use crate::filters::flatbuffer_generated::fb; + /// A list of string parts that can be matched against a URL. -pub(crate) struct FlatPatterns<'a> { - patterns: Option>>, +pub(crate) enum FlatPatterns<'a> { + /// No patterns to match + Empty, + /// Memory-usage optimization - ~95% of filters have <= 1 pattern. Special-casing avoids the + /// need to hold an extra pointer and vector length. + Single(&'a str), + /// More than 1 pattern to match + Multi(flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>), } impl<'a> FlatPatterns<'a> { #[inline(always)] pub fn new( - patterns: Option>>, + single_pattern: Option<&'a str>, + multi_patterns: Option>>, ) -> Self { - Self { patterns } + if let Some(single_pattern) = single_pattern { + FlatPatterns::Single(single_pattern) + } else if let Some(patterns) = multi_patterns { + FlatPatterns::Multi(patterns) + } else { + FlatPatterns::Empty + } } #[inline(always)] pub fn iter(&self) -> FlatPatternsIterator<'_> { FlatPatternsIterator { patterns: self, - len: self.patterns.map_or(0, |d| d.len()), index: 0, } } @@ -34,7 +47,6 @@ impl<'a> FlatPatterns<'a> { /// Iterator over [FlatPatterns]. pub(crate) struct FlatPatternsIterator<'a> { patterns: &'a FlatPatterns<'a>, - len: usize, index: usize, } @@ -43,21 +55,37 @@ impl<'a> Iterator for FlatPatternsIterator<'a> { #[inline(always)] fn next(&mut self) -> Option { - self.patterns.patterns.and_then(|fi| { - if self.index < self.len { - self.index += 1; - Some(fi.get(self.index - 1)) - } else { - None + match &self.patterns { + FlatPatterns::Empty => None, + FlatPatterns::Single(s) => { + if self.index == 0 { + self.index += 1; + Some(*s) + } else { + None + } + } + FlatPatterns::Multi(v) => { + if self.index < v.len() { + let result = v.get(self.index); + self.index += 1; + Some(result) + } else { + None + } } - }) + } } } impl ExactSizeIterator for FlatPatternsIterator<'_> { #[inline(always)] fn len(&self) -> usize { - self.len + match &self.patterns { + FlatPatterns::Empty => 0, + FlatPatterns::Single(_) => 1_usize.saturating_sub(self.index), + FlatPatterns::Multi(v) => v.len().saturating_sub(self.index), + } } } @@ -123,7 +151,10 @@ impl<'a> FlatNetworkFilter<'a> { #[inline(always)] pub fn patterns(&self) -> FlatPatterns<'_> { - FlatPatterns::new(self.fb_filter.patterns()) + FlatPatterns::new( + self.fb_filter.single_pattern(), + self.fb_filter.multi_patterns(), + ) } #[inline(always)] diff --git a/src/filters/fb_network_builder.rs b/src/filters/fb_network_builder.rs index 6f54ae8b..d07cdbdf 100644 --- a/src/filters/fb_network_builder.rs +++ b/src/filters/fb_network_builder.rs @@ -81,15 +81,22 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { .as_ref() .map(|s| builder.create_string(s)); - let patterns = if network_filter.filter.iter().len() > 0 { - let offsets: Vec> = network_filter - .filter - .iter() - .map(|s| builder.create_string(s)) - .collect(); - Some(FlatSerialize::serialize(offsets, builder)) + let mut filter_iter = network_filter.filter.iter(); + let filter_count = filter_iter.len(); + + // Use single_pattern for the common case of 0 or 1 patterns to avoid + // the overhead of a FlatBuffers vector (extra table + offset indirection). + let (single_pattern, multi_patterns) = if filter_count <= 1 { + let single = filter_iter.next().map(|s| builder.create_string(s)); + (single, None) } else { - None + ( + None, + Some(FlatSerialize::serialize( + filter_iter.collect::>(), + builder, + )), + ) }; let raw_line = network_filter @@ -101,7 +108,8 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { builder.raw_builder(), &fb::NetworkFilterArgs { mask: network_filter.mask.bits(), - patterns, + single_pattern, + multi_patterns, modifier_option, opt_domains, opt_not_domains, diff --git a/src/filters/network.rs b/src/filters/network.rs index fc2e0847..c5a45d53 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -331,8 +331,8 @@ impl ExactSizeIterator for FilterPartIterator<'_> { fn len(&self) -> usize { match self.filter_part { FilterPart::Empty => 0, - FilterPart::Simple(_) => 1, - FilterPart::AnyOf(vec) => vec.len(), + FilterPart::Simple(_) => 1usize.saturating_sub(self.index), + FilterPart::AnyOf(vec) => vec.len().saturating_sub(self.index), } } } diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index f36bde07..599847d0 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -18,7 +18,11 @@ table NetworkFilter { opt_domains: [uint32]; opt_not_domains: [uint32]; - patterns: [string]; + // A union-like storage for pattern. Prefer |single_pattern| where + // possible for performance concerns. + single_pattern: string; + multi_patterns: [string]; + modifier_option: string; hostname: string; diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index a90e295e..df1cc3fb 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -38,11 +38,12 @@ pub mod fb { pub const VT_MASK: flatbuffers::VOffsetT = 4; pub const VT_OPT_DOMAINS: flatbuffers::VOffsetT = 6; pub const VT_OPT_NOT_DOMAINS: flatbuffers::VOffsetT = 8; - pub const VT_PATTERNS: flatbuffers::VOffsetT = 10; - pub const VT_MODIFIER_OPTION: flatbuffers::VOffsetT = 12; - pub const VT_HOSTNAME: flatbuffers::VOffsetT = 14; - pub const VT_TAG: flatbuffers::VOffsetT = 16; - pub const VT_RAW_LINE: flatbuffers::VOffsetT = 18; + pub const VT_SINGLE_PATTERN: flatbuffers::VOffsetT = 10; + pub const VT_MULTI_PATTERNS: flatbuffers::VOffsetT = 12; + pub const VT_MODIFIER_OPTION: flatbuffers::VOffsetT = 14; + pub const VT_HOSTNAME: flatbuffers::VOffsetT = 16; + pub const VT_TAG: flatbuffers::VOffsetT = 18; + pub const VT_RAW_LINE: flatbuffers::VOffsetT = 20; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -71,8 +72,11 @@ pub mod fb { if let Some(x) = args.modifier_option { builder.add_modifier_option(x); } - if let Some(x) = args.patterns { - builder.add_patterns(x); + if let Some(x) = args.multi_patterns { + builder.add_multi_patterns(x); + } + if let Some(x) = args.single_pattern { + builder.add_single_pattern(x); } if let Some(x) = args.opt_not_domains { builder.add_opt_not_domains(x); @@ -88,8 +92,9 @@ pub mod fb { let mask = self.mask(); let opt_domains = self.opt_domains().map(|x| x.into_iter().collect()); let opt_not_domains = self.opt_not_domains().map(|x| x.into_iter().collect()); - let patterns = self - .patterns() + let single_pattern = self.single_pattern().map(|x| x.to_string()); + let multi_patterns = self + .multi_patterns() .map(|x| x.iter().map(|s| s.to_string()).collect()); let modifier_option = self.modifier_option().map(|x| x.to_string()); let hostname = self.hostname().map(|x| x.to_string()); @@ -99,7 +104,8 @@ pub mod fb { mask, opt_domains, opt_not_domains, - patterns, + single_pattern, + multi_patterns, modifier_option, hostname, tag, @@ -148,7 +154,19 @@ pub mod fb { } } #[inline] - pub fn patterns( + pub fn single_pattern(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + NetworkFilter::VT_SINGLE_PATTERN, + None, + ) + } + } + #[inline] + pub fn multi_patterns( &self, ) -> Option>> { // Safety: @@ -157,7 +175,7 @@ pub mod fb { unsafe { self._tab.get::>, - >>(NetworkFilter::VT_PATTERNS, None) + >>(NetworkFilter::VT_MULTI_PATTERNS, None) } } #[inline] @@ -223,9 +241,14 @@ pub mod fb { Self::VT_OPT_NOT_DOMAINS, false, )? + .visit_field::>( + "single_pattern", + Self::VT_SINGLE_PATTERN, + false, + )? .visit_field::>, - >>("patterns", Self::VT_PATTERNS, false)? + >>("multi_patterns", Self::VT_MULTI_PATTERNS, false)? .visit_field::>( "modifier_option", Self::VT_MODIFIER_OPTION, @@ -250,7 +273,8 @@ pub mod fb { pub mask: u32, pub opt_domains: Option>>, pub opt_not_domains: Option>>, - pub patterns: Option< + pub single_pattern: Option>, + pub multi_patterns: Option< flatbuffers::WIPOffset>>, >, pub modifier_option: Option>, @@ -265,7 +289,8 @@ pub mod fb { mask: 540221439, opt_domains: None, opt_not_domains: None, - patterns: None, + single_pattern: None, + multi_patterns: None, modifier_option: None, hostname: None, tag: None, @@ -305,15 +330,22 @@ pub mod fb { ); } #[inline] - pub fn add_patterns( + pub fn add_single_pattern(&mut self, single_pattern: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_SINGLE_PATTERN, + single_pattern, + ); + } + #[inline] + pub fn add_multi_patterns( &mut self, - patterns: flatbuffers::WIPOffset< + multi_patterns: flatbuffers::WIPOffset< flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, >, ) { self.fbb_.push_slot_always::>( - NetworkFilter::VT_PATTERNS, - patterns, + NetworkFilter::VT_MULTI_PATTERNS, + multi_patterns, ); } #[inline] @@ -365,7 +397,8 @@ pub mod fb { ds.field("mask", &self.mask()); ds.field("opt_domains", &self.opt_domains()); ds.field("opt_not_domains", &self.opt_not_domains()); - ds.field("patterns", &self.patterns()); + ds.field("single_pattern", &self.single_pattern()); + ds.field("multi_patterns", &self.multi_patterns()); ds.field("modifier_option", &self.modifier_option()); ds.field("hostname", &self.hostname()); ds.field("tag", &self.tag()); @@ -379,7 +412,8 @@ pub mod fb { pub mask: u32, pub opt_domains: Option>, pub opt_not_domains: Option>, - pub patterns: Option>, + pub single_pattern: Option, + pub multi_patterns: Option>, pub modifier_option: Option, pub hostname: Option, pub tag: Option, @@ -391,7 +425,8 @@ pub mod fb { mask: 540221439, opt_domains: None, opt_not_domains: None, - patterns: None, + single_pattern: None, + multi_patterns: None, modifier_option: None, hostname: None, tag: None, @@ -407,7 +442,8 @@ pub mod fb { let mask = self.mask; let opt_domains = self.opt_domains.as_ref().map(|x| _fbb.create_vector(x)); let opt_not_domains = self.opt_not_domains.as_ref().map(|x| _fbb.create_vector(x)); - let patterns = self.patterns.as_ref().map(|x| { + let single_pattern = self.single_pattern.as_ref().map(|x| _fbb.create_string(x)); + let multi_patterns = self.multi_patterns.as_ref().map(|x| { let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); _fbb.create_vector(&w) }); @@ -421,7 +457,8 @@ pub mod fb { mask, opt_domains, opt_not_domains, - patterns, + single_pattern, + multi_patterns, modifier_option, hostname, tag, diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index a68c9a4b..c579aac1 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -183,7 +183,7 @@ mod tests { fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 4588487935723956783; + const EXPECTED_HASH: u64 = 16556115079021991714; assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}"); engine.deserialize(&data).unwrap(); } @@ -193,7 +193,7 @@ mod tests { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 7781508779107365248; + const EXPECTED_HASH: u64 = 4864047469838009851; assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}"); engine.deserialize(&data).unwrap(); } @@ -237,9 +237,9 @@ mod tests { ); } let expected_hash: u64 = if cfg!(feature = "css-validation") { - 1278886013448413771 + 10068943306137168952 } else { - 6460458707531433656 + 6262681569722546672 }; assert_eq!(hash(&data), expected_hash, "{HASH_MISMATCH_MSG}");