From de8b7353a5d59a5213ae185c271c0e5fa64a7dc2 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Mon, 11 May 2026 14:34:04 +0400 Subject: [PATCH 1/5] Add single/multi_pattern --- src/filters/fb_network.rs | 57 +++++++++---- src/filters/fb_network_builder.rs | 26 ++++-- src/flatbuffers/fb_network_filter.fbs | 4 +- .../fb_network_filter_generated.rs | 85 +++++++++++++------ tests/unit/engine.rs | 8 +- 5 files changed, 127 insertions(+), 53 deletions(-) diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index 505caa17..daf36d11 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -8,24 +8,33 @@ use crate::regex_manager::RegexManager; use crate::request::Request; use crate::filters::flatbuffer_generated::fb; + /// A list of string parts that can be matched against a URL. -pub(crate) struct FlatPatterns<'a> { - patterns: Option>>, +pub(crate) enum FlatPatterns<'a> { + Empty, + Single(&'a str), + Multi(flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>), } impl<'a> FlatPatterns<'a> { #[inline(always)] pub fn new( - patterns: Option>>, + single_pattern: Option<&'a str>, + multi_patterns: Option>>, ) -> Self { - Self { patterns } + if let Some(single_pattern) = single_pattern { + FlatPatterns::Single(single_pattern) + } else if let Some(patterns) = multi_patterns { + FlatPatterns::Multi(patterns) + } else { + FlatPatterns::Empty + } } #[inline(always)] pub fn iter(&self) -> FlatPatternsIterator<'_> { FlatPatternsIterator { patterns: self, - len: self.patterns.map_or(0, |d| d.len()), index: 0, } } @@ -34,7 +43,6 @@ impl<'a> FlatPatterns<'a> { /// Iterator over [FlatPatterns]. pub(crate) struct FlatPatternsIterator<'a> { patterns: &'a FlatPatterns<'a>, - len: usize, index: usize, } @@ -43,21 +51,37 @@ impl<'a> Iterator for FlatPatternsIterator<'a> { #[inline(always)] fn next(&mut self) -> Option { - self.patterns.patterns.and_then(|fi| { - if self.index < self.len { - self.index += 1; - Some(fi.get(self.index - 1)) - } else { - None + match &self.patterns { + FlatPatterns::Empty => None, + FlatPatterns::Single(s) => { + if self.index == 0 { + self.index += 1; + Some(*s) + } else { + None + } + } + FlatPatterns::Multi(v) => { + if self.index < v.len() { + let result = v.get(self.index); + self.index += 1; + Some(result) + } else { + None + } } - }) + } } } impl ExactSizeIterator for FlatPatternsIterator<'_> { #[inline(always)] fn len(&self) -> usize { - self.len + match &self.patterns { + FlatPatterns::Empty => 0, + FlatPatterns::Single(_) => 1_usize.saturating_sub(self.index), + FlatPatterns::Multi(v) => v.len().saturating_sub(self.index), + } } } @@ -123,7 +147,10 @@ impl<'a> FlatNetworkFilter<'a> { #[inline(always)] pub fn patterns(&self) -> FlatPatterns<'_> { - FlatPatterns::new(self.fb_filter.patterns()) + FlatPatterns::new( + self.fb_filter.single_pattern(), + self.fb_filter.multi_patterns(), + ) } #[inline(always)] diff --git a/src/filters/fb_network_builder.rs b/src/filters/fb_network_builder.rs index 6f54ae8b..d07cdbdf 100644 --- a/src/filters/fb_network_builder.rs +++ b/src/filters/fb_network_builder.rs @@ -81,15 +81,22 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { .as_ref() .map(|s| builder.create_string(s)); - let patterns = if network_filter.filter.iter().len() > 0 { - let offsets: Vec> = network_filter - .filter - .iter() - .map(|s| builder.create_string(s)) - .collect(); - Some(FlatSerialize::serialize(offsets, builder)) + let mut filter_iter = network_filter.filter.iter(); + let filter_count = filter_iter.len(); + + // Use single_pattern for the common case of 0 or 1 patterns to avoid + // the overhead of a FlatBuffers vector (extra table + offset indirection). + let (single_pattern, multi_patterns) = if filter_count <= 1 { + let single = filter_iter.next().map(|s| builder.create_string(s)); + (single, None) } else { - None + ( + None, + Some(FlatSerialize::serialize( + filter_iter.collect::>(), + builder, + )), + ) }; let raw_line = network_filter @@ -101,7 +108,8 @@ impl<'a> FlatSerialize<'a, EngineFlatBuilder<'a>> for &NetworkFilter { builder.raw_builder(), &fb::NetworkFilterArgs { mask: network_filter.mask.bits(), - patterns, + single_pattern, + multi_patterns, modifier_option, opt_domains, opt_not_domains, diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index f36bde07..615c1dda 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -18,7 +18,9 @@ table NetworkFilter { opt_domains: [uint32]; opt_not_domains: [uint32]; - patterns: [string]; + single_pattern: string; + multi_patterns: [string]; + modifier_option: string; hostname: string; diff --git a/src/flatbuffers/fb_network_filter_generated.rs b/src/flatbuffers/fb_network_filter_generated.rs index a90e295e..df1cc3fb 100644 --- a/src/flatbuffers/fb_network_filter_generated.rs +++ b/src/flatbuffers/fb_network_filter_generated.rs @@ -38,11 +38,12 @@ pub mod fb { pub const VT_MASK: flatbuffers::VOffsetT = 4; pub const VT_OPT_DOMAINS: flatbuffers::VOffsetT = 6; pub const VT_OPT_NOT_DOMAINS: flatbuffers::VOffsetT = 8; - pub const VT_PATTERNS: flatbuffers::VOffsetT = 10; - pub const VT_MODIFIER_OPTION: flatbuffers::VOffsetT = 12; - pub const VT_HOSTNAME: flatbuffers::VOffsetT = 14; - pub const VT_TAG: flatbuffers::VOffsetT = 16; - pub const VT_RAW_LINE: flatbuffers::VOffsetT = 18; + pub const VT_SINGLE_PATTERN: flatbuffers::VOffsetT = 10; + pub const VT_MULTI_PATTERNS: flatbuffers::VOffsetT = 12; + pub const VT_MODIFIER_OPTION: flatbuffers::VOffsetT = 14; + pub const VT_HOSTNAME: flatbuffers::VOffsetT = 16; + pub const VT_TAG: flatbuffers::VOffsetT = 18; + pub const VT_RAW_LINE: flatbuffers::VOffsetT = 20; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -71,8 +72,11 @@ pub mod fb { if let Some(x) = args.modifier_option { builder.add_modifier_option(x); } - if let Some(x) = args.patterns { - builder.add_patterns(x); + if let Some(x) = args.multi_patterns { + builder.add_multi_patterns(x); + } + if let Some(x) = args.single_pattern { + builder.add_single_pattern(x); } if let Some(x) = args.opt_not_domains { builder.add_opt_not_domains(x); @@ -88,8 +92,9 @@ pub mod fb { let mask = self.mask(); let opt_domains = self.opt_domains().map(|x| x.into_iter().collect()); let opt_not_domains = self.opt_not_domains().map(|x| x.into_iter().collect()); - let patterns = self - .patterns() + let single_pattern = self.single_pattern().map(|x| x.to_string()); + let multi_patterns = self + .multi_patterns() .map(|x| x.iter().map(|s| s.to_string()).collect()); let modifier_option = self.modifier_option().map(|x| x.to_string()); let hostname = self.hostname().map(|x| x.to_string()); @@ -99,7 +104,8 @@ pub mod fb { mask, opt_domains, opt_not_domains, - patterns, + single_pattern, + multi_patterns, modifier_option, hostname, tag, @@ -148,7 +154,19 @@ pub mod fb { } } #[inline] - pub fn patterns( + pub fn single_pattern(&self) -> Option<&'a str> { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { + self._tab.get::>( + NetworkFilter::VT_SINGLE_PATTERN, + None, + ) + } + } + #[inline] + pub fn multi_patterns( &self, ) -> Option>> { // Safety: @@ -157,7 +175,7 @@ pub mod fb { unsafe { self._tab.get::>, - >>(NetworkFilter::VT_PATTERNS, None) + >>(NetworkFilter::VT_MULTI_PATTERNS, None) } } #[inline] @@ -223,9 +241,14 @@ pub mod fb { Self::VT_OPT_NOT_DOMAINS, false, )? + .visit_field::>( + "single_pattern", + Self::VT_SINGLE_PATTERN, + false, + )? .visit_field::>, - >>("patterns", Self::VT_PATTERNS, false)? + >>("multi_patterns", Self::VT_MULTI_PATTERNS, false)? .visit_field::>( "modifier_option", Self::VT_MODIFIER_OPTION, @@ -250,7 +273,8 @@ pub mod fb { pub mask: u32, pub opt_domains: Option>>, pub opt_not_domains: Option>>, - pub patterns: Option< + pub single_pattern: Option>, + pub multi_patterns: Option< flatbuffers::WIPOffset>>, >, pub modifier_option: Option>, @@ -265,7 +289,8 @@ pub mod fb { mask: 540221439, opt_domains: None, opt_not_domains: None, - patterns: None, + single_pattern: None, + multi_patterns: None, modifier_option: None, hostname: None, tag: None, @@ -305,15 +330,22 @@ pub mod fb { ); } #[inline] - pub fn add_patterns( + pub fn add_single_pattern(&mut self, single_pattern: flatbuffers::WIPOffset<&'b str>) { + self.fbb_.push_slot_always::>( + NetworkFilter::VT_SINGLE_PATTERN, + single_pattern, + ); + } + #[inline] + pub fn add_multi_patterns( &mut self, - patterns: flatbuffers::WIPOffset< + multi_patterns: flatbuffers::WIPOffset< flatbuffers::Vector<'b, flatbuffers::ForwardsUOffset<&'b str>>, >, ) { self.fbb_.push_slot_always::>( - NetworkFilter::VT_PATTERNS, - patterns, + NetworkFilter::VT_MULTI_PATTERNS, + multi_patterns, ); } #[inline] @@ -365,7 +397,8 @@ pub mod fb { ds.field("mask", &self.mask()); ds.field("opt_domains", &self.opt_domains()); ds.field("opt_not_domains", &self.opt_not_domains()); - ds.field("patterns", &self.patterns()); + ds.field("single_pattern", &self.single_pattern()); + ds.field("multi_patterns", &self.multi_patterns()); ds.field("modifier_option", &self.modifier_option()); ds.field("hostname", &self.hostname()); ds.field("tag", &self.tag()); @@ -379,7 +412,8 @@ pub mod fb { pub mask: u32, pub opt_domains: Option>, pub opt_not_domains: Option>, - pub patterns: Option>, + pub single_pattern: Option, + pub multi_patterns: Option>, pub modifier_option: Option, pub hostname: Option, pub tag: Option, @@ -391,7 +425,8 @@ pub mod fb { mask: 540221439, opt_domains: None, opt_not_domains: None, - patterns: None, + single_pattern: None, + multi_patterns: None, modifier_option: None, hostname: None, tag: None, @@ -407,7 +442,8 @@ pub mod fb { let mask = self.mask; let opt_domains = self.opt_domains.as_ref().map(|x| _fbb.create_vector(x)); let opt_not_domains = self.opt_not_domains.as_ref().map(|x| _fbb.create_vector(x)); - let patterns = self.patterns.as_ref().map(|x| { + let single_pattern = self.single_pattern.as_ref().map(|x| _fbb.create_string(x)); + let multi_patterns = self.multi_patterns.as_ref().map(|x| { let w: Vec<_> = x.iter().map(|s| _fbb.create_string(s)).collect(); _fbb.create_vector(&w) }); @@ -421,7 +457,8 @@ pub mod fb { mask, opt_domains, opt_not_domains, - patterns, + single_pattern, + multi_patterns, modifier_option, hostname, tag, diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index a68c9a4b..ec54ccc2 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -183,7 +183,7 @@ mod tests { fn deserialization_generate_simple() { let mut engine = Engine::from_rules(["ad-banner"], Default::default()); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 4588487935723956783; + const EXPECTED_HASH: u64 = 16556115079021991714; assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}"); engine.deserialize(&data).unwrap(); } @@ -193,7 +193,7 @@ mod tests { let mut engine = Engine::from_rules(["ad-banner$tag=abc"], Default::default()); engine.use_tags(&["abc"]); let data = engine.serialize().to_vec(); - const EXPECTED_HASH: u64 = 7781508779107365248; + const EXPECTED_HASH: u64 = 4864047469838009851; assert_eq!(hash(&data), EXPECTED_HASH, "{HASH_MISMATCH_MSG}"); engine.deserialize(&data).unwrap(); } @@ -237,9 +237,9 @@ mod tests { ); } let expected_hash: u64 = if cfg!(feature = "css-validation") { - 1278886013448413771 + 3693393726253099709 } else { - 6460458707531433656 + 13077492059152698660 }; assert_eq!(hash(&data), expected_hash, "{HASH_MISMATCH_MSG}"); From a55a96b2a856c0b0c404aa864445eec5f305edbe Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Tue, 12 May 2026 20:58:17 +0400 Subject: [PATCH 2/5] Fix FilterPartIterator bug --- src/filters/network.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/filters/network.rs b/src/filters/network.rs index fc2e0847..c5a45d53 100644 --- a/src/filters/network.rs +++ b/src/filters/network.rs @@ -331,8 +331,8 @@ impl ExactSizeIterator for FilterPartIterator<'_> { fn len(&self) -> usize { match self.filter_part { FilterPart::Empty => 0, - FilterPart::Simple(_) => 1, - FilterPart::AnyOf(vec) => vec.len(), + FilterPart::Simple(_) => 1usize.saturating_sub(self.index), + FilterPart::AnyOf(vec) => vec.len().saturating_sub(self.index), } } } From e804e5c197be285dcf60cd15c1a4dad7262bc087 Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Tue, 12 May 2026 14:43:31 -0700 Subject: [PATCH 3/5] document the need for `Single` pattern --- src/filters/fb_network.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/filters/fb_network.rs b/src/filters/fb_network.rs index daf36d11..9c8be08d 100644 --- a/src/filters/fb_network.rs +++ b/src/filters/fb_network.rs @@ -11,8 +11,12 @@ use crate::filters::flatbuffer_generated::fb; /// A list of string parts that can be matched against a URL. pub(crate) enum FlatPatterns<'a> { + /// No patterns to match Empty, + /// Memory-usage optimization - ~95% of filters have <= 1 pattern. Special-casing avoids the + /// need to hold an extra pointer and vector length. Single(&'a str), + /// More than 1 pattern to match Multi(flatbuffers::Vector<'a, flatbuffers::ForwardsUOffset<&'a str>>), } From d561d8e054469ccb0a2430826a399d3c6390c1f5 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 13 May 2026 01:50:09 +0400 Subject: [PATCH 4/5] add a comment to .fbs --- src/flatbuffers/fb_network_filter.fbs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/flatbuffers/fb_network_filter.fbs b/src/flatbuffers/fb_network_filter.fbs index 615c1dda..599847d0 100644 --- a/src/flatbuffers/fb_network_filter.fbs +++ b/src/flatbuffers/fb_network_filter.fbs @@ -18,6 +18,8 @@ table NetworkFilter { opt_domains: [uint32]; opt_not_domains: [uint32]; + // A union-like storage for pattern. Prefer |single_pattern| where + // possible for performance concerns. single_pattern: string; multi_patterns: [string]; From e2f4c6f51c057f29dd9cdcab1348ba1bbfac1436 Mon Sep 17 00:00:00 2001 From: Mikhail Atuchin Date: Wed, 13 May 2026 01:52:43 +0400 Subject: [PATCH 5/5] update test expectations after rebase --- tests/unit/engine.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/engine.rs b/tests/unit/engine.rs index ec54ccc2..c579aac1 100644 --- a/tests/unit/engine.rs +++ b/tests/unit/engine.rs @@ -237,9 +237,9 @@ mod tests { ); } let expected_hash: u64 = if cfg!(feature = "css-validation") { - 3693393726253099709 + 10068943306137168952 } else { - 13077492059152698660 + 6262681569722546672 }; assert_eq!(hash(&data), expected_hash, "{HASH_MISMATCH_MSG}");