From f2d9d9f4787ec02420115be2b8c974ce9ac5fa05 Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 13 Jul 2022 09:07:25 -0700 Subject: [PATCH 1/4] Fix NFA mode backtracking simulation --- .../Symbolic/SymbolicRegexMatcher.Automata.cs | 20 ++++++++-- .../Symbolic/SymbolicRegexMatcher.cs | 40 ++++++++++++++----- 2 files changed, 47 insertions(+), 13 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs index 9912da4da8ef39..d5ad959377763b 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs @@ -50,6 +50,7 @@ private enum ContextIndependentState : byte IsDeadend = 2, IsNullable = 4, CanBeNullable = 8, + SimulatesBacktracking = 16, } /// @@ -154,7 +155,7 @@ private Span GetDeltasFor(MatchingState state) /// Get context-independent information for the given state. [MethodImpl(MethodImplOptions.AggressiveInlining)] - private (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(int stateId) + private (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) GetStateInfo(int stateId) { Debug.Assert(stateId > 0); @@ -162,7 +163,8 @@ private Span GetDeltasFor(MatchingState state) return ((info & ContextIndependentState.IsInitial) != 0, (info & ContextIndependentState.IsDeadend) != 0, (info & ContextIndependentState.IsNullable) != 0, - (info & ContextIndependentState.CanBeNullable) != 0); + (info & ContextIndependentState.CanBeNullable) != 0, + (info & ContextIndependentState.SimulatesBacktracking) != 0); } /// @@ -205,13 +207,18 @@ private MatchingState GetOrCreateState_NoLock(SymbolicRegexNode node ArrayResizeAndVolatilePublish(ref _stateInfo, newsize); } _stateArray[state.Id] = state; - _stateInfo[state.Id] = BuildStateInfo(state.Id, isInitialState, state.IsDeadend(Solver), state.Node.IsNullable, state.Node.CanBeNullable); + _stateInfo[state.Id] = BuildStateInfo(state.Id, + isInitialState, + state.IsDeadend(Solver), + state.Node.IsNullable, + state.Node.CanBeNullable, + state.Node.Kind != SymbolicRegexNodeKind.DisableBacktrackingSimulation); } return state; // Assign the context-independent information for the given state - static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) + static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable, bool simulatesBacktracking) { Debug.Assert(stateId > 0); Debug.Assert(!isNullable || canBeNullable); @@ -237,6 +244,11 @@ static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool } } + if (simulatesBacktracking) + { + info |= ContextIndependentState.SimulatesBacktracking; + } + return info; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index b84df67463f680..7e5831d9acc6d7 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -523,7 +523,7 @@ private bool FindEndPositionDeltas(ReadOnlySpan input, int i, next.Update(index, targetStateId, newRegisters); int coreStateId = GetCoreStateId(targetStateId); - (bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable) = GetStateInfo(coreStateId); + (bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable, _) = GetStateInfo(coreStateId); Debug.Assert(!isDeadend); if (isNullable || (canBeNullable && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) @@ -950,7 +950,8 @@ private interface IStateHandler public static abstract int ExtractNullableCoreStateId(SymbolicRegexMatcher matcher, in CurrentState state, ReadOnlySpan input, int pos); public static abstract int FixedLength(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind); public static abstract bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId); - public static abstract (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state); + public static abstract (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) + GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state); } /// An for operating over instances configured as DFA states. @@ -1009,7 +1010,7 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur /// - whether this state may be contextually nullable /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) + public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) => matcher.GetStateInfo(state.DfaStateId); } @@ -1100,10 +1101,13 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur foreach (int nextState in GetNextStates(sourceStates.Values[0].Key, mintermId, matcher)) { nextStates.Add(nextState, out _); + // Nothing is required for backtracking simulation here, since there's just one state so the + // transition itself already handles it. } } else { + uint nextCharKind = matcher.GetPositionKind(mintermId); // We have multiple source states, so we need to potentially dedup across each of // their next states. For each source state, get its next states, adding each into // our set (which exists purely for deduping purposes), and if we successfully added @@ -1114,6 +1118,12 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur { nextStates.Add(nextState, out _); } + // To simulate backtracking, if a source state is nullable then no further transitions are taken + // as the backtracking engines would prefer the match ending here. + int coreStateId = matcher.GetCoreStateId(sourceState.Key); + (_, _, bool isNullable, bool canBeNullable, bool simulatesBacktracking) = matcher.GetStateInfo(coreStateId); + if (simulatesBacktracking && (isNullable || (canBeNullable && matcher.GetState(coreStateId).IsNullableFor(nextCharKind)))) + break; } } @@ -1145,8 +1155,8 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexMatcher< /// can transition back to a DFA state. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) => - (false, state.NfaState!.NfaStateSet.Count == 0, IsNullable(matcher, in state), CanBeNullable(matcher, in state)); + public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) => + (false, state.NfaState!.NfaStateSet.Count == 0, IsNullable(matcher, in state), CanBeNullable(matcher, in state), SimulatesBacktracking(matcher, in state)); /// Check if any underlying core state is unconditionally nullable. public static bool IsNullable(SymbolicRegexMatcher matcher, in CurrentState state) @@ -1176,9 +1186,21 @@ public static bool CanBeNullable(SymbolicRegexMatcher matcher, in CurrentS return false; } + /// + /// Check if the underlying core states have backtracking disabled. + /// That will be true for either all or none of them. + /// + private static bool SimulatesBacktracking(SymbolicRegexMatcher matcher, in CurrentState state) + { + List> stateEntries = state.NfaState!.NfaStateSet.Values; + return stateEntries.Count > 0 ? + matcher.GetStateInfo(matcher.GetCoreStateId(stateEntries[0].Key)).SimulatesBacktracking : + false; + } + #if DEBUG - /// Undo a previous call to . - public static void UndoTransition(ref CurrentState state) + /// Undo a previous call to . + public static void UndoTransition(ref CurrentState state) { Debug.Assert(state.DfaStateId < 0, $"Expected negative {nameof(state.DfaStateId)}."); Debug.Assert(state.NfaState is not null, $"Expected non-null {nameof(state.NfaState)}."); From bb9c3e243fe004b34a4d334505596a15da52c9bd Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 13 Jul 2022 14:22:10 -0700 Subject: [PATCH 2/4] Refactor to StateFlags --- .../src/System.Text.RegularExpressions.csproj | 1 + .../Symbolic/MatchingState.cs | 37 ++++++++ .../RegularExpressions/Symbolic/StateFlags.cs | 41 +++++++++ .../Symbolic/SymbolicRegexMatcher.Automata.cs | 71 +--------------- .../Symbolic/SymbolicRegexMatcher.Dgml.cs | 2 +- .../Symbolic/SymbolicRegexMatcher.Sample.cs | 6 +- .../Symbolic/SymbolicRegexMatcher.cs | 85 +++++++------------ ....Text.RegularExpressions.Unit.Tests.csproj | 1 + 8 files changed, 120 insertions(+), 124 deletions(-) create mode 100644 src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj index 94490e85c60c9b..8bab1fced79b63 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj +++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj @@ -70,6 +70,7 @@ + diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs index 27a4223eeccf53..7ece9a91265b81 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/MatchingState.cs @@ -104,6 +104,43 @@ internal bool IsNullableFor(uint nextCharKind) return Node.IsNullableFor(context); } + /// + /// Builds a with the relevant flags set. + /// + /// a solver for + /// whether this state is an initial state + /// the flags for this matching state + internal StateFlags BuildStateFlags(ISolver solver, bool isInitial) + { + StateFlags info = 0; + + if (isInitial) + { + info |= StateFlags.IsInitialFlag; + } + + if (IsDeadend(solver)) + { + info |= StateFlags.IsDeadendFlag; + } + + if (Node.CanBeNullable) + { + info |= StateFlags.CanBeNullableFlag; + if (Node.IsNullable) + { + info |= StateFlags.IsNullableFlag; + } + } + + if (Node.Kind != SymbolicRegexNodeKind.DisableBacktrackingSimulation) + { + info |= StateFlags.SimulatesBacktrackingFlag; + } + + return info; + } + public override bool Equals(object? obj) => obj is MatchingState s && PrevCharKind == s.PrevCharKind && Node.Equals(s.Node); diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs new file mode 100644 index 00000000000000..0b89497e460ea7 --- /dev/null +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs @@ -0,0 +1,41 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Runtime.CompilerServices; + +namespace System.Text.RegularExpressions.Symbolic +{ + /// + /// These flags provide context-independent information available for every state. They provide a fast way to evaluate + /// conditions in the inner matching loops of . The matcher caches one of these + /// for every state, for which they are created by . + /// In DFA mode the cached flags are used directly, while in NFA mode the + /// handles aggregating the flags in the state set. + /// + [Flags] + internal enum StateFlags : byte + { + IsInitialFlag = 1, + IsDeadendFlag = 2, + IsNullableFlag = 4, + CanBeNullableFlag = 8, + SimulatesBacktrackingFlag = 16, + } + + /// + /// These extension methods for make checking for the presence of flags more concise. + /// + internal static class StateFlagsExtensions + { + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool IsInitial(this StateFlags info) => info.HasFlag(StateFlags.IsInitialFlag); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool IsDeadend(this StateFlags info) => info.HasFlag(StateFlags.IsDeadendFlag); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool IsNullable(this StateFlags info) => info.HasFlag(StateFlags.IsNullableFlag); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool CanBeNullable(this StateFlags info) => info.HasFlag(StateFlags.CanBeNullableFlag); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static bool SimulatesBacktracking(this StateFlags info) => info.HasFlag(StateFlags.SimulatesBacktrackingFlag); + } +} diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs index d5ad959377763b..b1092ad3c0e39d 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Automata.cs @@ -40,18 +40,7 @@ internal sealed partial class SymbolicRegexMatcher /// Maps state IDs to context-independent information for all states in . /// The first valid entry is at index 1. /// - private ContextIndependentState[] _stateInfo; - - /// Context-independent information available for every state. - [Flags] - private enum ContextIndependentState : byte - { - IsInitial = 1, - IsDeadend = 2, - IsNullable = 4, - CanBeNullable = 8, - SimulatesBacktracking = 16, - } + private StateFlags[] _stateFlagsArray; /// /// The transition function for DFA mode. @@ -153,20 +142,6 @@ private Span GetDeltasFor(MatchingState state) return _nfaDelta.AsSpan(nfaState << _mintermsLog, numMinterms); } - /// Get context-independent information for the given state. - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) GetStateInfo(int stateId) - { - Debug.Assert(stateId > 0); - - ContextIndependentState info = _stateInfo[stateId]; - return ((info & ContextIndependentState.IsInitial) != 0, - (info & ContextIndependentState.IsDeadend) != 0, - (info & ContextIndependentState.IsNullable) != 0, - (info & ContextIndependentState.CanBeNullable) != 0, - (info & ContextIndependentState.SimulatesBacktracking) != 0); - } - /// /// Create a state with given node and previous character context. /// @@ -204,53 +179,13 @@ private MatchingState GetOrCreateState_NoLock(SymbolicRegexNode node int newsize = _stateArray.Length * 2; ArrayResizeAndVolatilePublish(ref _stateArray, newsize); ArrayResizeAndVolatilePublish(ref _dfaDelta, newsize << _mintermsLog); - ArrayResizeAndVolatilePublish(ref _stateInfo, newsize); + ArrayResizeAndVolatilePublish(ref _stateFlagsArray, newsize); } _stateArray[state.Id] = state; - _stateInfo[state.Id] = BuildStateInfo(state.Id, - isInitialState, - state.IsDeadend(Solver), - state.Node.IsNullable, - state.Node.CanBeNullable, - state.Node.Kind != SymbolicRegexNodeKind.DisableBacktrackingSimulation); + _stateFlagsArray[state.Id] = state.BuildStateFlags(Solver, isInitialState); } return state; - - // Assign the context-independent information for the given state - static ContextIndependentState BuildStateInfo(int stateId, bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable, bool simulatesBacktracking) - { - Debug.Assert(stateId > 0); - Debug.Assert(!isNullable || canBeNullable); - - ContextIndependentState info = 0; - - if (isInitial) - { - info |= ContextIndependentState.IsInitial; - } - - if (isDeadend) - { - info |= ContextIndependentState.IsDeadend; - } - - if (canBeNullable) - { - info |= ContextIndependentState.CanBeNullable; - if (isNullable) - { - info |= ContextIndependentState.IsNullable; - } - } - - if (simulatesBacktracking) - { - info |= ContextIndependentState.SimulatesBacktracking; - } - - return info; - } } /// diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs index 157fd7d332db92..6c4dee6866d98c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Dgml.cs @@ -35,7 +35,7 @@ public override void SaveDGML(TextWriter writer, int maxLabelLength) string nodeDgmlView = $"{(info == string.Empty ? info : $"Previous: {info} ")}{(deriv == string.Empty ? "()" : deriv)}"; writer.WriteLine(" ", state.Id, nodeDgmlView); - if (GetStateInfo(state.Id).IsInitial) + if (_stateFlagsArray[state.Id].IsInitial()) { writer.WriteLine(" "); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs index dc62647080b0e9..3bc9ce76859242 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs @@ -79,11 +79,11 @@ public override IEnumerable SampleMatches(int k, int randomseed) // Gather the possible endings for satisfying nullability possibleEndings.Clear(); - if (SymbolicRegexMatcher.NfaStateHandler.CanBeNullable(this, in statesWrapper)) + StateFlags flags = SymbolicRegexMatcher.NfaStateHandler.GetStateFlags(this, in statesWrapper); + if (flags.CanBeNullable()) { // Unconditionally final state or end of the input due to \Z anchor for example - if (SymbolicRegexMatcher.NfaStateHandler.IsNullable(this, in statesWrapper) || - SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd)) + if (flags.IsNullable() || SymbolicRegexMatcher.NfaStateHandler.IsNullableFor(this, in statesWrapper, CharKind.BeginningEnd)) { possibleEndings.Add(""); } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index 7e5831d9acc6d7..18ec6e2e7fe054 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -1,4 +1,4 @@ -// Licensed to the .NET Foundation under one or more agreements. +// Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. using System.Collections.Generic; @@ -173,7 +173,7 @@ private SymbolicRegexMatcher(SymbolicRegexBuilder builder, SymbolicRegexNo // Initialization for fields in SymbolicRegexMatcher.Automata.cs _stateArray = new MatchingState[InitialDfaStateCapacity]; - _stateInfo = new ContextIndependentState[InitialDfaStateCapacity]; + _stateFlagsArray = new StateFlags[InitialDfaStateCapacity]; _dfaDelta = new int[InitialDfaStateCapacity << _mintermsLog]; // Initialize a lookup array for the character kinds of each minterm ID. This includes one "special" minterm @@ -523,11 +523,11 @@ private bool FindEndPositionDeltas(this, input, ref state, ref pos)) { @@ -538,7 +538,7 @@ private bool FindEndPositionDeltas(this, in state, positionId, isNullable, canBeNullable)) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId, info.IsNullable(), info.CanBeNullable())) { endPos = pos; endStateId = TStateHandler.ExtractNullableCoreStateId(this, in state, input, pos); @@ -652,20 +652,20 @@ private bool FindStartPositionDeltas(this, in state, positionId, isNullable, canBeNullable)) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId, flags.IsNullable(), flags.CanBeNullable())) { lastStart = pos; } // If we are past the start threshold or if the state is a dead end, bail; we should have already // found a valid starting location. - if (pos <= startThreshold || isDeadend) + if (pos <= startThreshold || flags.IsDeadend()) { Debug.Assert(lastStart != -1); return true; @@ -750,10 +750,10 @@ private Registers FindSubcaptures(ReadOnlySpan input, int i, next.Update(index, targetStateId, newRegisters); int coreStateId = GetCoreStateId(targetStateId); - (bool isInitial, bool isDeadend, bool isNullable, bool canBeNullable, _) = GetStateInfo(coreStateId); - Debug.Assert(!isDeadend); + StateFlags flags = _stateFlagsArray[coreStateId]; + Debug.Assert(!flags.IsDeadend()); - if (isNullable || (canBeNullable && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) + if (flags.IsNullable() || (flags.CanBeNullable() && GetState(coreStateId).IsNullableFor(GetCharKind(input, i + 1)))) { // No lower priority transitions from this or other source states are taken because the // backtracking engines would return the match ending here. @@ -950,8 +950,7 @@ private interface IStateHandler public static abstract int ExtractNullableCoreStateId(SymbolicRegexMatcher matcher, in CurrentState state, ReadOnlySpan input, int pos); public static abstract int FixedLength(SymbolicRegexMatcher matcher, in CurrentState state, uint nextCharKind); public static abstract bool TryTakeTransition(SymbolicRegexMatcher matcher, ref CurrentState state, int mintermId); - public static abstract (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) - GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state); + public static abstract StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state); } /// An for operating over instances configured as DFA states. @@ -1010,8 +1009,8 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur /// - whether this state may be contextually nullable /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) - => matcher.GetStateInfo(state.DfaStateId); + public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state) + => matcher._stateFlagsArray[state.DfaStateId]; } /// An for operating over instances configured as NFA states. @@ -1121,9 +1120,12 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur // To simulate backtracking, if a source state is nullable then no further transitions are taken // as the backtracking engines would prefer the match ending here. int coreStateId = matcher.GetCoreStateId(sourceState.Key); - (_, _, bool isNullable, bool canBeNullable, bool simulatesBacktracking) = matcher.GetStateInfo(coreStateId); - if (simulatesBacktracking && (isNullable || (canBeNullable && matcher.GetState(coreStateId).IsNullableFor(nextCharKind)))) + StateFlags flags = matcher._stateFlagsArray[coreStateId]; + if (flags.SimulatesBacktracking() && + (flags.IsNullable() || (flags.CanBeNullable() && matcher.GetState(coreStateId).IsNullableFor(nextCharKind)))) + { break; + } } } @@ -1155,47 +1157,26 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexMatcher< /// can transition back to a DFA state. /// [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static (bool IsInitial, bool IsDeadend, bool IsNullable, bool CanBeNullable, bool SimulatesBacktracking) GetStateInfo(SymbolicRegexMatcher matcher, in CurrentState state) => - (false, state.NfaState!.NfaStateSet.Count == 0, IsNullable(matcher, in state), CanBeNullable(matcher, in state), SimulatesBacktracking(matcher, in state)); - - /// Check if any underlying core state is unconditionally nullable. - public static bool IsNullable(SymbolicRegexMatcher matcher, in CurrentState state) + public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state) { - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) + if (state.NfaState!.NfaStateSet.Count == 0) { - if (matcher.GetStateInfo(matcher.GetCoreStateId(nfaState.Key)).IsNullable) - { - return true; - } + // In NFA state sets dead ends are never included. Instead an empty set of states represents a dead end. + return StateFlags.IsDeadendFlag; } - - return false; - } - - /// Check if any underlying core state can be nullable in some context. - public static bool CanBeNullable(SymbolicRegexMatcher matcher, in CurrentState state) - { - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) + else { - if (matcher.GetStateInfo(matcher.GetCoreStateId(nfaState.Key)).CanBeNullable) + // Build the flags for the set of states by taking a bitwise Or of all the per-state flags and then + // masking out the irrelevant ones. This works because IsNullable and CanBeNullable should be true if + // they are true for any state in the set; SimulatesBacktracking is true for all the states if + // it is true for any state (since it is a phase-wide property); and all other flags are masked out. + StateFlags flags = 0; + foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) { - return true; + flags |= matcher._stateFlagsArray[matcher.GetCoreStateId(nfaState.Key)]; } + return flags & (StateFlags.IsNullableFlag | StateFlags.CanBeNullableFlag | StateFlags.SimulatesBacktrackingFlag); } - - return false; - } - - /// - /// Check if the underlying core states have backtracking disabled. - /// That will be true for either all or none of them. - /// - private static bool SimulatesBacktracking(SymbolicRegexMatcher matcher, in CurrentState state) - { - List> stateEntries = state.NfaState!.NfaStateSet.Values; - return stateEntries.Count > 0 ? - matcher.GetStateInfo(matcher.GetCoreStateId(stateEntries[0].Key)).SimulatesBacktracking : - false; } #if DEBUG diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj index 8c45a0c5adbc7e..684b36c6d3d4c0 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/System.Text.RegularExpressions.Unit.Tests.csproj @@ -56,6 +56,7 @@ + From 774dd55e4cabf7d24a03a5dcc2ec78d0d894534e Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 13 Jul 2022 16:10:20 -0700 Subject: [PATCH 3/4] Fix bug in timeout check --- .../Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index 18ec6e2e7fe054..c8ca33aced5b64 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -453,8 +453,8 @@ private int FindEndPosition(input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : - FindEndPositionDeltas(input, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); + FindEndPositionDeltas(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : + FindEndPositionDeltas(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or // there is no more input available, then the whole search is done. From bd9eac2fbfa1f18fceb952de332b0e21c457aaca Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Thu, 14 Jul 2022 13:56:19 -0700 Subject: [PATCH 4/4] Changes from review --- .../RegularExpressions/Symbolic/StateFlags.cs | 15 +++------ .../Symbolic/SymbolicRegexMatcher.cs | 32 ++++++++++--------- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs index 0b89497e460ea7..cd859350352fff 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/StateFlags.cs @@ -27,15 +27,10 @@ internal enum StateFlags : byte /// internal static class StateFlagsExtensions { - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsInitial(this StateFlags info) => info.HasFlag(StateFlags.IsInitialFlag); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsDeadend(this StateFlags info) => info.HasFlag(StateFlags.IsDeadendFlag); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool IsNullable(this StateFlags info) => info.HasFlag(StateFlags.IsNullableFlag); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool CanBeNullable(this StateFlags info) => info.HasFlag(StateFlags.CanBeNullableFlag); - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static bool SimulatesBacktracking(this StateFlags info) => info.HasFlag(StateFlags.SimulatesBacktrackingFlag); + internal static bool IsInitial(this StateFlags info) => (info & StateFlags.IsInitialFlag) != 0; + internal static bool IsDeadend(this StateFlags info) => (info & StateFlags.IsDeadendFlag) != 0; + internal static bool IsNullable(this StateFlags info) => (info & StateFlags.IsNullableFlag) != 0; + internal static bool CanBeNullable(this StateFlags info) => (info & StateFlags.CanBeNullableFlag) != 0; + internal static bool SimulatesBacktracking(this StateFlags info) => (info & StateFlags.SimulatesBacktrackingFlag) != 0; } } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index c8ca33aced5b64..70390343c3405c 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -523,11 +523,11 @@ private bool FindEndPositionDeltas(this, input, ref state, ref pos)) { @@ -538,7 +538,7 @@ private bool FindEndPositionDeltas(this, in state, positionId, info.IsNullable(), info.CanBeNullable())) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId, flags)) { endPos = pos; endStateId = TStateHandler.ExtractNullableCoreStateId(this, in state, input, pos); @@ -658,7 +658,7 @@ private bool FindStartPositionDeltas(this, in state, positionId, flags.IsNullable(), flags.CanBeNullable())) + if (TNullabilityHandler.IsNullableAt(this, in state, positionId, flags)) { lastStart = pos; } @@ -1106,17 +1106,18 @@ public static bool TryTakeTransition(SymbolicRegexMatcher matcher, ref Cur } else { - uint nextCharKind = matcher.GetPositionKind(mintermId); // We have multiple source states, so we need to potentially dedup across each of // their next states. For each source state, get its next states, adding each into // our set (which exists purely for deduping purposes), and if we successfully added // to the set, then add the known-unique state to the destination list. + uint nextCharKind = matcher.GetPositionKind(mintermId); foreach (ref KeyValuePair sourceState in CollectionsMarshal.AsSpan(sourceStates.Values)) { foreach (int nextState in GetNextStates(sourceState.Key, mintermId, matcher)) { nextStates.Add(nextState, out _); } + // To simulate backtracking, if a source state is nullable then no further transitions are taken // as the backtracking engines would prefer the match ending here. int coreStateId = matcher.GetCoreStateId(sourceState.Key); @@ -1159,7 +1160,8 @@ static int[] GetNextStates(int sourceState, int mintermId, SymbolicRegexMatcher< [MethodImpl(MethodImplOptions.AggressiveInlining)] public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in CurrentState state) { - if (state.NfaState!.NfaStateSet.Count == 0) + SparseIntMap stateSet = state.NfaState!.NfaStateSet; + if (stateSet.Count == 0) { // In NFA state sets dead ends are never included. Instead an empty set of states represents a dead end. return StateFlags.IsDeadendFlag; @@ -1171,7 +1173,7 @@ public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in Cu // they are true for any state in the set; SimulatesBacktracking is true for all the states if // it is true for any state (since it is a phase-wide property); and all other flags are masked out. StateFlags flags = 0; - foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(state.NfaState!.NfaStateSet.Values)) + foreach (ref KeyValuePair nfaState in CollectionsMarshal.AsSpan(stateSet.Values)) { flags |= matcher._stateFlagsArray[matcher.GetCoreStateId(nfaState.Key)]; } @@ -1180,8 +1182,8 @@ public static StateFlags GetStateFlags(SymbolicRegexMatcher matcher, in Cu } #if DEBUG - /// Undo a previous call to . - public static void UndoTransition(ref CurrentState state) + /// Undo a previous call to . + public static void UndoTransition(ref CurrentState state) { Debug.Assert(state.DfaStateId < 0, $"Expected negative {nameof(state.DfaStateId)}."); Debug.Assert(state.NfaState is not null, $"Expected non-null {nameof(state.NfaState)}."); @@ -1287,7 +1289,7 @@ public static bool TryFindNextStartingPosition(SymbolicRegexMatche /// private interface INullabilityHandler { - public static abstract bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable) + public static abstract bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) where TStateHandler : struct, IStateHandler; } @@ -1297,11 +1299,11 @@ public static abstract bool IsNullableAt(SymbolicRegexMatcher(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable) + public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) where TStateHandler : struct, IStateHandler { Debug.Assert(!matcher._pattern._info.ContainsSomeAnchor); - return isNullable; + return flags.IsNullable(); } } @@ -1311,10 +1313,10 @@ public static bool IsNullableAt(SymbolicRegexMatcher matche private readonly struct FullNullabilityHandler : INullabilityHandler { [MethodImpl(MethodImplOptions.AggressiveInlining)] - public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, bool isNullable, bool canBeNullable) + public static bool IsNullableAt(SymbolicRegexMatcher matcher, in CurrentState state, int positionId, StateFlags flags) where TStateHandler : struct, IStateHandler { - return isNullable || (canBeNullable && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId))); + return flags.IsNullable() || (flags.CanBeNullable() && TStateHandler.IsNullableFor(matcher, in state, matcher.GetPositionKind(positionId))); } } }