From a66d957f0b2b0a6331f08fc306c5222aeedf75c5 Mon Sep 17 00:00:00 2001 From: hudclark Date: Tue, 27 Apr 2021 15:40:15 -0400 Subject: [PATCH 1/3] Enable custom regex engine use. Allow library users to customize the regular expression engine used during fingerprint matching. Custom regex engines can be configured by implementing the RecogPatternMatcher interface. This feature is desirable as Java's regex engine is susceptible to catastrophic backtracking. --- .../java/com/rapid7/recog/RecogMatcher.java | 46 ++++++---- .../com/rapid7/recog/parser/RecogParser.java | 32 ++++++- .../pattern/JavaRegexRecogPatternMatcher.java | 89 +++++++++++++++++++ .../pattern/RecogPatternMatchResult.java | 34 +++++++ .../recog/pattern/RecogPatternMatcher.java | 32 +++++++ .../recog/CustomPatternMatcherTest.java | 72 +++++++++++++++ 6 files changed, 289 insertions(+), 16 deletions(-) create mode 100644 src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java create mode 100644 src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java create mode 100644 src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java create mode 100644 src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java diff --git a/src/main/java/com/rapid7/recog/RecogMatcher.java b/src/main/java/com/rapid7/recog/RecogMatcher.java index af5bc24..c78e6e1 100644 --- a/src/main/java/com/rapid7/recog/RecogMatcher.java +++ b/src/main/java/com/rapid7/recog/RecogMatcher.java @@ -1,5 +1,8 @@ package com.rapid7.recog; +import com.rapid7.recog.pattern.JavaRegexRecogPatternMatcher; +import com.rapid7.recog.pattern.RecogPatternMatchResult; +import com.rapid7.recog.pattern.RecogPatternMatcher; import java.io.Serializable; import java.util.HashMap; import java.util.HashSet; @@ -21,8 +24,7 @@ */ public class RecogMatcher implements Serializable { - /** The regular expression pattern to match. */ - private Pattern pattern; + private final RecogPatternMatcher matcher; /** "Constant" values always matched as parameters. Key is the name, value is the value. */ private Map values; @@ -45,8 +47,23 @@ public class RecogMatcher implements Serializable { /** Optional examples that illustrate the matcher (or that can be used to test the matcher). */ private Set examples; + /** + * Creates a new RecogMatcher using a {@link JavaRegexRecogPatternMatcher} to + * match fingerprint values. + * + * @param pattern The regular expression pattern to match fingerprint values against. + */ public RecogMatcher(Pattern pattern) { - this.pattern = requireNonNull(pattern); + this(new JavaRegexRecogPatternMatcher(pattern)); + } + + /** + * Creates a RecogMatcher with the specified {@link RecogPatternMatcher}. + * + * @param matcher The {@link RecogPatternMatcher} to use when matching fingerprint values. + */ + public RecogMatcher(RecogPatternMatcher matcher) { + this.matcher = matcher; values = new HashMap<>(); positionalParameters = new HashMap<>(); namedParameters = new HashSet<>(); @@ -111,7 +128,7 @@ public boolean matches(String input) { if (input == null) return false; else - return pattern.matcher(input).find(); + return matcher.matches(input); } /** @@ -129,19 +146,19 @@ public Map match(String input) { if (input == null) return null; - Matcher matcher = pattern.matcher(input); - if (matcher.find()) { + RecogPatternMatchResult result = matcher.match(input); + if (result != null) { Map values = new HashMap<>(); values.putAll(this.values); // parse positional parameters for the groups specified for (Entry parameter : positionalParameters.entrySet()) - if (parameter.getValue() <= matcher.groupCount()) - values.put(parameter.getKey(), matcher.group(parameter.getValue())); + if (parameter.getValue() <= result.groupCount()) + values.put(parameter.getKey(), result.group(parameter.getValue())); for (String parameter : namedParameters) { try { - values.put(parameter, matcher.group(parameter)); + values.put(parameter, result.group(parameter)); } catch (IllegalArgumentException exception) { // the group with the name doesn't exist, ignore it } @@ -199,7 +216,7 @@ public RecogMatcher addParam(String name) { } public String getPattern() { - return pattern.pattern(); + return matcher.getPattern(); } /** @@ -221,9 +238,9 @@ public static Pattern pattern(String regex, int... flags) { @Override public String toString() { return new StringJoiner(", ", RecogMatcher.class.getSimpleName() + "[", "]") - .add("Pattern=" + pattern.pattern()) + .add("Pattern=" + matcher.getPattern()) .add("Description=" + description) - .add("Flags=" + pattern.flags()) + .add("Flags=" + matcher.getFlags()) .add("Positional Parameters=" + positionalParameters) .add("Named Parameters=" + namedParameters) .add("Values=" + values) @@ -233,7 +250,7 @@ public String toString() { @Override public int hashCode() { - return Objects.hash(pattern, values, positionalParameters); + return Objects.hash(matcher, values, positionalParameters); } @Override @@ -244,8 +261,7 @@ else if (!(obj instanceof RecogMatcher)) return false; else { RecogMatcher other = (RecogMatcher) obj; - return Objects.equals(pattern.flags(), other.pattern.flags()) - && Objects.equals(pattern.pattern(), other.pattern.pattern()) + return Objects.equals(matcher, other.matcher) && Objects.equals(values, other.values) && Objects.equals(positionalParameters, other.positionalParameters) && Objects.equals(namedParameters, other.namedParameters); diff --git a/src/main/java/com/rapid7/recog/parser/RecogParser.java b/src/main/java/com/rapid7/recog/parser/RecogParser.java index b3613c2..f095074 100644 --- a/src/main/java/com/rapid7/recog/parser/RecogParser.java +++ b/src/main/java/com/rapid7/recog/parser/RecogParser.java @@ -2,6 +2,8 @@ import com.rapid7.recog.RecogMatcher; import com.rapid7.recog.RecogMatchers; +import com.rapid7.recog.pattern.JavaRegexRecogPatternMatcher; +import com.rapid7.recog.pattern.RecogPatternMatcher; import java.io.File; import java.io.FileReader; import java.io.IOException; @@ -27,8 +29,24 @@ */ public class RecogParser { + /** + * Factory used to create the underlying {@link RecogPatternMatcher} used + * when matching inputs against regular expressions. + */ + interface PatternMatcherFactory { + RecogPatternMatcher create(String pattern, int flags); + } + + /** + * The default {@link PatternMatcherFactory} uses java.regex.* packages to evaluate + * regular expressions. + */ + public static final PatternMatcherFactory DEFAULT_PATTERN_MATCHER_FACTORY = + (pattern, flags) -> new JavaRegexRecogPatternMatcher(Pattern.compile(pattern, flags)); + private static final Logger LOGGER = LoggerFactory.getLogger(RecogParser.class); private final boolean strictMode; + private final PatternMatcherFactory patternMatcherFactory; /** * Constructs a parser to parser with non-strict (lenient) parsing mode. @@ -44,7 +62,19 @@ public RecogParser() { * encountered, {@code false} otherwise. */ public RecogParser(boolean strictMode) { + this(strictMode, DEFAULT_PATTERN_MATCHER_FACTORY); + } + + /** + * Constructs a parser with the specified strictness mode and {@link PatternMatcherFactory}. + * + * @param strictMode {@code true} if the parser should throw exceptions when any error is + * encountered, {@code false} otherwise. + * @param patternMatcherFactory The {@link PatternMatcherFactory} to be used during parsing. + */ + public RecogParser(boolean strictMode, PatternMatcherFactory patternMatcherFactory) { this.strictMode = strictMode; + this.patternMatcherFactory = patternMatcherFactory; } /** @@ -117,7 +147,7 @@ public RecogMatchers parse(Reader reader, String name) int regexFlags = parseFlags(fingerprint.getAttribute("flags")); // construct a pattern - RecogMatcher fingerprintPattern = new RecogMatcher(Pattern.compile(pattern, regexFlags)); + RecogMatcher fingerprintPattern = new RecogMatcher(patternMatcherFactory.create(pattern, regexFlags)); // description (optional) NodeList description = fingerprint.getElementsByTagName("description"); diff --git a/src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java b/src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java new file mode 100644 index 0000000..192c2a6 --- /dev/null +++ b/src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java @@ -0,0 +1,89 @@ +package com.rapid7.recog.pattern; + +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static java.util.Objects.requireNonNull; + +/** + * An implementation of {@link RecogPatternMatcher} that uses java.util.regex.* + * packages to match fingerprint values against fingerprint patterns. + * Matching of the patterns specified is performed using a sub-sequence or "partial" + * match. See {@link Matcher#find()} vs {@link Matcher#matches()}. + */ +public class JavaRegexRecogPatternMatcher implements RecogPatternMatcher { + + private static class JavaRegexRecogPatternMatchResult implements RecogPatternMatchResult { + private final Matcher matcher; + + JavaRegexRecogPatternMatchResult(Matcher matcher) { + this.matcher = matcher; + } + + @Override + public int groupCount() { + return matcher.groupCount(); + } + + @Override + public String group(int group) { + return matcher.group(group); + } + + @Override + public String group(String group) { + return matcher.group(group); + } + } + + /** + * The regular expression pattern to match. + */ + private final Pattern pattern; + + public JavaRegexRecogPatternMatcher(Pattern pattern) { + this.pattern = requireNonNull(pattern); + } + + @Override + public String getPattern() { + return pattern.pattern(); + } + + @Override + public int getFlags() { + return pattern.flags(); + } + + @Override + public boolean matches(String input) { + return input != null && pattern.matcher(input).find(); + } + + @Override + public RecogPatternMatchResult match(String input) { + if (input == null) { + return null; + } + Matcher matcher = pattern.matcher(input); + return matcher.find() ? new JavaRegexRecogPatternMatchResult(matcher) : null; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof JavaRegexRecogPatternMatcher)) { + return false; + } else { + JavaRegexRecogPatternMatcher that = (JavaRegexRecogPatternMatcher) other; + return Objects.equals(getPattern(), that.getPattern()) + && Objects.equals(getFlags(), that.getFlags()); + } + } + + @Override + public int hashCode() { + return Objects.hash(pattern); + } +} diff --git a/src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java new file mode 100644 index 0000000..f9b76e6 --- /dev/null +++ b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java @@ -0,0 +1,34 @@ +package com.rapid7.recog.pattern; + +/** + * The result of a match operation. + */ +public interface RecogPatternMatchResult { + + /** + * Returns the number of capturing groups in this result. + */ + int groupCount(); + + /** + * Returns the input captured by the indexed group. + * + * @param index The index of the capturing group. Group indexes start at one. + * @return The input captured by the group at the specified index, or {@code null} + * if there is no matching input for this group. + * @throws IndexOutOfBoundsException if the index is less than 1 or greater than + * that returned of {@code groupCount()}. + */ + String group(int index); + + /** + * Returns the input captured by the named group. + * + * @param name The name of the capturing group. + * @return Input captured by the named group or {@code null} if there is no + * matching input for this group. + * @throws IllegalArgumentException if there is no group with this name. + */ + String group(String name); + +} diff --git a/src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java new file mode 100644 index 0000000..d206c6a --- /dev/null +++ b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java @@ -0,0 +1,32 @@ +package com.rapid7.recog.pattern; + +/** + * Performs matching of input values against a regular expression that supports grouped parameter + * extraction. + */ +public interface RecogPatternMatcher { + + /** The regex pattern this matcher matches. */ + String getPattern(); + + int getFlags(); + + /** + * Returns whether this matcher matches the specified input fingerprint value. + * + * @param input The fingerprint to test this matcher against. May be {@code null}. + * @return {@code true} if the input is non-{@code null} and matches the fingerprint matcher + * pattern. + */ + boolean matches(String input); + + /** + * Matches the regular expression against the specified input. + * + * @param input The fingerprint to match. May be {@code null}. + * @return {@code null} if the input does not match the pattern, otherwise a non-{@code null} + * {@link RecogPatternMatchResult} + */ + RecogPatternMatchResult match(String input); + +} diff --git a/src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java b/src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java new file mode 100644 index 0000000..91db8d9 --- /dev/null +++ b/src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java @@ -0,0 +1,72 @@ +package com.rapid7.recog; + +import com.rapid7.recog.pattern.RecogPatternMatchResult; +import com.rapid7.recog.pattern.RecogPatternMatcher; +import java.util.AbstractMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.hasSize; + +public class CustomPatternMatcherTest { + + private static class EchoPatternMatcher implements RecogPatternMatcher { + + @Override + public String getPattern() { + return null; + } + + @Override + public int getFlags() { + return 0; + } + + @Override + public boolean matches(String input) { + return true; + } + + @Override + public RecogPatternMatchResult match(String input) { + return new RecogPatternMatchResult() { + @Override + public int groupCount() { + return Integer.MAX_VALUE; + } + + @Override + public String group(int index) { + return "group: " + index; + } + + @Override + public String group(String name) { + return "group: " + name; + } + }; + } + } + + @Test + public void customMatcherTest() { + // given + RecogPatternMatcher patternMatcher = new EchoPatternMatcher(); + RecogMatcher matcher = new RecogMatcher(patternMatcher) + .addParam(1, "1") + .addParam(2, "2") + .addParam("name"); + + // when + Map matches = matcher.match("arbitrary text input"); + + // then + assertThat(matches.entrySet(), hasSize(3)); + assertThat(matches.entrySet(), containsInAnyOrder( + new AbstractMap.SimpleEntry<>("1", "group: 1"), + new AbstractMap.SimpleEntry<>("2", "group: 2"), + new AbstractMap.SimpleEntry<>("name", "group: name") + )); + } +} From ece5f6611dd1da3869991e3d0b9b73a14a5afbb9 Mon Sep 17 00:00:00 2001 From: hudclark Date: Tue, 4 May 2021 09:16:14 -0400 Subject: [PATCH 2/3] Mention custom pattern matchers in README. Update the README to include documentation about the default regular expression engine used in recog-java and provide an example for how users can override this behavior. --- README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/README.md b/README.md index 7b2052d..393fa0f 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,22 @@ List matchResults = recog.fingerprint("Apache HTTPD 6.5"); // draw the rest of the owl... ``` +#### Configuring Pattern Matching + +By default, recog-java uses Java's standard regular expression package, `java.util.regex`. To use a different implementation, users can implement their own `RecogPatternMatcher` instance: + +```java +import com.rapid7.recog.pattern.RecogPatternMatcher; + +public class CustomPatternMatcher implements RecogPatternMatcher { + // custom implementation... +} + +RecogPatternMatcher patternMatcher = new CustomPatternMatcher("^Apache HTTPD (?.*)$"); +RecogMatcher matcher = new RecogMatcher(patternMatcher); +Map results = matcher.match("Apache HTTPD 6.5"); +``` + ## Differences from Ruby implementation This library is not yet at a 1:1 parity with the original [rapid7/recog](https://github.com/rapid7/recog) Ruby implementation. From a894f91ef8e316becb060ac5cef5276b31a45295 Mon Sep 17 00:00:00 2001 From: hudclark Date: Tue, 4 May 2021 11:18:46 -0400 Subject: [PATCH 3/3] Update RecogParser#PatternMatcherFactory to be public. Previously, this interface was package-private and didn't support library users supplying their own factories. --- src/main/java/com/rapid7/recog/parser/RecogParser.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rapid7/recog/parser/RecogParser.java b/src/main/java/com/rapid7/recog/parser/RecogParser.java index f095074..c29c6c2 100644 --- a/src/main/java/com/rapid7/recog/parser/RecogParser.java +++ b/src/main/java/com/rapid7/recog/parser/RecogParser.java @@ -33,7 +33,7 @@ public class RecogParser { * Factory used to create the underlying {@link RecogPatternMatcher} used * when matching inputs against regular expressions. */ - interface PatternMatcherFactory { + public interface PatternMatcherFactory { RecogPatternMatcher create(String pattern, int flags); }