diff --git a/README.md b/README.md index 7b2052d..393fa0f 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,22 @@ List matchResults = recog.fingerprint("Apache HTTPD 6.5"); // draw the rest of the owl... ``` +#### Configuring Pattern Matching + +By default, recog-java uses Java's standard regular expression package, `java.util.regex`. To use a different implementation, users can implement their own `RecogPatternMatcher` instance: + +```java +import com.rapid7.recog.pattern.RecogPatternMatcher; + +public class CustomPatternMatcher implements RecogPatternMatcher { + // custom implementation... +} + +RecogPatternMatcher patternMatcher = new CustomPatternMatcher("^Apache HTTPD (?.*)$"); +RecogMatcher matcher = new RecogMatcher(patternMatcher); +Map results = matcher.match("Apache HTTPD 6.5"); +``` + ## Differences from Ruby implementation This library is not yet at a 1:1 parity with the original [rapid7/recog](https://github.com/rapid7/recog) Ruby implementation. diff --git a/src/main/java/com/rapid7/recog/RecogMatcher.java b/src/main/java/com/rapid7/recog/RecogMatcher.java index af5bc24..c78e6e1 100644 --- a/src/main/java/com/rapid7/recog/RecogMatcher.java +++ b/src/main/java/com/rapid7/recog/RecogMatcher.java @@ -1,5 +1,8 @@ package com.rapid7.recog; +import com.rapid7.recog.pattern.JavaRegexRecogPatternMatcher; +import com.rapid7.recog.pattern.RecogPatternMatchResult; +import com.rapid7.recog.pattern.RecogPatternMatcher; import java.io.Serializable; import java.util.HashMap; import java.util.HashSet; @@ -21,8 +24,7 @@ */ public class RecogMatcher implements Serializable { - /** The regular expression pattern to match. */ - private Pattern pattern; + private final RecogPatternMatcher matcher; /** "Constant" values always matched as parameters. Key is the name, value is the value. */ private Map values; @@ -45,8 +47,23 @@ public class RecogMatcher implements Serializable { /** Optional examples that illustrate the matcher (or that can be used to test the matcher). */ private Set examples; + /** + * Creates a new RecogMatcher using a {@link JavaRegexRecogPatternMatcher} to + * match fingerprint values. + * + * @param pattern The regular expression pattern to match fingerprint values against. + */ public RecogMatcher(Pattern pattern) { - this.pattern = requireNonNull(pattern); + this(new JavaRegexRecogPatternMatcher(pattern)); + } + + /** + * Creates a RecogMatcher with the specified {@link RecogPatternMatcher}. + * + * @param matcher The {@link RecogPatternMatcher} to use when matching fingerprint values. + */ + public RecogMatcher(RecogPatternMatcher matcher) { + this.matcher = matcher; values = new HashMap<>(); positionalParameters = new HashMap<>(); namedParameters = new HashSet<>(); @@ -111,7 +128,7 @@ public boolean matches(String input) { if (input == null) return false; else - return pattern.matcher(input).find(); + return matcher.matches(input); } /** @@ -129,19 +146,19 @@ public Map match(String input) { if (input == null) return null; - Matcher matcher = pattern.matcher(input); - if (matcher.find()) { + RecogPatternMatchResult result = matcher.match(input); + if (result != null) { Map values = new HashMap<>(); values.putAll(this.values); // parse positional parameters for the groups specified for (Entry parameter : positionalParameters.entrySet()) - if (parameter.getValue() <= matcher.groupCount()) - values.put(parameter.getKey(), matcher.group(parameter.getValue())); + if (parameter.getValue() <= result.groupCount()) + values.put(parameter.getKey(), result.group(parameter.getValue())); for (String parameter : namedParameters) { try { - values.put(parameter, matcher.group(parameter)); + values.put(parameter, result.group(parameter)); } catch (IllegalArgumentException exception) { // the group with the name doesn't exist, ignore it } @@ -199,7 +216,7 @@ public RecogMatcher addParam(String name) { } public String getPattern() { - return pattern.pattern(); + return matcher.getPattern(); } /** @@ -221,9 +238,9 @@ public static Pattern pattern(String regex, int... flags) { @Override public String toString() { return new StringJoiner(", ", RecogMatcher.class.getSimpleName() + "[", "]") - .add("Pattern=" + pattern.pattern()) + .add("Pattern=" + matcher.getPattern()) .add("Description=" + description) - .add("Flags=" + pattern.flags()) + .add("Flags=" + matcher.getFlags()) .add("Positional Parameters=" + positionalParameters) .add("Named Parameters=" + namedParameters) .add("Values=" + values) @@ -233,7 +250,7 @@ public String toString() { @Override public int hashCode() { - return Objects.hash(pattern, values, positionalParameters); + return Objects.hash(matcher, values, positionalParameters); } @Override @@ -244,8 +261,7 @@ else if (!(obj instanceof RecogMatcher)) return false; else { RecogMatcher other = (RecogMatcher) obj; - return Objects.equals(pattern.flags(), other.pattern.flags()) - && Objects.equals(pattern.pattern(), other.pattern.pattern()) + return Objects.equals(matcher, other.matcher) && Objects.equals(values, other.values) && Objects.equals(positionalParameters, other.positionalParameters) && Objects.equals(namedParameters, other.namedParameters); diff --git a/src/main/java/com/rapid7/recog/parser/RecogParser.java b/src/main/java/com/rapid7/recog/parser/RecogParser.java index b3613c2..c29c6c2 100644 --- a/src/main/java/com/rapid7/recog/parser/RecogParser.java +++ b/src/main/java/com/rapid7/recog/parser/RecogParser.java @@ -2,6 +2,8 @@ import com.rapid7.recog.RecogMatcher; import com.rapid7.recog.RecogMatchers; +import com.rapid7.recog.pattern.JavaRegexRecogPatternMatcher; +import com.rapid7.recog.pattern.RecogPatternMatcher; import java.io.File; import java.io.FileReader; import java.io.IOException; @@ -27,8 +29,24 @@ */ public class RecogParser { + /** + * Factory used to create the underlying {@link RecogPatternMatcher} used + * when matching inputs against regular expressions. + */ + public interface PatternMatcherFactory { + RecogPatternMatcher create(String pattern, int flags); + } + + /** + * The default {@link PatternMatcherFactory} uses java.regex.* packages to evaluate + * regular expressions. + */ + public static final PatternMatcherFactory DEFAULT_PATTERN_MATCHER_FACTORY = + (pattern, flags) -> new JavaRegexRecogPatternMatcher(Pattern.compile(pattern, flags)); + private static final Logger LOGGER = LoggerFactory.getLogger(RecogParser.class); private final boolean strictMode; + private final PatternMatcherFactory patternMatcherFactory; /** * Constructs a parser to parser with non-strict (lenient) parsing mode. @@ -44,7 +62,19 @@ public RecogParser() { * encountered, {@code false} otherwise. */ public RecogParser(boolean strictMode) { + this(strictMode, DEFAULT_PATTERN_MATCHER_FACTORY); + } + + /** + * Constructs a parser with the specified strictness mode and {@link PatternMatcherFactory}. + * + * @param strictMode {@code true} if the parser should throw exceptions when any error is + * encountered, {@code false} otherwise. + * @param patternMatcherFactory The {@link PatternMatcherFactory} to be used during parsing. + */ + public RecogParser(boolean strictMode, PatternMatcherFactory patternMatcherFactory) { this.strictMode = strictMode; + this.patternMatcherFactory = patternMatcherFactory; } /** @@ -117,7 +147,7 @@ public RecogMatchers parse(Reader reader, String name) int regexFlags = parseFlags(fingerprint.getAttribute("flags")); // construct a pattern - RecogMatcher fingerprintPattern = new RecogMatcher(Pattern.compile(pattern, regexFlags)); + RecogMatcher fingerprintPattern = new RecogMatcher(patternMatcherFactory.create(pattern, regexFlags)); // description (optional) NodeList description = fingerprint.getElementsByTagName("description"); diff --git a/src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java b/src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java new file mode 100644 index 0000000..192c2a6 --- /dev/null +++ b/src/main/java/com/rapid7/recog/pattern/JavaRegexRecogPatternMatcher.java @@ -0,0 +1,89 @@ +package com.rapid7.recog.pattern; + +import java.util.Objects; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import static java.util.Objects.requireNonNull; + +/** + * An implementation of {@link RecogPatternMatcher} that uses java.util.regex.* + * packages to match fingerprint values against fingerprint patterns. + * Matching of the patterns specified is performed using a sub-sequence or "partial" + * match. See {@link Matcher#find()} vs {@link Matcher#matches()}. + */ +public class JavaRegexRecogPatternMatcher implements RecogPatternMatcher { + + private static class JavaRegexRecogPatternMatchResult implements RecogPatternMatchResult { + private final Matcher matcher; + + JavaRegexRecogPatternMatchResult(Matcher matcher) { + this.matcher = matcher; + } + + @Override + public int groupCount() { + return matcher.groupCount(); + } + + @Override + public String group(int group) { + return matcher.group(group); + } + + @Override + public String group(String group) { + return matcher.group(group); + } + } + + /** + * The regular expression pattern to match. + */ + private final Pattern pattern; + + public JavaRegexRecogPatternMatcher(Pattern pattern) { + this.pattern = requireNonNull(pattern); + } + + @Override + public String getPattern() { + return pattern.pattern(); + } + + @Override + public int getFlags() { + return pattern.flags(); + } + + @Override + public boolean matches(String input) { + return input != null && pattern.matcher(input).find(); + } + + @Override + public RecogPatternMatchResult match(String input) { + if (input == null) { + return null; + } + Matcher matcher = pattern.matcher(input); + return matcher.find() ? new JavaRegexRecogPatternMatchResult(matcher) : null; + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } else if (!(other instanceof JavaRegexRecogPatternMatcher)) { + return false; + } else { + JavaRegexRecogPatternMatcher that = (JavaRegexRecogPatternMatcher) other; + return Objects.equals(getPattern(), that.getPattern()) + && Objects.equals(getFlags(), that.getFlags()); + } + } + + @Override + public int hashCode() { + return Objects.hash(pattern); + } +} diff --git a/src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java new file mode 100644 index 0000000..f9b76e6 --- /dev/null +++ b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatchResult.java @@ -0,0 +1,34 @@ +package com.rapid7.recog.pattern; + +/** + * The result of a match operation. + */ +public interface RecogPatternMatchResult { + + /** + * Returns the number of capturing groups in this result. + */ + int groupCount(); + + /** + * Returns the input captured by the indexed group. + * + * @param index The index of the capturing group. Group indexes start at one. + * @return The input captured by the group at the specified index, or {@code null} + * if there is no matching input for this group. + * @throws IndexOutOfBoundsException if the index is less than 1 or greater than + * that returned of {@code groupCount()}. + */ + String group(int index); + + /** + * Returns the input captured by the named group. + * + * @param name The name of the capturing group. + * @return Input captured by the named group or {@code null} if there is no + * matching input for this group. + * @throws IllegalArgumentException if there is no group with this name. + */ + String group(String name); + +} diff --git a/src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java new file mode 100644 index 0000000..d206c6a --- /dev/null +++ b/src/main/java/com/rapid7/recog/pattern/RecogPatternMatcher.java @@ -0,0 +1,32 @@ +package com.rapid7.recog.pattern; + +/** + * Performs matching of input values against a regular expression that supports grouped parameter + * extraction. + */ +public interface RecogPatternMatcher { + + /** The regex pattern this matcher matches. */ + String getPattern(); + + int getFlags(); + + /** + * Returns whether this matcher matches the specified input fingerprint value. + * + * @param input The fingerprint to test this matcher against. May be {@code null}. + * @return {@code true} if the input is non-{@code null} and matches the fingerprint matcher + * pattern. + */ + boolean matches(String input); + + /** + * Matches the regular expression against the specified input. + * + * @param input The fingerprint to match. May be {@code null}. + * @return {@code null} if the input does not match the pattern, otherwise a non-{@code null} + * {@link RecogPatternMatchResult} + */ + RecogPatternMatchResult match(String input); + +} diff --git a/src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java b/src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java new file mode 100644 index 0000000..91db8d9 --- /dev/null +++ b/src/test/java/com/rapid7/recog/CustomPatternMatcherTest.java @@ -0,0 +1,72 @@ +package com.rapid7.recog; + +import com.rapid7.recog.pattern.RecogPatternMatchResult; +import com.rapid7.recog.pattern.RecogPatternMatcher; +import java.util.AbstractMap; +import java.util.Map; +import org.junit.jupiter.api.Test; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsInAnyOrder; +import static org.hamcrest.Matchers.hasSize; + +public class CustomPatternMatcherTest { + + private static class EchoPatternMatcher implements RecogPatternMatcher { + + @Override + public String getPattern() { + return null; + } + + @Override + public int getFlags() { + return 0; + } + + @Override + public boolean matches(String input) { + return true; + } + + @Override + public RecogPatternMatchResult match(String input) { + return new RecogPatternMatchResult() { + @Override + public int groupCount() { + return Integer.MAX_VALUE; + } + + @Override + public String group(int index) { + return "group: " + index; + } + + @Override + public String group(String name) { + return "group: " + name; + } + }; + } + } + + @Test + public void customMatcherTest() { + // given + RecogPatternMatcher patternMatcher = new EchoPatternMatcher(); + RecogMatcher matcher = new RecogMatcher(patternMatcher) + .addParam(1, "1") + .addParam(2, "2") + .addParam("name"); + + // when + Map matches = matcher.match("arbitrary text input"); + + // then + assertThat(matches.entrySet(), hasSize(3)); + assertThat(matches.entrySet(), containsInAnyOrder( + new AbstractMap.SimpleEntry<>("1", "group: 1"), + new AbstractMap.SimpleEntry<>("2", "group: 2"), + new AbstractMap.SimpleEntry<>("name", "group: name") + )); + } +}