From 0966a64b2c5bde7b5aac434ee61c296e0fe67462 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Sat, 5 Apr 2025 11:33:46 -0700 Subject: [PATCH 1/4] Move everything to benchmarks + fix regex --- BenchmarkRunner.java | 72 +++++++++++++++++++++++++++++++++++++++++ SearchByContains.java | 32 ++++-------------- SearchByManualLoop.java | 36 +++++---------------- SearchByRegex.java | 35 ++++++-------------- 4 files changed, 95 insertions(+), 80 deletions(-) create mode 100644 BenchmarkRunner.java diff --git a/BenchmarkRunner.java b/BenchmarkRunner.java new file mode 100644 index 0000000..a923f55 --- /dev/null +++ b/BenchmarkRunner.java @@ -0,0 +1,72 @@ +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.lang.reflect.Method; +import java.util.*; + +public class BenchmarkRunner { + + private static final int WARMUP_RUNS = 2; + private static final int MEASURED_RUNS = 5; + + static List load() throws IOException { + ArrayList lines = new ArrayList(1000000); + + try (BufferedReader br = new BufferedReader(new FileReader("test_data.out"))) { + String line = br.readLine(); + while (line != null) { + while (!line.endsWith("\"")) { + String nextLine = br.readLine(); + if (nextLine == null) { + break; + } + line = line + nextLine; + } + if (line.startsWith("\"")) { + line = line.substring(1); + } + if (line.endsWith("\"")) { + line = line.substring(0, line.length() - 1); + } + lines.add(line); + line = br.readLine(); + } + } + + return lines; + } + + public static void main(String[] args) throws Exception { + List classNames = Arrays.asList( + "SearchByContains", + "SearchByManualLoop", + "SearchByRegex" + ); + + List data = load(); + + for (String className : classNames) { + System.out.println("Benchmarking " + className + "..."); + + Class clazz = Class.forName(className); + Method mainMethod = clazz.getMethod("count", List.class); + Integer count = 0; + + for (int i = 0; i < WARMUP_RUNS; i++) { + count = (Integer) mainMethod.invoke(null, data); + } + + long totalTime = 0; + for (int i = 0; i < MEASURED_RUNS; i++) { + long start = System.nanoTime(); + mainMethod.invoke(null, data); + long end = System.nanoTime(); + totalTime += (end - start); + } + + double avgMs = totalTime / MEASURED_RUNS / 1_000_000.0; + System.out.printf("Average time: %.3f ms%nCount: %d%n", avgMs, count); + System.out.println(); + } + } +} diff --git a/SearchByContains.java b/SearchByContains.java index 6b0e2a0..d8574d1 100644 --- a/SearchByContains.java +++ b/SearchByContains.java @@ -1,34 +1,14 @@ -import java.io.*; +import java.util.*; public class SearchByContains { - - public static void main(String [] args) throws Exception { + public static int count(List data) { int count = 0; - try (BufferedReader br = new BufferedReader(new FileReader("test_data.out"))) { - String line = br.readLine(); - while (line != null) { - while (!line.endsWith("\"")) { - String nextLine = br.readLine(); - if (nextLine == null) { - break; - } - line = line + nextLine; - } - if (line.startsWith("\"")) { - line = line.substring(1); - } - if (line.endsWith("\"")) { - line = line.substring(0, line.length() - 1); - } - if (line.contains("\"") || line.contains(",") || line.contains("\r") || + for (String line : data) { + if (line.contains("\"") || line.contains(",") || line.contains("\r") || line.contains("\n")) { - count++; - } - line = br.readLine(); + count++; } } - System.out.println("Search By Four 'contains' calls."); - System.out.println("Counted " + count + " lines that would have needed to be quoted."); + return count; } - } diff --git a/SearchByManualLoop.java b/SearchByManualLoop.java index 0970a85..a1d5f74 100644 --- a/SearchByManualLoop.java +++ b/SearchByManualLoop.java @@ -1,4 +1,3 @@ -import java.io.*; import java.util.*; public class SearchByManualLoop { @@ -8,36 +7,17 @@ public class SearchByManualLoop { static final int cpCr = "\r".codePointAt(0); static final int cpNl = "\n".codePointAt(0); - public static void main(String [] args) throws Exception { + public static int count(List data) { int count = 0; - try (BufferedReader br = new BufferedReader(new FileReader("test_data.out"))) { - String line = br.readLine(); - while (line != null) { - while (!line.endsWith("\"")) { - String nextLine = br.readLine(); - if (nextLine == null) { - break; - } - line = line + nextLine; + for (String line : data) { + for (int i = 0; i < line.length(); i++) { + int c = line.codePointAt(i); + if (c == cpQuote || c == cpComma || c == cpCr || c == cpNl) { + count++; + break; } - if (line.startsWith("\"")) { - line = line.substring(1); - } - if (line.endsWith("\"")) { - line = line.substring(0, line.length() - 1); - } - for (int i = 0; i < line.length(); i++) { - int c = line.codePointAt(i); - if (c == cpQuote || c == cpComma || c == cpCr || c == cpNl) { - count++; - break; - } - } - line = br.readLine(); } } - System.out.println("Search By Manual Loop:"); - System.out.println("Counted " + count + " lines that would have needed to be quoted."); + return count; } - } diff --git a/SearchByRegex.java b/SearchByRegex.java index dbc4bfe..648b548 100644 --- a/SearchByRegex.java +++ b/SearchByRegex.java @@ -1,33 +1,16 @@ -import java.io.*; +import java.util.List; +import java.util.regex.*; public class SearchByRegex { - - public static void main(String [] args) throws Exception { + public static int count(List data) { + Pattern regex = Pattern.compile("[\",\\r\\n]"); int count = 0; - try (BufferedReader br = new BufferedReader(new FileReader("test_data.out"))) { - String line = br.readLine(); - while (line != null) { - while (!line.endsWith("\"")) { - String nextLine = br.readLine(); - if (nextLine == null) { - break; - } - line = line + nextLine; - } - if (line.startsWith("\"")) { - line = line.substring(1); - } - if (line.endsWith("\"")) { - line = line.substring(0, line.length() - 1); - } - if (line.matches("^[^\",\r\n]*[\",\r\n].*")) { - count++; - } - line = br.readLine(); + for (String line : data) { + Matcher matcher = regex.matcher(line); + if (matcher.find()) { + count++; } } - System.out.println("Search by Regex:"); - System.out.println("Counted " + count + " lines that would have needed to be quoted."); + return count; } - } From 229ff1e2ce310b9173a0224bc6f4d7ac4df9a55c Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Sat, 5 Apr 2025 11:39:42 -0700 Subject: [PATCH 2/4] Clean up benchmark script --- MakeTestData.java | 2 ++ run_benchmark.sh | 25 ++++++++----------------- 2 files changed, 10 insertions(+), 17 deletions(-) diff --git a/MakeTestData.java b/MakeTestData.java index a65c1d0..6c72ad8 100644 --- a/MakeTestData.java +++ b/MakeTestData.java @@ -19,6 +19,8 @@ public class MakeTestData { public static void main(String [] args) throws Exception { Random random = new Random(); + random.setSeed(0); // Reproducibility + StringBuilder sb = new StringBuilder(); try(FileWriter fw = new FileWriter("test_data.out")) { for (int i = 0; i < NUM_WORDS; i++) { diff --git a/run_benchmark.sh b/run_benchmark.sh index f68221a..68f3182 100755 --- a/run_benchmark.sh +++ b/run_benchmark.sh @@ -3,26 +3,17 @@ main() { cat < Date: Sat, 5 Apr 2025 11:49:45 -0700 Subject: [PATCH 3/4] Manual loop: use char and foreach instead of codepoints --- SearchByManualLoop.java | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/SearchByManualLoop.java b/SearchByManualLoop.java index a1d5f74..8dbc2f8 100644 --- a/SearchByManualLoop.java +++ b/SearchByManualLoop.java @@ -2,16 +2,15 @@ public class SearchByManualLoop { - static final int cpQuote = "\"".codePointAt(0); - static final int cpComma = ",".codePointAt(0); - static final int cpCr = "\r".codePointAt(0); - static final int cpNl = "\n".codePointAt(0); + static final char cpQuote = '\"'; + static final char cpComma = ','; + static final char cpCr = '\r'; + static final char cpNl = '\n'; public static int count(List data) { int count = 0; for (String line : data) { - for (int i = 0; i < line.length(); i++) { - int c = line.codePointAt(i); + for (char c : line.toCharArray()) { if (c == cpQuote || c == cpComma || c == cpCr || c == cpNl) { count++; break; From 1350279e86fefbd2498660e523723a0d92afc1d3 Mon Sep 17 00:00:00 2001 From: Simeon Widdis Date: Sat, 5 Apr 2025 11:53:12 -0700 Subject: [PATCH 4/4] New method: search by Lookup Table --- BenchmarkRunner.java | 3 ++- SearchByArray.java | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 SearchByArray.java diff --git a/BenchmarkRunner.java b/BenchmarkRunner.java index a923f55..1bb0a1a 100644 --- a/BenchmarkRunner.java +++ b/BenchmarkRunner.java @@ -40,7 +40,8 @@ public static void main(String[] args) throws Exception { List classNames = Arrays.asList( "SearchByContains", "SearchByManualLoop", - "SearchByRegex" + "SearchByRegex", + "SearchByArray" ); List data = load(); diff --git a/SearchByArray.java b/SearchByArray.java new file mode 100644 index 0000000..f90c19c --- /dev/null +++ b/SearchByArray.java @@ -0,0 +1,24 @@ +import java.util.*; + +public class SearchByArray { + public static int count(List data) { + int count = 0; + + // Lookup tables my beloved + boolean[] match = new boolean[256]; + match[','] = true; + match['\n'] = true; + match['\r'] = true; + match['\"'] = true; + + for (String line : data) { + for (char c : line.toCharArray()) { + if (c < 256 && match[c]) { + count++; + break; + } + } + } + return count; + } +}