From f8de5aab0e4d9688d0fd78076a05bdeb9a8f10e3 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Thu, 7 Dec 2023 18:12:09 -0800
Subject: [PATCH 1/9] Bump to web_benchmarks 1.0.0

---
 packages/devtools_app/benchmark/README.md     |  2 +-
 .../benchmark/devtools_benchmarks_test.dart   | 79 ++++++++++---------
 .../benchmark/run_benchmarks.dart             | 30 -------
 .../benchmark/scripts/run_benchmarks.dart     | 79 +++++++++++++++++++
 packages/devtools_app/pubspec.yaml            |  2 +-
 5 files changed, 122 insertions(+), 70 deletions(-)
 delete mode 100644 packages/devtools_app/benchmark/run_benchmarks.dart
 create mode 100644 packages/devtools_app/benchmark/scripts/run_benchmarks.dart
diff --git a/packages/devtools_app/benchmark/README.md b/packages/devtools_app/benchmark/README.md
index 6e4ca0995bc..5e983a1d0fe 100644
--- a/packages/devtools_app/benchmark/README.md
+++ b/packages/devtools_app/benchmark/README.md
@@ -18,7 +18,7 @@ See the "benchmark-performance" and "benchmark-size" jobs.
 
 To run the performance benchmark tests locally, run:
 ```sh
-dart run run_benchmarks.dart
+dart run benchmark/scripts/run_benchmarks.dart
 ```
 
 To run the test that verifies we can run benchmark tests, run:
diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
index a64ebd3c7c5..14e0a2286ca 100644
--- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
+++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -30,57 +30,60 @@ final valueList = <String>[
 /// Tests that the DevTools web benchmarks are run and reported correctly.
 void main() {
   test(
-    'Can run a web benchmark',
+    'Can run web benchmarks',
     () async {
-      stdout.writeln('Starting web benchmark tests ...');
+      await _runBenchmarks();
+    },
+    timeout: Timeout.none,
+  );
 
-      final taskResult = await serveWebBenchmark(
-        benchmarkAppDirectory: projectRootDirectory(),
-        entryPoint: 'benchmark/test_infra/client.dart',
-        useCanvasKit: true,
-        treeShakeIcons: false,
-        initialPage: benchmarkInitialPage,
-      );
 
-      stdout.writeln('Web benchmark tests finished.');
+  // TODO(kenz): add tests that verify performance meets some expected threshold
+}
 
-      expect(
-        taskResult.scores.keys,
-        hasLength(DevToolsBenchmark.values.length),
-      );
+Future<void> _runBenchmarks({bool useWasm = false}) async {
+  stdout.writeln('Starting web benchmark tests ...');
+  final taskResult = await serveWebBenchmark(
+    benchmarkAppDirectory: projectRootDirectory(),
+    entryPoint: 'benchmark/test_infra/client.dart',
+    compilationOptions: CompilationOptions(useWasm: useWasm),
+    treeShakeIcons: false,
+    initialPage: benchmarkInitialPage,
+  );
+  stdout.writeln('Web benchmark tests finished.');
 
-      for (final benchmarkName in DevToolsBenchmark.values.map((e) => e.id)) {
-        expect(
-          taskResult.scores[benchmarkName],
-          hasLength(metricList.length * valueList.length + 1),
-        );
+  expect(
+    taskResult.scores.keys,
+    hasLength(DevToolsBenchmark.values.length),
+  );
 
-        for (final metricName in metricList) {
-          for (final valueName in valueList) {
-            expect(
-              taskResult.scores[benchmarkName]?.where(
-                (score) => score.metric == '$metricName.$valueName',
-              ),
-              hasLength(1),
-            );
-          }
-        }
+  for (final benchmarkName in DevToolsBenchmark.values.map((e) => e.id)) {
+    expect(
+      taskResult.scores[benchmarkName],
+      hasLength(metricList.length * valueList.length + 1),
+    );
 
+    for (final metricName in metricList) {
+      for (final valueName in valueList) {
         expect(
           taskResult.scores[benchmarkName]?.where(
-            (score) => score.metric == 'totalUiFrame.average',
+            (score) => score.metric == '$metricName.$valueName',
           ),
           hasLength(1),
         );
       }
+    }
 
-      expect(
-        const JsonEncoder.withIndent('  ').convert(taskResult.toJson()),
-        isA<String>(),
-      );
-    },
-    timeout: Timeout.none,
-  );
+    expect(
+      taskResult.scores[benchmarkName]?.where(
+        (score) => score.metric == 'totalUiFrame.average',
+      ),
+      hasLength(1),
+    );
+  }
 
-  // TODO(kenz): add tests that verify performance meets some expected threshold
+  expect(
+    const JsonEncoder.withIndent('  ').convert(taskResult.toJson()),
+    isA<String>(),
+  );
 }
diff --git a/packages/devtools_app/benchmark/run_benchmarks.dart b/packages/devtools_app/benchmark/run_benchmarks.dart
deleted file mode 100644
index 1da34fde834..00000000000
--- a/packages/devtools_app/benchmark/run_benchmarks.dart
+++ /dev/null
@@ -1,30 +0,0 @@
-// Copyright 2023 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-import 'dart:convert' show JsonEncoder;
-import 'dart:io';
-
-import 'package:web_benchmarks/server.dart';
-
-import 'test_infra/common.dart';
-import 'test_infra/project_root_directory.dart';
-
-/// Runs the DevTools web benchmarks and reports the benchmark data.
-Future<void> main() async {
-  stdout.writeln('Starting web benchmark tests ...');
-
-  final taskResult = await serveWebBenchmark(
-    benchmarkAppDirectory: projectRootDirectory(),
-    entryPoint: 'benchmark/test_infra/client.dart',
-    useCanvasKit: true,
-    treeShakeIcons: false,
-    initialPage: benchmarkInitialPage,
-  );
-
-  stdout
-    ..writeln('Web benchmark tests finished.')
-    ..writeln('==== Results ====')
-    ..writeln(const JsonEncoder.withIndent('  ').convert(taskResult.toJson()))
-    ..writeln('==== End of results ====');
-}
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
new file mode 100644
index 00000000000..bec8a6b8a8b
--- /dev/null
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -0,0 +1,79 @@
+// Copyright 2023 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:io';
+
+import 'package:args/args.dart';
+import 'package:web_benchmarks/server.dart';
+
+import '../test_infra/common.dart';
+import '../test_infra/project_root_directory.dart';
+import 'compare_benchmarks.dart';
+import 'utils.dart';
+
+/// Runs the DevTools web benchmarks and reports the benchmark data.
+Future<void> main(List<String> args) async {
+  final benchmarkArgs = BenchmarkArgs(args);
+
+  stdout.writeln('Starting web benchmark tests (run #$i) ...');
+  final taskResult = await serveWebBenchmark(
+    benchmarkAppDirectory: projectRootDirectory(),
+    entryPoint: 'benchmark/test_infra/client.dart',
+    compilationOptions: CompilationOptions(useWasm: benchmarkArgs.useWasm),
+    treeShakeIcons: false,
+    initialPage: benchmarkInitialPage,
+    headless: !benchmarkArgs.useBrowser,
+  );
+  stdout.writeln('Web benchmark tests finished (run #$i).');
+
+  final resultsAsMap = taskResult.toJson();
+  final resultsAsJsonString =
+      const JsonEncoder.withIndent('  ').convert(resultsAsMap);
+
+  if (benchmarkArgs.saveToFileLocation != null) {
+    final location = Uri.parse(benchmarkArgs.saveToFileLocation!);
+    File.fromUri(location)
+      ..createSync()
+      ..writeAsStringSync(resultsAsJsonString);
+  }
+
+  stdout
+    ..writeln('==== Results ====')
+    ..writeln(resultsAsJsonString)
+    ..writeln('==== End of results ====')
+    ..writeln();
+}
+
+class BenchmarkArgs {
+  BenchmarkArgs(List<String> args) {
+    argParser = _buildArgParser();
+    argResults = argParser.parse(args);
+  }
+  late final ArgParser argParser;
+
+  late final ArgResults argResults;
+
+  bool get useBrowser => argResults[_browserFlag];
+
+  bool get useWasm => argResults[_wasmFlag];
+
+  static const _browserFlag = 'browser';
+
+  static const _wasmFlag = 'wasm';
+
+  /// Builds an arg parser for DevTools integration tests.
+  static ArgParser _buildArgParser() {
+    return ArgParser()
+      ..addFlag(
+        _browserFlag,
+        help: 'Runs the benchmark tests in browser mode (not headless mode).',
+      )
+      ..addFlag(
+        _wasmFlag,
+        help: 'Runs the benchmark tests with dart2wasm',
+      );
+  }
+}
+
diff --git a/packages/devtools_app/pubspec.yaml b/packages/devtools_app/pubspec.yaml
index 4d8762c98b2..1b839ad86a1 100644
--- a/packages/devtools_app/pubspec.yaml
+++ b/packages/devtools_app/pubspec.yaml
@@ -79,7 +79,7 @@ dev_dependencies:
   mockito: ^5.4.1
   stager: ^1.0.1
   test: ^1.21.1
-  web_benchmarks: ^0.1.0+10
+  web_benchmarks: ^1.0.0
   webkit_inspection_protocol: ">=0.5.0 <2.0.0"
 
 flutter:

From 2822ffcb2be50a57583b9d394edf8e0c9958df7a Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Thu, 7 Dec 2023 18:13:01 -0800
Subject: [PATCH 2/9] remove imports

---
 packages/devtools_app/benchmark/scripts/run_benchmarks.dart | 2 --
 1 file changed, 2 deletions(-)

diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index bec8a6b8a8b..7f5b09aedc5 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -10,8 +10,6 @@ import 'package:web_benchmarks/server.dart';
 
 import '../test_infra/common.dart';
 import '../test_infra/project_root_directory.dart';
-import 'compare_benchmarks.dart';
-import 'utils.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
 Future<void> main(List<String> args) async {

From 5b54bb4ca3b2086cebb9d1ca8c0725a0f119a261 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Thu, 7 Dec 2023 18:15:17 -0800
Subject: [PATCH 3/9] doc

---
 packages/devtools_app/benchmark/scripts/run_benchmarks.dart | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 7f5b09aedc5..2e787747309 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -12,6 +12,12 @@ import '../test_infra/common.dart';
 import '../test_infra/project_root_directory.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
+/// 
+/// Arguments:
+/// * --browser - runs the benchmark tests in the browser (non-headless mode)
+/// * --wasm - runs the benchmark tests with the dart2wasm compiler
+/// 
+/// See [BenchmarkArgs].
 Future<void> main(List<String> args) async {
   final benchmarkArgs = BenchmarkArgs(args);
 

From 6d5f36e86f2ab62f75985a62b706abd7800247e6 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Thu, 7 Dec 2023 18:19:20 -0800
Subject: [PATCH 4/9] comment

---
 packages/devtools_app/benchmark/scripts/run_benchmarks.dart | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 2e787747309..989fd080410 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -67,7 +67,7 @@ class BenchmarkArgs {
 
   static const _wasmFlag = 'wasm';
 
-  /// Builds an arg parser for DevTools integration tests.
+  /// Builds an arg parser for DevTools benchmarks.
   static ArgParser _buildArgParser() {
     return ArgParser()
       ..addFlag(
@@ -80,4 +80,3 @@ class BenchmarkArgs {
       );
   }
 }
-

From 2954f2d9a397d4cc76542af645453285257478bd Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Thu, 7 Dec 2023 18:39:56 -0800
Subject: [PATCH 5/9] formatting

---
 packages/devtools_app/benchmark/scripts/run_benchmarks.dart | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 989fd080410..b6eebce16f7 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -12,11 +12,11 @@ import '../test_infra/common.dart';
 import '../test_infra/project_root_directory.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
-/// 
+///
 /// Arguments:
 /// * --browser - runs the benchmark tests in the browser (non-headless mode)
 /// * --wasm - runs the benchmark tests with the dart2wasm compiler
-/// 
+///
 /// See [BenchmarkArgs].
 Future<void> main(List<String> args) async {
   final benchmarkArgs = BenchmarkArgs(args);
@@ -55,6 +55,7 @@ class BenchmarkArgs {
     argParser = _buildArgParser();
     argResults = argParser.parse(args);
   }
+
   late final ArgParser argParser;
 
   late final ArgResults argResults;

From 79cb9a6d7b89aa62f860ebba2b95c03f39648c37 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Thu, 7 Dec 2023 18:42:30 -0800
Subject: [PATCH 6/9] Add the ability to compare benchmark runs

---
 packages/devtools_app/benchmark/README.md     |  22 ++-
 .../benchmark/scripts/compare_benchmarks.dart | 126 ++++++++++++++++++
 .../benchmark/scripts/run_benchmarks.dart     |  42 ++++++
 .../benchmark/scripts/utils.dart.dart         |  14 ++
 4 files changed, 203 insertions(+), 1 deletion(-)
 create mode 100644 packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
 create mode 100644 packages/devtools_app/benchmark/scripts/utils.dart.dart

diff --git a/packages/devtools_app/benchmark/README.md b/packages/devtools_app/benchmark/README.md
index 5e983a1d0fe..5083e98ecc8 100644
--- a/packages/devtools_app/benchmark/README.md
+++ b/packages/devtools_app/benchmark/README.md
@@ -39,4 +39,24 @@ The size benchmark must be ran by itself because it actually modifies the
 `devtools_app/build` folder to create and measure the release build web bundle size.
 If this test is ran while other tests are running, it can affect the measurements
 that the size benchmark test takes, and it can affect the DevTools build that
-the other running tests are using with.
+the other running tests are using.
+
+## Adding a new benchmark test or test case
+
+The tests are defined by "automators", which live in the `benchmark/test_infra/automators`
+directory. To add a new test or test case, either modify an existing automator or add
+a new one for a new screen. Follow existing examples in that directory for guidance.
+
+## Comparing two benchmark test runs
+
+In order to compare two different benchmark runs, you first need to run the benchmark
+tests and save the results to a file:
+```sh
+dart run benchmark/scripts/run_benchmarks.dart --save-to-file=baseline.json
+dart run benchmark/scripts/run_benchmarks.dart --save-to-file=test.json
+```
+
+Then, to compare the benchmarks and calculate deltas, run:
+```sh
+dart run benchmark/scripts/compare_benchmarks.dart baseline_file.json test_file.json
+```
diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
new file mode 100644
index 00000000000..314640dfe61
--- /dev/null
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -0,0 +1,126 @@
+// Copyright 2023 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:convert';
+import 'dart:io';
+
+import 'package:collection/collection.dart';
+import 'package:web_benchmarks/server.dart';
+
+import 'utils.dart';
+
+/// Compares two sets of web benchmarks and calculates the delta between each
+/// matching metric.
+void main(List<String> args) {
+  if (args.length != 2) {
+    throw Exception(
+      'Expected 2 arguments (<baseline-file>, <test-file>), but instead there '
+      'were ${args.length}.',
+    );
+  }
+
+  final baselineSource = args[0];
+  final testSource = args[1];
+
+  stdout
+    ..writeln('Comparing the following benchmark results:')
+    ..writeln('    "$testSource" (test)')
+    ..writeln('    "$baselineSource" (baseline)');
+
+  final baselineFile = checkFileExists(baselineSource);
+  final testFile = checkFileExists(testSource);
+  if (baselineFile == null || testFile == null) {
+    if (baselineFile == null) {
+      throw Exception('Cannot find baseline file $baselineSource');
+    }
+    if (testFile == null) {
+      throw Exception('Cannot find test file $testSource');
+    }
+  }
+
+  final baselineResults =
+      BenchmarkResults.parse(jsonDecode(baselineFile.readAsStringSync()));
+  final testResults =
+      BenchmarkResults.parse(jsonDecode(testFile.readAsStringSync()));
+  compareBenchmarks(
+    baselineResults,
+    testResults,
+    baselineSource: baselineSource,
+  );
+}
+
+void compareBenchmarks(
+  BenchmarkResults baseline,
+  BenchmarkResults test, {
+  required String baselineSource,
+}) {
+  stdout.writeln('Starting baseline comparison...');
+
+  for (final benchmarkName in test.scores.keys) {
+    stdout.writeln('Comparing metrics for benchmark "$benchmarkName".');
+
+    // Lookup this benchmark in the baseline.
+    final baselineScores = baseline.scores[benchmarkName];
+    if (baselineScores == null) {
+      stdout.writeln(
+        'Baseline does not contain results for benchmark "$benchmarkName".',
+      );
+      continue;
+    }
+
+    final testScores = test.scores[benchmarkName]!;
+
+    for (final score in testScores) {
+      // Lookup this metric in the baseline.
+      final baselineScore =
+          baselineScores.firstWhereOrNull((s) => s.metric == score.metric);
+      if (baselineScore == null) {
+        stdout.writeln(
+          'Baseline does not contain metric "${score.metric}" for '
+          'benchmark "$benchmarkName".',
+        );
+        continue;
+      }
+
+      // Add the delta to the [testMetric].
+      _benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
+      // score.deltaFromBaseline = score.value - baselineScore.value;
+    }
+  }
+  stdout.writeln('Baseline comparison finished.');
+
+  stdout
+    ..writeln('==== Comparison with baseline $baselineSource ====')
+    ..writeln(
+      const JsonEncoder.withIndent('  ').convert(test.toJsonWithDeltas()),
+    )
+    ..writeln('==== End of baseline comparison ====');
+}
+
+Expando<double> _benchmarkDeltas = Expando<double>();
+
+extension ScoreDeltaExtension on BenchmarkScore {
+  double? get deltaFromBaseline => _benchmarkDeltas[this];
+}
+
+extension ResultDeltaExtension on BenchmarkResults {
+  Map<String, List<Map<String, dynamic>>> toJsonWithDeltas() {
+    return scores.map<String, List<Map<String, dynamic>>>(
+      (String benchmarkName, List<BenchmarkScore> scores) {
+        return MapEntry<String, List<Map<String, dynamic>>>(
+          benchmarkName,
+          scores.map<Map<String, dynamic>>(
+            (BenchmarkScore score) {
+              final delta = _benchmarkDeltas[score];
+              return <String, dynamic>{
+                ...score.toJson(),
+                if (delta != null) 'delta': delta,
+              };
+            },
+          ).toList(),
+        );
+      },
+    );
+  }
+}
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index b6eebce16f7..b9920ec9c85 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -10,6 +10,8 @@ import 'package:web_benchmarks/server.dart';
 
 import '../test_infra/common.dart';
 import '../test_infra/project_root_directory.dart';
+import 'compare_benchmarks.dart';
+import 'utils.dart';
 
 /// Runs the DevTools web benchmarks and reports the benchmark data.
 ///
@@ -48,6 +50,24 @@ Future<void> main(List<String> args) async {
     ..writeln(resultsAsJsonString)
     ..writeln('==== End of results ====')
     ..writeln();
+
+  final baselineSource = benchmarkArgs.baselineLocation;
+  if (baselineSource != null) {
+    final baselineFile = checkFileExists(baselineSource);
+    if (baselineFile != null) {
+      final baselineResults = BenchmarkResults.parse(
+        jsonDecode(baselineFile.readAsStringSync()),
+      );
+      final testResults = BenchmarkResults.parse(
+        jsonDecode(resultsAsJsonString),
+      );
+      compareBenchmarks(
+        baselineResults,
+        testResults,
+        baselineSource: baselineSource,
+      );
+    }
+  }
 }
 
 class BenchmarkArgs {
@@ -64,10 +84,18 @@ class BenchmarkArgs {
 
   bool get useWasm => argResults[_wasmFlag];
 
+  String? get saveToFileLocation => argResults[_saveToFileOption];
+
+  String? get baselineLocation => argResults[_baselineOption];
+
   static const _browserFlag = 'browser';
 
   static const _wasmFlag = 'wasm';
 
+  static const _baselineOption = 'baseline';
+
+  static const _saveToFileOption = 'save-to-file';
+
   /// Builds an arg parser for DevTools benchmarks.
   static ArgParser _buildArgParser() {
     return ArgParser()
@@ -78,6 +106,20 @@ class BenchmarkArgs {
       ..addFlag(
         _wasmFlag,
         help: 'Runs the benchmark tests with dart2wasm',
+      )
+      ..addOption(
+        _saveToFileOption,
+        help: 'Saves the benchmark results to a JSON file at the given path.',
+        valueHelp: '/Users/me/Downloads/output.json',
+      )
+      ..addOption(
+        _baselineOption,
+        help: 'The baseline benchmark data to compare this test run to. The '
+            'baseline file should be created by running this script with the '
+            '$_saveToFileOption in a separate test run.',
+        valueHelp: '/Users/me/Downloads/baseline.json',
       );
   }
 }
+// BenchmarkResults _averageBenchmarkResults(List<BenchmarkResults> results) {
+// }
\ No newline at end of file
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart.dart b/packages/devtools_app/benchmark/scripts/utils.dart.dart
new file mode 100644
index 00000000000..a4cbc99bead
--- /dev/null
+++ b/packages/devtools_app/benchmark/scripts/utils.dart.dart
@@ -0,0 +1,14 @@
+// Copyright 2023 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+import 'dart:io';
+
+File? checkFileExists(String path) {
+  final testFile = File.fromUri(Uri.parse(path));
+  if (!testFile.existsSync()) {
+    stdout.writeln('Could not locate file at $path.');
+    return null;
+  }
+  return testFile;
+}

From be5cdc2745aaeba0c7d2ca76c4d70181133852b8 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 10:32:12 -0800
Subject: [PATCH 7/9] fixes

---
 packages/devtools_app/benchmark/devtools_benchmarks_test.dart | 1 -
 .../devtools_app/benchmark/scripts/compare_benchmarks.dart    | 1 -
 packages/devtools_app/benchmark/scripts/run_benchmarks.dart   | 4 ++--
 3 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
index 519d2a78f12..35d687a7261 100644
--- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
+++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -37,7 +37,6 @@ void main() {
     timeout: const Timeout(Duration(minutes: 10)),
   );
 
-
   // TODO(kenz): add tests that verify performance meets some expected threshold
 }
 
diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
index 314640dfe61..64fdbcdb578 100644
--- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -85,7 +85,6 @@ void compareBenchmarks(
 
       // Add the delta to the [testMetric].
       _benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
-      // score.deltaFromBaseline = score.value - baselineScore.value;
     }
   }
   stdout.writeln('Baseline comparison finished.');
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 25a1b131433..eb052867a9f 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -92,10 +92,10 @@ class BenchmarkArgs {
 
   static const _wasmFlag = 'wasm';
 
-  static const _baselineOption = 'baseline';
-
   static const _saveToFileOption = 'save-to-file';
 
+  static const _baselineOption = 'baseline';
+
   /// Builds an arg parser for DevTools benchmarks.
   static ArgParser _buildArgParser() {
     return ArgParser()

From 2c83386258472c7ccd0fb20e4df501dda6be7ae0 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 10:41:26 -0800
Subject: [PATCH 8/9] add test

---
 .../benchmark/devtools_benchmarks_test.dart   | 147 ++++++++++++++++++
 .../benchmark/scripts/compare_benchmarks.dart |   8 +-
 2 files changed, 151 insertions(+), 4 deletions(-)

diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
index 35d687a7261..14cdcf5d6eb 100644
--- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
+++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -11,6 +11,7 @@ import 'dart:io';
 import 'package:test/test.dart';
 import 'package:web_benchmarks/server.dart';
 
+import 'scripts/compare_benchmarks.dart';
 import 'test_infra/common.dart';
 import 'test_infra/project_root_directory.dart';
 
@@ -37,6 +38,21 @@ void main() {
     timeout: const Timeout(Duration(minutes: 10)),
   );
 
+  test(
+    'Can compare web benchmarks',
+    () {
+      final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
+      final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
+      final comparison = compareBenchmarks(
+        benchmark1,
+        benchmark2,
+        baselineSource: 'path/to/baseline',
+      );
+      expect(comparison, testBenchmarkComparison);
+    },
+    timeout: const Timeout(Duration(minutes: 10)),
+  );
+
   // TODO(kenz): add tests that verify performance meets some expected threshold
 }
 
@@ -86,3 +102,134 @@ Future<void> _runBenchmarks({bool useWasm = false}) async {
     isA<String>(),
   );
 }
+
+final testBenchmarkResults1 = {
+  'foo': [
+    {'metric': 'preroll_frame.average', 'value': 60.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1400},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
+    {'metric': 'preroll_frame.noise', 'value': 0.85},
+    {'metric': 'apply_frame.average', 'value': 80.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 200.6},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.5},
+    {'metric': 'apply_frame.noise', 'value': 0.4},
+    {'metric': 'drawFrameDuration.average', 'value': 2058.9},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 0.34},
+    {'metric': 'totalUiFrame.average', 'value': 4166},
+  ],
+  'bar': [
+    {'metric': 'preroll_frame.average', 'value': 60.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1400},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
+    {'metric': 'preroll_frame.noise', 'value': 0.85},
+    {'metric': 'apply_frame.average', 'value': 80.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 200.6},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.5},
+    {'metric': 'apply_frame.noise', 'value': 0.4},
+    {'metric': 'drawFrameDuration.average', 'value': 2058.9},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 0.34},
+    {'metric': 'totalUiFrame.average', 'value': 4166},
+  ],
+};
+
+final testBenchmarkResults2 = {
+  'foo': [
+    {'metric': 'preroll_frame.average', 'value': 65.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
+    {'metric': 'preroll_frame.noise', 'value': 1.5},
+    {'metric': 'apply_frame.average', 'value': 50.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.55},
+    {'metric': 'apply_frame.noise', 'value': 0.9},
+    {'metric': 'drawFrameDuration.average', 'value': 2000.0},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34},
+    {'metric': 'totalUiFrame.average', 'value': 4150},
+  ],
+  'bar': [
+    {'metric': 'preroll_frame.average', 'value': 65.5},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410},
+    {'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
+    {'metric': 'preroll_frame.noise', 'value': 1.5},
+    {'metric': 'apply_frame.average', 'value': 50.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0},
+    {'metric': 'apply_frame.outlierRatio', 'value': 2.55},
+    {'metric': 'apply_frame.noise', 'value': 0.9},
+    {'metric': 'drawFrameDuration.average', 'value': 2000.0},
+    {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34},
+    {'metric': 'totalUiFrame.average', 'value': 4150},
+  ],
+};
+
+final testBenchmarkComparison = {
+  'foo': [
+    {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
+    {
+      'metric': 'preroll_frame.outlierRatio',
+      'value': 20.0,
+      'delta': -0.1999999999999993,
+    },
+    {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
+    {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
+    {
+      'metric': 'apply_frame.outlierRatio',
+      'value': 2.55,
+      'delta': 0.04999999999999982,
+    },
+    {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
+    {
+      'metric': 'drawFrameDuration.average',
+      'value': 2000.0,
+      'delta': -58.90000000000009,
+    },
+    {
+      'metric': 'drawFrameDuration.outlierAverage',
+      'value': 20000.0,
+      'delta': -4000.0,
+    },
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
+    {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
+  ],
+  'bar': [
+    {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
+    {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
+    {
+      'metric': 'preroll_frame.outlierRatio',
+      'value': 20.0,
+      'delta': -0.1999999999999993,
+    },
+    {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
+    {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
+    {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
+    {
+      'metric': 'apply_frame.outlierRatio',
+      'value': 2.55,
+      'delta': 0.04999999999999982,
+    },
+    {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
+    {
+      'metric': 'drawFrameDuration.average',
+      'value': 2000.0,
+      'delta': -58.90000000000009,
+    },
+    {
+      'metric': 'drawFrameDuration.outlierAverage',
+      'value': 20000.0,
+      'delta': -4000.0,
+    },
+    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
+    {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
+    {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
+  ],
+};
diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
index 64fdbcdb578..f6744c4ca22 100644
--- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -50,7 +50,7 @@ void main(List<String> args) {
   );
 }
 
-void compareBenchmarks(
+Map<String, List<Map<String, dynamic>>> compareBenchmarks(
   BenchmarkResults baseline,
   BenchmarkResults test, {
   required String baselineSource,
@@ -89,12 +89,12 @@ void compareBenchmarks(
   }
   stdout.writeln('Baseline comparison finished.');
 
+  final comparisonAsMap = test.toJsonWithDeltas();
   stdout
     ..writeln('==== Comparison with baseline $baselineSource ====')
-    ..writeln(
-      const JsonEncoder.withIndent('  ').convert(test.toJsonWithDeltas()),
-    )
+    ..writeln(const JsonEncoder.withIndent('  ').convert(comparisonAsMap))
     ..writeln('==== End of baseline comparison ====');
+  return comparisonAsMap;
 }
 
 Expando<double> _benchmarkDeltas = Expando<double>();

From 0ccd52da354a7d7458b92832c2fd53457d90042a Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 10:44:37 -0800
Subject: [PATCH 9/9] use object and bump version

---
 .../benchmark/scripts/compare_benchmarks.dart        | 12 ++++++------
 packages/devtools_app/pubspec.yaml                   |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
index f6744c4ca22..234ef9a0d87 100644
--- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -50,7 +50,7 @@ void main(List<String> args) {
   );
 }
 
-Map<String, List<Map<String, dynamic>>> compareBenchmarks(
+Map<String, List<Map<String, Object?>>> compareBenchmarks(
   BenchmarkResults baseline,
   BenchmarkResults test, {
   required String baselineSource,
@@ -104,15 +104,15 @@ extension ScoreDeltaExtension on BenchmarkScore {
 }
 
 extension ResultDeltaExtension on BenchmarkResults {
-  Map<String, List<Map<String, dynamic>>> toJsonWithDeltas() {
-    return scores.map<String, List<Map<String, dynamic>>>(
+  Map<String, List<Map<String, Object?>>> toJsonWithDeltas() {
+    return scores.map<String, List<Map<String, Object?>>>(
       (String benchmarkName, List<BenchmarkScore> scores) {
-        return MapEntry<String, List<Map<String, dynamic>>>(
+        return MapEntry<String, List<Map<String, Object?>>>(
           benchmarkName,
-          scores.map<Map<String, dynamic>>(
+          scores.map<Map<String, Object?>>(
             (BenchmarkScore score) {
               final delta = _benchmarkDeltas[score];
-              return <String, dynamic>{
+              return <String, Object?>{
                 ...score.toJson(),
                 if (delta != null) 'delta': delta,
               };
diff --git a/packages/devtools_app/pubspec.yaml b/packages/devtools_app/pubspec.yaml
index 1b839ad86a1..0b7b31afbf6 100644
--- a/packages/devtools_app/pubspec.yaml
+++ b/packages/devtools_app/pubspec.yaml
@@ -79,7 +79,7 @@ dev_dependencies:
   mockito: ^5.4.1
   stager: ^1.0.1
   test: ^1.21.1
-  web_benchmarks: ^1.0.0
+  web_benchmarks: ^1.0.1
   webkit_inspection_protocol: ">=0.5.0 <2.0.0"
 
 flutter: