From 3300cbc56512220918c0645dc7dcbdd747132428 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Fri, 8 Dec 2023 13:40:13 -0800
Subject: [PATCH 1/2] Bump to web_benchmarks version 1.1.0

---
 .../benchmark/devtools_benchmarks_test.dart   | 143 ------------------
 .../benchmark/scripts/compare_benchmarks.dart |  69 +--------
 .../benchmark/scripts/run_benchmarks.dart     |  33 +---
 .../devtools_app/benchmark/scripts/utils.dart |  50 ------
 packages/devtools_app/pubspec.yaml            |   2 +-
 5 files changed, 7 insertions(+), 290 deletions(-)
diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
index ff0cf3f4d01..35d687a7261 100644
--- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
+++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart
@@ -11,7 +11,6 @@ import 'dart:io';
 import 'package:test/test.dart';
 import 'package:web_benchmarks/server.dart';
 
-import 'scripts/compare_benchmarks.dart';
 import 'test_infra/common.dart';
 import 'test_infra/project_root_directory.dart';
 
@@ -38,17 +37,6 @@ void main() {
     timeout: const Timeout(Duration(minutes: 10)),
   );
 
-  test('Can compare web benchmarks', () {
-    final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
-    final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
-    final comparison = compareBenchmarks(
-      benchmark1,
-      benchmark2,
-      baselineSource: 'path/to/baseline',
-    );
-    expect(comparison, testBenchmarkComparison);
-  });
-
   // TODO(kenz): add tests that verify performance meets some expected threshold
 }
 
@@ -98,134 +86,3 @@ Future<void> _runBenchmarks({bool useWasm = false}) async {
     isA<String>(),
   );
 }
-
-final testBenchmarkResults1 = {
-  'foo': [
-    {'metric': 'preroll_frame.average', 'value': 60.5},
-    {'metric': 'preroll_frame.outlierAverage', 'value': 1400},
-    {'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
-    {'metric': 'preroll_frame.noise', 'value': 0.85},
-    {'metric': 'apply_frame.average', 'value': 80.0},
-    {'metric': 'apply_frame.outlierAverage', 'value': 200.6},
-    {'metric': 'apply_frame.outlierRatio', 'value': 2.5},
-    {'metric': 'apply_frame.noise', 'value': 0.4},
-    {'metric': 'drawFrameDuration.average', 'value': 2058.9},
-    {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
-    {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
-    {'metric': 'drawFrameDuration.noise', 'value': 0.34},
-    {'metric': 'totalUiFrame.average', 'value': 4166},
-  ],
-  'bar': [
-    {'metric': 'preroll_frame.average', 'value': 60.5},
-    {'metric': 'preroll_frame.outlierAverage', 'value': 1400},
-    {'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
-    {'metric': 'preroll_frame.noise', 'value': 0.85},
-    {'metric': 'apply_frame.average', 'value': 80.0},
-    {'metric': 'apply_frame.outlierAverage', 'value': 200.6},
-    {'metric': 'apply_frame.outlierRatio', 'value': 2.5},
-    {'metric': 'apply_frame.noise', 'value': 0.4},
-    {'metric': 'drawFrameDuration.average', 'value': 2058.9},
-    {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
-    {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
-    {'metric': 'drawFrameDuration.noise', 'value': 0.34},
-    {'metric': 'totalUiFrame.average', 'value': 4166},
-  ],
-};
-
-final testBenchmarkResults2 = {
-  'foo': [
-    {'metric': 'preroll_frame.average', 'value': 65.5},
-    {'metric': 'preroll_frame.outlierAverage', 'value': 1410},
-    {'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
-    {'metric': 'preroll_frame.noise', 'value': 1.5},
-    {'metric': 'apply_frame.average', 'value': 50.0},
-    {'metric': 'apply_frame.outlierAverage', 'value': 100.0},
-    {'metric': 'apply_frame.outlierRatio', 'value': 2.55},
-    {'metric': 'apply_frame.noise', 'value': 0.9},
-    {'metric': 'drawFrameDuration.average', 'value': 2000.0},
-    {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
-    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
-    {'metric': 'drawFrameDuration.noise', 'value': 1.34},
-    {'metric': 'totalUiFrame.average', 'value': 4150},
-  ],
-  'bar': [
-    {'metric': 'preroll_frame.average', 'value': 65.5},
-    {'metric': 'preroll_frame.outlierAverage', 'value': 1410},
-    {'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
-    {'metric': 'preroll_frame.noise', 'value': 1.5},
-    {'metric': 'apply_frame.average', 'value': 50.0},
-    {'metric': 'apply_frame.outlierAverage', 'value': 100.0},
-    {'metric': 'apply_frame.outlierRatio', 'value': 2.55},
-    {'metric': 'apply_frame.noise', 'value': 0.9},
-    {'metric': 'drawFrameDuration.average', 'value': 2000.0},
-    {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
-    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
-    {'metric': 'drawFrameDuration.noise', 'value': 1.34},
-    {'metric': 'totalUiFrame.average', 'value': 4150},
-  ],
-};
-
-final testBenchmarkComparison = {
-  'foo': [
-    {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
-    {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
-    {
-      'metric': 'preroll_frame.outlierRatio',
-      'value': 20.0,
-      'delta': -0.1999999999999993,
-    },
-    {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
-    {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
-    {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
-    {
-      'metric': 'apply_frame.outlierRatio',
-      'value': 2.55,
-      'delta': 0.04999999999999982,
-    },
-    {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
-    {
-      'metric': 'drawFrameDuration.average',
-      'value': 2000.0,
-      'delta': -58.90000000000009,
-    },
-    {
-      'metric': 'drawFrameDuration.outlierAverage',
-      'value': 20000.0,
-      'delta': -4000.0,
-    },
-    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
-    {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
-    {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
-  ],
-  'bar': [
-    {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
-    {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
-    {
-      'metric': 'preroll_frame.outlierRatio',
-      'value': 20.0,
-      'delta': -0.1999999999999993,
-    },
-    {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
-    {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
-    {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
-    {
-      'metric': 'apply_frame.outlierRatio',
-      'value': 2.55,
-      'delta': 0.04999999999999982,
-    },
-    {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
-    {
-      'metric': 'drawFrameDuration.average',
-      'value': 2000.0,
-      'delta': -58.90000000000009,
-    },
-    {
-      'metric': 'drawFrameDuration.outlierAverage',
-      'value': 20000.0,
-      'delta': -4000.0,
-    },
-    {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
-    {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
-    {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
-  ],
-};
diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
index 234ef9a0d87..06147c52982 100644
--- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -5,8 +5,7 @@
 import 'dart:convert';
 import 'dart:io';
 
-import 'package:collection/collection.dart';
-import 'package:web_benchmarks/server.dart';
+import 'package:web_benchmarks/analysis.dart';
 
 import 'utils.dart';
 
@@ -50,76 +49,16 @@ void main(List<String> args) {
   );
 }
 
-Map<String, List<Map<String, Object?>>> compareBenchmarks(
+void compareBenchmarks(
   BenchmarkResults baseline,
   BenchmarkResults test, {
   required String baselineSource,
 }) {
   stdout.writeln('Starting baseline comparison...');
-
-  for (final benchmarkName in test.scores.keys) {
-    stdout.writeln('Comparing metrics for benchmark "$benchmarkName".');
-
-    // Lookup this benchmark in the baseline.
-    final baselineScores = baseline.scores[benchmarkName];
-    if (baselineScores == null) {
-      stdout.writeln(
-        'Baseline does not contain results for benchmark "$benchmarkName".',
-      );
-      continue;
-    }
-
-    final testScores = test.scores[benchmarkName]!;
-
-    for (final score in testScores) {
-      // Lookup this metric in the baseline.
-      final baselineScore =
-          baselineScores.firstWhereOrNull((s) => s.metric == score.metric);
-      if (baselineScore == null) {
-        stdout.writeln(
-          'Baseline does not contain metric "${score.metric}" for '
-          'benchmark "$benchmarkName".',
-        );
-        continue;
-      }
-
-      // Add the delta to the [testMetric].
-      _benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
-    }
-  }
+  final delta = computeDelta(baseline, test);
   stdout.writeln('Baseline comparison finished.');
-
-  final comparisonAsMap = test.toJsonWithDeltas();
   stdout
     ..writeln('==== Comparison with baseline $baselineSource ====')
-    ..writeln(const JsonEncoder.withIndent('  ').convert(comparisonAsMap))
+    ..writeln(const JsonEncoder.withIndent('  ').convert(delta))
     ..writeln('==== End of baseline comparison ====');
-  return comparisonAsMap;
-}
-
-Expando<double> _benchmarkDeltas = Expando<double>();
-
-extension ScoreDeltaExtension on BenchmarkScore {
-  double? get deltaFromBaseline => _benchmarkDeltas[this];
-}
-
-extension ResultDeltaExtension on BenchmarkResults {
-  Map<String, List<Map<String, Object?>>> toJsonWithDeltas() {
-    return scores.map<String, List<Map<String, Object?>>>(
-      (String benchmarkName, List<BenchmarkScore> scores) {
-        return MapEntry<String, List<Map<String, Object?>>>(
-          benchmarkName,
-          scores.map<Map<String, Object?>>(
-            (BenchmarkScore score) {
-              final delta = _benchmarkDeltas[score];
-              return <String, Object?>{
-                ...score.toJson(),
-                if (delta != null) 'delta': delta,
-              };
-            },
-          ).toList(),
-        );
-      },
-    );
-  }
 }
diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
index 7799ce16c1a..ba39bdfee68 100644
--- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart
@@ -6,6 +6,7 @@ import 'dart:convert';
 import 'dart:io';
 
 import 'package:args/args.dart';
+import 'package:web_benchmarks/analysis.dart';
 import 'package:web_benchmarks/server.dart';
 
 import '../test_infra/common.dart';
@@ -46,7 +47,7 @@ Future<void> main(List<String> args) async {
     stdout.writeln(
       'Taking the average of ${benchmarkResults.length} benchmark runs.',
     );
-    taskResult = averageBenchmarkResults(benchmarkResults);
+    taskResult = computeAverage(benchmarkResults);
   }
 
   final resultsAsMap = taskResult.toJson();
@@ -150,33 +151,3 @@ class BenchmarkArgs {
       );
   }
 }
-
-// TODO(kenz): upstream the logic to average benchmarks into the
-// package:web_benchmarks
-
-/// Returns the average of the benchmark results in [results].
-///
-/// Each element in [results] is expected to have identical benchmark names and
-/// metrics; otherwise, an [Exception] will be thrown.
-BenchmarkResults averageBenchmarkResults(List<BenchmarkResults> results) {
-  if (results.isEmpty) {
-    throw Exception('Cannot take average of empty list.');
-  }
-
-  var totalSum = results.first;
-  for (int i = 1; i < results.length; i++) {
-    final current = results[i];
-    totalSum = totalSum.sumWith(current);
-  }
-
-  final average = totalSum.toJson();
-  for (final benchmark in totalSum.scores.keys) {
-    final scoresForBenchmark = totalSum.scores[benchmark]!;
-    for (int i = 0; i < scoresForBenchmark.length; i++) {
-      final score = scoresForBenchmark[i];
-      final averageValue = score.value / results.length;
-      average[benchmark]![i]['value'] = averageValue;
-    }
-  }
-  return BenchmarkResults.parse(average);
-}
diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart
index 01df4c2143c..a4cbc99bead 100644
--- a/packages/devtools_app/benchmark/scripts/utils.dart
+++ b/packages/devtools_app/benchmark/scripts/utils.dart
@@ -4,9 +4,6 @@
 
 import 'dart:io';
 
-import 'package:collection/collection.dart';
-import 'package:web_benchmarks/server.dart';
-
 File? checkFileExists(String path) {
   final testFile = File.fromUri(Uri.parse(path));
   if (!testFile.existsSync()) {
@@ -15,50 +12,3 @@ File? checkFileExists(String path) {
   }
   return testFile;
 }
-
-extension BenchmarkResultsExtension on BenchmarkResults {
-  /// Sums this [BenchmarkResults] instance with [other] by adding the values
-  /// of each matching benchmark score.
-  ///
-  /// Returns a [BenchmarkResults] object with the summed values.
-  BenchmarkResults sumWith(
-    BenchmarkResults other, {
-    bool throwExceptionOnMismatch = true,
-  }) {
-    final sum = toJson();
-    for (final benchmark in scores.keys) {
-      // Look up this benchmark in [other].
-      final matchingBenchmark = other.scores[benchmark];
-      if (matchingBenchmark == null) {
-        if (throwExceptionOnMismatch) {
-          throw Exception(
-            'Cannot sum benchmarks because [other] is missing an entry for '
-            'benchmark "$benchmark".',
-          );
-        }
-        continue;
-      }
-
-      final scoresForBenchmark = scores[benchmark]!;
-      for (int i = 0; i < scoresForBenchmark.length; i++) {
-        final score = scoresForBenchmark[i];
-        // Look up this score in the [matchingBenchmark] from [other].
-        final matchingScore =
-            matchingBenchmark.firstWhereOrNull((s) => s.metric == score.metric);
-        if (matchingScore == null) {
-          if (throwExceptionOnMismatch) {
-            throw Exception(
-              'Cannot sum benchmarks because benchmark "$benchmark" is missing '
-              'a score for metric ${score.metric}.',
-            );
-          }
-          continue;
-        }
-
-        final sumScore = score.value + matchingScore.value;
-        sum[benchmark]![i]['value'] = sumScore;
-      }
-    }
-    return BenchmarkResults.parse(sum);
-  }
-}
diff --git a/packages/devtools_app/pubspec.yaml b/packages/devtools_app/pubspec.yaml
index 0b7b31afbf6..183a0e7dc52 100644
--- a/packages/devtools_app/pubspec.yaml
+++ b/packages/devtools_app/pubspec.yaml
@@ -79,7 +79,7 @@ dev_dependencies:
   mockito: ^5.4.1
   stager: ^1.0.1
   test: ^1.21.1
-  web_benchmarks: ^1.0.1
+  web_benchmarks: ^1.1.0
   webkit_inspection_protocol: ">=0.5.0 <2.0.0"
 
 flutter:

From b5ea980fd8a381d6194074d165795ea87fde7459 Mon Sep 17 00:00:00 2001
From: Kenzie Schmoll <kenzieschmoll@google.com>
Date: Mon, 11 Dec 2023 09:57:23 -0800
Subject: [PATCH 2/2] api change

---
 packages/devtools_app/benchmark/scripts/compare_benchmarks.dart | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
index 06147c52982..515132a1d1d 100644
--- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
+++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
@@ -59,6 +59,6 @@ void compareBenchmarks(
   stdout.writeln('Baseline comparison finished.');
   stdout
     ..writeln('==== Comparison with baseline $baselineSource ====')
-    ..writeln(const JsonEncoder.withIndent('  ').convert(delta))
+    ..writeln(const JsonEncoder.withIndent('  ').convert(delta.toJson()))
     ..writeln('==== End of baseline comparison ====');
 }