From 3300cbc56512220918c0645dc7dcbdd747132428 Mon Sep 17 00:00:00 2001 From: Kenzie Schmoll Date: Fri, 8 Dec 2023 13:40:13 -0800 Subject: [PATCH 1/2] Bump to web_benchmarks version 1.1.0 --- .../benchmark/devtools_benchmarks_test.dart | 143 ------------------ .../benchmark/scripts/compare_benchmarks.dart | 69 +-------- .../benchmark/scripts/run_benchmarks.dart | 33 +--- .../devtools_app/benchmark/scripts/utils.dart | 50 ------ packages/devtools_app/pubspec.yaml | 2 +- 5 files changed, 7 insertions(+), 290 deletions(-) diff --git a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart index ff0cf3f4d01..35d687a7261 100644 --- a/packages/devtools_app/benchmark/devtools_benchmarks_test.dart +++ b/packages/devtools_app/benchmark/devtools_benchmarks_test.dart @@ -11,7 +11,6 @@ import 'dart:io'; import 'package:test/test.dart'; import 'package:web_benchmarks/server.dart'; -import 'scripts/compare_benchmarks.dart'; import 'test_infra/common.dart'; import 'test_infra/project_root_directory.dart'; @@ -38,17 +37,6 @@ void main() { timeout: const Timeout(Duration(minutes: 10)), ); - test('Can compare web benchmarks', () { - final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1); - final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2); - final comparison = compareBenchmarks( - benchmark1, - benchmark2, - baselineSource: 'path/to/baseline', - ); - expect(comparison, testBenchmarkComparison); - }); - // TODO(kenz): add tests that verify performance meets some expected threshold } @@ -98,134 +86,3 @@ Future _runBenchmarks({bool useWasm = false}) async { isA(), ); } - -final testBenchmarkResults1 = { - 'foo': [ - {'metric': 'preroll_frame.average', 'value': 60.5}, - {'metric': 'preroll_frame.outlierAverage', 'value': 1400}, - {'metric': 'preroll_frame.outlierRatio', 'value': 20.2}, - {'metric': 'preroll_frame.noise', 'value': 0.85}, - {'metric': 'apply_frame.average', 'value': 80.0}, - {'metric': 'apply_frame.outlierAverage', 'value': 200.6}, - {'metric': 'apply_frame.outlierRatio', 'value': 2.5}, - {'metric': 'apply_frame.noise', 'value': 0.4}, - {'metric': 'drawFrameDuration.average', 'value': 2058.9}, - {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000}, - {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05}, - {'metric': 'drawFrameDuration.noise', 'value': 0.34}, - {'metric': 'totalUiFrame.average', 'value': 4166}, - ], - 'bar': [ - {'metric': 'preroll_frame.average', 'value': 60.5}, - {'metric': 'preroll_frame.outlierAverage', 'value': 1400}, - {'metric': 'preroll_frame.outlierRatio', 'value': 20.2}, - {'metric': 'preroll_frame.noise', 'value': 0.85}, - {'metric': 'apply_frame.average', 'value': 80.0}, - {'metric': 'apply_frame.outlierAverage', 'value': 200.6}, - {'metric': 'apply_frame.outlierRatio', 'value': 2.5}, - {'metric': 'apply_frame.noise', 'value': 0.4}, - {'metric': 'drawFrameDuration.average', 'value': 2058.9}, - {'metric': 'drawFrameDuration.outlierAverage', 'value': 24000}, - {'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05}, - {'metric': 'drawFrameDuration.noise', 'value': 0.34}, - {'metric': 'totalUiFrame.average', 'value': 4166}, - ], -}; - -final testBenchmarkResults2 = { - 'foo': [ - {'metric': 'preroll_frame.average', 'value': 65.5}, - {'metric': 'preroll_frame.outlierAverage', 'value': 1410}, - {'metric': 'preroll_frame.outlierRatio', 'value': 20.0}, - {'metric': 'preroll_frame.noise', 'value': 1.5}, - {'metric': 'apply_frame.average', 'value': 50.0}, - {'metric': 'apply_frame.outlierAverage', 'value': 100.0}, - {'metric': 'apply_frame.outlierRatio', 'value': 2.55}, - {'metric': 'apply_frame.noise', 'value': 0.9}, - {'metric': 'drawFrameDuration.average', 'value': 2000.0}, - {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000}, - {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05}, - {'metric': 'drawFrameDuration.noise', 'value': 1.34}, - {'metric': 'totalUiFrame.average', 'value': 4150}, - ], - 'bar': [ - {'metric': 'preroll_frame.average', 'value': 65.5}, - {'metric': 'preroll_frame.outlierAverage', 'value': 1410}, - {'metric': 'preroll_frame.outlierRatio', 'value': 20.0}, - {'metric': 'preroll_frame.noise', 'value': 1.5}, - {'metric': 'apply_frame.average', 'value': 50.0}, - {'metric': 'apply_frame.outlierAverage', 'value': 100.0}, - {'metric': 'apply_frame.outlierRatio', 'value': 2.55}, - {'metric': 'apply_frame.noise', 'value': 0.9}, - {'metric': 'drawFrameDuration.average', 'value': 2000.0}, - {'metric': 'drawFrameDuration.outlierAverage', 'value': 20000}, - {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05}, - {'metric': 'drawFrameDuration.noise', 'value': 1.34}, - {'metric': 'totalUiFrame.average', 'value': 4150}, - ], -}; - -final testBenchmarkComparison = { - 'foo': [ - {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0}, - {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0}, - { - 'metric': 'preroll_frame.outlierRatio', - 'value': 20.0, - 'delta': -0.1999999999999993, - }, - {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65}, - {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0}, - {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6}, - { - 'metric': 'apply_frame.outlierRatio', - 'value': 2.55, - 'delta': 0.04999999999999982, - }, - {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5}, - { - 'metric': 'drawFrameDuration.average', - 'value': 2000.0, - 'delta': -58.90000000000009, - }, - { - 'metric': 'drawFrameDuration.outlierAverage', - 'value': 20000.0, - 'delta': -4000.0, - }, - {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0}, - {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0}, - {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0}, - ], - 'bar': [ - {'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0}, - {'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0}, - { - 'metric': 'preroll_frame.outlierRatio', - 'value': 20.0, - 'delta': -0.1999999999999993, - }, - {'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65}, - {'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0}, - {'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6}, - { - 'metric': 'apply_frame.outlierRatio', - 'value': 2.55, - 'delta': 0.04999999999999982, - }, - {'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5}, - { - 'metric': 'drawFrameDuration.average', - 'value': 2000.0, - 'delta': -58.90000000000009, - }, - { - 'metric': 'drawFrameDuration.outlierAverage', - 'value': 20000.0, - 'delta': -4000.0, - }, - {'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0}, - {'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0}, - {'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0}, - ], -}; diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart index 234ef9a0d87..06147c52982 100644 --- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart +++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart @@ -5,8 +5,7 @@ import 'dart:convert'; import 'dart:io'; -import 'package:collection/collection.dart'; -import 'package:web_benchmarks/server.dart'; +import 'package:web_benchmarks/analysis.dart'; import 'utils.dart'; @@ -50,76 +49,16 @@ void main(List args) { ); } -Map>> compareBenchmarks( +void compareBenchmarks( BenchmarkResults baseline, BenchmarkResults test, { required String baselineSource, }) { stdout.writeln('Starting baseline comparison...'); - - for (final benchmarkName in test.scores.keys) { - stdout.writeln('Comparing metrics for benchmark "$benchmarkName".'); - - // Lookup this benchmark in the baseline. - final baselineScores = baseline.scores[benchmarkName]; - if (baselineScores == null) { - stdout.writeln( - 'Baseline does not contain results for benchmark "$benchmarkName".', - ); - continue; - } - - final testScores = test.scores[benchmarkName]!; - - for (final score in testScores) { - // Lookup this metric in the baseline. - final baselineScore = - baselineScores.firstWhereOrNull((s) => s.metric == score.metric); - if (baselineScore == null) { - stdout.writeln( - 'Baseline does not contain metric "${score.metric}" for ' - 'benchmark "$benchmarkName".', - ); - continue; - } - - // Add the delta to the [testMetric]. - _benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble(); - } - } + final delta = computeDelta(baseline, test); stdout.writeln('Baseline comparison finished.'); - - final comparisonAsMap = test.toJsonWithDeltas(); stdout ..writeln('==== Comparison with baseline $baselineSource ====') - ..writeln(const JsonEncoder.withIndent(' ').convert(comparisonAsMap)) + ..writeln(const JsonEncoder.withIndent(' ').convert(delta)) ..writeln('==== End of baseline comparison ===='); - return comparisonAsMap; -} - -Expando _benchmarkDeltas = Expando(); - -extension ScoreDeltaExtension on BenchmarkScore { - double? get deltaFromBaseline => _benchmarkDeltas[this]; -} - -extension ResultDeltaExtension on BenchmarkResults { - Map>> toJsonWithDeltas() { - return scores.map>>( - (String benchmarkName, List scores) { - return MapEntry>>( - benchmarkName, - scores.map>( - (BenchmarkScore score) { - final delta = _benchmarkDeltas[score]; - return { - ...score.toJson(), - if (delta != null) 'delta': delta, - }; - }, - ).toList(), - ); - }, - ); - } } diff --git a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart index 7799ce16c1a..ba39bdfee68 100644 --- a/packages/devtools_app/benchmark/scripts/run_benchmarks.dart +++ b/packages/devtools_app/benchmark/scripts/run_benchmarks.dart @@ -6,6 +6,7 @@ import 'dart:convert'; import 'dart:io'; import 'package:args/args.dart'; +import 'package:web_benchmarks/analysis.dart'; import 'package:web_benchmarks/server.dart'; import '../test_infra/common.dart'; @@ -46,7 +47,7 @@ Future main(List args) async { stdout.writeln( 'Taking the average of ${benchmarkResults.length} benchmark runs.', ); - taskResult = averageBenchmarkResults(benchmarkResults); + taskResult = computeAverage(benchmarkResults); } final resultsAsMap = taskResult.toJson(); @@ -150,33 +151,3 @@ class BenchmarkArgs { ); } } - -// TODO(kenz): upstream the logic to average benchmarks into the -// package:web_benchmarks - -/// Returns the average of the benchmark results in [results]. -/// -/// Each element in [results] is expected to have identical benchmark names and -/// metrics; otherwise, an [Exception] will be thrown. -BenchmarkResults averageBenchmarkResults(List results) { - if (results.isEmpty) { - throw Exception('Cannot take average of empty list.'); - } - - var totalSum = results.first; - for (int i = 1; i < results.length; i++) { - final current = results[i]; - totalSum = totalSum.sumWith(current); - } - - final average = totalSum.toJson(); - for (final benchmark in totalSum.scores.keys) { - final scoresForBenchmark = totalSum.scores[benchmark]!; - for (int i = 0; i < scoresForBenchmark.length; i++) { - final score = scoresForBenchmark[i]; - final averageValue = score.value / results.length; - average[benchmark]![i]['value'] = averageValue; - } - } - return BenchmarkResults.parse(average); -} diff --git a/packages/devtools_app/benchmark/scripts/utils.dart b/packages/devtools_app/benchmark/scripts/utils.dart index 01df4c2143c..a4cbc99bead 100644 --- a/packages/devtools_app/benchmark/scripts/utils.dart +++ b/packages/devtools_app/benchmark/scripts/utils.dart @@ -4,9 +4,6 @@ import 'dart:io'; -import 'package:collection/collection.dart'; -import 'package:web_benchmarks/server.dart'; - File? checkFileExists(String path) { final testFile = File.fromUri(Uri.parse(path)); if (!testFile.existsSync()) { @@ -15,50 +12,3 @@ File? checkFileExists(String path) { } return testFile; } - -extension BenchmarkResultsExtension on BenchmarkResults { - /// Sums this [BenchmarkResults] instance with [other] by adding the values - /// of each matching benchmark score. - /// - /// Returns a [BenchmarkResults] object with the summed values. - BenchmarkResults sumWith( - BenchmarkResults other, { - bool throwExceptionOnMismatch = true, - }) { - final sum = toJson(); - for (final benchmark in scores.keys) { - // Look up this benchmark in [other]. - final matchingBenchmark = other.scores[benchmark]; - if (matchingBenchmark == null) { - if (throwExceptionOnMismatch) { - throw Exception( - 'Cannot sum benchmarks because [other] is missing an entry for ' - 'benchmark "$benchmark".', - ); - } - continue; - } - - final scoresForBenchmark = scores[benchmark]!; - for (int i = 0; i < scoresForBenchmark.length; i++) { - final score = scoresForBenchmark[i]; - // Look up this score in the [matchingBenchmark] from [other]. - final matchingScore = - matchingBenchmark.firstWhereOrNull((s) => s.metric == score.metric); - if (matchingScore == null) { - if (throwExceptionOnMismatch) { - throw Exception( - 'Cannot sum benchmarks because benchmark "$benchmark" is missing ' - 'a score for metric ${score.metric}.', - ); - } - continue; - } - - final sumScore = score.value + matchingScore.value; - sum[benchmark]![i]['value'] = sumScore; - } - } - return BenchmarkResults.parse(sum); - } -} diff --git a/packages/devtools_app/pubspec.yaml b/packages/devtools_app/pubspec.yaml index 0b7b31afbf6..183a0e7dc52 100644 --- a/packages/devtools_app/pubspec.yaml +++ b/packages/devtools_app/pubspec.yaml @@ -79,7 +79,7 @@ dev_dependencies: mockito: ^5.4.1 stager: ^1.0.1 test: ^1.21.1 - web_benchmarks: ^1.0.1 + web_benchmarks: ^1.1.0 webkit_inspection_protocol: ">=0.5.0 <2.0.0" flutter: From b5ea980fd8a381d6194074d165795ea87fde7459 Mon Sep 17 00:00:00 2001 From: Kenzie Schmoll Date: Mon, 11 Dec 2023 09:57:23 -0800 Subject: [PATCH 2/2] api change --- packages/devtools_app/benchmark/scripts/compare_benchmarks.dart | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart index 06147c52982..515132a1d1d 100644 --- a/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart +++ b/packages/devtools_app/benchmark/scripts/compare_benchmarks.dart @@ -59,6 +59,6 @@ void compareBenchmarks( stdout.writeln('Baseline comparison finished.'); stdout ..writeln('==== Comparison with baseline $baselineSource ====') - ..writeln(const JsonEncoder.withIndent(' ').convert(delta)) + ..writeln(const JsonEncoder.withIndent(' ').convert(delta.toJson())) ..writeln('==== End of baseline comparison ===='); }