Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions packages/devtools_app/benchmark/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ All of the commands below should be run from the `packages/devtools_app` directo
To run the performance benchmark tests locally, run:
```sh
dart run benchmark/scripts/run_benchmarks.dart
dart run benchmark/run_benchmarks.dart
```

To run the test that verifies we can run benchmark tests, run:
Expand All @@ -48,4 +47,18 @@ the other running tests are using.

The tests are defined by "automators", which live in the `benchmark/test_infra/automators`
directory. To add a new test or test case, either modify an existing automator or add
a new one for a new screen. Follow existing examples in that directory for guidance.
a new one for a new screen. Follow existing examples in that directory for guidance.

## Comparing two benchmark test runs

In order to compare two different benchmark runs, you first need to run the benchmark
tests and save the results to a file:
```sh
dart run benchmark/scripts/run_benchmarks.dart --save-to-file=baseline.json
dart run benchmark/scripts/run_benchmarks.dart --save-to-file=test.json
```

Then, to compare the benchmarks and calculate deltas, run:
```sh
dart run benchmark/scripts/compare_benchmarks.dart baseline_file.json test_file.json
```
147 changes: 147 additions & 0 deletions packages/devtools_app/benchmark/devtools_benchmarks_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import 'dart:io';
import 'package:test/test.dart';
import 'package:web_benchmarks/server.dart';

import 'scripts/compare_benchmarks.dart';
import 'test_infra/common.dart';
import 'test_infra/project_root_directory.dart';

Expand All @@ -37,6 +38,21 @@ void main() {
timeout: const Timeout(Duration(minutes: 10)),
);

test(
'Can compare web benchmarks',
() {
final benchmark1 = BenchmarkResults.parse(testBenchmarkResults1);
final benchmark2 = BenchmarkResults.parse(testBenchmarkResults2);
final comparison = compareBenchmarks(
benchmark1,
benchmark2,
baselineSource: 'path/to/baseline',
);
expect(comparison, testBenchmarkComparison);
},
timeout: const Timeout(Duration(minutes: 10)),
);

// TODO(kenz): add tests that verify performance meets some expected threshold
}

Expand Down Expand Up @@ -86,3 +102,134 @@ Future<void> _runBenchmarks({bool useWasm = false}) async {
isA<String>(),
);
}

final testBenchmarkResults1 = {
'foo': [
{'metric': 'preroll_frame.average', 'value': 60.5},
{'metric': 'preroll_frame.outlierAverage', 'value': 1400},
{'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
{'metric': 'preroll_frame.noise', 'value': 0.85},
{'metric': 'apply_frame.average', 'value': 80.0},
{'metric': 'apply_frame.outlierAverage', 'value': 200.6},
{'metric': 'apply_frame.outlierRatio', 'value': 2.5},
{'metric': 'apply_frame.noise', 'value': 0.4},
{'metric': 'drawFrameDuration.average', 'value': 2058.9},
{'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
{'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
{'metric': 'drawFrameDuration.noise', 'value': 0.34},
{'metric': 'totalUiFrame.average', 'value': 4166},
],
'bar': [
{'metric': 'preroll_frame.average', 'value': 60.5},
{'metric': 'preroll_frame.outlierAverage', 'value': 1400},
{'metric': 'preroll_frame.outlierRatio', 'value': 20.2},
{'metric': 'preroll_frame.noise', 'value': 0.85},
{'metric': 'apply_frame.average', 'value': 80.0},
{'metric': 'apply_frame.outlierAverage', 'value': 200.6},
{'metric': 'apply_frame.outlierRatio', 'value': 2.5},
{'metric': 'apply_frame.noise', 'value': 0.4},
{'metric': 'drawFrameDuration.average', 'value': 2058.9},
{'metric': 'drawFrameDuration.outlierAverage', 'value': 24000},
{'metric': 'drawFrameDuration.outlierRatio', 'value': 12.05},
{'metric': 'drawFrameDuration.noise', 'value': 0.34},
{'metric': 'totalUiFrame.average', 'value': 4166},
],
};

final testBenchmarkResults2 = {
'foo': [
{'metric': 'preroll_frame.average', 'value': 65.5},
{'metric': 'preroll_frame.outlierAverage', 'value': 1410},
{'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
{'metric': 'preroll_frame.noise', 'value': 1.5},
{'metric': 'apply_frame.average', 'value': 50.0},
{'metric': 'apply_frame.outlierAverage', 'value': 100.0},
{'metric': 'apply_frame.outlierRatio', 'value': 2.55},
{'metric': 'apply_frame.noise', 'value': 0.9},
{'metric': 'drawFrameDuration.average', 'value': 2000.0},
{'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
{'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
{'metric': 'drawFrameDuration.noise', 'value': 1.34},
{'metric': 'totalUiFrame.average', 'value': 4150},
],
'bar': [
{'metric': 'preroll_frame.average', 'value': 65.5},
{'metric': 'preroll_frame.outlierAverage', 'value': 1410},
{'metric': 'preroll_frame.outlierRatio', 'value': 20.0},
{'metric': 'preroll_frame.noise', 'value': 1.5},
{'metric': 'apply_frame.average', 'value': 50.0},
{'metric': 'apply_frame.outlierAverage', 'value': 100.0},
{'metric': 'apply_frame.outlierRatio', 'value': 2.55},
{'metric': 'apply_frame.noise', 'value': 0.9},
{'metric': 'drawFrameDuration.average', 'value': 2000.0},
{'metric': 'drawFrameDuration.outlierAverage', 'value': 20000},
{'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05},
{'metric': 'drawFrameDuration.noise', 'value': 1.34},
{'metric': 'totalUiFrame.average', 'value': 4150},
],
};

final testBenchmarkComparison = {
'foo': [
{'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
{'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
{
'metric': 'preroll_frame.outlierRatio',
'value': 20.0,
'delta': -0.1999999999999993,
},
{'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
{'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
{'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
{
'metric': 'apply_frame.outlierRatio',
'value': 2.55,
'delta': 0.04999999999999982,
},
{'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
{
'metric': 'drawFrameDuration.average',
'value': 2000.0,
'delta': -58.90000000000009,
},
{
'metric': 'drawFrameDuration.outlierAverage',
'value': 20000.0,
'delta': -4000.0,
},
{'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
{'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
{'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
],
'bar': [
{'metric': 'preroll_frame.average', 'value': 65.5, 'delta': 5.0},
{'metric': 'preroll_frame.outlierAverage', 'value': 1410.0, 'delta': 10.0},
{
'metric': 'preroll_frame.outlierRatio',
'value': 20.0,
'delta': -0.1999999999999993,
},
{'metric': 'preroll_frame.noise', 'value': 1.5, 'delta': 0.65},
{'metric': 'apply_frame.average', 'value': 50.0, 'delta': -30.0},
{'metric': 'apply_frame.outlierAverage', 'value': 100.0, 'delta': -100.6},
{
'metric': 'apply_frame.outlierRatio',
'value': 2.55,
'delta': 0.04999999999999982,
},
{'metric': 'apply_frame.noise', 'value': 0.9, 'delta': 0.5},
{
'metric': 'drawFrameDuration.average',
'value': 2000.0,
'delta': -58.90000000000009,
},
{
'metric': 'drawFrameDuration.outlierAverage',
'value': 20000.0,
'delta': -4000.0,
},
{'metric': 'drawFrameDuration.outlierRatio', 'value': 11.05, 'delta': -1.0},
{'metric': 'drawFrameDuration.noise', 'value': 1.34, 'delta': 1.0},
{'metric': 'totalUiFrame.average', 'value': 4150.0, 'delta': -16.0},
],
};
125 changes: 125 additions & 0 deletions packages/devtools_app/benchmark/scripts/compare_benchmarks.dart
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
// Copyright 2023 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import 'dart:convert';
import 'dart:io';

import 'package:collection/collection.dart';
import 'package:web_benchmarks/server.dart';

import 'utils.dart';

/// Compares two sets of web benchmarks and calculates the delta between each
/// matching metric.
void main(List<String> args) {
if (args.length != 2) {
throw Exception(
'Expected 2 arguments (<baseline-file>, <test-file>), but instead there '
'were ${args.length}.',
);
}

final baselineSource = args[0];
final testSource = args[1];

stdout
..writeln('Comparing the following benchmark results:')
..writeln(' "$testSource" (test)')
..writeln(' "$baselineSource" (baseline)');

final baselineFile = checkFileExists(baselineSource);
final testFile = checkFileExists(testSource);
if (baselineFile == null || testFile == null) {
if (baselineFile == null) {
throw Exception('Cannot find baseline file $baselineSource');
}
if (testFile == null) {
throw Exception('Cannot find test file $testSource');
}
}

final baselineResults =
BenchmarkResults.parse(jsonDecode(baselineFile.readAsStringSync()));
final testResults =
BenchmarkResults.parse(jsonDecode(testFile.readAsStringSync()));
compareBenchmarks(
baselineResults,
testResults,
baselineSource: baselineSource,
);
}

Map<String, List<Map<String, Object?>>> compareBenchmarks(
BenchmarkResults baseline,
BenchmarkResults test, {
required String baselineSource,
}) {
stdout.writeln('Starting baseline comparison...');

for (final benchmarkName in test.scores.keys) {
stdout.writeln('Comparing metrics for benchmark "$benchmarkName".');

// Lookup this benchmark in the baseline.
final baselineScores = baseline.scores[benchmarkName];
if (baselineScores == null) {
stdout.writeln(
'Baseline does not contain results for benchmark "$benchmarkName".',
);
continue;
}

final testScores = test.scores[benchmarkName]!;

for (final score in testScores) {
// Lookup this metric in the baseline.
final baselineScore =
baselineScores.firstWhereOrNull((s) => s.metric == score.metric);
if (baselineScore == null) {
stdout.writeln(
'Baseline does not contain metric "${score.metric}" for '
'benchmark "$benchmarkName".',
);
continue;
}

// Add the delta to the [testMetric].
_benchmarkDeltas[score] = (score.value - baselineScore.value).toDouble();
}
}
stdout.writeln('Baseline comparison finished.');

final comparisonAsMap = test.toJsonWithDeltas();
stdout
..writeln('==== Comparison with baseline $baselineSource ====')
..writeln(const JsonEncoder.withIndent(' ').convert(comparisonAsMap))
..writeln('==== End of baseline comparison ====');
return comparisonAsMap;
}

Expando<double> _benchmarkDeltas = Expando<double>();

extension ScoreDeltaExtension on BenchmarkScore {
double? get deltaFromBaseline => _benchmarkDeltas[this];
}

extension ResultDeltaExtension on BenchmarkResults {
Map<String, List<Map<String, Object?>>> toJsonWithDeltas() {
return scores.map<String, List<Map<String, Object?>>>(
(String benchmarkName, List<BenchmarkScore> scores) {
return MapEntry<String, List<Map<String, Object?>>>(
benchmarkName,
scores.map<Map<String, Object?>>(
(BenchmarkScore score) {
final delta = _benchmarkDeltas[score];
return <String, Object?>{
...score.toJson(),
if (delta != null) 'delta': delta,
};
},
).toList(),
);
},
);
}
}
Loading