From 3017a853aade9dfcce647f671933ee065f4aa8c0 Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Tue, 31 Mar 2026 05:17:02 +0530 Subject: [PATCH 1/7] Add documentation for JsonImporter --- docs/JSON.md | 116 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100644 docs/JSON.md diff --git a/docs/JSON.md b/docs/JSON.md new file mode 100644 index 0000000..0643564 --- /dev/null +++ b/docs/JSON.md @@ -0,0 +1,116 @@ +# JSON Data Source + +> **Tip** +> See [examples/](../examples/) for sample configuration files. + +## Overview + +`JsonImporter` reads benchmark results from a local JSON file and feeds them into Otava for change-point analysis. It is the simplest data source to set up — no external database or service is required. + +The importer caches parsed file content in memory, so a file is only read once per session even if multiple tests reference the same path. + +--- + +## Expected JSON Format + +The input file must be a JSON array. Each element represents a single benchmark run. +```json +[ + { + "timestamp": 1711929600, + "metrics": [ + { "name": "throughput", "value": 4821.0 }, + { "name": "p99_latency_ms", "value": 142.7 } + ], + "attributes": { + "branch": "main", + "commit": "a3f9c12" + } + }, + { + "timestamp": 1712016000, + "metrics": [ + { "name": "throughput", "value": 5013.0 }, + { "name": "p99_latency_ms", "value": 138.2 } + ], + "attributes": { + "branch": "main", + "commit": "b7d2e45" + } + } +] +``` + +--- + +## Fields + +### `timestamp` + +- **Type:** integer (Unix epoch seconds) +- **Required:** yes +- Identifies when the benchmark run occurred. Used for time-range filtering via `DataSelector`. + +### `metrics` + +- **Type:** array of objects +- **Required:** yes +- Each object must have: + - `name` (string) — unique identifier for the metric within this run + - `value` (number) — the measured value +- Metric names are collected dynamically across all entries in the file. Names must be consistent across runs for change-point analysis to be meaningful. + +### `attributes` + +- **Type:** object (string → string) +- **Required:** yes if `branch` filtering is used +- Arbitrary key-value pairs describing the run context (e.g. branch, commit, version). +- The `branch` key is treated specially: if a branch is specified via `DataSelector` or `base_branch` in the config, only runs where `attributes["branch"]` matches that value are included. + +--- + +## Configuration Example + +Add a test with `type: json` to your `otava.yaml`: +```yaml +tests: + my_benchmark: + type: json + file: path/to/results.json + base_branch: main +``` + +| Field | Required | Description | +|---|---|---| +| `type` | yes | Must be `json` | +| `file` | yes | file: Path to the JSON file | +| `base_branch` | no | If set, only runs from this branch are analyzed by default | + +--- + +## Behavior + +- **File loading:** The file is read in full when first accessed. Parsed content is cached in memory for the lifetime of the session — repeated calls with the same file path do not re-read from disk. +- **Metric discovery:** All metric names are collected by scanning every entry in the file. The resulting set is unordered. +- **Attribute discovery:** Attribute keys are collected the same way — by scanning all entries. +- **Branch filtering:** If `selector.branch` is set, only runs where `attributes["branch"]` equals that value are included. If not set but `base_branch` is configured, that value is used instead. If neither is set, all runs are included. +- **Metric filtering:** If `selector.metrics` is set, only metrics whose names appear in that list are included. Others are silently skipped. +- **Time filtering:** Entries outside `selector.since_time` / `selector.until_time` are excluded. An invalid range (since > until) raises an error. +- **Truncation:** After filtering, only the last `selector.last_n_points` entries are kept for time, data, and attributes. + +--- + +## Limitations + +- The entire file is read into memory at once. Very large files may cause high memory usage. +- There is no schema validation. Missing or malformed fields will cause a `KeyError` at runtime. +- The `branch` filter requires the key `"branch"` to exist inside `attributes` on every entry — if it is absent on any entry that would otherwise be included, the importer will raise a `KeyError`. +- Attribute values are expected to be strings. No type coercion is performed. +- The file path is resolved at config load time; a missing file raises a `TestConfigError` immediately. + +--- + +## Example Usage + +Run analysis on a test backed by a JSON file: +otava analyze my_benchmark From 14b45a840a214602fed458027c7dfba090a59952 Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Tue, 31 Mar 2026 05:22:37 +0530 Subject: [PATCH 2/7] Add Apache license header to JSON docs --- docs/JSON.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/docs/JSON.md b/docs/JSON.md index 0643564..c070fde 100644 --- a/docs/JSON.md +++ b/docs/JSON.md @@ -1,3 +1,21 @@ + # JSON Data Source > **Tip** From 6bb537148b5399b9ff15073587665c6b2cf6df3b Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Fri, 3 Apr 2026 09:22:22 +0530 Subject: [PATCH 3/7] Add example JSON data and config for documentation validation --- otava.yaml | 5 +++++ test_data/sample.json | 11 +++++++++++ 2 files changed, 16 insertions(+) create mode 100644 otava.yaml create mode 100644 test_data/sample.json diff --git a/otava.yaml b/otava.yaml new file mode 100644 index 0000000..c4d3af9 --- /dev/null +++ b/otava.yaml @@ -0,0 +1,5 @@ +tests: + my_benchmark: + type: json + file: test_data/sample.json + base_branch: main diff --git a/test_data/sample.json b/test_data/sample.json new file mode 100644 index 0000000..c288ac0 --- /dev/null +++ b/test_data/sample.json @@ -0,0 +1,11 @@ +[ + { + "timestamp": 1711929600, + "metrics": [ + { "name": "throughput", "value": 100 } + ], + "attributes": { + "branch": "main" + } + } +] From 682a4a403041682dbaec9de740ff49d181bc4d9b Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Fri, 3 Apr 2026 23:38:53 +0530 Subject: [PATCH 4/7] Fix license header for YAML example --- docs/JSON.md | 10 +++++---- otava.yaml | 5 ----- otava/examples/json/config/otava.yaml | 22 +++++++++++++++++++ .../examples/json/data}/sample.json | 0 4 files changed, 28 insertions(+), 9 deletions(-) delete mode 100644 otava.yaml create mode 100644 otava/examples/json/config/otava.yaml rename {test_data => otava/examples/json/data}/sample.json (100%) diff --git a/docs/JSON.md b/docs/JSON.md index c070fde..7b8b305 100644 --- a/docs/JSON.md +++ b/docs/JSON.md @@ -23,7 +23,7 @@ ## Overview -`JsonImporter` reads benchmark results from a local JSON file and feeds them into Otava for change-point analysis. It is the simplest data source to set up — no external database or service is required. +`JsonImporter` reads benchmark results from a local JSON file and feeds them into Otava for change-point analysis. It is a simple data source to set up — no external database or service is required. The importer caches parsed file content in memory, so a file is only read once per session even if multiple tests reference the same path. @@ -94,14 +94,14 @@ Add a test with `type: json` to your `otava.yaml`: tests: my_benchmark: type: json - file: path/to/results.json + file: otava/examples/json/data/sample.json base_branch: main ``` | Field | Required | Description | |---|---|---| | `type` | yes | Must be `json` | -| `file` | yes | file: Path to the JSON file | +| `file` | yes | Path to the JSON file | | `base_branch` | no | If set, only runs from this branch are analyzed by default | --- @@ -131,4 +131,6 @@ tests: ## Example Usage Run analysis on a test backed by a JSON file: -otava analyze my_benchmark +```bash +otava analyze my_benchmark --config otava/examples/json/config/otava.yaml +``` diff --git a/otava.yaml b/otava.yaml deleted file mode 100644 index c4d3af9..0000000 --- a/otava.yaml +++ /dev/null @@ -1,5 +0,0 @@ -tests: - my_benchmark: - type: json - file: test_data/sample.json - base_branch: main diff --git a/otava/examples/json/config/otava.yaml b/otava/examples/json/config/otava.yaml new file mode 100644 index 0000000..d374a5d --- /dev/null +++ b/otava/examples/json/config/otava.yaml @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +tests: + my_benchmark: + type: json + file: test_data/sample.json + base_branch: main diff --git a/test_data/sample.json b/otava/examples/json/data/sample.json similarity index 100% rename from test_data/sample.json rename to otava/examples/json/data/sample.json From d1efe5b34a3f48ceffec69efb346cb7c918cb766 Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Sat, 4 Apr 2026 09:39:43 +0530 Subject: [PATCH 5/7] Improve JsonImporter docs: clarify timestamp semantics, simplify attributes, fix paths, and update examples --- docs/JSON.md | 27 ++++++++------------------- otava/examples/json/config/otava.yaml | 2 +- otava/examples/json/data/sample.json | 9 +++++++++ 3 files changed, 18 insertions(+), 20 deletions(-) diff --git a/docs/JSON.md b/docs/JSON.md index 7b8b305..8338249 100644 --- a/docs/JSON.md +++ b/docs/JSON.md @@ -67,24 +67,25 @@ The input file must be a JSON array. Each element represents a single benchmark - **Type:** integer (Unix epoch seconds) - **Required:** yes -- Identifies when the benchmark run occurred. Used for time-range filtering via `DataSelector`. +- Identifies when the commit was merged into the tracked branch. This timestamp should remain constant for the same commit, even if benchmarks are rerun multiple times. ### `metrics` - **Type:** array of objects - **Required:** yes - Each object must have: - - `name` (string) — unique identifier for the metric within this run + - `name` (string) — unique identifier for the metric within this result - `value` (number) — the measured value -- Metric names are collected dynamically across all entries in the file. Names must be consistent across runs for change-point analysis to be meaningful. +- Metric names must be consistent across results for change-point analysis to be meaningful. + +> Note: A `unit` field (e.g., "ms") is not currently supported by JsonImporter. ### `attributes` - **Type:** object (string → string) -- **Required:** yes if `branch` filtering is used +- **Required:** no - Arbitrary key-value pairs describing the run context (e.g. branch, commit, version). -- The `branch` key is treated specially: if a branch is specified via `DataSelector` or `base_branch` in the config, only runs where `attributes["branch"]` matches that value are included. - +- The `branch` key is required only when using branch-based filtering. --- ## Configuration Example @@ -106,18 +107,6 @@ tests: --- -## Behavior - -- **File loading:** The file is read in full when first accessed. Parsed content is cached in memory for the lifetime of the session — repeated calls with the same file path do not re-read from disk. -- **Metric discovery:** All metric names are collected by scanning every entry in the file. The resulting set is unordered. -- **Attribute discovery:** Attribute keys are collected the same way — by scanning all entries. -- **Branch filtering:** If `selector.branch` is set, only runs where `attributes["branch"]` equals that value are included. If not set but `base_branch` is configured, that value is used instead. If neither is set, all runs are included. -- **Metric filtering:** If `selector.metrics` is set, only metrics whose names appear in that list are included. Others are silently skipped. -- **Time filtering:** Entries outside `selector.since_time` / `selector.until_time` are excluded. An invalid range (since > until) raises an error. -- **Truncation:** After filtering, only the last `selector.last_n_points` entries are kept for time, data, and attributes. - ---- - ## Limitations - The entire file is read into memory at once. Very large files may cause high memory usage. @@ -130,7 +119,7 @@ tests: ## Example Usage -Run analysis on a test backed by a JSON file: +Analyze test results stored in JSON format: ```bash otava analyze my_benchmark --config otava/examples/json/config/otava.yaml ``` diff --git a/otava/examples/json/config/otava.yaml b/otava/examples/json/config/otava.yaml index d374a5d..9ec882d 100644 --- a/otava/examples/json/config/otava.yaml +++ b/otava/examples/json/config/otava.yaml @@ -18,5 +18,5 @@ tests: my_benchmark: type: json - file: test_data/sample.json + file: otava/examples/json/data/sample.json base_branch: main diff --git a/otava/examples/json/data/sample.json b/otava/examples/json/data/sample.json index c288ac0..f6816e6 100644 --- a/otava/examples/json/data/sample.json +++ b/otava/examples/json/data/sample.json @@ -7,5 +7,14 @@ "attributes": { "branch": "main" } + }, + { + "timestamp": 1712016000, + "metrics": [ + { "name": "throughput", "value": 120 } + ], + "attributes": { + "branch": "main" + } } ] From 8894acb71fdc3842875a24f508d8ff58a7a63e0f Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Mon, 6 Apr 2026 18:56:35 +0530 Subject: [PATCH 6/7] Use realistic multi-point JSON example for change-point detection --- examples/json/data/sample.json | 82 ++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 examples/json/data/sample.json diff --git a/examples/json/data/sample.json b/examples/json/data/sample.json new file mode 100644 index 0000000..3ed405c --- /dev/null +++ b/examples/json/data/sample.json @@ -0,0 +1,82 @@ +[ + { + "timestamp": 1767222000, + "metrics": [ + { "name": "metric1", "value": 154023 }, + { "name": "metric2", "value": 10.43 } + ], + "attributes": { "branch": "main", "commit": "aaa0" } + }, + { + "timestamp": 1767308400, + "metrics": [ + { "name": "metric1", "value": 138455 }, + { "name": "metric2", "value": 10.23 } + ], + "attributes": { "branch": "main", "commit": "aaa1" } + }, + { + "timestamp": 1767394800, + "metrics": [ + { "name": "metric1", "value": 143112 }, + { "name": "metric2", "value": 10.29 } + ], + "attributes": { "branch": "main", "commit": "aaa2" } + }, + { + "timestamp": 1767481200, + "metrics": [ + { "name": "metric1", "value": 149190 }, + { "name": "metric2", "value": 10.91 } + ], + "attributes": { "branch": "main", "commit": "aaa3" } + }, + { + "timestamp": 1767567600, + "metrics": [ + { "name": "metric1", "value": 132098 }, + { "name": "metric2", "value": 10.34 } + ], + "attributes": { "branch": "main", "commit": "aaa4" } + }, + { + "timestamp": 1767654000, + "metrics": [ + { "name": "metric1", "value": 151344 }, + { "name": "metric2", "value": 10.69 } + ], + "attributes": { "branch": "main", "commit": "aaa5" } + }, + { + "timestamp": 1767740400, + "metrics": [ + { "name": "metric1", "value": 155145 }, + { "name": "metric2", "value": 9.23 } + ], + "attributes": { "branch": "main", "commit": "aaa6" } + }, + { + "timestamp": 1767826800, + "metrics": [ + { "name": "metric1", "value": 148889 }, + { "name": "metric2", "value": 9.11 } + ], + "attributes": { "branch": "main", "commit": "aaa7" } + }, + { + "timestamp": 1767913200, + "metrics": [ + { "name": "metric1", "value": 149466 }, + { "name": "metric2", "value": 9.13 } + ], + "attributes": { "branch": "main", "commit": "aaa8" } + }, + { + "timestamp": 1767999600, + "metrics": [ + { "name": "metric1", "value": 148209 }, + { "name": "metric2", "value": 9.03 } + ], + "attributes": { "branch": "main", "commit": "aaa9" } + } +] From f163d4957a79ec70a06bf5a2cc1544bf8194617a Mon Sep 17 00:00:00 2001 From: Phanindra899 Date: Tue, 14 Apr 2026 22:11:55 +0530 Subject: [PATCH 7/7] Remove duplicate sample JSON file --- examples/json/data/sample.json | 82 ---------------------------------- 1 file changed, 82 deletions(-) delete mode 100644 examples/json/data/sample.json diff --git a/examples/json/data/sample.json b/examples/json/data/sample.json deleted file mode 100644 index 3ed405c..0000000 --- a/examples/json/data/sample.json +++ /dev/null @@ -1,82 +0,0 @@ -[ - { - "timestamp": 1767222000, - "metrics": [ - { "name": "metric1", "value": 154023 }, - { "name": "metric2", "value": 10.43 } - ], - "attributes": { "branch": "main", "commit": "aaa0" } - }, - { - "timestamp": 1767308400, - "metrics": [ - { "name": "metric1", "value": 138455 }, - { "name": "metric2", "value": 10.23 } - ], - "attributes": { "branch": "main", "commit": "aaa1" } - }, - { - "timestamp": 1767394800, - "metrics": [ - { "name": "metric1", "value": 143112 }, - { "name": "metric2", "value": 10.29 } - ], - "attributes": { "branch": "main", "commit": "aaa2" } - }, - { - "timestamp": 1767481200, - "metrics": [ - { "name": "metric1", "value": 149190 }, - { "name": "metric2", "value": 10.91 } - ], - "attributes": { "branch": "main", "commit": "aaa3" } - }, - { - "timestamp": 1767567600, - "metrics": [ - { "name": "metric1", "value": 132098 }, - { "name": "metric2", "value": 10.34 } - ], - "attributes": { "branch": "main", "commit": "aaa4" } - }, - { - "timestamp": 1767654000, - "metrics": [ - { "name": "metric1", "value": 151344 }, - { "name": "metric2", "value": 10.69 } - ], - "attributes": { "branch": "main", "commit": "aaa5" } - }, - { - "timestamp": 1767740400, - "metrics": [ - { "name": "metric1", "value": 155145 }, - { "name": "metric2", "value": 9.23 } - ], - "attributes": { "branch": "main", "commit": "aaa6" } - }, - { - "timestamp": 1767826800, - "metrics": [ - { "name": "metric1", "value": 148889 }, - { "name": "metric2", "value": 9.11 } - ], - "attributes": { "branch": "main", "commit": "aaa7" } - }, - { - "timestamp": 1767913200, - "metrics": [ - { "name": "metric1", "value": 149466 }, - { "name": "metric2", "value": 9.13 } - ], - "attributes": { "branch": "main", "commit": "aaa8" } - }, - { - "timestamp": 1767999600, - "metrics": [ - { "name": "metric1", "value": 148209 }, - { "name": "metric2", "value": 9.03 } - ], - "attributes": { "branch": "main", "commit": "aaa9" } - } -]