Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 16 additions & 16 deletions queries/cdmq/cdm.js
Original file line number Diff line number Diff line change
Expand Up @@ -677,7 +677,7 @@ async function fetchBatchedData(instance, reqs, batchSize = 16) {
try {
var osReqStart = Date.now();
var bodyLen = req.body ? req.body.length : 0;
console.log('[' + new Date().toISOString() + '] [OS-REQ] POST ' + req.url + ' (' + bodyLen + ' bytes)');
//console.log POST ' + req.url + ' (' + bodyLen + ' bytes)');
if (process.env.CDM_LOG_OS_CURL) {
var curlBody = req.body.replace(/'/g, "'\\''");
console.log('[' + new Date().toISOString() + '] [OS-CURL] curl -s -X POST "' + req.url + '" -H "Content-Type: application/json" -d $\'' + curlBody + '\'');
Expand All @@ -689,7 +689,7 @@ async function fetchBatchedData(instance, reqs, batchSize = 16) {
headers: { 'Content-Type': 'application/json' },
});
var osElapsed = Date.now() - osReqStart;
console.log('[' + new Date().toISOString() + '] [OS-RESP] POST ' + req.url + ' status=' + response.status + ' in ' + osElapsed + 'ms');
//console.log POST ' + req.url + ' status=' + response.status + ' in ' + osElapsed + 'ms');
if (response.ok) {
try {
return await response.json();
Expand Down Expand Up @@ -819,7 +819,7 @@ esJsonArrRequest = async function (instance, docType, action, jsonArr, yearDotMo
} else {
batchSize = 16;
}
console.log('[' + new Date().toISOString() + '] [OS-BATCH] ' + reqs.length + ' _msearch request(s), ' + totalSubQueries + ' sub-queries, batchSize=' + batchSize);
//console.log ' + reqs.length + ' _msearch request(s), ' + totalSubQueries + ' sub-queries, batchSize=' + batchSize);
var responses = await fetchBatchedData(instance, reqs, batchSize);
reqs = [];

Expand Down Expand Up @@ -2921,7 +2921,7 @@ getMetricGroupsFromBreakouts = async function (instance, sets, yearDotMonth) {
jsonArr.push(JSON.stringify(q));
// Log the metric_desc query as curl for debugging
var indexName = getIndexName('metric_desc', instance, yearDotMonth);
console.log('[' + new Date().toISOString() + '] [OS-METRIC-DESC] curl -s -X POST "http://' + instance['host'] + '/' + indexName + '/_search" -H "Content-Type: application/json" -d \'' + JSON.stringify(q) + '\'');
//console.log -s -X POST "http://' + instance['host'] + '/' + indexName + '/_search" -H "Content-Type: application/json" -d \'' + JSON.stringify(q) + '\'');
});
var mdStart = Date.now();
var responses = await esJsonArrRequest(instance, 'metric_desc', '/_msearch', jsonArr, yearDotMonth);
Expand Down Expand Up @@ -3105,9 +3105,9 @@ sendMetricReq = async function (
// Trim the pre-allocated array to actual size before sending
var actualLen = jsonArr._writeIdx;
var sendArr = jsonArr.slice(0, actualLen);
console.log('[' + new Date().toISOString() + '] [PERF] sendMetricReq: submitting ' + actualLen + ' jsonArr entries (' + (jsonArrEstimatedBytes/1024/1024).toFixed(1) + 'MB) to esJsonArrRequest');
//console.log: submitting ' + actualLen + ' jsonArr entries (' + (jsonArrEstimatedBytes/1024/1024).toFixed(1) + 'MB) to esJsonArrRequest');
const theseResponses = await esJsonArrRequest(instance, 'metric_data', '/_msearch', sendArr, yearDotMonth);
console.log('[' + new Date().toISOString() + '] [PERF] sendMetricReq: esJsonArrRequest returned ' + theseResponses.length + ' responses in ' + (Date.now()-esStart) + 'ms');
//console.log: esJsonArrRequest returned ' + theseResponses.length + ' responses in ' + (Date.now()-esStart) + 'ms');
responses.push(...theseResponses);
jsonArr._writeIdx = 0;
jsonArrTracker._writeIdx = 0;
Expand Down Expand Up @@ -3145,7 +3145,7 @@ sendMetricReq = async function (
valueSets[setIdx][trackerLabel]
);
}
console.log('[' + new Date().toISOString() + '] [PERF] sendMetricReq: calcAvg processed responses in ' + (Date.now()-calcStart) + 'ms');
//console.log: calcAvg processed responses in ' + (Date.now()-calcStart) + 'ms');
}

if (thisBegin > thisEnd) {
Expand Down Expand Up @@ -3326,7 +3326,7 @@ getMetricDataFromIdsSets = async function (instance, sets, metricGroupIdsByLabel
if (thisEnd > end) thisEnd = end;
if (thisBegin > thisEnd) break;
}
console.log('[' + new Date().toISOString() + '] [PERF] Built ' + timeRangeTemplates.length + ' time-range templates for set ' + idx);
//console.log templates for set ' + idx);

const sortedKeys = Object.keys(metricGroupIdsByLabelSets[idx]).sort();
var jsonArr = [];
Expand Down Expand Up @@ -3364,14 +3364,14 @@ getMetricDataFromIdsSets = async function (instance, sets, metricGroupIdsByLabel

if (shouldFlush && jsonArr.length > 0) {
var esStart = Date.now();
console.log('[' + new Date().toISOString() + '] [PERF] Flushing ' + jsonArr.length + ' entries (' + (k+1) + '/' + sortedKeys.length + ' labels) to OpenSearch');
//console.log ' + jsonArr.length + ' entries (' + (k+1) + '/' + sortedKeys.length + ' labels) to OpenSearch');
var theseResponses = await esJsonArrRequest(instance, 'metric_data', '/_msearch', jsonArr, yearDotMonth);
console.log('[' + new Date().toISOString() + '] [PERF] OpenSearch returned ' + theseResponses.length + ' responses in ' + (Date.now()-esStart) + 'ms');
//console.log ' + theseResponses.length + ' responses in ' + (Date.now()-esStart) + 'ms');
responses.push(...theseResponses);

// Process responses
var calcStart = Date.now();
console.log('[' + new Date().toISOString() + '] [DEBUG] Before calcAvg loop: jsonArrIdx=' + jsonArrIdx + ', responses.length=' + responses.length + ', jsonArrTracker.length=' + jsonArrTracker.length);
//console.log Before calcAvg loop: jsonArrIdx=' + jsonArrIdx + ', responses.length=' + responses.length + ', jsonArrTracker.length=' + jsonArrTracker.length);
while (jsonArrIdx < responses.length * 2) {
var trackerIdx = jsonArrIdx / 2;
var tracker = jsonArrTracker[trackerIdx];
Expand All @@ -3384,20 +3384,20 @@ getMetricDataFromIdsSets = async function (instance, sets, metricGroupIdsByLabel
jsonArrIdx = calcAvg(tracker.begin, tracker.end, responses, jsonArrIdx, jsonArrTracker, tracker.numMetricIds, valueSets[setIdx][trackerLabel]);
console.log('[' + new Date().toISOString() + '] [DEBUG] calcAvg: label="' + trackerLabel + '", set=' + setIdx + ', jsonArrIdx ' + prevIdx + '->' + jsonArrIdx + ', values=' + valueSets[setIdx][trackerLabel].length);
}
console.log('[' + new Date().toISOString() + '] [PERF] calcAvg in ' + (Date.now()-calcStart) + 'ms');
//console.log in ' + (Date.now()-calcStart) + 'ms');

jsonArr = [];
jsonArrTracker = [];
responses = [];
jsonArrIdx = 0;
}

if (k === 0 || lastPass || (Date.now() - labelStart > 500)) {
console.log('[' + new Date().toISOString() + '] [PERF] label ' + (k+1) + '/' + sortedKeys.length + ' "' + label + '" took ' + (Date.now() - labelStart) + 'ms');
}
//if (k === 0 || lastPass || (Date.now() - labelStart > 500)) {
// console.log('[' + new Date().toISOString() + '] [PERF] label ' + (k+1) + '/' + sortedKeys.length + ' "' + label + '" took ' + (Date.now() - labelStart) + 'ms');
//}
}
}
console.log('[' + new Date().toISOString() + '] [PERF] getMetricDataFromIdsSets total: ' + (Date.now()-funcStart) + 'ms, valueSets.length=' + valueSets.length + ', keys=' + valueSets.map(function(vs, i) { return i + ':' + (vs ? Object.keys(vs).join(',') : 'null'); }).join(' | '));
console.log('[' + new Date().toISOString() + '] [PERF] getMetricDataFromIdsSets total: ' + (Date.now()-funcStart) + 'ms');
return valueSets;
};

Expand Down
2 changes: 1 addition & 1 deletion queries/cdmq/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -1561,7 +1561,7 @@ app.post('/api/v1/metric-data', async (req, res) => {
var reqStart = Date.now();
var breakoutStr = Array.isArray(breakout) ? breakout.join(',') : (breakout || 'none');
serverLog('POST /api/v1/metric-data: ' + source + '::' + type + ' resolution=' + resolution + ' breakout=[' + breakoutStr + ']' + (filter ? ' filter=' + filter : '') + ' run=' + (run || 'none').toString().substring(0, 8) + '... period=' + (period || 'none').toString().substring(0, 8) + '...', req.reqId);
serverLog(' curl: curl -s -X POST http://localhost:3000/api/v1/metric-data -H "Content-Type: application/json" -d \'' + JSON.stringify({ run: run, period: period, begin: begin, end: end, source: source, type: type, resolution: resolution, breakout: breakout, filter: filter }) + '\'', req.reqId);
//serverLog(' curl: curl -s -X POST http://localhost:3000/api/v1/metric-data -H "Content-Type: application/json" -d \'' + JSON.stringify({ run: run, period: period, begin: begin, end: end, source: source, type: type, resolution: resolution, breakout: breakout, filter: filter }) + '\'', req.reqId);

// Use instances from request if provided, otherwise use server's configured instances
var instancesToUse = reqInstances && reqInstances.length > 0 ? reqInstances : instances;
Expand Down
14 changes: 6 additions & 8 deletions queries/cdmq/web-ui/ARCHITECTURE.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Crucible Web UI — Architecture & Design

> **Note:** This document covers the initial Phase 1 architecture. For comprehensive
> documentation covering all phases (search, compare, supplemental metrics, breakouts,
> URL sharing, etc.), see **[DESIGN.md](DESIGN.md)**.
> **Note:** This document covers the initial architecture overview. For comprehensive
> documentation covering all phases (search, compare, deep dive, supplemental metrics,
> breakouts, URL sharing, performance optimizations, etc.), see **[DESIGN.md](DESIGN.md)**.

## Overview

Expand Down Expand Up @@ -51,6 +51,7 @@ queries/cdmq/
│ │ ├── IterationTable.jsx # Results table with sorting/filtering
│ │ ├── SelectionBar.jsx # Persistent selection display
│ │ ├── CompareView.jsx # Bar charts with grouping, metrics, breakouts
│ │ ├── DeepDiveView.jsx # Time-series line charts with zoom and legends
│ │ ├── AutocompleteInput.jsx # Reusable dropdown (single/multi-select)
│ │ └── DebugConsole.jsx # Timing/debug console panel
│ └── dist/ # Build output (served by Express in production)
Expand Down Expand Up @@ -347,12 +348,9 @@ See sections above and [DESIGN.md](DESIGN.md) for full details.
### Phase 2: Compare — Implemented
Bar chart comparison with hierarchical group-by headers, supplemental metrics (overlay + panel modes), breakouts with filter/sample selection, click-to-pin with reference lines, URL state sharing. See [DESIGN.md](DESIGN.md) for full details.

### Phase 3: Deep Dive (Time-Series Line Charts) — Planned
### Phase 3: Deep Dive (Time-Series Line Charts) — Implemented

Interactive time-series exploration:
- Line charts with zoom/pan
- Breakout exploration
- Iteration overlay with relative time alignment
Time-series line charts with per-iteration color themes, combined/split/stacked modes, brush-to-zoom with re-query, unified series legend with rowSpan grouping, live value tracking across all charts, and two-pass filter support. See [DESIGN.md](DESIGN.md) for full details.

## Design Decisions & Rationale

Expand Down
89 changes: 86 additions & 3 deletions queries/cdmq/web-ui/DESIGN.md
Original file line number Diff line number Diff line change
Expand Up @@ -702,21 +702,104 @@ Primary metric values are NOT loaded with iteration details (would add ~7 second

### Start-Server Restart Loop

`start-server.sh` runs server.js in a `while true` loop. Killing the Node process (`pkill -f 'node ./server.js'`) triggers an automatic restart with updated code after a 1-second delay. This avoids the need to stop/restart the container during development.
`start-server.sh` runs server.js in a `while true` loop with `npm ci` and web UI build gated by stamp files (only re-run when `package-lock.json` changes). Killing the Node process (`pkill -f 'node ./server.js'`) triggers an automatic restart. Full restart via `sudo crucible stop opensearch && sudo crucible start opensearch`.

### Metric Query Performance (20x improvement)

The `getMetricDataFromIdsSets` function was rewritten to address a 100-second bottleneck when querying 130+ breakout labels at resolution=100:

1. **Time-range templates**: The 4 queries per time window (weighted avg, total weight, 2 boundary doc fetches) share identical structure except timestamps. Templates are built once per set with `__IDS__` placeholder, then reused per label via `String.replace()`.

2. **Periodic flushing**: Instead of accumulating all queries (104K+ array entries) before sending to OpenSearch, flush every 10 labels. Keeps array sizes small and lets OpenSearch process while building the next batch.

3. **Native fetch**: Replaced `then-request` (which spawns child processes via `sync-rpc`) with Node.js native `fetch` for all async OpenSearch HTTP requests.

4. **Debug function short-circuit**: `numMBytes()` and `memUsage()` now return immediately when `debugOut == 0`, avoiding `JSON.stringify` on large arrays.

5. **Two-pass filter**: When `filter` is set with `resolution > 1`, first queries at resolution=1 to determine surviving labels, then re-queries at the requested resolution with only those labels' UUIDs.

---

## Deep Dive Workflow

### Overview

The Deep Dive view provides time-series line charts for selected metrics at high resolution (default 100 data points), with multiple iterations overlaid.

### Entry Flow

1. In Compare view, check "Dive" on metric panels to select metrics for deep dive
2. "Deep Dive (N)" button becomes enabled in the nav bar
3. Clicking it snapshots the supplemental metric configs (breakouts, filters) and switches view
4. DeepDiveView fetches period info, then metric data sequentially per metric

### Data Alignment

CDM metric data is continuous — each sample covers a `[begin, end]` range in epoch-ms with no gaps. All series at the same resolution have exactly N samples. The chart uses **sample index** as the X coordinate (not raw elapsed midpoints) to ensure all series from different iterations align perfectly on the same grid. The X-axis displays elapsed time based on the longest period's duration.

### Chart Modes

Each metric chart has independent controls:
- **Combined**: All iterations overlaid on one chart (300px)
- **Split**: One chart per iteration stacked vertically (200px each), with consistent Y-axis scale across iterations
- **Lines / Stacked**: In split mode, toggle between individual lines and stacked area charts (useful for CPU utilization breakdown)

### Zoom

- **Click + drag** on any chart to select a time range (blue highlight)
- All charts re-query with the zoomed time range at the same resolution (more detail)
- Zoom is composable — zoom again within a zoomed view
- "Reset Zoom" button shows current zoom percentage
- Zoom is percentage-based: each iteration's begin/end adjusted proportionally

### Series Legend

Below each chart, a unified legend table shows all breakout labels once (not duplicated per iteration):

- **Segment columns**: Breakout dimension values with rowSpan grouping and sticky text for tall cells
- **Per-iteration columns**: Color swatch + value pair for each iteration, with iteration chip header matching the context bar style
- **Live tracking**: Values update as pointer moves across any chart, synchronized across all charts via shared elapsed time
- **Click-to-pin**: Click locks all charts; click again to resume live tracking
- **Common prefix/suffix stripping**: Hostnames like `f35-h17-000-r640.rdu2.scalelab.redhat.com` shown as `f35-h17-000-r640`
- **Empty series**: No color swatch shown when an iteration lacks data for a label

### Per-Iteration Color Themes

Each iteration gets a color family (blues, reds, greens, purples, teals, ambers). Within each family, shade varies per breakout label. This makes it easy to identify which iteration a line belongs to.

### Context Bar

Above the charts, a context section shows:
- **Common**: Params/tags/benchmark shared across all iterations (chip-styled, respects hidden fields)
- **Chip legend**: bench/tag/param color reference
- **Iterations**: Labeled chips with iteration-specific varying params, colored with the iteration's theme

### Server Endpoints

| Method | Path | Purpose |
|--------|------|---------|
| POST | `/api/v1/iterations/period-info` | Period IDs and time ranges per iteration |
| POST | `/api/v1/metric-data` | Time-series metric values with resolution and breakouts |

### Progressive Loading

Metrics are fetched sequentially (one metric at a time). Within each metric, iterations run concurrently. Charts render progressively as data arrives.

---

## Known Limitations and Future Work

### Current Limitations

- **Phase 3 (Deep Dive):** Time-series line charts are not yet implemented
- **Large result sets:** Searching across many months with hundreds of runs can be slow due to sequential OpenSearch queries
- **Bundle size:** Recharts adds ~400KB to the bundle. Code splitting could help.
- **Breakout label parsing:** CDM may omit breakout dimensions with single values from labels, making label-to-dimension mapping imperfect. The sidebar uses segment-based grouping to work around this.
- **Deep dive color differentiation:** With many breakout labels, shades within an iteration's color theme can be hard to distinguish

### Planned Features

- **Deep Dive view:** Time-series line charts with zoom/pan and interactive breakout exploration
- **Deep dive series filtering:** Click-to-hide individual series or groups in the legend
- **Deep dive breakout controls:** Add/remove breakouts directly in deep dive view
- **"Other" aggregate series:** For filtered-out labels, show a single aggregated line
- **Save/load workflows:** Server-side or localStorage persistence of named workflows
- **Drag-to-reorder:** Group-by chips currently use arrow buttons; drag-and-drop would be more intuitive
Loading
Loading