Skip to content

Commit f163085

Browse files
authored
[OGUI-1590] Add support for final ECS operation (error/timeout) in DCS SOR pane (#2697)
* Add support for ECS operation done in SOR * Fix lint issues
1 parent 4f7f920 commit f163085

File tree

4 files changed

+151
-9
lines changed

4 files changed

+151
-9
lines changed

Control/lib/adapters/DcsIntegratedEventAdapter.js

Lines changed: 86 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,15 @@
1111
* or submit itself to any jurisdiction.
1212
*/
1313

14+
const {
15+
EcsOperationAndStepStatus: {
16+
DONE_ERROR,
17+
DONE_TIMEOUT
18+
}
19+
} = require('../common/ecsOperationAndStepStatus.enum.js');
20+
1421
/**
15-
* DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event
22+
* @class DcsIntegratedEventAdapter - Given an AliECS Integrated Service Event for DCS.SOR, build a DCS Integrated Event
1623
*
1724
* The DCS SOR event is a special event that comes from either:
1825
* * the DCS service itself (when containing the payload "dcsEvent") and it is for one detector only
@@ -26,7 +33,77 @@ class DcsIntegratedEventAdapter {
2633
}
2734

2835
/**
29-
* Build a DCS Integrated Event from an AliECS Integrated Service Event. If it is a DCSevent, the detector will replace detectors array
36+
* Build a DCS Integrated Event from an AliECS Integrated Service Event - SOR. If it is a DCSevent, the detector will replace detectors array
37+
*
38+
* // IntegratedService event, related to SOR but with a failure on ECS side (such as timeout)
39+
* @example
40+
* {
41+
* "timestamp": 1733497646607,
42+
* "integratedServiceEvent": {
43+
* "name": "readout-dataflow.dcs.sor",
44+
* "error": "DCS SOR timed out after 1s: rpc error: code = DeadlineExceeded desc = Deadline Exceeded",
45+
* "operationName": "dcs.StartOfRun()",
46+
* "operationStatus": "ONGOING",
47+
* "operationStep": "perform DCS call: StartOfRun",
48+
* "operationStepStatus": "DONE_TIMEOUT",
49+
* "environmentId": "2rRm96N9k7E",
50+
* "payload": "{\"detectors\":[\"EMC\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\"},\"runNumber\":1601}"
51+
* }
52+
* // IntegratedService event with final state DONE_ERROR following the DONE_TIMEOUT from above
53+
* @example
54+
* {
55+
* "timestamp": 1734004912438,
56+
* "integratedServiceEvent": {
57+
* "name": "readout-dataflow.dcs.sor",
58+
* "error": "DCS SOR timed out after 100ms: rpc error: code = DeadlineExceeded desc = context deadline exceeded : SOR failed for EMC, FDD, DCS EOR will run anyway for this run",
59+
* "operationName": "dcs.StartOfRun()",
60+
* "operationStatus": "DONE_ERROR",
61+
* "operationStep": "perform DCS call: StartOfRun",
62+
* "operationStepStatus": "DONE_ERROR",
63+
* "environmentId": "2rYQabnjWy2",
64+
* "payload": "{\"detectors\":[\"EMC\",\"FDD\"],\"detectorsReadiness\":{\"EMC\":\"SOR_AVAILABLE\",\"FDD\":\"SOR_AVAILABLE\"},\"failedDetectors\":[\"EMC\",\"FDD\"],\"runNumber\":1622}"
65+
* }
66+
*
67+
* // IntegratedService event, related to SOR_PROGRESSING with payload from DCS
68+
* @example
69+
* {
70+
* "timestamp": 1734004912360,
71+
* "timestampNano": 1734004912360675322,
72+
* "environmentEvent": null,
73+
* "taskEvent": null,
74+
* "roleEvent": null,
75+
* "callEvent": null,
76+
* "integratedServiceEvent": {
77+
* "name": "readout-dataflow.dcs.sor",
78+
* "error": null,
79+
* "operationName": "dcs.StartOfRun()",
80+
* "operationStatus": "ONGOING",
81+
* "operationStep": "perform DCS call: StartOfRun",
82+
* "operationStepStatus": "ONGOING",
83+
* "environmentId": "2rYQabnjWy2",
84+
* "payload": \"{
85+
* \"dcsEvent\": {
86+
* \"eventtype\":20,
87+
* \"detector\":2,
88+
* \"state\":5,\"
89+
* extraParameters\":{
90+
* \"run_no\":\"1622\"
91+
* },
92+
* \"timestamp\":\"2024-12-12 13:01:52.358\",
93+
* \"message\":\"run_type\"
94+
* },
95+
* \"detector\":\"EMC\",
96+
* \"detectors\":[\"EMC\",\"FDD\"],
97+
* \"detectorsReadiness\":{
98+
* \"EMC\":\"SOR_AVAILABLE\",
99+
* \"FDD\":\"SOR_AVAILABLE\"
100+
* },
101+
* \"runNumber\":1622,
102+
* \"state\":\"SOR_PROGRESSING\"
103+
* }"
104+
* }
105+
*
106+
* Final OperationStates: DONE_TIMEOUT/DONE_ERROR/DONE_OK
30107
* @param {object} event - AliECS Integrated Service Event
31108
* @param {number} timestamp - timestamp of the event (int64 as per proto file definition)
32109
* @return {object} DCS Integrated Event
@@ -37,7 +114,13 @@ class DcsIntegratedEventAdapter {
37114

38115
const payloadJSON = JSON.parse(payload);
39116
const { dcsEvent, runNumber, detector = null, state } = payloadJSON;
40-
if (!dcsEvent) {
117+
118+
if (!dcsEvent
119+
&& operationStatus !== DONE_ERROR && operationStatus !== DONE_TIMEOUT
120+
&& operationStepStatus !== DONE_ERROR && operationStepStatus !== DONE_TIMEOUT
121+
) {
122+
// if there is no DCS event and status is not final error or timeout, we ignore the event as we expect to have `RUN_OK` from DCS as final state
123+
// or DONE_TIMEOUT or DONE_ERROR from ECS. We are not interested in DONE_OK from ECS as this means all detectors in RUN_OK which we already look for
41124
return null;
42125
}
43126
let { detectors } = payloadJSON;
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/**
2+
* @license
3+
* Copyright 2019-2020 CERN and copyright holders of ALICE O2.
4+
* See http://alice-o2.web.cern.ch/copyright for details of the copyright holders.
5+
* All rights not expressly granted are reserved.
6+
*
7+
* This software is distributed under the terms of the GNU General Public
8+
* License v3 (GPL Version 3), copied verbatim in the file "COPYING".
9+
*
10+
* In applying this license CERN does not waive the privileges and immunities
11+
* granted to it by virtue of its status as an Intergovernmental Organization
12+
* or submit itself to any jurisdiction.
13+
*/
14+
15+
/**
16+
* Available ECS Statuses of operations for Kafka Events
17+
* These operations can be under the label:
18+
* * operationStatus
19+
* * operationStepStatus
20+
*/
21+
const EcsOperationAndStepStatus = Object.freeze({
22+
DONE_OK: 'DONE_OK',
23+
DONE_ERROR: 'DONE_ERROR',
24+
DONE_TIMEOUT: 'DONE_TIMEOUT',
25+
});
26+
27+
exports.EcsOperationAndStepStatus = EcsOperationAndStepStatus;

Control/public/common/enums/DetectorState.enum.js

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,5 +62,9 @@ export const DetectorStateStyle = Object.freeze({
6262
EOR_AVAILABLE: '',
6363
PFR_AVAILABLE: '',
6464
PFR_UNAVAILABLE: '',
65-
TIMEOUT: '',
65+
TIMEOUT: 'bg-danger white',
66+
// Custom states for the SOR/EOR operations covered by ECS when DCS does not reply
67+
DONE_TIMEOUT: 'bg-danger white',
68+
DONE_ERROR: 'bg-danger white',
69+
DONE_OK: 'bg-primary white',
6670
});

Control/public/pages/Environment/components/dcs/dcsSorPanel.js

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ import { infoLoggerButtonLink } from './../../../../common/buttons/infoLoggerRed
2323
/**
2424
* Panel that will display DCS last states during the SOR activity at the start of run
2525
* @param {string} id - environment id
26-
* @param {array<string>} detectors - list of detectors
26+
* @param {array<string>} detectors - list of detectors as received by the environment currently displayed in variable `includedDetectors`
2727
* @return {vnode}
2828
*/
2929
export const dcsSorPanel = (id, detectors) => {
@@ -67,18 +67,46 @@ export const dcsSorPanel = (id, detectors) => {
6767
}
6868

6969
/**
70-
* Group operations by detector
70+
* Group events by detector and filter out events that are arriving after a final event such as
71+
* some detectors might end the SOR sequence and arrive in RUN_OK, DONE_TIMEOUT, DONE_ERROR state but still recieve from ECS event that if failed.
72+
* This is incorrect form ECS and should be filtered out.
7173
* @param {array<object>} operations - list of operations
7274
* @return {object}
7375
*/
7476
const groupOperationsByDetector = (operations) => {
7577
const groupedOperations = {};
76-
operations.forEach((operation) => {
77-
operation.detectors.forEach((detector) => {
78+
operations.forEach((event) => {
79+
const eventCopy = JSON.parse(JSON.stringify(event));
80+
eventCopy.detectors.forEach((detector) => {
7881
if (!groupedOperations[detector]) {
7982
groupedOperations[detector] = [];
83+
if (!eventCopy?.state) {
84+
// first operation might be an error or timeout which comes without a state
85+
eventCopy.state = eventCopy.operationStepStatus ?? eventCopy.operationStatus;
86+
}
87+
groupedOperations[detector].push(eventCopy);
88+
} else {
89+
const lastOperation = groupedOperations[detector][groupedOperations[detector].length - 1];
90+
if (eventCopy.state) {
91+
// If there is a state, it means it is still an event from DCS
92+
groupedOperations[detector].push(eventCopy);
93+
} else if (
94+
lastOperation?.state !== 'RUN_OK'
95+
&& lastOperation?.state !== 'DONE_TIMEOUT'
96+
&& lastOperation?.state !== 'DONE_ERROR'
97+
) {
98+
// we only add event or step with status DONE_TIMEOUT or DONE_ERROR if the last event state of that detector is SOR_PROGRESSING
99+
const operationStatus = eventCopy.operationStatus;
100+
const operationStepStatus = eventCopy.operationStepStatus;
101+
// priority is given to operationStep as it offers more granularity
102+
if (operationStepStatus === 'DONE_TIMEOUT' || operationStepStatus === 'DONE_ERROR') {
103+
eventCopy.state = operationStepStatus;
104+
} else if (operationStatus === 'DONE_TIMEOUT' || operationStatus === 'DONE_ERROR') {
105+
eventCopy.state = operationStatus;
106+
}
107+
groupedOperations[detector].push(eventCopy);
108+
}
80109
}
81-
groupedOperations[detector].push(operation);
82110
});
83111
});
84112
return groupedOperations;

0 commit comments

Comments
 (0)