From 4ca22057adbfd5ebb6b6462f5ca09c5583b9c6a8 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 12 Jun 2025 15:59:40 +0530 Subject: [PATCH 01/17] HDDS-12890. Add acceptance robot tests for ozone debug replicas verify --- .../src/main/compose/common/replicas-test.sh | 65 +++++++----- .../dist/src/main/compose/ozone/test.sh | 1 - .../main/compose/ozonesecure-ha/docker-config | 3 + ...node.robot => block-existence-check.robot} | 17 +-- ...ock.robot => corrupt-block-checksum.robot} | 30 ++---- .../debug/ozone-debug-keywords.robot | 63 +++++++++++ .../debug/ozone-debug-stale-datanode.robot | 46 -------- .../debug/ozone-debug-tests-ec3-2.robot | 49 +-------- .../debug/ozone-debug-tests-ec6-3.robot | 100 ------------------ .../smoketest/debug/ozone-debug-tests.robot | 8 +- .../main/smoketest/debug/ozone-debug.robot | 98 ----------------- .../debug/stale-datanode-checksum.robot | 35 ++++++ 12 files changed, 166 insertions(+), 349 deletions(-) rename hadoop-ozone/dist/src/main/smoketest/debug/{ozone-debug-dead-datanode.robot => block-existence-check.robot} (69%) rename hadoop-ozone/dist/src/main/smoketest/debug/{ozone-debug-corrupt-block.robot => corrupt-block-checksum.robot} (52%) create mode 100644 hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot delete mode 100644 hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot delete mode 100644 hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot delete mode 100644 hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot create mode 100644 hadoop-ozone/dist/src/main/smoketest/debug/stale-datanode-checksum.robot diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index b875bfcafc8e..2dde1f3a7a49 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -21,33 +21,46 @@ volume="cli-debug-volume${prefix}" bucket="cli-debug-bucket" key="testfile" +dn_container="ozonesecure-ha-datanode1-1" +container_db_path="/data/hdds/hdds/" +local_db_backup_path="${COMPOSE_DIR}/container_db_backup" +mkdir -p "${local_db_backup_path}" + +echo "Taking a backup of container.db" +docker exec "${dn_container}" find "${container_db_path}" -name "container.db" | while read -r db; do + docker cp "${dn_container}:${db}" "${local_db_backup_path}/container.db" +done + execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests.robot -#TODO HDDS-12890: Add acceptance robot tests for ozone debug replicas verify # get block locations for key -#chunkinfo="${key}-blocks-${prefix}" -#docker-compose exec -T ${SCM} bash -c "ozone debug replicas chunk-info ${volume}/${bucket}/${key}" > "$chunkinfo" -#host="$(jq -r '.KeyLocations[0][0]["Datanode-HostName"]' ${chunkinfo})" -#container="${host%%.*}" -# -## corrupt the first block of key on one of the datanodes -#datafile="$(jq -r '.KeyLocations[0][0].Locations.files[0]' ${chunkinfo})" -#docker exec "${container}" sed -i -e '1s/^/a/' "${datafile}" -# -#execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" debug/ozone-debug-corrupt-block.robot -# -#docker stop "${container}" -# -#wait_for_datanode "${container}" STALE 60 -#execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/ozone-debug-stale-datanode.robot -# -#wait_for_datanode "${container}" DEAD 60 -#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-dead-datanode.robot -# -#docker start "${container}" -# -#wait_for_datanode "${container}" HEALTHY 60 +chunkinfo="${key}-blocks-${prefix}" +docker-compose exec -T ${SCM} bash -c "ozone debug replicas chunk-info ${volume}/${bucket}/${key}" > "$chunkinfo" +host="$(jq -r '.keyLocations[0][0].datanode["hostname"]' ${chunkinfo})" +container="${host%%.*}" + +# corrupt the first block of key on one of the datanodes +datafile="$(jq -r '.keyLocations[0][0].file' ${chunkinfo})" +docker exec "${container}" sed -i -e '1s/^/a/' "${datafile}" + +execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" debug/corrupt-block-checksum.robot + +echo "Overwriting container.db with the backup db" +target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) +docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" + +docker stop "${container}" + +wait_for_datanode "${container}" STALE 60 +execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/stale-datanode-checksum.robot + +wait_for_datanode "${container}" DEAD 60 + +docker start "${container}" + +wait_for_datanode "${container}" HEALTHY 60 + +execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "DATANODE:${host}" debug/block-existence-check.robot -#start_docker_env 9 -#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec3-2.robot -#execute_robot_test ${SCM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec6-3.robot +execute_robot_test ${OM} kinit.robot +execute_robot_test ${OM} -v "PREFIX:${prefix}" debug/ozone-debug-tests-ec3-2.robot diff --git a/hadoop-ozone/dist/src/main/compose/ozone/test.sh b/hadoop-ozone/dist/src/main/compose/ozone/test.sh index 929632a07d01..653a0aaf766e 100755 --- a/hadoop-ozone/dist/src/main/compose/ozone/test.sh +++ b/hadoop-ozone/dist/src/main/compose/ozone/test.sh @@ -49,7 +49,6 @@ execute_robot_test scm cli execute_robot_test scm admincli execute_robot_test scm -v USERNAME:httpfs httpfs -source "$COMPOSE_DIR/../common/replicas-test.sh" execute_robot_test scm -v SCHEME:o3fs -v BUCKET_TYPE:bucket -N ozonefs-o3fs-bucket ozonefs/ozonefs.robot execute_robot_test scm -v SCHEME:ofs -N ozonefs-obs ozonefs/ozonefs-obs.robot diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config index 9e2e7cbf0a04..d759a78d1c15 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config @@ -47,6 +47,9 @@ OZONE-SITE.XML_ozone.scm.pipeline.creation.interval=30s OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data/metadata OZONE-SITE.XML_ozone.scm.block.client.address=scm +OZONE-SITE.XML_ozone.scm.stale.node.interval=30s +OZONE-SITE.XML_ozone.scm.dead.node.interval=45s +OZONE-SITE.XML_hdds.heartbeat.interval=5s OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.handler.type=distributed OZONE-SITE.XML_ozone.scm.client.address=scm diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot b/hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot similarity index 69% rename from hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot rename to hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot index 33a2b78c3df0..087ce7fa1545 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-dead-datanode.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot @@ -14,21 +14,22 @@ # limitations under the License. *** Settings *** -Documentation Test checksums in case of one datanode is dead +Documentation Test block existence in case a block does not exist Library OperatingSystem Resource ../lib/os.robot -Resource ozone-debug.robot +Resource ozone-debug-keywords.robot Test Timeout 5 minute + *** Variables *** ${PREFIX} ${EMPTY} +${DATANODE} ${EMPTY} ${VOLUME} cli-debug-volume${PREFIX} ${BUCKET} cli-debug-bucket ${TESTFILE} testfile +${CHECK_TYPE} blockExistence *** Test Cases *** -Test ozone debug checksums with one datanode DEAD - ${directory} = Execute replicas verify checksums CLI tool - Set Test Variable ${DIR} ${directory} - - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 +Test block existence with a block missing on a replica + ${output} = Execute replicas verify block existence debug tool + ${json} = Parse replicas verify JSON output ${output} + Check to Verify Replicas ${json} ${CHECK_TYPE} ${DATANODE} Unable to find the block diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot b/hadoop-ozone/dist/src/main/smoketest/debug/corrupt-block-checksum.robot similarity index 52% rename from hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot rename to hadoop-ozone/dist/src/main/smoketest/debug/corrupt-block-checksum.robot index ba17ca9f085a..4bd06e1e5f94 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-corrupt-block.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/corrupt-block-checksum.robot @@ -14,33 +14,23 @@ # limitations under the License. *** Settings *** -Documentation Test read-replicas in case of a corrupt replica +Documentation Test checksums on a corrupt block replica Library OperatingSystem Resource ../lib/os.robot -Resource ozone-debug.robot +Resource ozone-debug-keywords.robot Test Timeout 5 minute + *** Variables *** ${PREFIX} ${EMPTY} +${CORRUPT_DATANODE} ${EMPTY} ${VOLUME} cli-debug-volume${PREFIX} ${BUCKET} cli-debug-bucket ${TESTFILE} testfile -${CORRUPT_DATANODE} ozone_datanode_1.ozone_default - -*** Test Cases *** -Test ozone debug checksums with corrupt block replica - ${directory} = Execute replicas verify checksums CLI tool - Set Test Variable ${DIR} ${directory} +${CHECK_TYPE} checksum - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - ${json} = Read Replicas Manifest - ${md5sum} = Execute md5sum ${TEMP_DIR}/${TESTFILE} | awk '{print $1}' - - FOR ${replica} IN RANGE 3 - ${datanode} = Set Variable ${json}[blocks][0][replicas][${replica}][hostname] - - IF '${datanode}' == '${CORRUPT_DATANODE}' - Should Contain ${json}[blocks][0][replicas][${replica}][exception] Checksum mismatch - END - END +*** Test Cases *** +Test checksums with a corrupt block replica + ${output} = Execute replicas verify checksums debug tool + ${json} = Parse replicas verify JSON output ${output} + Check to Verify Replicas ${json} ${CHECK_TYPE} ${CORRUPT_DATANODE} Checksum mismatched diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot new file mode 100644 index 000000000000..532b8b3c4a22 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Keyword definitions for Ozone Debug CLI tests +Library Collections +Resource ../lib/os.robot + +*** Variables *** +${OM_SERVICE_ID} %{OM_SERVICE_ID} + +*** Keywords *** +Execute replicas verify checksums debug tool + ${output} Execute ozone debug replicas verify --checksums o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --output-dir ${TEMP_DIR} --all-results + [Return] ${output} + +Execute replicas verify block existence debug tool + ${output} Execute ozone debug replicas verify --block-existence o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --output-dir ${TEMP_DIR} --all-results + [Return] ${output} + +Parse replicas verify JSON output + [Arguments] ${output} + ${json} = Evaluate json.loads('''${output}''') json + [Return] ${json} + +Check to Verify Replicas + [Arguments] ${json} ${check_type} ${faulty_datanode} ${expected_message} + ${replicas} = Get From Dictionary ${json['keys'][0]['blocks'][0]} replicas + FOR ${replica} IN @{replicas} + ${datanode} = Get From Dictionary ${replica} datanode + ${hostname} = Get From Dictionary ${datanode} hostname + Run Keyword If '${hostname}' == '${faulty_datanode}' Check Replica Failed ${replica} ${check_type} ${expected_message} + Run Keyword If '${hostname}' != '${faulty_datanode}' Check Replica Passed ${replica} ${check_type} + END + +Check Replica Failed + [Arguments] ${replica} ${check_type} ${expected_message} + ${checks} = Get From Dictionary ${replica} checks + ${check} = Get From List ${checks} 0 + Should Be Equal ${check['type']} ${check_type} + Should Be Equal ${check['pass']} ${False} + Should Contain ${check['failures'][0]['message']} ${expected_message} + +Check Replica Passed + [Arguments] ${replica} ${check_type} + ${checks} = Get From Dictionary ${replica} checks + ${check} = Get From List ${checks} 0 + Should Be Equal ${check['type']} ${check_type} + Should Be True ${check['completed']} + Should Be True ${check['pass']} + Should Be Empty ${check['failures']} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot deleted file mode 100644 index 69448f21e6b9..000000000000 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-stale-datanode.robot +++ /dev/null @@ -1,46 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -*** Settings *** -Documentation Test checksums in case of one datanode is stale -Library OperatingSystem -Resource ../lib/os.robot -Resource ozone-debug.robot -Test Timeout 5 minute -*** Variables *** -${PREFIX} ${EMPTY} -${VOLUME} cli-debug-volume${PREFIX} -${BUCKET} cli-debug-bucket -${TESTFILE} testfile -${STALE_DATANODE} ozone_datanode_1.ozone_default - -*** Test Cases *** -Test ozone debug checksums with one datanode STALE - ${directory} = Execute replicas verify checksums CLI tool - Set Test Variable ${DIR} ${directory} - - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - - ${json} = Read Replicas Manifest - ${md5sum} = Execute md5sum ${TEMP_DIR}/${TESTFILE} | awk '{print $1}' - - FOR ${replica} IN RANGE 3 - ${datanode} = Set Variable ${json}[blocks][0][replicas][${replica}][hostname] - - IF '${datanode}' == '${STALE_DATANODE}' - Verify Stale Replica ${json} ${replica} - END - END diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot index e10e03443d60..7b88f97254c9 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec3-2.robot @@ -18,14 +18,14 @@ Documentation Test ozone Debug CLI for EC(3,2) replicated keys Library OperatingSystem Library Process Resource ../lib/os.robot -Resource ozone-debug.robot +Resource ozone-debug-keywords.robot Test Timeout 5 minute Suite Setup Create Volume Bucket *** Variables *** ${PREFIX} ${EMPTY} -${VOLUME} cli-debug-volume${PREFIX} -${BUCKET} cli-debug-bucket +${VOLUME} cli-debug-ec-volume${PREFIX} +${BUCKET} cli-debug-ec-bucket ${TESTFILE} testfile ${EC_DATA} 3 ${EC_PARITY} 2 @@ -43,49 +43,6 @@ Create EC key Execute ozone sh key put o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/testfile ${TEMP_DIR}/testfile -r rs-${EC_DATA}-${EC_PARITY}-1024k -t EC *** Test Cases *** -0 data block - Create EC key 1000 0 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -1 data block - Create EC key 1048576 1 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -2 data blocks - Create EC key 1048576 2 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -3 data blocks - Create EC key 1048576 3 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -3 data blocks and partial stripe - Create EC key 1000000 4 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - ${sum_size_last_stripe} = Evaluate ((1000000 * 4) % 1048576) * 3 - Should Be Equal As Integers ${count_files} 1 - -4 data blocks and partial stripe - Create EC key 1000000 5 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -6 data blocks - Create EC key 1048576 6 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - Test ozone debug replicas chunk-info Create EC key 1048576 3 ${count} = Execute ozone debug replicas chunk-info o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/testfile | jq '[.keyLocations[0][] | select(.file | test("\\\\.block$")) | .file] | length' diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot deleted file mode 100644 index c3b8c7ad1a3a..000000000000 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests-ec6-3.robot +++ /dev/null @@ -1,100 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -*** Settings *** -Documentation Test ozone Debug CLI for EC(6,3) replicated key -Library OperatingSystem -Library Process -Resource ../lib/os.robot -Resource ozone-debug.robot -Test Timeout 5 minute - -*** Variables *** -${PREFIX} ${EMPTY} -${VOLUME} cli-debug-volume${PREFIX} -${BUCKET} cli-debug-bucket -${TESTFILE} testfile -${EC_DATA} 6 -${EC_PARITY} 3 -${OM_SERVICE_ID} %{OM_SERVICE_ID} - -*** Keywords *** -Create EC key - [arguments] ${bs} ${count} - - Execute dd if=/dev/urandom of=${TEMP_DIR}/testfile bs=${bs} count=${count} - Execute ozone sh key put o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/testfile ${TEMP_DIR}/testfile -r rs-${EC_DATA}-${EC_PARITY}-1024k -t EC - -*** Test Cases *** -0 data block - Create EC key 1048576 0 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -1 data block - Create EC key 1048576 1 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -2 data blocks - Create EC key 1048576 2 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -3 data blocks - Create EC key 1048576 3 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -4 data blocks - Create EC key 1048576 4 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -5 data blocks - Create EC key 1048576 5 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -6 data blocks - Create EC key 1048576 6 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - Should Be Equal As Integers ${count_files} 1 - -6 data blocks and partial stripe - Create EC key 1000000 7 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - ${sum_size_last_stripe} = Evaluate ((1000000 * 7) % 1048576) * 4 - Should Be Equal As Integers ${count_files} 1 - -7 data blocks and partial stripe - Create EC key 1000000 8 - ${directory} = Execute replicas verify checksums CLI tool - ${count_files} = Count Files In Directory ${directory} - ${sum_size_last_stripe} = Evaluate 1048576 * 4 + ((1000000 * 8) % 1048576) - Should Be Equal As Integers ${count_files} 1 - -Test ozone debug replicas chunk-info - Create EC key 1048576 6 - ${count} = Execute ozone debug replicas chunk-info o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/testfile | jq '[.keyLocations[0][] | select(.file | test("\\\\.block$")) | .file] | length' - Should Be Equal As Integers ${count} 9 diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot index f402e29d78af..53a75366b590 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-tests.robot @@ -14,10 +14,10 @@ # limitations under the License. *** Settings *** -Documentation Test ozone Debug CLI +Documentation Test ozone debug CLI Library OperatingSystem Resource ../lib/os.robot -Resource ozone-debug.robot +Resource ozone-debug-keywords.robot Test Timeout 5 minute Suite Setup Write keys @@ -38,9 +38,9 @@ Write keys *** Test Cases *** Test ozone debug replicas verify checksums ${output} = Execute ozone debug replicas verify --checksums --block-existence --container-state o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --output-dir ${TEMP_DIR} - ${json} = Evaluate json.loads('''${output}''') json + ${json} = Parse replicas verify JSON output ${output} - # 'keys' array should be empty if all keys and their replicas passed checksum verification + # 'keys' array should be empty if all keys and their replicas passed Should Be Empty ${json}[keys] Should Be True ${json}[pass] ${True} diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot deleted file mode 100644 index c8eb08924756..000000000000 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug.robot +++ /dev/null @@ -1,98 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -*** Settings *** -Documentation Keyword definitions for Ozone Debug CLI tests -Library Collections -Resource ../lib/os.robot - -*** Variables *** -${OM_SERVICE_ID} %{OM_SERVICE_ID} - -*** Keywords *** -Execute replicas verify checksums CLI tool - Execute ozone debug -Dozone.network.topology.aware.read=true replicas verify --checksums --output-dir ${TEMP_DIR} o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} - ${directory} = Execute ls -d ${TEMP_DIR}/${VOLUME}_${BUCKET}_${TESTFILE}_*/ | tail -n 1 - Directory Should Exist ${directory} - File Should Exist ${directory}/${TESTFILE}_manifest - [Return] ${directory} - -Read Replicas Manifest - ${manifest} = Get File ${DIR}/${TESTFILE}_manifest - ${json} = Evaluate json.loads('''${manifest}''') json - Validate JSON ${json} - [return] ${json} - -Validate JSON - [arguments] ${json} - Should Be Equal ${json}[filename] ${VOLUME}/${BUCKET}/${TESTFILE} - ${file_size} = Get File Size ${TEMP_DIR}/${TESTFILE} - Should Be Equal ${json}[datasize] ${file_size} - Should Be Equal As Integers ${json}[blocks][0][blockIndex] 1 - Should Not Be Empty Convert To String ${json}[blocks][0][containerId] - Should Not Be Empty Convert To String ${json}[blocks][0][localId] - Should Be Equal As Integers ${json}[blocks][0][length] 1048576 - Should Not Be Empty Convert To String ${json}[blocks][0][offset] - Should Be Equal As Integers ${json}[blocks][1][blockIndex] 2 - Should Not Be Empty Convert To String ${json}[blocks][1][containerId] - Should Not Be Empty Convert To String ${json}[blocks][1][localId] - Should Be Equal As Integers ${json}[blocks][1][length] 451424 - Should Not Be Empty Convert To String ${json}[blocks][1][offset] - -Get Replica Filenames - [arguments] ${json} ${replica} - - ${list} = Create List - - FOR ${block} IN RANGE 2 - ${datanode} = Set Variable ${json}[blocks][${block}][replicas][${replica}][hostname] - ${n} = Evaluate ${block} + 1 - Append To List ${list} ${DIR}/${TESTFILE}_block${n}_${datanode} - END - - ${filenames} = Catenate @{list} - - [return] ${filenames} - -Verify Healthy Replica - [arguments] ${json} ${replica} ${expected_md5sum} - - ${block_filenames} = Get Replica Filenames ${json} ${replica} - ${md5sum} = Execute cat ${block_filenames} | md5sum | awk '{print $1}' - Should Be Equal ${md5sum} ${expected_md5sum} - -Verify Healthy EC Replica - [arguments] ${directory} ${block} ${expected_block_size} - - ${block_size} = Execute ls -l ${directory} | grep "testfile_block${block}_ozone-datanode-.*\.ozone_default" | awk '{sum += $5} END {print sum}' - Should Be Equal As Integers ${block_size} ${expected_block_size} - -Verify Corrupt Replica - [arguments] ${json} ${replica} ${valid_md5sum} - - ${block_filenames} = Get Replica Filenames ${json} ${replica} - ${md5sum} = Execute cat ${block_filenames} | md5sum | awk '{print $1}' - Should Not Be Equal ${md5sum} ${valid_md5sum} - -Verify Stale Replica - [arguments] ${json} ${replica} - - FOR ${block} IN RANGE 2 - ${datanode} = Set Variable ${json}[blocks][${block}][replicas][${replica}][hostname] - - IF '${datanode}' == '${STALE_DATANODE}' - Should Contain ${json}[blocks][${block}][replicas][${replica}][exception] UNAVAILABLE - END - END diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/stale-datanode-checksum.robot b/hadoop-ozone/dist/src/main/smoketest/debug/stale-datanode-checksum.robot new file mode 100644 index 000000000000..56f2133cd0fc --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/debug/stale-datanode-checksum.robot @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Test checksums in case of a stale datanode +Library OperatingSystem +Resource ../lib/os.robot +Resource ozone-debug-keywords.robot +Test Timeout 5 minute + +*** Variables *** +${PREFIX} ${EMPTY} +${STALE_DATANODE} ${EMPTY} +${VOLUME} cli-debug-volume${PREFIX} +${BUCKET} cli-debug-bucket +${TESTFILE} testfile +${CHECK_TYPE} checksum + +*** Test Cases *** +Test checksums with a stale datanode + ${output} = Execute replicas verify checksums debug tool + ${json} = Parse replicas verify JSON output ${output} + Check to Verify Replicas ${json} ${CHECK_TYPE} ${STALE_DATANODE} UNAVAILABLE From 4719c1171cc39e95e361928dd40d7a78f3055718 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Thu, 12 Jun 2025 17:26:35 +0530 Subject: [PATCH 02/17] Wait time for datanode to be healthy increased to 90s --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 2dde1f3a7a49..40341b20a8c3 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -58,7 +58,7 @@ wait_for_datanode "${container}" DEAD 60 docker start "${container}" -wait_for_datanode "${container}" HEALTHY 60 +wait_for_datanode "${container}" HEALTHY 90 execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "DATANODE:${host}" debug/block-existence-check.robot From da67def57e513ba007d750e3393596621b8ab793 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Fri, 13 Jun 2025 11:41:46 +0200 Subject: [PATCH 03/17] Add more logging Change-Id: I4e45e522a69c8ead36fa53df503f5c149a5fe555 --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 4 ++-- hadoop-ozone/dist/src/main/compose/testlib.sh | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 40341b20a8c3..6ff417dd19ce 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -49,14 +49,14 @@ echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" -docker stop "${container}" +docker-compose stop "${container}" wait_for_datanode "${container}" STALE 60 execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/stale-datanode-checksum.robot wait_for_datanode "${container}" DEAD 60 -docker start "${container}" +docker-compose start "${container}" wait_for_datanode "${container}" HEALTHY 90 diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index 8888127ee951..f3d64d88c663 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -575,6 +575,7 @@ wait_for_datanode() { local command="ozone admin datanode list" docker-compose exec -T ${SCM} bash -c "$command" | grep -A2 "$datanode" > /tmp/dn_check local health=$(grep -c "State: $state" /tmp/dn_check) + echo "$datanode is $state" if [[ "$health" -eq 1 ]]; then echo "$datanode is $state" From 3b7049b1bfa6e6b0e3ecff250fd6619325f3472a Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Fri, 13 Jun 2025 13:11:57 +0200 Subject: [PATCH 04/17] Use docker commands and remove dead state Change-Id: I1f44cae5a7c6a093c5f568a96fde4d1ba743e33f --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 6ff417dd19ce..abc14483bc54 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -49,14 +49,12 @@ echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" -docker-compose stop "${container}" +docker stop "${container}" wait_for_datanode "${container}" STALE 60 execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/stale-datanode-checksum.robot -wait_for_datanode "${container}" DEAD 60 - -docker-compose start "${container}" +docker start "${container}" wait_for_datanode "${container}" HEALTHY 90 From 72d8c44aea978496071c2b1002e843d6439111f2 Mon Sep 17 00:00:00 2001 From: Zita Dombi Date: Mon, 16 Jun 2025 00:43:32 +0200 Subject: [PATCH 05/17] Remove all my changes Change-Id: I3b5c93d325ca1fcbf81eaab2c49ecbcbfbb5662f --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 2 ++ hadoop-ozone/dist/src/main/compose/testlib.sh | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index abc14483bc54..40341b20a8c3 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -54,6 +54,8 @@ docker stop "${container}" wait_for_datanode "${container}" STALE 60 execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/stale-datanode-checksum.robot +wait_for_datanode "${container}" DEAD 60 + docker start "${container}" wait_for_datanode "${container}" HEALTHY 90 diff --git a/hadoop-ozone/dist/src/main/compose/testlib.sh b/hadoop-ozone/dist/src/main/compose/testlib.sh index f3d64d88c663..8888127ee951 100755 --- a/hadoop-ozone/dist/src/main/compose/testlib.sh +++ b/hadoop-ozone/dist/src/main/compose/testlib.sh @@ -575,7 +575,6 @@ wait_for_datanode() { local command="ozone admin datanode list" docker-compose exec -T ${SCM} bash -c "$command" | grep -A2 "$datanode" > /tmp/dn_check local health=$(grep -c "State: $state" /tmp/dn_check) - echo "$datanode is $state" if [[ "$health" -eq 1 ]]; then echo "$datanode is $state" From fa9d446e04e188496b12743676af9464ab7ff0a6 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 05:53:40 +0200 Subject: [PATCH 06/17] TEMP fewer tests --- .github/workflows/ci.yml | 16 +++++++--------- .../compose/ozonesecure-ha/test-debug-tools.sh | 2 +- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f1bc6bb5288a..c9ed56a03ee5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ on: default: '' required: false env: - BUILD_ARGS: "-Pdist -Psrc -Dmaven.javadoc.skip=true -Drocks_tools_native" + BUILD_ARGS: "-Pdist -Dmaven.javadoc.skip=true -DskipShade" # Minimum required Java version for running Ozone is defined in pom.xml (javac.version). TEST_JAVA_VERSION: 21 # JDK version used by CI build and tests; should match the JDK version in apache/ozone-runner image # MAVEN_ARGS and MAVEN_OPTS are duplicated in check.yml, please keep in sync @@ -43,13 +43,13 @@ jobs: outputs: acceptance-suites: ${{ steps.acceptance-suites.outputs.suites }} integration-suites: ${{ steps.integration-suites.outputs.suites }} - needs-basic-check: ${{ steps.categorize-basic-checks.outputs.needs-basic-check }} + needs-basic-check: false basic-checks: ${{ steps.categorize-basic-checks.outputs.basic-checks }} needs-build: ${{ steps.selective-checks.outputs.needs-build }} - needs-compile: ${{ steps.selective-checks.outputs.needs-compile }} + needs-compile: false needs-compose-tests: ${{ steps.selective-checks.outputs.needs-compose-tests }} - needs-integration-tests: ${{ steps.selective-checks.outputs.needs-integration-tests }} - needs-kubernetes-tests: ${{ steps.selective-checks.outputs.needs-kubernetes-tests }} + needs-integration-tests: false + needs-kubernetes-tests: false sha: ${{ steps.get-sha.outputs.sha }} # `env` context cannot be used when calling reusable workflow, so we need to convert these to `outputs` build-args: ${{ env.BUILD_ARGS }} @@ -223,9 +223,6 @@ jobs: needs: - build-info - build - - basic - - dependency - - license if: needs.build-info.outputs.needs-compose-tests == 'true' uses: ./.github/workflows/check.yml secrets: inherit @@ -241,7 +238,8 @@ jobs: with-coverage: ${{ fromJSON(needs.build-info.outputs.with-coverage) }} strategy: matrix: - suite: ${{ fromJson(needs.build-info.outputs.acceptance-suites) }} + suite: + - tools fail-fast: false kubernetes: diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh index caa929bc6812..91c0b98ad3cd 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -#suite:HA-secure +#suite:tools set -u -o pipefail From c2e98a2f2a991078d0570f5c8b2f1c5a958961d1 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 05:53:59 +0200 Subject: [PATCH 07/17] TEMP list container dir --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 40341b20a8c3..dbea7587409b 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -47,7 +47,9 @@ execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" de echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) +docker exec "${container}" find "${target_container_dir}" -ls docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" +docker exec "${container}" find "${target_container_dir}" -ls docker stop "${container}" From e6c033c75a334983ef68a648165019ba115c141a Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:31:19 +0200 Subject: [PATCH 08/17] chown container dir --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index dbea7587409b..201bcd1b179f 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -49,6 +49,7 @@ echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) docker exec "${container}" find "${target_container_dir}" -ls docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" +docker exec "${container}" sudo chown -R hadoop:hadoop "${target_container_dir}" docker exec "${container}" find "${target_container_dir}" -ls docker stop "${container}" From 449a9cdc1b5703fcf605ecff43100a3acb649f25 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:32:08 +0200 Subject: [PATCH 09/17] fixup for fa9d446e04 TEMP fewer tests --- .github/workflows/ci.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c9ed56a03ee5..adcfe0918b69 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -177,6 +177,7 @@ jobs: needs: - build-info - build + - basic uses: ./.github/workflows/check.yml secrets: inherit with: @@ -190,6 +191,7 @@ jobs: needs: - build-info - build + - basic uses: ./.github/workflows/check.yml secrets: inherit with: From fc75dd4bdedbae09608cbeab023e6af1d400476b Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 16 Jun 2025 10:19:36 +0530 Subject: [PATCH 10/17] Revert Wait time for datanode to be healthy increased to 90s --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 201bcd1b179f..2a25c288e3a1 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -61,7 +61,7 @@ wait_for_datanode "${container}" DEAD 60 docker start "${container}" -wait_for_datanode "${container}" HEALTHY 90 +wait_for_datanode "${container}" HEALTHY 60 execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "DATANODE:${host}" debug/block-existence-check.robot From a4934ae07e20267c5db6ec7d0b17c839e0341081 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:55:55 +0200 Subject: [PATCH 11/17] Revert "fixup for fa9d446e04 TEMP fewer tests" This reverts commit 449a9cdc1b5703fcf605ecff43100a3acb649f25. --- .github/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f65660335bae..2a91afb54587 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -177,7 +177,6 @@ jobs: needs: - build-info - build - - basic uses: ./.github/workflows/check.yml secrets: inherit with: @@ -191,7 +190,6 @@ jobs: needs: - build-info - build - - basic uses: ./.github/workflows/check.yml secrets: inherit with: From 795002dcdbf3ca723c16b0e94acc912708cb4b02 Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:55:55 +0200 Subject: [PATCH 12/17] Revert "chown container dir" This reverts commit e6c033c75a334983ef68a648165019ba115c141a. --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 2a25c288e3a1..8628b5a84600 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -49,7 +49,6 @@ echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) docker exec "${container}" find "${target_container_dir}" -ls docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" -docker exec "${container}" sudo chown -R hadoop:hadoop "${target_container_dir}" docker exec "${container}" find "${target_container_dir}" -ls docker stop "${container}" From 036e06ca06b6476549148fbcdb8de03eb5eed21c Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:55:55 +0200 Subject: [PATCH 13/17] Revert "TEMP list container dir" This reverts commit c2e98a2f2a991078d0570f5c8b2f1c5a958961d1. --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 8628b5a84600..2dde1f3a7a49 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -47,9 +47,7 @@ execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" de echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) -docker exec "${container}" find "${target_container_dir}" -ls docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" -docker exec "${container}" find "${target_container_dir}" -ls docker stop "${container}" From a58706db565d915b2ba83bbcd62a70c5f6b57b7d Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:55:55 +0200 Subject: [PATCH 14/17] Revert "TEMP fewer tests" This reverts commit fa9d446e04e188496b12743676af9464ab7ff0a6. --- .github/workflows/ci.yml | 16 +++++++++------- .../compose/ozonesecure-ha/test-debug-tools.sh | 2 +- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2a91afb54587..04c944c9b4a5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,7 +27,7 @@ on: default: '' required: false env: - BUILD_ARGS: "-Pdist -Dmaven.javadoc.skip=true -DskipShade" + BUILD_ARGS: "-Pdist -Psrc -Dmaven.javadoc.skip=true -Drocks_tools_native" # Minimum required Java version for running Ozone is defined in pom.xml (javac.version). TEST_JAVA_VERSION: 21 # JDK version used by CI build and tests; should match the JDK version in apache/ozone-runner image # MAVEN_ARGS and MAVEN_OPTS are duplicated in check.yml, please keep in sync @@ -43,13 +43,13 @@ jobs: outputs: acceptance-suites: ${{ steps.acceptance-suites.outputs.suites }} integration-suites: ${{ steps.integration-suites.outputs.suites }} - needs-basic-check: false + needs-basic-check: ${{ steps.categorize-basic-checks.outputs.needs-basic-check }} basic-checks: ${{ steps.categorize-basic-checks.outputs.basic-checks }} needs-build: ${{ steps.selective-checks.outputs.needs-build }} - needs-compile: false + needs-compile: ${{ steps.selective-checks.outputs.needs-compile }} needs-compose-tests: ${{ steps.selective-checks.outputs.needs-compose-tests }} - needs-integration-tests: false - needs-kubernetes-tests: false + needs-integration-tests: ${{ steps.selective-checks.outputs.needs-integration-tests }} + needs-kubernetes-tests: ${{ steps.selective-checks.outputs.needs-kubernetes-tests }} sha: ${{ steps.get-sha.outputs.sha }} # `env` context cannot be used when calling reusable workflow, so we need to convert these to `outputs` build-args: ${{ env.BUILD_ARGS }} @@ -223,6 +223,9 @@ jobs: needs: - build-info - build + - basic + - dependency + - license if: needs.build-info.outputs.needs-compose-tests == 'true' uses: ./.github/workflows/check.yml secrets: inherit @@ -238,8 +241,7 @@ jobs: with-coverage: ${{ fromJSON(needs.build-info.outputs.with-coverage) }} strategy: matrix: - suite: - - tools + suite: ${{ fromJson(needs.build-info.outputs.acceptance-suites) }} fail-fast: false kubernetes: diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh index 91c0b98ad3cd..caa929bc6812 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/test-debug-tools.sh @@ -15,7 +15,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -#suite:tools +#suite:HA-secure set -u -o pipefail From a9e27307d2aaa56db2d01e8a7443904c5b8ce0ed Mon Sep 17 00:00:00 2001 From: "Doroszlai, Attila" Date: Mon, 16 Jun 2025 06:31:19 +0200 Subject: [PATCH 15/17] fix permissions in dn1 --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 2dde1f3a7a49..2fc033eaff6a 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -48,6 +48,7 @@ execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "CORRUPT_DATANODE:${host}" de echo "Overwriting container.db with the backup db" target_container_dir=$(docker exec "${container}" find "${container_db_path}" -name "container.db" | xargs dirname) docker cp "${local_db_backup_path}/container.db" "${container}:${target_container_dir}/" +docker exec "${container}" sudo chown -R hadoop:hadoop "${target_container_dir}" docker stop "${container}" From b79e95c0fca2749d543184acfa362e36137f0bb3 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Mon, 16 Jun 2025 10:44:44 +0530 Subject: [PATCH 16/17] Remove --output-dir option --- .../dist/src/main/smoketest/debug/ozone-debug-keywords.robot | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot index 532b8b3c4a22..19257a62c72d 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/ozone-debug-keywords.robot @@ -23,11 +23,11 @@ ${OM_SERVICE_ID} %{OM_SERVICE_ID} *** Keywords *** Execute replicas verify checksums debug tool - ${output} Execute ozone debug replicas verify --checksums o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --output-dir ${TEMP_DIR} --all-results + ${output} Execute ozone debug replicas verify --checksums o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --all-results [Return] ${output} Execute replicas verify block existence debug tool - ${output} Execute ozone debug replicas verify --block-existence o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --output-dir ${TEMP_DIR} --all-results + ${output} Execute ozone debug replicas verify --block-existence o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET}/${TESTFILE} --all-results [Return] ${output} Parse replicas verify JSON output From 4cfbd11072dbb7e2ac43c42032e36547d8a94480 Mon Sep 17 00:00:00 2001 From: sarvekshayr Date: Wed, 18 Jun 2025 10:22:20 +0530 Subject: [PATCH 17/17] Remove wait for dead datanode and change doc title --- hadoop-ozone/dist/src/main/compose/common/replicas-test.sh | 2 -- hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config | 1 - .../dist/src/main/smoketest/debug/block-existence-check.robot | 2 +- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh index 2fc033eaff6a..520ab8c47909 100755 --- a/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh +++ b/hadoop-ozone/dist/src/main/compose/common/replicas-test.sh @@ -55,8 +55,6 @@ docker stop "${container}" wait_for_datanode "${container}" STALE 60 execute_robot_test ${SCM} -v "PREFIX:${prefix}" -v "STALE_DATANODE:${host}" debug/stale-datanode-checksum.robot -wait_for_datanode "${container}" DEAD 60 - docker start "${container}" wait_for_datanode "${container}" HEALTHY 60 diff --git a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config index d759a78d1c15..c5ab0bf9443f 100644 --- a/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozonesecure-ha/docker-config @@ -48,7 +48,6 @@ OZONE-SITE.XML_ozone.scm.pipeline.owner.container.count=1 OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data/metadata OZONE-SITE.XML_ozone.scm.block.client.address=scm OZONE-SITE.XML_ozone.scm.stale.node.interval=30s -OZONE-SITE.XML_ozone.scm.dead.node.interval=45s OZONE-SITE.XML_hdds.heartbeat.interval=5s OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.handler.type=distributed diff --git a/hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot b/hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot index 087ce7fa1545..4c319f169d3e 100644 --- a/hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot +++ b/hadoop-ozone/dist/src/main/smoketest/debug/block-existence-check.robot @@ -14,7 +14,7 @@ # limitations under the License. *** Settings *** -Documentation Test block existence in case a block does not exist +Documentation Test existence of a block on a datanode Library OperatingSystem Resource ../lib/os.robot Resource ozone-debug-keywords.robot