From 259a9b87fa23cc07a3a0dd2637dc81bf2c18e797 Mon Sep 17 00:00:00 2001 From: "anastasia.filippova" Date: Thu, 7 Mar 2024 11:00:45 +0300 Subject: [PATCH 1/3] HDDS-10612. Add Robot test to verify Container Balancer for RATIS containers --- .../dist/src/main/compose/ozone-balancer/.env | 20 ++ .../ozone-balancer/docker-compose.yaml | 183 ++++++++++++++++++ .../main/compose/ozone-balancer/docker-config | 62 ++++++ .../src/main/compose/ozone-balancer/test.sh | 37 ++++ .../smoketest/balancer/testBalancer.robot | 141 ++++++++++++++ 5 files changed, 443 insertions(+) create mode 100644 hadoop-ozone/dist/src/main/compose/ozone-balancer/.env create mode 100644 hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml create mode 100644 hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config create mode 100644 hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh create mode 100644 hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/.env b/hadoop-ozone/dist/src/main/compose/ozone-balancer/.env new file mode 100644 index 000000000000..0e99fab82fd0 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/.env @@ -0,0 +1,20 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +HDDS_VERSION=${hdds.version} +OZONE_RUNNER_VERSION=${docker.ozone-runner.version} +OZONE_RUNNER_IMAGE=apache/ozone-runner +OZONE_OPTS= \ No newline at end of file diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml new file mode 100644 index 000000000000..08b5fe4d2021 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml @@ -0,0 +1,183 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.8" + +# reusable fragments (see https://docs.docker.com/compose/compose-file/#extension-fields) +x-common-config: + &common-config + image: ${OZONE_RUNNER_IMAGE}:${OZONE_RUNNER_VERSION} + volumes: + - ../..:/opt/hadoop + env_file: + - docker-config + +x-replication: + &replication + OZONE-SITE.XML_ozone.server.default.replication: ${OZONE_REPLICATION_FACTOR:-3} + +services: + datanode1: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 + command: ["/opt/hadoop/bin/ozone","datanode"] + volumes: + - tmpfs1:/data + - ../..:/opt/hadoop + datanode2: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 + command: [ "/opt/hadoop/bin/ozone","datanode" ] + volumes: + - tmpfs2:/data + - ../..:/opt/hadoop + datanode3: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 + command: [ "/opt/hadoop/bin/ozone","datanode" ] + volumes: + - tmpfs3:/data + - ../..:/opt/hadoop + datanode4: + <<: *common-config + ports: + - 19864 + - 9882 + environment: + <<: *replication + OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 + command: [ "/opt/hadoop/bin/ozone","datanode" ] + volumes: + - tmpfs4:/data + - ../..:/opt/hadoop + om1: + <<: *common-config + environment: + WAITFOR: scm3:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + <<: *replication + ports: + - 9874:9874 + - 9862 + hostname: om1 + command: ["/opt/hadoop/bin/ozone","om"] + om2: + <<: *common-config + environment: + WAITFOR: scm3:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + <<: *replication + ports: + - 9874 + - 9862 + hostname: om2 + command: ["/opt/hadoop/bin/ozone","om"] + om3: + <<: *common-config + environment: + WAITFOR: scm3:9894 + ENSURE_OM_INITIALIZED: /data/metadata/om/current/VERSION + <<: *replication + ports: + - 9874 + - 9862 + hostname: om3 + command: ["/opt/hadoop/bin/ozone","om"] + scm1: + <<: *common-config + ports: + - 9876:9876 + environment: + ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + command: ["/opt/hadoop/bin/ozone","scm"] + scm2: + <<: *common-config + ports: + - 9876 + environment: + WAITFOR: scm1:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + command: ["/opt/hadoop/bin/ozone","scm"] + scm3: + <<: *common-config + ports: + - 9876 + environment: + WAITFOR: scm2:9894 + ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + command: ["/opt/hadoop/bin/ozone","scm"] + httpfs: + <<: *common-config + environment: + OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} + <<: *replication + ports: + - 14000:14000 + command: [ "/opt/hadoop/bin/ozone","httpfs" ] + s3g: + <<: *common-config + environment: + OZONE_OPTS: + <<: *replication + ports: + - 9878:9878 + command: ["/opt/hadoop/bin/ozone","s3g"] +volumes: + tmpfs1: + driver: local + driver_opts: + o: "size=1g,uid=1000" + device: tmpfs + type: tmpfs + tmpfs2: + driver: local + driver_opts: + o: "size=1g,uid=2000" + device: tmpfs + type: tmpfs + tmpfs3: + driver: local + driver_opts: + o: "size=1g,uid=3000" + device: tmpfs + type: tmpfs + tmpfs4: + driver: local + driver_opts: + o: "size=1g,uid=4000" + device: tmpfs + type: tmpfs diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config new file mode 100644 index 000000000000..60e8afe6e1f6 --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-config @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# For HttpFS service it is required to enable proxying users. +CORE-SITE.XML_hadoop.proxyuser.hadoop.hosts=* +CORE-SITE.XML_hadoop.proxyuser.hadoop.groups=* + +CORE-SITE.XML_fs.defaultFS=ofs://om/ +CORE-SITE.XML_fs.trash.interval=1 + +OZONE-SITE.XML_ozone.om.service.ids=om +OZONE-SITE.XML_ozone.om.nodes.om=om1,om2,om3 +OZONE-SITE.XML_ozone.om.address.om.om1=om1 +OZONE-SITE.XML_ozone.om.address.om.om2=om2 +OZONE-SITE.XML_ozone.om.address.om.om3=om3 +OZONE-SITE.XML_ozone.om.ratis.enable=true + +OZONE-SITE.XML_ozone.scm.service.ids=scmservice +OZONE-SITE.XML_ozone.scm.nodes.scmservice=scm1,scm2,scm3 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm1=scm1 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm2=scm2 +OZONE-SITE.XML_ozone.scm.address.scmservice.scm3=scm3 +OZONE-SITE.XML_ozone.scm.ratis.enable=true +OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data +OZONE-SITE.XML_ozone.scm.container.size=100MB +OZONE-SITE.XML_ozone.scm.block.size=20MB +OZONE-SITE.XML_ozone.scm.datanode.ratis.volume.free-space.min=10MB +OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata +OZONE-SITE.XML_hdds.node.report.interval=20s +OZONE-SITE.XML_hdds.heartbeat.interval=20s +OZONE-SITE.XML_hdds.datanode.du.refresh.period=20s +OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +OZONE-SITE.XML_hdds.datanode.volume.min.free.space=100MB +OZONE-SITE.XML_ozone.scm.pipeline.creation.auto.factor.one=false +OZONE-SITE.XML_ozone.datanode.pipeline.limit=1 +OZONE-SITE.XML_hdds.scmclient.max.retry.timeout=30s +OZONE-SITE.XML_ozone.scm.primordial.node.id=scm1 +OZONE-SITE.XML_hdds.container.report.interval=30s +OZONE-SITE.XML_ozone.om.s3.grpc.server_enabled=true +OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon +OZONE-SITE.XML_ozone.recon.address=recon:9891 +OZONE-SITE.XML_ozone.recon.http-address=0.0.0.0:9888 +OZONE-SITE.XML_ozone.recon.https-address=0.0.0.0:9889 +OZONE-SITE.XML_dfs.container.ratis.datastream.enabled=true + +OZONE_CONF_DIR=/etc/hadoop +OZONE_LOG_DIR=/var/log/hadoop + +no_proxy=om1,om2,om3,scm,s3g,recon,kdc,localhost,127.0.0.1 diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh b/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh new file mode 100644 index 000000000000..bb9800b3169e --- /dev/null +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#suite:balancer + +COMPOSE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +export COMPOSE_DIR +export OM_SERVICE_ID="om" +export OM=om1 +export SCM=scm1 +export OZONE_REPLICATION_FACTOR=3 + +# shellcheck source=/dev/null +source "$COMPOSE_DIR/../testlib.sh" + +# We need 4 dataNodes in this tests +start_docker_env 4 + +# Start OMs separately. In this test, the OMs will be stopped and restarted multiple times. +# So we do not want the container to be tied to the OM process. +#startOMs + +execute_robot_test ${OM} balancer/testBalancer.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot new file mode 100644 index 000000000000..89a8c68f0604 --- /dev/null +++ b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot @@ -0,0 +1,141 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +*** Settings *** +Documentation Smoketest ozone cluster startup +Library OperatingSystem +Library Collections +Resource ../commonlib.robot +Resource ../ozone-lib/shell.robot + +Test Timeout 20 minutes + +*** Variables *** +${SECURITY_ENABLED} false +${HOST} datanode1 +${VOLUME} volume1 +${BUCKET} bucket1 +${SIZE} 104857600 + + +** Keywords *** +Prepare For Tests + Execute dd if=/dev/urandom of=/tmp/100mb bs=1048576 count=100 + Run Keyword if '${SECURITY_ENABLED}' == 'true' Kinit test user testuser testuser.keytab + Execute ozone sh volume create /${VOLUME} + Execute ozone sh bucket create /${VOLUME}/${BUCKET} + + +Datanode In Maintenance Mode + ${result} = Execute /opt/hadoop/bin/ozone admin datanode maintenance ${HOST} + Should Contain ${result} Entering maintenance mode on datanode + ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | grep "Operational State:*" + Wait Until Keyword Succeeds 30sec 5sec Should contain ${result} ENTERING_MAINTENANCE + Wait Until Keyword Succeeds 1min 10sec Related pipelines are closed + Sleep 60000ms + +Related pipelines are closed + ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | awk -v RS= '{$1=$1}1'|grep MAINT | sed -e 's/^.*pipelines: \\(.*\\)$/\\1/' -e 's/ /\\n/' + Should Contain Any ${result} CLOSED No\\nrelated pipelines or the node is not in Healthy state. + +Datanode Recommission + ${result} = Execute /opt/hadoop/bin/ozone admin datanode recommission ${HOST} + Should Contain ${result} Started recommissioning datanode + Wait Until Keyword Succeeds 1min 10sec Datanode Recommission is Finished + Sleep 300000ms + +Datanode Recommission is Finished + ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | grep "Operational State:*" + Should Not Contain ${result} ENTERING_MAINTENANCE + +Container Balancer + ${result} = Execute /opt/hadoop/bin/ozone admin containerbalancer start -t 1 -d 100 -i 1 + Should Contain ${result} Container Balancer started successfully. + ${result} = Execute /opt/hadoop/bin/ozone admin containerbalancer status + Should Contain ${result} ContainerBalancer is Running. + Wait Until Keyword Succeeds 3min 10sec ContainerBalancer is Not Running + Sleep 60000ms + +ContainerBalancer is Not Running + ${result} = Execute /opt/hadoop/bin/ozone admin containerbalancer status + Should contain ${result} ContainerBalancer is Not Running. + +Create Multiple Keys + [arguments] ${NUM_KEYS} + ${file} = Set Variable /tmp/100mb + FOR ${INDEX} IN RANGE ${NUM_KEYS} + ${fileName} = Set Variable file-${INDEX}.txt + ${key} = Set Variable /${VOLUME}/${BUCKET}/${fileName} + LOG ${fileName} + Create Key ${key} ${file} + Key Should Match Local File ${key} ${file} + END + +Datanode Usageinfo + [arguments] ${uuid} + ${result} = Execute ozone admin datanode usageinfo --uuid=${uuid} + Should Contain ${result} Ozone Used + +Get Uuid + ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | awk -v RS= '{$1=$1}1'| grep ${HOST} | sed -e 's/Datanode: //'|sed -e 's/ .*$//' + [return] ${result} + +Close All Containers + FOR ${INDEX} IN RANGE 15 + ${container} = Execute ozone admin container list --state OPEN | jq -r 'select(.replicationConfig.replicationFactor == "THREE") | .containerID' | head -1 + EXIT FOR LOOP IF "${container}" == "" + Execute ozone admin container close "${container}" + ${output} = Execute ozone admin container info "${container}" + Should contain ${output} CLOS + END + Wait until keyword succeeds 3min 10sec All container is closed + +All container is closed + ${output} = Execute ozone admin container list + Should Not Contain ${output} OPEN + +** Test Cases *** +Verify Container Balancer for RATIS containers + Prepare For Tests + + Datanode In Maintenance Mode + + ${uuid} = Get Uuid + Datanode Usageinfo ${uuid} + + Create Multiple Keys 3 + + Close All Containers + + ${output1} = Execute export DATANODES=$(ozone admin datanode list --json) && for datanode in $(echo "$\{DATANODES\}" | jq -r '.[].datanodeDetails.uuid'); do ozone admin datanode usageinfo --uuid=$\{datanode\} --json | jq '{(.[0].datanodeDetails.uuid) : .[0].ozoneUsed}'; done | jq -s add + + ${datanodePreviousUsedBytes} Execute echo '${output1}' | jq '. | to_entries | .[] | select(.key == "${uuid}") | .value' + Should Be True ${datanodePreviousUsedBytes} < ${SIZE} + + Datanode Recommission + + Container Balancer + + ${output2} = Execute export DATANODES=$(ozone admin datanode list --json) && for datanode in $(echo "$\{DATANODES\}" | jq -r '.[].datanodeDetails.uuid'); do ozone admin datanode usageinfo --uuid=$\{datanode\} --json | jq '{(.[0].datanodeDetails.uuid) : .[0].ozoneUsed}'; done | jq -s add + + ${datanodeCurrentUsedBytes} Execute echo '${output2}' | jq '. | to_entries | .[] | select(.key == "${uuid}") | .value' + Should Not Be Equal As Integers ${datanodePreviousUsedBytes} ${datanodeCurrentUsedBytes} + Should Be True ${datanodeCurrentUsedBytes} < ${SIZE} * 3.5 + Should Be True ${datanodeCurrentUsedBytes} > ${SIZE} * 3 + + + + + From 71213a9e8a25001939f0ac1921afa8e65179abed Mon Sep 17 00:00:00 2001 From: "anastasia.filippova" Date: Fri, 29 Mar 2024 19:01:59 +0300 Subject: [PATCH 2/3] HDDS-10612. Fix to improve test check stability --- .../dist/src/main/smoketest/balancer/testBalancer.robot | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot index 89a8c68f0604..1e8a96ebc4f3 100644 --- a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot @@ -43,12 +43,12 @@ Datanode In Maintenance Mode Should Contain ${result} Entering maintenance mode on datanode ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | grep "Operational State:*" Wait Until Keyword Succeeds 30sec 5sec Should contain ${result} ENTERING_MAINTENANCE - Wait Until Keyword Succeeds 1min 10sec Related pipelines are closed + Wait Until Keyword Succeeds 3min 10sec Related pipelines are closed Sleep 60000ms Related pipelines are closed - ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | awk -v RS= '{$1=$1}1'|grep MAINT | sed -e 's/^.*pipelines: \\(.*\\)$/\\1/' -e 's/ /\\n/' - Should Contain Any ${result} CLOSED No\\nrelated pipelines or the node is not in Healthy state. + ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | awk -v RS= '{$1=$1}1'|grep MAINT | sed -e 's/^.*pipelines: \\(.*\\)$/\\1/' + Should Contain Any ${result} CLOSED No related pipelines or the node is not in Healthy state. Datanode Recommission ${result} = Execute /opt/hadoop/bin/ozone admin datanode recommission ${HOST} From 021198b3012e26caa327c9c71ed6da1766b96848 Mon Sep 17 00:00:00 2001 From: "anastasia.filippova" Date: Mon, 1 Apr 2024 14:00:01 +0300 Subject: [PATCH 3/3] HDDS-10612. Fix comments --- .../ozone-balancer/docker-compose.yaml | 28 +++++------ .../src/main/compose/ozone-balancer/test.sh | 4 -- .../smoketest/balancer/testBalancer.robot | 48 ++++++++++--------- 3 files changed, 37 insertions(+), 43 deletions(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml index 08b5fe4d2021..dc6bae7822e5 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/docker-compose.yaml @@ -37,8 +37,7 @@ services: - 9882 environment: <<: *replication - OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 - command: ["/opt/hadoop/bin/ozone","datanode"] + command: ["ozone","datanode"] volumes: - tmpfs1:/data - ../..:/opt/hadoop @@ -49,8 +48,7 @@ services: - 9882 environment: <<: *replication - OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 - command: [ "/opt/hadoop/bin/ozone","datanode" ] + command: [ "ozone","datanode" ] volumes: - tmpfs2:/data - ../..:/opt/hadoop @@ -61,8 +59,7 @@ services: - 9882 environment: <<: *replication - OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 - command: [ "/opt/hadoop/bin/ozone","datanode" ] + command: [ "ozone","datanode" ] volumes: - tmpfs3:/data - ../..:/opt/hadoop @@ -73,8 +70,7 @@ services: - 9882 environment: <<: *replication - OZONE_OPTS: -agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5005 - command: [ "/opt/hadoop/bin/ozone","datanode" ] + command: [ "ozone","datanode" ] volumes: - tmpfs4:/data - ../..:/opt/hadoop @@ -88,7 +84,7 @@ services: - 9874:9874 - 9862 hostname: om1 - command: ["/opt/hadoop/bin/ozone","om"] + command: ["ozone","om"] om2: <<: *common-config environment: @@ -99,7 +95,7 @@ services: - 9874 - 9862 hostname: om2 - command: ["/opt/hadoop/bin/ozone","om"] + command: ["ozone","om"] om3: <<: *common-config environment: @@ -110,7 +106,7 @@ services: - 9874 - 9862 hostname: om3 - command: ["/opt/hadoop/bin/ozone","om"] + command: ["ozone","om"] scm1: <<: *common-config ports: @@ -119,7 +115,7 @@ services: ENSURE_SCM_INITIALIZED: /data/metadata/scm/current/VERSION OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} <<: *replication - command: ["/opt/hadoop/bin/ozone","scm"] + command: ["ozone","scm"] scm2: <<: *common-config ports: @@ -129,7 +125,7 @@ services: ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} <<: *replication - command: ["/opt/hadoop/bin/ozone","scm"] + command: ["ozone","scm"] scm3: <<: *common-config ports: @@ -139,7 +135,7 @@ services: ENSURE_SCM_BOOTSTRAPPED: /data/metadata/scm/current/VERSION OZONE-SITE.XML_hdds.scm.safemode.min.datanode: ${OZONE_SAFEMODE_MIN_DATANODES:-1} <<: *replication - command: ["/opt/hadoop/bin/ozone","scm"] + command: ["ozone","scm"] httpfs: <<: *common-config environment: @@ -147,7 +143,7 @@ services: <<: *replication ports: - 14000:14000 - command: [ "/opt/hadoop/bin/ozone","httpfs" ] + command: [ "ozone","httpfs" ] s3g: <<: *common-config environment: @@ -155,7 +151,7 @@ services: <<: *replication ports: - 9878:9878 - command: ["/opt/hadoop/bin/ozone","s3g"] + command: ["ozone","s3g"] volumes: tmpfs1: driver: local diff --git a/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh b/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh index bb9800b3169e..e79979877ba3 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh +++ b/hadoop-ozone/dist/src/main/compose/ozone-balancer/test.sh @@ -30,8 +30,4 @@ source "$COMPOSE_DIR/../testlib.sh" # We need 4 dataNodes in this tests start_docker_env 4 -# Start OMs separately. In this test, the OMs will be stopped and restarted multiple times. -# So we do not want the container to be tied to the OM process. -#startOMs - execute_robot_test ${OM} balancer/testBalancer.robot diff --git a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot index 1e8a96ebc4f3..6e2fb9d85a56 100644 --- a/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot +++ b/hadoop-ozone/dist/src/main/smoketest/balancer/testBalancer.robot @@ -39,37 +39,37 @@ Prepare For Tests Datanode In Maintenance Mode - ${result} = Execute /opt/hadoop/bin/ozone admin datanode maintenance ${HOST} + ${result} = Execute ozone admin datanode maintenance ${HOST} Should Contain ${result} Entering maintenance mode on datanode - ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | grep "Operational State:*" + ${result} = Execute ozone admin datanode list | grep "Operational State:*" Wait Until Keyword Succeeds 30sec 5sec Should contain ${result} ENTERING_MAINTENANCE Wait Until Keyword Succeeds 3min 10sec Related pipelines are closed Sleep 60000ms Related pipelines are closed - ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | awk -v RS= '{$1=$1}1'|grep MAINT | sed -e 's/^.*pipelines: \\(.*\\)$/\\1/' + ${result} = Execute ozone admin datanode list | awk -v RS= '{$1=$1}1'|grep MAINT | sed -e 's/^.*pipelines: \\(.*\\)$/\\1/' Should Contain Any ${result} CLOSED No related pipelines or the node is not in Healthy state. Datanode Recommission - ${result} = Execute /opt/hadoop/bin/ozone admin datanode recommission ${HOST} + ${result} = Execute ozone admin datanode recommission ${HOST} Should Contain ${result} Started recommissioning datanode Wait Until Keyword Succeeds 1min 10sec Datanode Recommission is Finished Sleep 300000ms Datanode Recommission is Finished - ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | grep "Operational State:*" + ${result} = Execute ozone admin datanode list | grep "Operational State:*" Should Not Contain ${result} ENTERING_MAINTENANCE -Container Balancer - ${result} = Execute /opt/hadoop/bin/ozone admin containerbalancer start -t 1 -d 100 -i 1 +Run Container Balancer + ${result} = Execute ozone admin containerbalancer start -t 1 -d 100 -i 1 Should Contain ${result} Container Balancer started successfully. - ${result} = Execute /opt/hadoop/bin/ozone admin containerbalancer status + ${result} = Execute ozone admin containerbalancer status Should Contain ${result} ContainerBalancer is Running. Wait Until Keyword Succeeds 3min 10sec ContainerBalancer is Not Running Sleep 60000ms ContainerBalancer is Not Running - ${result} = Execute /opt/hadoop/bin/ozone admin containerbalancer status + ${result} = Execute ozone admin containerbalancer status Should contain ${result} ContainerBalancer is Not Running. Create Multiple Keys @@ -89,7 +89,7 @@ Datanode Usageinfo Should Contain ${result} Ozone Used Get Uuid - ${result} = Execute /opt/hadoop/bin/ozone admin datanode list | awk -v RS= '{$1=$1}1'| grep ${HOST} | sed -e 's/Datanode: //'|sed -e 's/ .*$//' + ${result} = Execute ozone admin datanode list | awk -v RS= '{$1=$1}1'| grep ${HOST} | sed -e 's/Datanode: //'|sed -e 's/ .*$//' [return] ${result} Close All Containers @@ -103,8 +103,14 @@ Close All Containers Wait until keyword succeeds 3min 10sec All container is closed All container is closed - ${output} = Execute ozone admin container list - Should Not Contain ${output} OPEN + ${output} = Execute ozone admin container list --state OPEN + Should Be Empty ${output} + +Get Datanode Ozone Used Bytes Info + [arguments] ${uuid} + ${output} = Execute export DATANODES=$(ozone admin datanode list --json) && for datanode in $(echo "$\{DATANODES\}" | jq -r '.[].datanodeDetails.uuid'); do ozone admin datanode usageinfo --uuid=$\{datanode\} --json | jq '{(.[0].datanodeDetails.uuid) : .[0].ozoneUsed}'; done | jq -s add + ${result} = Execute echo '${output}' | jq '. | to_entries | .[] | select(.key == "${uuid}") | .value' + [return] ${result} ** Test Cases *** Verify Container Balancer for RATIS containers @@ -119,21 +125,17 @@ Verify Container Balancer for RATIS containers Close All Containers - ${output1} = Execute export DATANODES=$(ozone admin datanode list --json) && for datanode in $(echo "$\{DATANODES\}" | jq -r '.[].datanodeDetails.uuid'); do ozone admin datanode usageinfo --uuid=$\{datanode\} --json | jq '{(.[0].datanodeDetails.uuid) : .[0].ozoneUsed}'; done | jq -s add - - ${datanodePreviousUsedBytes} Execute echo '${output1}' | jq '. | to_entries | .[] | select(.key == "${uuid}") | .value' - Should Be True ${datanodePreviousUsedBytes} < ${SIZE} + ${datanodeOzoneUsedBytesInfo} = Get Datanode Ozone Used Bytes Info ${uuid} + Should Be True ${datanodeOzoneUsedBytesInfo} < ${SIZE} Datanode Recommission - Container Balancer - - ${output2} = Execute export DATANODES=$(ozone admin datanode list --json) && for datanode in $(echo "$\{DATANODES\}" | jq -r '.[].datanodeDetails.uuid'); do ozone admin datanode usageinfo --uuid=$\{datanode\} --json | jq '{(.[0].datanodeDetails.uuid) : .[0].ozoneUsed}'; done | jq -s add + Run Container Balancer - ${datanodeCurrentUsedBytes} Execute echo '${output2}' | jq '. | to_entries | .[] | select(.key == "${uuid}") | .value' - Should Not Be Equal As Integers ${datanodePreviousUsedBytes} ${datanodeCurrentUsedBytes} - Should Be True ${datanodeCurrentUsedBytes} < ${SIZE} * 3.5 - Should Be True ${datanodeCurrentUsedBytes} > ${SIZE} * 3 + ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} = Get Datanode Ozone Used Bytes Info ${uuid} + Should Not Be Equal As Integers ${datanodeOzoneUsedBytesInfo} ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} + Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} < ${SIZE} * 3.5 + Should Be True ${datanodeOzoneUsedBytesInfoAfterContainerBalancing} > ${SIZE} * 3