From fc1d8b450ab7410509a14fd633d0ac2188ec8a26 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Mon, 16 Mar 2020 16:09:37 -0700 Subject: [PATCH 1/6] HDDS-2621. Enable OM HA acceptance tests --- .../src/main/compose/ozone-om-ha/.ssh/id_rsa | 15 ------- .../main/compose/ozone-om-ha/.ssh/id_rsa.pub | 15 ------- .../compose/ozone-om-ha/{run.sh => test.sh} | 0 .../src/main/smoketest/omha/testOMHA.robot | 44 ++++++++++--------- hadoop-ozone/pom.xml | 1 + 5 files changed, 24 insertions(+), 51 deletions(-) rename hadoop-ozone/dist/src/main/compose/ozone-om-ha/{run.sh => test.sh} (100%) diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa index 6632ce51c54a..b4b1604f806e 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa +++ b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa @@ -1,18 +1,3 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -----BEGIN RSA PRIVATE KEY----- MIIEowIBAAKCAQEA4BJi6WJuAa1ratShvYYWVwmYBqxE57btHjU6NtVN1SnPZx/f 6LezOpQGsLBXE/bl7uG+fD05Z378B/0wE5QhYwvJ9Ge0jsfhVOi90p/FEYfR2l+C diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa.pub b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa.pub index ae390529c7eb..0e5cb14756d7 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa.pub +++ b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/.ssh/id_rsa.pub @@ -1,16 +1 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDgEmLpYm4BrWtq1KG9hhZXCZgGrETntu0eNTo21U3VKc9nH9/ot7M6lAawsFcT9uXu4b58PTlnfvwH/TATlCFjC8n0Z7SOx+FU6L3Sn8URh9HaX4L0tF8u87oCAD4dBrUGhhB36eiuH9dBBWly6RKffYJvrjatbc7GxBO/e5OSUMtqk/DSVKksmBhZxutrKivCNjDish9ViGIf8b5yS/MlEGmaVKApik1fJ5iOlloM/GgpB60YV/hbqfCecbWgeiM1gK92gdOcA/Wx1C7fj8BSI5iDSE6eZeF80gM3421lvyPDWyVhFaGbka4rXBX/fb9QSRBA9RTqhRKAEmAIf49H hadoop@cdae967fa87a diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/run.sh b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/test.sh similarity index 100% rename from hadoop-ozone/dist/src/main/compose/ozone-om-ha/run.sh rename to hadoop-ozone/dist/src/main/compose/ozone-om-ha/test.sh diff --git a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot index 8c5a706961fe..2ca618cc3267 100644 --- a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot +++ b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot @@ -33,6 +33,7 @@ ${VOLUME} volume1 ${BUCKET} bucket1 ${TEST_FILE} NOTICE.txt ${WRITE_FILE_COUNT} 0 +${TEMPDIR} /tmp ** Keywords *** Open Connection And Log In @@ -68,12 +69,13 @@ Create volume and bucket Write Test File ${writeFileCount} = Evaluate ${WRITE_FILE_COUNT}+1 Set Global Variable ${WRITE_FILE_COUNT} ${writeFileCount} - ${fileName} = Catenate SEPARATOR= ${WRITE_FILE_COUNT} .txt - Copy File ${TEST_FILE} ${fileName} - Execute ozone fs -copyFromLocal ${fileName} o3fs://${BUCKET}.${VOLUME}.${OM_SERVICE_ID}/ + ${fileName} = Set Variable omha-${WRITE_FILE_COUNT}.txt + ${testFilePath} = Set Variable ${TEMPDIR}/${fileName} + Copy File ${TEST_FILE} ${testFilePath} + Execute ozone fs -copyFromLocal ${testFilePath} o3fs://${BUCKET}.${VOLUME}.${OM_SERVICE_ID}/ ${result} = Execute ozone sh key list o3://${OM_SERVICE_ID}/${VOLUME}/${BUCKET} | jq -r '.name' Should contain ${result} ${fileName} - Remove File ${fileName} + Remove File ${testFilePath} Put Key [arguments] ${FILE} ${KEY} @@ -133,6 +135,23 @@ Stop Leader OM and Verify Failover # Restart stopped OM Start OM ${leaderOM} +Test Multiple Failovers + FOR ${INDEX} IN RANGE 5 + # Find Leader OM and stop it + ${leaderOM} = Get OM Leader Node + ${stopOMResult} = Stop OM ${leaderOM} + + # Verify that new Leader OM is elected + ${newLeaderOM} = Get OM Leader Node + Should Not be Equal ${leaderOM} ${newLeaderOM} OMs did not failover + + # Verify write succeeds after failover + Write Test File + + # Restart OM + Start OM ${leaderOM} + END + Restart OM and Verify Ratis Logs Set Test Variable ${OM_HOST} om2 Set Test Variable ${keyBase} testOMRestart_ @@ -163,21 +182,4 @@ Restart OM and Verify Ratis Logs # Verify that the logs match with the Leader OMs logs List Should Contain Sub List ${logsAfter} ${logsLeader} -Test Multiple Failovers - FOR ${INDEX} IN RANGE 5 - # Find Leader OM and stop it - ${leaderOM} = Get OM Leader Node - ${stopOMResult} = Stop OM ${leaderOM} - - # Verify that new Leader OM is elected - ${newLeaderOM} = Get OM Leader Node - Should Not be Equal ${leaderOM} ${newLeaderOM} OMs did not failover - - # Verify write succeeds after failover - Write Test File - - # Restart OM - Start OM ${leaderOM} - END - diff --git a/hadoop-ozone/pom.xml b/hadoop-ozone/pom.xml index af28185e07e5..3517b236615b 100644 --- a/hadoop-ozone/pom.xml +++ b/hadoop-ozone/pom.xml @@ -245,6 +245,7 @@ **/target/** .gitattributes .idea/** + **/.ssh/id_rsa* dev-support/*tests dev-support/checkstyle* dev-support/jdiff/** From f4483ddc2adcd71013c7dd40641a024fd90449f8 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Wed, 18 Mar 2020 10:51:57 -0700 Subject: [PATCH 2/6] Config changes --- hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config index c5e1680ddbef..c35c30d9691a 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config @@ -25,10 +25,11 @@ OZONE-SITE.XML_ozone.om.ratis.enable=true OZONE-SITE.XML_ozone.scm.names=scm OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data OZONE-SITE.XML_ozone.scm.block.client.address=scm +OZONE-SITE.XML_ozone.scm.container.size=1GB OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.handler.type=distributed OZONE-SITE.XML_ozone.scm.client.address=scm -OZONE-SITE.XML_ozone.replication=1 +OZONE-SITE.XML_ozone.replication=3 OZONE-SITE.XML_ozone.client.failover.max.attempts=6 OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_hdds.profiler.endpoint.enabled=true From f33386bae7a54d67d6c650fadc69594564e94fc3 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Thu, 19 Mar 2020 15:47:20 -0700 Subject: [PATCH 3/6] Changing Ratis log size to 1KB --- hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config index c35c30d9691a..d39abc28e763 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config @@ -22,6 +22,7 @@ OZONE-SITE.XML_ozone.om.address.omservice.om1=om1 OZONE-SITE.XML_ozone.om.address.omservice.om2=om2 OZONE-SITE.XML_ozone.om.address.omservice.om3=om3 OZONE-SITE.XML_ozone.om.ratis.enable=true +OZONE-SITE.XML_ozone.om.ratis.segment.size=1KB OZONE-SITE.XML_ozone.scm.names=scm OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data OZONE-SITE.XML_ozone.scm.block.client.address=scm @@ -29,7 +30,7 @@ OZONE-SITE.XML_ozone.scm.container.size=1GB OZONE-SITE.XML_ozone.metadata.dirs=/data/metadata OZONE-SITE.XML_ozone.handler.type=distributed OZONE-SITE.XML_ozone.scm.client.address=scm -OZONE-SITE.XML_ozone.replication=3 +OZONE-SITE.XML_ozone.replication=1 OZONE-SITE.XML_ozone.client.failover.max.attempts=6 OZONE-SITE.XML_hdds.datanode.dir=/data/hdds OZONE-SITE.XML_hdds.profiler.endpoint.enabled=true From e3d7cfd54e03951510429e6f6956752f0e69f431 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Mon, 23 Mar 2020 12:44:29 -0700 Subject: [PATCH 4/6] Disabling inconsistent test --- .../src/main/smoketest/omha/testOMHA.robot | 59 ++++++++++--------- 1 file changed, 30 insertions(+), 29 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot index 2ca618cc3267..7896aefac571 100644 --- a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot +++ b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot @@ -152,34 +152,35 @@ Test Multiple Failovers Start OM ${leaderOM} END -Restart OM and Verify Ratis Logs - Set Test Variable ${OM_HOST} om2 - Set Test Variable ${keyBase} testOMRestart_ - - # Stop 1 OM and get the Logs present in its Ratis Dir - Stop OM ${OM_HOST} - ${numLogsBefore} @{logsBefore} = Get Ratis Logs ${OM_HOST} - ${leaderOM} = Get OM Leader Node - - # Perform write operations to advance the Ratis log index till a new Log segment is created - FOR ${INDEX} IN RANGE 20 - Set Test Variable ${keyPrefix} ${keyBase}${INDEX} - Put Multiple Keys 5 ${keyPrefix} ${TEST_FILE} - ${numLogsLeader} @{logsLeader} = Get Ratis Logs ${leaderOM} - EXIT FOR LOOP IF ${numLogsLeader} > ${numLogsBefore} - END - Should Be True ${numLogsLeader} > ${numLogsBefore} Cannot test OM Restart as Ratis did not start new log segment. - - # Restart the stopped OM and wait for Ratis to catch up with Leader OM - Start OM ${OM_HOST} - FOR ${INDEX} IN RANGE 300 - ${numLogsAfter} @{logsAfter} = Get Ratis Logs ${OM_HOST} - EXIT FOR LOOP IF ${numLogsAfter} >= ${numLogsLeader} - Sleep 1s - END - Should Be True ${numLogsAfter} >= ${numLogsLeader} Restarted OM did not catch up with Leader OM - - # Verify that the logs match with the Leader OMs logs - List Should Contain Sub List ${logsAfter} ${logsLeader} +##Disabling inconsistent test +#Restart OM and Verify Ratis Logs +# Set Test Variable ${OM_HOST} om2 +# Set Test Variable ${keyBase} testOMRestart_ +# +# # Stop 1 OM and get the Logs present in its Ratis Dir +# Stop OM ${OM_HOST} +# ${numLogsBefore} @{logsBefore} = Get Ratis Logs ${OM_HOST} +# ${leaderOM} = Get OM Leader Node +# +# # Perform write operations to advance the Ratis log index till a new Log segment is created +# FOR ${INDEX} IN RANGE 20 +# Set Test Variable ${keyPrefix} ${keyBase}${INDEX} +# Put Multiple Keys 5 ${keyPrefix} ${TEST_FILE} +# ${numLogsLeader} @{logsLeader} = Get Ratis Logs ${leaderOM} +# EXIT FOR LOOP IF ${numLogsLeader} > ${numLogsBefore} +# END +# Should Be True ${numLogsLeader} > ${numLogsBefore} Cannot test OM Restart as Ratis did not start new log segment. +# +# # Restart the stopped OM and wait for Ratis to catch up with Leader OM +# Start OM ${OM_HOST} +# FOR ${INDEX} IN RANGE 300 +# ${numLogsAfter} @{logsAfter} = Get Ratis Logs ${OM_HOST} +# EXIT FOR LOOP IF ${numLogsAfter} >= ${numLogsLeader} +# Sleep 1s +# END +# Should Be True ${numLogsAfter} >= ${numLogsLeader} Restarted OM did not catch up with Leader OM +# +# # Verify that the logs match with the Leader OMs logs +# List Should Contain Sub List ${logsAfter} ${logsLeader} From 703946b9b6dcb9b652f8d267e074ad91e8e2bf44 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Mon, 23 Mar 2020 13:53:07 -0700 Subject: [PATCH 5/6] revert ratis log segment size config change --- hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config | 1 - 1 file changed, 1 deletion(-) diff --git a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config index d39abc28e763..feafda931dac 100644 --- a/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/ozone-om-ha/docker-config @@ -22,7 +22,6 @@ OZONE-SITE.XML_ozone.om.address.omservice.om1=om1 OZONE-SITE.XML_ozone.om.address.omservice.om2=om2 OZONE-SITE.XML_ozone.om.address.omservice.om3=om3 OZONE-SITE.XML_ozone.om.ratis.enable=true -OZONE-SITE.XML_ozone.om.ratis.segment.size=1KB OZONE-SITE.XML_ozone.scm.names=scm OZONE-SITE.XML_ozone.scm.datanode.id.dir=/data OZONE-SITE.XML_ozone.scm.block.client.address=scm From ca4b385ccb9784f98df580bcb60a1b2440b10589 Mon Sep 17 00:00:00 2001 From: Hanisha Koneru Date: Wed, 25 Mar 2020 10:23:44 -0700 Subject: [PATCH 6/6] Enabling Multiple Failover test --- .../src/main/smoketest/omha/testOMHA.robot | 59 +++++++++---------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot index 7896aefac571..2ca618cc3267 100644 --- a/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot +++ b/hadoop-ozone/dist/src/main/smoketest/omha/testOMHA.robot @@ -152,35 +152,34 @@ Test Multiple Failovers Start OM ${leaderOM} END -##Disabling inconsistent test -#Restart OM and Verify Ratis Logs -# Set Test Variable ${OM_HOST} om2 -# Set Test Variable ${keyBase} testOMRestart_ -# -# # Stop 1 OM and get the Logs present in its Ratis Dir -# Stop OM ${OM_HOST} -# ${numLogsBefore} @{logsBefore} = Get Ratis Logs ${OM_HOST} -# ${leaderOM} = Get OM Leader Node -# -# # Perform write operations to advance the Ratis log index till a new Log segment is created -# FOR ${INDEX} IN RANGE 20 -# Set Test Variable ${keyPrefix} ${keyBase}${INDEX} -# Put Multiple Keys 5 ${keyPrefix} ${TEST_FILE} -# ${numLogsLeader} @{logsLeader} = Get Ratis Logs ${leaderOM} -# EXIT FOR LOOP IF ${numLogsLeader} > ${numLogsBefore} -# END -# Should Be True ${numLogsLeader} > ${numLogsBefore} Cannot test OM Restart as Ratis did not start new log segment. -# -# # Restart the stopped OM and wait for Ratis to catch up with Leader OM -# Start OM ${OM_HOST} -# FOR ${INDEX} IN RANGE 300 -# ${numLogsAfter} @{logsAfter} = Get Ratis Logs ${OM_HOST} -# EXIT FOR LOOP IF ${numLogsAfter} >= ${numLogsLeader} -# Sleep 1s -# END -# Should Be True ${numLogsAfter} >= ${numLogsLeader} Restarted OM did not catch up with Leader OM -# -# # Verify that the logs match with the Leader OMs logs -# List Should Contain Sub List ${logsAfter} ${logsLeader} +Restart OM and Verify Ratis Logs + Set Test Variable ${OM_HOST} om2 + Set Test Variable ${keyBase} testOMRestart_ + + # Stop 1 OM and get the Logs present in its Ratis Dir + Stop OM ${OM_HOST} + ${numLogsBefore} @{logsBefore} = Get Ratis Logs ${OM_HOST} + ${leaderOM} = Get OM Leader Node + + # Perform write operations to advance the Ratis log index till a new Log segment is created + FOR ${INDEX} IN RANGE 20 + Set Test Variable ${keyPrefix} ${keyBase}${INDEX} + Put Multiple Keys 5 ${keyPrefix} ${TEST_FILE} + ${numLogsLeader} @{logsLeader} = Get Ratis Logs ${leaderOM} + EXIT FOR LOOP IF ${numLogsLeader} > ${numLogsBefore} + END + Should Be True ${numLogsLeader} > ${numLogsBefore} Cannot test OM Restart as Ratis did not start new log segment. + + # Restart the stopped OM and wait for Ratis to catch up with Leader OM + Start OM ${OM_HOST} + FOR ${INDEX} IN RANGE 300 + ${numLogsAfter} @{logsAfter} = Get Ratis Logs ${OM_HOST} + EXIT FOR LOOP IF ${numLogsAfter} >= ${numLogsLeader} + Sleep 1s + END + Should Be True ${numLogsAfter} >= ${numLogsLeader} Restarted OM did not catch up with Leader OM + + # Verify that the logs match with the Leader OMs logs + List Should Contain Sub List ${logsAfter} ${logsLeader}