diff --git a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-config b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-config index 09ed5ab73cca..4b53107b6dff 100644 --- a/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-config +++ b/hadoop-ozone/dist/src/main/compose/upgrade/compose/ha/docker-config @@ -34,6 +34,21 @@ OZONE-SITE.XML_ozone.scm.client.address=scm OZONE-SITE.XML_hdds.datanode.dir=/data/hdds +# If SCM sends container close commands as part of upgrade finalization while +# datanodes are doing a leader election, all 3 replicas may end up in the +# CLOSING state. The replication manager must be running to later move them to +# a CLOSED state so the datanodes can progress with finalization. +# +# This config sets the amount of time SCM will wait after safemode exit to +# start the replication manager and pipeline scrubber. The default of 5 minutes +# is fine in real clusters to prevent unnecessary over-replication, +# but it is too long for this test. +OZONE-SITE.XML_hdds.scm.wait.time.after.safemode.exit=5s +# If datanodes take too long to close pipelines during finalization, let the +# scrubber force close them to move the test forward. +OZONE-SITE.XML_ozone.scm.pipeline.scrub.interval=1m +OZONE-SITE.XML_ozone.scm.pipeline.allocated.timeout=2m + OZONE-SITE.XML_ozone.recon.db.dir=/data/metadata/recon OZONE-SITE.XML_ozone.recon.om.snapshot.task.interval.delay=1m diff --git a/hadoop-ozone/dist/src/main/smoketest/upgrade/finalize.robot b/hadoop-ozone/dist/src/main/smoketest/upgrade/finalize.robot index b70f3ca14781..521147ff6a62 100644 --- a/hadoop-ozone/dist/src/main/smoketest/upgrade/finalize.robot +++ b/hadoop-ozone/dist/src/main/smoketest/upgrade/finalize.robot @@ -16,7 +16,7 @@ *** Settings *** Documentation Finalize Upgrade of OMs and SCM Resource ../commonlib.robot -Test Timeout 5 minutes +Test Timeout 10 minutes Test Setup Run Keyword if '${SECURITY_ENABLED}' == 'true' Kinit test user testuser testuser.keytab *** Test Cases ***