diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 28dd907c0788..331da2385e64 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -94,10 +94,11 @@ jobs: - name: Cache for maven dependencies uses: actions/cache@v3 with: - path: ~/.m2/repository - key: maven-repo-${{ hashFiles('**/pom.xml') }}-${{ matrix.java }} + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} restore-keys: | - maven-repo-${{ hashFiles('**/pom.xml') }} maven-repo- - name: Setup java uses: actions/setup-java@v3 @@ -129,12 +130,6 @@ jobs: path: | ~/.m2/repository/org/apache/ozone retention-days: 1 - - name: Delete temporary build artifacts before caching - run: | - #Never cache local artifacts - rm -rf ~/.m2/repository/org/apache/ozone/hdds* - rm -rf ~/.m2/repository/org/apache/ozone/ozone* - if: always() compile: needs: - build-info @@ -162,12 +157,13 @@ jobs: git config user.email 'noreply@github.com' git commit --allow-empty -a -m 'workaround for HADOOP-19011' - name: Cache for maven dependencies - uses: actions/cache@v3 + uses: actions/cache/restore@v3 with: - path: ~/.m2/repository - key: maven-repo-${{ hashFiles('**/pom.xml') }}-${{ matrix.java }} + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} restore-keys: | - maven-repo-${{ hashFiles('**/pom.xml') }} maven-repo- - name: Setup java uses: actions/setup-java@v3 @@ -179,12 +175,6 @@ jobs: env: OZONE_WITH_COVERAGE: false GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }} - - name: Delete temporary build artifacts before caching - run: | - #Never cache local artifacts - rm -rf ~/.m2/repository/org/apache/ozone/hdds* - rm -rf ~/.m2/repository/org/apache/ozone/ozone* - if: always() basic: needs: - build-info @@ -205,13 +195,13 @@ jobs: fetch-depth: 0 if: matrix.check == 'bats' - name: Cache for maven dependencies - uses: actions/cache@v3 + uses: actions/cache/restore@v3 with: - path: ~/.m2/repository - key: maven-repo-${{ hashFiles('**/pom.xml') }}-8-${{ matrix.check }} + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} restore-keys: | - maven-repo-${{ hashFiles('**/pom.xml') }}-8 - maven-repo-${{ hashFiles('**/pom.xml') }} maven-repo- if: ${{ !contains('author,bats,docs', matrix.check) }} - name: Setup java @@ -234,12 +224,6 @@ jobs: name: ${{ matrix.check }} path: target/${{ matrix.check }} continue-on-error: true - - name: Delete temporary build artifacts before caching - run: | - #Never cache local artifacts - rm -rf ~/.m2/repository/org/apache/ozone/hdds* - rm -rf ~/.m2/repository/org/apache/ozone/ozone* - if: always() unit: needs: - build-info @@ -255,13 +239,13 @@ jobs: - name: Checkout project uses: actions/checkout@v3 - name: Cache for maven dependencies - uses: actions/cache@v3 + uses: actions/cache/restore@v3 with: - path: ~/.m2/repository - key: maven-repo-${{ hashFiles('**/pom.xml') }}-8-${{ matrix.profile }} + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} restore-keys: | - maven-repo-${{ hashFiles('**/pom.xml') }}-8 - maven-repo-${{ hashFiles('**/pom.xml') }} maven-repo- - name: Setup java uses: actions/setup-java@v3 @@ -283,12 +267,6 @@ jobs: name: ${{ matrix.check }} path: target/${{ matrix.check }} continue-on-error: true - - name: Delete temporary build artifacts before caching - run: | - #Never cache local artifacts - rm -rf ~/.m2/repository/org/apache/ozone/hdds* - rm -rf ~/.m2/repository/org/apache/ozone/ozone* - if: always() dependency: needs: - build-info @@ -328,6 +306,15 @@ jobs: steps: - name: Checkout project uses: actions/checkout@v3 + - name: Cache for maven dependencies + uses: actions/cache/restore@v3 + with: + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} + restore-keys: | + maven-repo- - name: Download Ozone repo id: download-ozone-repo uses: actions/download-artifact@v3 @@ -456,13 +443,13 @@ jobs: - name: Checkout project uses: actions/checkout@v3 - name: Cache for maven dependencies - uses: actions/cache@v3 + uses: actions/cache/restore@v3 with: - path: ~/.m2/repository - key: maven-repo-${{ hashFiles('**/pom.xml') }}-8-${{ matrix.profile }} + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} restore-keys: | - maven-repo-${{ hashFiles('**/pom.xml') }}-8 - maven-repo-${{ hashFiles('**/pom.xml') }} maven-repo- - name: Download Ozone repo id: download-ozone-repo @@ -502,12 +489,6 @@ jobs: name: it-${{ matrix.profile }} path: target/integration continue-on-error: true - - name: Delete temporary build artifacts before caching - run: | - #Never cache local artifacts - rm -rf ~/.m2/repository/org/apache/ozone/hdds* - rm -rf ~/.m2/repository/org/apache/ozone/ozone* - if: always() coverage: runs-on: ubuntu-20.04 timeout-minutes: 30 @@ -522,13 +503,13 @@ jobs: with: fetch-depth: 0 - name: Cache for maven dependencies - uses: actions/cache@v3 + uses: actions/cache/restore@v3 with: - path: ~/.m2/repository - key: maven-repo-${{ hashFiles('**/pom.xml') }}-8-${{ github.job }} + path: | + ~/.m2/repository + !~/.m2/repository/org/apache/ozone + key: maven-repo-${{ hashFiles('**/pom.xml') }} restore-keys: | - maven-repo-${{ hashFiles('**/pom.xml') }}-8 - maven-repo-${{ hashFiles('**/pom.xml') }} maven-repo- - name: Download artifacts uses: actions/download-artifact@v3 @@ -557,9 +538,3 @@ jobs: name: coverage path: target/coverage continue-on-error: true - - name: Delete temporary build artifacts before caching - run: | - #Never cache local artifacts - rm -rf ~/.m2/repository/org/apache/ozone/hdds* - rm -rf ~/.m2/repository/org/apache/ozone/ozone* - if: always() diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java index 80f86a677a7c..44af34cb919c 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/OzoneClientConfig.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hdds.scm; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.conf.Config; import org.apache.hadoop.hdds.conf.ConfigGroup; import org.apache.hadoop.hdds.conf.ConfigTag; @@ -254,6 +255,7 @@ public long getStreamBufferFlushSize() { return streamBufferFlushSize; } + @VisibleForTesting public void setStreamBufferFlushSize(long streamBufferFlushSize) { this.streamBufferFlushSize = streamBufferFlushSize; } @@ -262,6 +264,7 @@ public int getStreamBufferSize() { return streamBufferSize; } + @VisibleForTesting public void setStreamBufferSize(int streamBufferSize) { this.streamBufferSize = streamBufferSize; } @@ -270,6 +273,7 @@ public boolean isStreamBufferFlushDelay() { return streamBufferFlushDelay; } + @VisibleForTesting public void setStreamBufferFlushDelay(boolean streamBufferFlushDelay) { this.streamBufferFlushDelay = streamBufferFlushDelay; } @@ -278,6 +282,7 @@ public long getStreamBufferMaxSize() { return streamBufferMaxSize; } + @VisibleForTesting public void setStreamBufferMaxSize(long streamBufferMaxSize) { this.streamBufferMaxSize = streamBufferMaxSize; } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/StreamBufferArgs.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/StreamBufferArgs.java new file mode 100644 index 000000000000..4772cb90fb37 --- /dev/null +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/StreamBufferArgs.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdds.scm; + +import org.apache.hadoop.hdds.client.ECReplicationConfig; +import org.apache.hadoop.hdds.client.ReplicationConfig; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; + +/** + * This class encapsulates the arguments that are + * required for Ozone client StreamBuffer. + */ +public class StreamBufferArgs { + + private int streamBufferSize; + private long streamBufferFlushSize; + private long streamBufferMaxSize; + private boolean streamBufferFlushDelay; + + protected StreamBufferArgs(Builder builder) { + this.streamBufferSize = builder.bufferSize; + this.streamBufferFlushSize = builder.bufferFlushSize; + this.streamBufferMaxSize = builder.bufferMaxSize; + this.streamBufferFlushDelay = builder.streamBufferFlushDelay; + } + + public int getStreamBufferSize() { + return streamBufferSize; + } + + public long getStreamBufferFlushSize() { + return streamBufferFlushSize; + } + + public long getStreamBufferMaxSize() { + return streamBufferMaxSize; + } + + public boolean isStreamBufferFlushDelay() { + return streamBufferFlushDelay; + } + + public void setStreamBufferFlushDelay(boolean streamBufferFlushDelay) { + this.streamBufferFlushDelay = streamBufferFlushDelay; + } + + protected void setStreamBufferSize(int streamBufferSize) { + this.streamBufferSize = streamBufferSize; + } + + protected void setStreamBufferFlushSize(long streamBufferFlushSize) { + this.streamBufferFlushSize = streamBufferFlushSize; + } + + protected void setStreamBufferMaxSize(long streamBufferMaxSize) { + this.streamBufferMaxSize = streamBufferMaxSize; + } + + /** + * Builder class for StreamBufferArgs. + */ + public static class Builder { + private int bufferSize; + private long bufferFlushSize; + private long bufferMaxSize; + private boolean streamBufferFlushDelay; + + public Builder setBufferSize(int bufferSize) { + this.bufferSize = bufferSize; + return this; + } + + public Builder setBufferFlushSize(long bufferFlushSize) { + this.bufferFlushSize = bufferFlushSize; + return this; + } + + public Builder setBufferMaxSize(long bufferMaxSize) { + this.bufferMaxSize = bufferMaxSize; + return this; + } + + public Builder setStreamBufferFlushDelay(boolean streamBufferFlushDelay) { + this.streamBufferFlushDelay = streamBufferFlushDelay; + return this; + } + + public StreamBufferArgs build() { + return new StreamBufferArgs(this); + } + + public static Builder getNewBuilder() { + return new Builder(); + } + } + + public static StreamBufferArgs getDefaultStreamBufferArgs( + ReplicationConfig replicationConfig, OzoneClientConfig clientConfig) { + int bufferSize; + long flushSize; + long bufferMaxSize; + boolean streamBufferFlushDelay = clientConfig.isStreamBufferFlushDelay(); + if (replicationConfig.getReplicationType() == HddsProtos.ReplicationType.EC) { + bufferSize = ((ECReplicationConfig) replicationConfig).getEcChunkSize(); + flushSize = ((ECReplicationConfig) replicationConfig).getEcChunkSize(); + bufferMaxSize = ((ECReplicationConfig) replicationConfig).getEcChunkSize(); + } else { + bufferSize = clientConfig.getStreamBufferSize(); + flushSize = clientConfig.getStreamBufferFlushSize(); + bufferMaxSize = clientConfig.getStreamBufferMaxSize(); + } + + return Builder.getNewBuilder() + .setBufferSize(bufferSize) + .setBufferFlushSize(flushSize) + .setBufferMaxSize(bufferMaxSize) + .setStreamBufferFlushDelay(streamBufferFlushDelay) + .build(); + } +} diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java index 9988fb258ebb..b97165084f6e 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/BlockOutputStream.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.KeyValue; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientReply; import org.apache.hadoop.hdds.scm.XceiverClientSpi; @@ -88,6 +89,7 @@ public class BlockOutputStream extends OutputStream { private XceiverClientFactory xceiverClientFactory; private XceiverClientSpi xceiverClient; private OzoneClientConfig config; + private StreamBufferArgs streamBufferArgs; private int chunkIndex; private final AtomicLong chunkOffset = new AtomicLong(); @@ -134,6 +136,7 @@ public class BlockOutputStream extends OutputStream { * @param pipeline pipeline where block will be written * @param bufferPool pool of buffers */ + @SuppressWarnings("checkstyle:ParameterNumber") public BlockOutputStream( BlockID blockID, XceiverClientFactory xceiverClientManager, @@ -141,7 +144,7 @@ public BlockOutputStream( BufferPool bufferPool, OzoneClientConfig config, Token token, - ContainerClientMetrics clientMetrics + ContainerClientMetrics clientMetrics, StreamBufferArgs streamBufferArgs ) throws IOException { this.xceiverClientFactory = xceiverClientManager; this.config = config; @@ -166,12 +169,12 @@ public BlockOutputStream( //number of buffers used before doing a flush refreshCurrentBuffer(); - flushPeriod = (int) (config.getStreamBufferFlushSize() / config + flushPeriod = (int) (streamBufferArgs.getStreamBufferFlushSize() / streamBufferArgs .getStreamBufferSize()); Preconditions .checkArgument( - (long) flushPeriod * config.getStreamBufferSize() == config + (long) flushPeriod * streamBufferArgs.getStreamBufferSize() == streamBufferArgs .getStreamBufferFlushSize()); // A single thread executor handle the responses of async requests @@ -185,6 +188,7 @@ public BlockOutputStream( config.getBytesPerChecksum()); this.clientMetrics = clientMetrics; this.pipeline = pipeline; + this.streamBufferArgs = streamBufferArgs; } void refreshCurrentBuffer() { @@ -321,7 +325,7 @@ private void updateFlushLength() { } private boolean isBufferPoolFull() { - return bufferPool.computeBufferData() == config.getStreamBufferMaxSize(); + return bufferPool.computeBufferData() == streamBufferArgs.getStreamBufferMaxSize(); } /** @@ -339,7 +343,7 @@ public void writeOnRetry(long len) throws IOException { if (LOG.isDebugEnabled()) { LOG.debug("Retrying write length {} for blockID {}", len, blockID); } - Preconditions.checkArgument(len <= config.getStreamBufferMaxSize()); + Preconditions.checkArgument(len <= streamBufferArgs.getStreamBufferMaxSize()); int count = 0; while (len > 0) { ChunkBuffer buffer = bufferPool.getBuffer(count); @@ -355,13 +359,13 @@ public void writeOnRetry(long len) throws IOException { // the buffer. We should just validate // if we wrote data of size streamBufferMaxSize/streamBufferFlushSize to // call for handling full buffer/flush buffer condition. - if (writtenDataLength % config.getStreamBufferFlushSize() == 0) { + if (writtenDataLength % streamBufferArgs.getStreamBufferFlushSize() == 0) { // reset the position to zero as now we will be reading the // next buffer in the list updateFlushLength(); executePutBlock(false, false); } - if (writtenDataLength == config.getStreamBufferMaxSize()) { + if (writtenDataLength == streamBufferArgs.getStreamBufferMaxSize()) { handleFullBuffer(); } } @@ -518,9 +522,9 @@ void putFlushFuture(long flushPos, public void flush() throws IOException { if (xceiverClientFactory != null && xceiverClient != null && bufferPool != null && bufferPool.getSize() > 0 - && (!config.isStreamBufferFlushDelay() || + && (!streamBufferArgs.isStreamBufferFlushDelay() || writtenDataLength - totalDataFlushedLength - >= config.getStreamBufferSize())) { + >= streamBufferArgs.getStreamBufferSize())) { handleFlush(false); } } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java index 90cf4743f8dd..1d7fdc1df60b 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/ECBlockOutputStream.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ChunkInfo; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientReply; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -66,6 +67,7 @@ public class ECBlockOutputStream extends BlockOutputStream { * @param pipeline pipeline where block will be written * @param bufferPool pool of buffers */ + @SuppressWarnings("checkstyle:ParameterNumber") public ECBlockOutputStream( BlockID blockID, XceiverClientFactory xceiverClientManager, @@ -73,10 +75,10 @@ public ECBlockOutputStream( BufferPool bufferPool, OzoneClientConfig config, Token token, - ContainerClientMetrics clientMetrics + ContainerClientMetrics clientMetrics, StreamBufferArgs streamBufferArgs ) throws IOException { super(blockID, xceiverClientManager, - pipeline, bufferPool, config, token, clientMetrics); + pipeline, bufferPool, config, token, clientMetrics, streamBufferArgs); // In EC stream, there will be only one node in pipeline. this.datanodeDetails = pipeline.getClosestNode(); } diff --git a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java index ede70574967a..ee708bf0de15 100644 --- a/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java +++ b/hadoop-hdds/client/src/main/java/org/apache/hadoop/hdds/scm/storage/RatisBlockOutputStream.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos.ContainerCommandResponseProto; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientReply; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; @@ -71,6 +72,7 @@ public class RatisBlockOutputStream extends BlockOutputStream * @param blockID block ID * @param bufferPool pool of buffers */ + @SuppressWarnings("checkstyle:ParameterNumber") public RatisBlockOutputStream( BlockID blockID, XceiverClientFactory xceiverClientManager, @@ -78,10 +80,10 @@ public RatisBlockOutputStream( BufferPool bufferPool, OzoneClientConfig config, Token token, - ContainerClientMetrics clientMetrics + ContainerClientMetrics clientMetrics, StreamBufferArgs streamBufferArgs ) throws IOException { super(blockID, xceiverClientManager, pipeline, - bufferPool, config, token, clientMetrics); + bufferPool, config, token, clientMetrics, streamBufferArgs); this.commitWatcher = new CommitWatcher(bufferPool, getXceiverClient()); } diff --git a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java index a2a0832bdb4f..29c0798df77d 100644 --- a/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java +++ b/hadoop-hdds/client/src/test/java/org/apache/hadoop/hdds/scm/storage/TestBlockOutputStreamCorrectness.java @@ -36,6 +36,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.XceiverClientReply; import org.apache.hadoop.hdds.scm.XceiverClientSpi; @@ -103,6 +104,8 @@ private BlockOutputStream createBlockOutputStream(BufferPool bufferPool) config.setStreamBufferFlushSize(16 * 1024 * 1024); config.setChecksumType(ChecksumType.NONE); config.setBytesPerChecksum(256 * 1024); + StreamBufferArgs streamBufferArgs = + StreamBufferArgs.getDefaultStreamBufferArgs(pipeline.getReplicationConfig(), config); return new RatisBlockOutputStream( new BlockID(1L, 1L), @@ -111,7 +114,7 @@ private BlockOutputStream createBlockOutputStream(BufferPool bufferPool) bufferPool, config, null, - ContainerClientMetrics.acquire()); + ContainerClientMetrics.acquire(), streamBufferArgs); } /** diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java new file mode 100644 index 000000000000..67f5c2f2bbc5 --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakDetector.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.hadoop.hdds.utils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.ref.ReferenceQueue; +import java.util.Collections; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Simple general resource leak detector using {@link ReferenceQueue} and {@link java.lang.ref.WeakReference} to + * observe resource object life-cycle and assert proper resource closure before they are GCed. + * + *

+ * Example usage: + * + *

 {@code
+ * class MyResource implements AutoClosable {
+ *   static final LeakDetector LEAK_DETECTOR = new LeakDetector("MyResource");
+ *
+ *   private final LeakTracker leakTracker = LEAK_DETECTOR.track(this, () -> {
+ *      // report leaks, don't refer to the original object (MyResource) here.
+ *      System.out.println("MyResource is not closed before being discarded.");
+ *   });
+ *
+ *   @Override
+ *   public void close() {
+ *     // proper resources cleanup...
+ *     // inform tracker that this object is closed properly.
+ *     leakTracker.close();
+ *   }
+ * }
+ *
+ * }
+ */ +public class LeakDetector { + public static final Logger LOG = LoggerFactory.getLogger(LeakDetector.class); + private final ReferenceQueue queue = new ReferenceQueue<>(); + private final Set allLeaks = Collections.newSetFromMap(new ConcurrentHashMap<>()); + private final String name; + + public LeakDetector(String name) { + this.name = name; + start(); + } + + private void start() { + Thread t = new Thread(this::run); + t.setName(LeakDetector.class.getSimpleName() + "-" + name); + t.setDaemon(true); + LOG.info("Starting leak detector thread {}.", name); + t.start(); + } + + private void run() { + while (true) { + try { + LeakTracker tracker = (LeakTracker) queue.remove(); + // Original resource already been GCed, if tracker is not closed yet, + // report a leak. + if (allLeaks.remove(tracker)) { + tracker.reportLeak(); + } + } catch (InterruptedException e) { + LOG.warn("Thread interrupted, exiting.", e); + break; + } + } + + LOG.warn("Exiting leak detector {}.", name); + } + + public LeakTracker track(Object leakable, Runnable reportLeak) { + // A rate filter can be put here to only track a subset of all objects, e.g. 5%, 10%, + // if we have proofs that leak tracking impacts performance, or a single LeakDetector + // thread can't keep up with the pace of object allocation. + // For now, it looks effective enough and let keep it simple. + LeakTracker tracker = new LeakTracker(leakable, queue, allLeaks, reportLeak); + allLeaks.add(tracker); + return tracker; + } +} diff --git a/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakTracker.java b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakTracker.java new file mode 100644 index 000000000000..6103d520ca8a --- /dev/null +++ b/hadoop-hdds/common/src/main/java/org/apache/hadoop/hdds/utils/LeakTracker.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.hadoop.hdds.utils; + +import java.lang.ref.ReferenceQueue; +import java.lang.ref.WeakReference; +import java.util.Set; + +/** + * A token to track resource closure. + * + * @see LeakDetector + */ +public class LeakTracker extends WeakReference { + private final Set allLeaks; + private final Runnable leakReporter; + LeakTracker(Object referent, ReferenceQueue referenceQueue, + Set allLeaks, Runnable leakReporter) { + super(referent, referenceQueue); + this.allLeaks = allLeaks; + this.leakReporter = leakReporter; + } + + /** + * Called by the tracked resource when closing. + */ + public void close() { + allLeaks.remove(this); + } + + void reportLeak() { + leakReporter.run(); + } +} diff --git a/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/resource/TestLeakDetector.java b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/resource/TestLeakDetector.java new file mode 100644 index 000000000000..4a60fcc8a4d5 --- /dev/null +++ b/hadoop-hdds/common/src/test/java/org/apache/hadoop/hdds/resource/TestLeakDetector.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdds.resource; + +import org.apache.hadoop.hdds.utils.LeakDetector; +import org.apache.hadoop.hdds.utils.LeakTracker; +import org.junit.jupiter.api.Test; + +import java.util.concurrent.atomic.AtomicInteger; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + * Test LeakDetector. + */ +public class TestLeakDetector { + private static final LeakDetector LEAK_DETECTOR = new LeakDetector("test"); + private AtomicInteger leaks = new AtomicInteger(0); + + @Test + public void testLeakDetector() throws Exception { + // create and close resource => no leaks. + createResource(true); + System.gc(); + Thread.sleep(100); + assertEquals(0, leaks.get()); + + // create and not close => leaks. + createResource(false); + System.gc(); + Thread.sleep(100); + assertEquals(1, leaks.get()); + } + + private void createResource(boolean close) throws Exception { + MyResource resource = new MyResource(leaks); + if (close) { + resource.close(); + } + } + + private static final class MyResource implements AutoCloseable { + private final LeakTracker leakTracker; + + private MyResource(final AtomicInteger leaks) { + leakTracker = LEAK_DETECTOR.track(this, () -> leaks.incrementAndGet()); + } + + @Override + public void close() throws Exception { + leakTracker.close(); + } + } +} diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java index 89ba03b08e34..e702b1e6e158 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/common/states/endpoint/VersionEndpointTask.java @@ -76,10 +76,6 @@ public EndpointStateMachine.EndPointStates call() throws Exception { // If end point is passive, datanode does not need to check volumes. String scmId = response.getValue(OzoneConsts.SCM_ID); String clusterId = response.getValue(OzoneConsts.CLUSTER_ID); - DatanodeLayoutStorage layoutStorage - = new DatanodeLayoutStorage(configuration); - layoutStorage.setClusterId(clusterId); - layoutStorage.persistCurrentState(); Preconditions.checkNotNull(scmId, "Reply from SCM: scmId cannot be null"); @@ -92,6 +88,11 @@ public EndpointStateMachine.EndPointStates call() throws Exception { // Check HddsVolumes checkVolumeSet(ozoneContainer.getVolumeSet(), scmId, clusterId); + DatanodeLayoutStorage layoutStorage + = new DatanodeLayoutStorage(configuration); + layoutStorage.setClusterId(clusterId); + layoutStorage.persistCurrentState(); + // Start the container services after getting the version information ozoneContainer.start(clusterId); } diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java index 80348dbe45ef..24e76821f9c5 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/ec/reconstruction/ECReconstructionCoordinator.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.container.ContainerID; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; @@ -223,13 +224,15 @@ private ECBlockOutputStream getECBlockOutputStream( BlockLocationInfo blockLocationInfo, DatanodeDetails datanodeDetails, ECReplicationConfig repConfig, int replicaIndex, OzoneClientConfig configuration) throws IOException { + StreamBufferArgs streamBufferArgs = + StreamBufferArgs.getDefaultStreamBufferArgs(repConfig, configuration); return new ECBlockOutputStream( blockLocationInfo.getBlockID(), containerOperationClient.getXceiverClientManager(), containerOperationClient.singleNodePipeline(datanodeDetails, repConfig, replicaIndex), BufferPool.empty(), configuration, - blockLocationInfo.getToken(), clientMetrics); + blockLocationInfo.getToken(), clientMetrics, streamBufferArgs); } @VisibleForTesting diff --git a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java index 50303bd99ba6..b451071d7030 100644 --- a/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java +++ b/hadoop-hdds/container-service/src/main/java/org/apache/hadoop/ozone/container/metadata/AbstractDatanodeStore.java @@ -32,14 +32,12 @@ import org.apache.hadoop.hdds.utils.db.TableIterator; import org.apache.hadoop.hdds.utils.db.managed.ManagedColumnFamilyOptions; import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions; -import org.apache.hadoop.hdds.utils.db.managed.ManagedStatistics; import org.apache.hadoop.ozone.container.common.helpers.BlockData; import org.apache.hadoop.ozone.container.common.helpers.ChunkInfoList; import org.apache.hadoop.ozone.container.common.interfaces.BlockIterator; import org.apache.hadoop.ozone.container.common.statemachine.DatanodeConfiguration; import org.apache.hadoop.ozone.container.common.utils.db.DatanodeDBProfile; import org.rocksdb.InfoLogLevel; -import org.rocksdb.StatsLevel; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -49,9 +47,6 @@ import static org.apache.hadoop.hdds.HddsConfigKeys.HDDS_DB_PROFILE; import static org.apache.hadoop.hdds.utils.db.DBStoreBuilder.HDDS_DEFAULT_DB_PROFILE; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS_DEFAULT; -import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS_OFF; /** * Implementation of the {@link DatanodeStore} interface that contains @@ -113,16 +108,6 @@ public void start(ConfigurationSource config) options.setMaxTotalWalSize(maxWalSize); } - String rocksDbStat = config.getTrimmed( - OZONE_METADATA_STORE_ROCKSDB_STATISTICS, - OZONE_METADATA_STORE_ROCKSDB_STATISTICS_DEFAULT); - - if (!rocksDbStat.equals(OZONE_METADATA_STORE_ROCKSDB_STATISTICS_OFF)) { - ManagedStatistics statistics = new ManagedStatistics(); - statistics.setStatsLevel(StatsLevel.valueOf(rocksDbStat)); - options.setStatistics(statistics); - } - DatanodeConfiguration dc = config.getObject(DatanodeConfiguration.class); // Config user log files diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java index fe495e7b0618..32fcbfec6e44 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/DBStoreBuilder.java @@ -83,6 +83,9 @@ public final class DBStoreBuilder { // The column family options that will be used for any column families // added by name only (without specifying options). private ManagedColumnFamilyOptions defaultCfOptions; + // Initialize the Statistics instance if ROCKSDB_STATISTICS enabled + private ManagedStatistics statistics; + private String dbname; private Path dbPath; private String dbJmxBeanNameName; @@ -188,6 +191,11 @@ private void setDBOptionsProps(ManagedDBOptions dbOptions) { if (maxNumberOfOpenFiles != null) { dbOptions.setMaxOpenFiles(maxNumberOfOpenFiles); } + if (!rocksDbStat.equals(OZONE_METADATA_STORE_ROCKSDB_STATISTICS_OFF)) { + statistics = new ManagedStatistics(); + statistics.setStatsLevel(StatsLevel.valueOf(rocksDbStat)); + dbOptions.setStatistics(statistics); + } } /** @@ -217,7 +225,7 @@ public DBStore build() throws IOException { throw new IOException("The DB destination directory should exist."); } - return new RDBStore(dbFile, rocksDBOption, writeOptions, tableConfigs, + return new RDBStore(dbFile, rocksDBOption, statistics, writeOptions, tableConfigs, registry.build(), openReadOnly, maxFSSnapshots, dbJmxBeanNameName, enableCompactionDag, maxDbUpdatesSizeThreshold, createCheckpointDirs, configuration, threadNamePrefix); @@ -413,13 +421,6 @@ protected void log(InfoLogLevel infoLogLevel, String s) { dbOptions.setWalTtlSeconds(rocksDBConfiguration.getWalTTL()); dbOptions.setWalSizeLimitMB(rocksDBConfiguration.getWalSizeLimit()); - // Create statistics. - if (!rocksDbStat.equals(OZONE_METADATA_STORE_ROCKSDB_STATISTICS_OFF)) { - ManagedStatistics statistics = new ManagedStatistics(); - statistics.setStatsLevel(StatsLevel.valueOf(rocksDbStat)); - dbOptions.setStatistics(statistics); - } - return dbOptions; } diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java index 65e891f9820e..3feff9314b7c 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RDBStore.java @@ -37,6 +37,7 @@ import org.apache.hadoop.hdds.utils.db.RocksDatabase.ColumnFamily; import org.apache.hadoop.hdds.utils.db.managed.ManagedCompactRangeOptions; import org.apache.hadoop.hdds.utils.db.managed.ManagedDBOptions; +import org.apache.hadoop.hdds.utils.db.managed.ManagedStatistics; import org.apache.hadoop.hdds.utils.db.managed.ManagedTransactionLogIterator; import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer; @@ -78,10 +79,11 @@ public class RDBStore implements DBStore { // number in request to avoid increase in heap memory. private final long maxDbUpdatesSizeThreshold; private final ManagedDBOptions dbOptions; + private final ManagedStatistics statistics; private final String threadNamePrefix; @SuppressWarnings("parameternumber") - public RDBStore(File dbFile, ManagedDBOptions dbOptions, + public RDBStore(File dbFile, ManagedDBOptions dbOptions, ManagedStatistics statistics, ManagedWriteOptions writeOptions, Set families, CodecRegistry registry, boolean readOnly, int maxFSSnapshots, String dbJmxBeanName, boolean enableCompactionDag, @@ -98,6 +100,7 @@ public RDBStore(File dbFile, ManagedDBOptions dbOptions, codecRegistry = registry; dbLocation = dbFile; this.dbOptions = dbOptions; + this.statistics = statistics; try { if (enableCompactionDag) { @@ -120,8 +123,8 @@ public RDBStore(File dbFile, ManagedDBOptions dbOptions, if (dbJmxBeanName == null) { dbJmxBeanName = dbFile.getName(); } - metrics = RocksDBStoreMetrics.create(dbOptions.statistics(), db, - dbJmxBeanName); + // Use statistics instead of dbOptions.statistics() to avoid repeated init. + metrics = RocksDBStoreMetrics.create(statistics, db, dbJmxBeanName); if (metrics == null) { LOG.warn("Metrics registration failed during RocksDB init, " + "db path :{}", dbJmxBeanName); @@ -198,6 +201,7 @@ public String getSnapshotsParentDir() { return snapshotsParentDir; } + @Override public RocksDBCheckpointDiffer getRocksDBCheckpointDiffer() { return rocksDBCheckpointDiffer; } @@ -226,12 +230,15 @@ public void close() throws IOException { } RDBMetrics.unRegister(); - IOUtils.closeQuietly(checkPointManager); + IOUtils.close(LOG, checkPointManager); if (rocksDBCheckpointDiffer != null) { RocksDBCheckpointDifferHolder .invalidateCacheEntry(rocksDBCheckpointDiffer.getMetadataDir()); } - IOUtils.closeQuietly(db); + if (statistics != null) { + IOUtils.close(LOG, statistics); + } + IOUtils.close(LOG, db); } @Override diff --git a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java index 5c0aeae5ed5f..4dd1042fde2b 100644 --- a/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java +++ b/hadoop-hdds/framework/src/main/java/org/apache/hadoop/hdds/utils/db/RocksDatabase.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteBatch; import org.apache.hadoop.hdds.utils.db.managed.ManagedWriteOptions; import org.apache.ozone.rocksdiff.RocksDiffUtils; +import org.apache.ratis.util.UncheckedAutoCloseable; import org.rocksdb.ColumnFamilyDescriptor; import org.rocksdb.ColumnFamilyHandle; import org.rocksdb.Holder; @@ -236,15 +237,11 @@ private RocksCheckpoint() { } public void createCheckpoint(Path path) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { checkpoint.get().createCheckpoint(path.toString()); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "createCheckpoint " + path, e); - } finally { - counter.decrementAndGet(); } } @@ -298,14 +295,10 @@ public int getID() { public void batchDelete(ManagedWriteBatch writeBatch, byte[] key) throws IOException { - assertClosed(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { writeBatch.delete(getHandle(), key); } catch (RocksDBException e) { throw toIOException(this, "batchDelete key " + bytes2String(key), e); - } finally { - counter.decrementAndGet(); } } @@ -316,14 +309,10 @@ public void batchPut(ManagedWriteBatch writeBatch, byte[] key, byte[] value) LOG.debug("batchPut array value {}", bytes2String(value)); } - assertClosed(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { writeBatch.put(getHandle(), key, value); } catch (RocksDBException e) { throw toIOException(this, "batchPut key " + bytes2String(key), e); - } finally { - counter.decrementAndGet(); } } @@ -334,15 +323,11 @@ public void batchPut(ManagedWriteBatch writeBatch, ByteBuffer key, LOG.debug("batchPut buffer value {}", bytes2String(value.duplicate())); } - assertClosed(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { writeBatch.put(getHandle(), key.duplicate(), value); } catch (RocksDBException e) { throw toIOException(this, "batchPut ByteBuffer key " + bytes2String(key), e); - } finally { - counter.decrementAndGet(); } } @@ -350,10 +335,15 @@ public void markClosed() { isClosed.set(true); } - private void assertClosed() throws IOException { + private UncheckedAutoCloseable acquire() throws IOException { if (isClosed.get()) { throw new IOException("Rocks Database is closed"); } + if (counter.getAndIncrement() < 0) { + counter.getAndDecrement(); + throw new IOException("Rocks Database is closed"); + } + return counter::getAndDecrement; } @Override @@ -370,7 +360,6 @@ public String toString() { private final Map columnFamilies; private final AtomicBoolean isClosed = new AtomicBoolean(); - private final AtomicLong counter; private RocksDatabase(File dbFile, ManagedRocksDB db, @@ -389,6 +378,10 @@ private RocksDatabase(File dbFile, ManagedRocksDB db, @Override public void close() { + close(true); + } + + private void close(boolean isSync) { if (isClosed.compareAndSet(false, true)) { // Wait for all background work to be cancelled first. e.g. RDB compaction db.get().cancelAllBackgroundWork(true); @@ -399,38 +392,36 @@ public void close() { if (columnFamilies != null) { columnFamilies.values().stream().forEach(f -> f.markClosed()); } - // wait till all access to rocks db is process to avoid crash while close - while (true) { - if (counter.get() == 0) { - break; - } - try { - Thread.currentThread().sleep(1); - } catch (InterruptedException e) { - close(columnFamilies, db, descriptors, writeOptions, dbOptions); - Thread.currentThread().interrupt(); - return; - } + + if (isSync) { + waitAndClose(); + return; } - - // close when counter is 0, no more operation - close(columnFamilies, db, descriptors, writeOptions, dbOptions); + // async trigger the close event + new Thread(() -> waitAndClose(), "DBCloser-" + name).start(); } } - private void closeOnError(RocksDBException e, boolean isCounted) { - if (shouldClose(e)) { + private void waitAndClose() { + // wait till all access to rocks db is process to avoid crash while close + while (!counter.compareAndSet(0, Long.MIN_VALUE)) { try { - if (isCounted) { - counter.decrementAndGet(); - } - close(); - } finally { - if (isCounted) { - counter.incrementAndGet(); - } + Thread.currentThread().sleep(1); + } catch (InterruptedException e) { + close(columnFamilies, db, descriptors, writeOptions, dbOptions); + Thread.currentThread().interrupt(); + return; } } + + // close when counter is 0, no more operation + close(columnFamilies, db, descriptors, writeOptions, dbOptions); + } + + private void closeOnError(RocksDBException e) { + if (shouldClose(e)) { + close(false); + } } private boolean shouldClose(RocksDBException e) { @@ -442,72 +433,62 @@ private boolean shouldClose(RocksDBException e) { return false; } } - - private void assertClose() throws IOException { + + private UncheckedAutoCloseable acquire() throws IOException { if (isClosed()) { throw new IOException("Rocks Database is closed"); } + if (counter.getAndIncrement() < 0) { + counter.getAndDecrement(); + throw new IOException("Rocks Database is closed"); + } + return counter::getAndDecrement; } public void ingestExternalFile(ColumnFamily family, List files, ManagedIngestExternalFileOptions ingestOptions) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().ingestExternalFile(family.getHandle(), files, ingestOptions); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); String msg = "Failed to ingest external files " + files.stream().collect(Collectors.joining(", ")) + " of " + family.getName(); throw toIOException(this, msg, e); - } finally { - counter.decrementAndGet(); } } public void put(ColumnFamily family, byte[] key, byte[] value) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().put(family.getHandle(), writeOptions, key, value); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "put " + bytes2String(key), e); - } finally { - counter.decrementAndGet(); } } public void put(ColumnFamily family, ByteBuffer key, ByteBuffer value) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().put(family.getHandle(), writeOptions, key, value); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "put " + bytes2String(key), e); - } finally { - counter.decrementAndGet(); } } public void flush() throws IOException { - assertClose(); - try (ManagedFlushOptions options = new ManagedFlushOptions()) { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire(); + ManagedFlushOptions options = new ManagedFlushOptions()) { options.setWaitForFlush(true); db.get().flush(options); for (RocksDatabase.ColumnFamily columnFamily : getExtraColumnFamilies()) { db.get().flush(options, columnFamily.handle); } } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "flush", e); - } finally { - counter.decrementAndGet(); } } @@ -515,78 +496,65 @@ public void flush() throws IOException { * @param cfName columnFamily on which flush will run. */ public void flush(String cfName) throws IOException { - assertClose(); - ColumnFamilyHandle handle = getColumnFamilyHandle(cfName); - try (ManagedFlushOptions options = new ManagedFlushOptions()) { - options.setWaitForFlush(true); - if (handle != null) { - db.get().flush(options, handle); - } else { - LOG.error("Provided column family doesn't exist." - + " Calling flush on null columnFamily"); - flush(); + try (UncheckedAutoCloseable ignored = acquire()) { + ColumnFamilyHandle handle = getColumnFamilyHandle(cfName); + try (ManagedFlushOptions options = new ManagedFlushOptions()) { + options.setWaitForFlush(true); + if (handle != null) { + db.get().flush(options, handle); + } else { + LOG.error("Provided column family doesn't exist." + + " Calling flush on null columnFamily"); + flush(); + } + } catch (RocksDBException e) { + closeOnError(e); + throw toIOException(this, "flush", e); } - } catch (RocksDBException e) { - closeOnError(e, true); - throw toIOException(this, "flush", e); } } public void flushWal(boolean sync) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().flushWal(sync); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "flushWal with sync=" + sync, e); - } finally { - counter.decrementAndGet(); } } public void compactRange() throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().compactRange(); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "compactRange", e); - } finally { - counter.decrementAndGet(); } } public void compactRangeDefault(final ManagedCompactRangeOptions options) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().compactRange(null, null, null, options); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "compactRange", e); - } finally { - counter.decrementAndGet(); } } public void compactDB(ManagedCompactRangeOptions options) throws IOException { - assertClose(); - compactRangeDefault(options); - for (RocksDatabase.ColumnFamily columnFamily : getExtraColumnFamilies()) { - compactRange(columnFamily, null, null, options); + try (UncheckedAutoCloseable ignored = acquire()) { + compactRangeDefault(options); + for (RocksDatabase.ColumnFamily columnFamily + : getExtraColumnFamilies()) { + compactRange(columnFamily, null, null, options); + } } } public int getLiveFilesMetaDataSize() throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getLiveFilesMetaData().size(); - } finally { - counter.decrementAndGet(); } } @@ -594,25 +562,25 @@ public int getLiveFilesMetaDataSize() throws IOException { * @param cfName columnFamily on which compaction will run. */ public void compactRange(String cfName) throws IOException { - assertClose(); - ColumnFamilyHandle handle = getColumnFamilyHandle(cfName); - try { - if (handle != null) { - db.get().compactRange(handle); - } else { - LOG.error("Provided column family doesn't exist." - + " Calling compactRange on null columnFamily"); - db.get().compactRange(); + try (UncheckedAutoCloseable ignored = acquire()) { + ColumnFamilyHandle handle = getColumnFamilyHandle(cfName); + try { + if (handle != null) { + db.get().compactRange(handle); + } else { + LOG.error("Provided column family doesn't exist." + + " Calling compactRange on null columnFamily"); + db.get().compactRange(); + } + } catch (RocksDBException e) { + closeOnError(e); + throw toIOException(this, "compactRange", e); } - } catch (RocksDBException e) { - closeOnError(e, true); - throw toIOException(this, "compactRange", e); } } private ColumnFamilyHandle getColumnFamilyHandle(String cfName) throws IOException { - assertClose(); for (ColumnFamilyHandle cf : getCfHandleMap().get(db.get().getName())) { try { String table = new String(cf.getName(), UTF_8); @@ -620,7 +588,7 @@ private ColumnFamilyHandle getColumnFamilyHandle(String cfName) return cf; } } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "columnFamilyHandle.getName", e); } } @@ -630,25 +598,17 @@ private ColumnFamilyHandle getColumnFamilyHandle(String cfName) public void compactRange(ColumnFamily family, final byte[] begin, final byte[] end, final ManagedCompactRangeOptions options) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().compactRange(family.getHandle(), begin, end, options); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "compactRange", e); - } finally { - counter.decrementAndGet(); } } public List getLiveFilesMetaData() throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getLiveFilesMetaData(); - } finally { - counter.decrementAndGet(); } } @@ -672,22 +632,16 @@ RocksCheckpoint createCheckpoint() { */ Supplier keyMayExist(ColumnFamily family, byte[] key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { final Holder out = new Holder<>(); return db.get().keyMayExist(family.getHandle(), key, out) ? out::getValue : null; - } finally { - counter.decrementAndGet(); } } Supplier keyMayExist(ColumnFamily family, ByteBuffer key, ByteBuffer out) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { final KeyMayExist result = db.get().keyMayExist( family.getHandle(), key, out); switch (result.exists) { @@ -698,8 +652,6 @@ Supplier keyMayExist(ColumnFamily family, throw new IllegalStateException( "Unexpected KeyMayExistEnum case " + result.exists); } - } finally { - counter.decrementAndGet(); } } @@ -712,16 +664,12 @@ public Collection getExtraColumnFamilies() { } byte[] get(ColumnFamily family, byte[] key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().get(family.getHandle(), key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); final String message = "get " + bytes2String(key) + " from " + family; throw toIOException(this, message, e); - } finally { - counter.decrementAndGet(); } } @@ -741,20 +689,16 @@ byte[] get(ColumnFamily family, byte[] key) throws IOException { */ Integer get(ColumnFamily family, ByteBuffer key, ByteBuffer outValue) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { final int size = db.get().get(family.getHandle(), DEFAULT_READ_OPTION, key, outValue); LOG.debug("get: size={}, remaining={}", size, outValue.asReadOnlyBuffer().remaining()); return size == ManagedRocksDB.NOT_FOUND ? null : size; } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); final String message = "get " + bytes2String(key) + " from " + family; throw toIOException(this, message, e); - } finally { - counter.decrementAndGet(); } } @@ -767,119 +711,84 @@ public long estimateNumKeys(ColumnFamily family) throws IOException { } private long getLongProperty(String key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getLongProperty(key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "getLongProperty " + key, e); - } finally { - counter.decrementAndGet(); } } private long getLongProperty(ColumnFamily family, String key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getLongProperty(family.getHandle(), key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); final String message = "getLongProperty " + key + " from " + family; throw toIOException(this, message, e); - } finally { - counter.decrementAndGet(); } } public String getProperty(String key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getProperty(key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "getProperty " + key, e); - } finally { - counter.decrementAndGet(); } } public String getProperty(ColumnFamily family, String key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getProperty(family.getHandle(), key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "getProperty " + key + " from " + family, e); - } finally { - counter.decrementAndGet(); } } public ManagedTransactionLogIterator getUpdatesSince(long sequenceNumber) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return managed(db.get().getUpdatesSince(sequenceNumber)); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "getUpdatesSince " + sequenceNumber, e); - } finally { - counter.decrementAndGet(); } } public long getLatestSequenceNumber() throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return db.get().getLatestSequenceNumber(); - } finally { - counter.decrementAndGet(); } } public ManagedRocksIterator newIterator(ColumnFamily family) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { return managed(db.get().newIterator(family.getHandle())); - } finally { - counter.decrementAndGet(); } } public ManagedRocksIterator newIterator(ColumnFamily family, boolean fillCache) throws IOException { - assertClose(); - try (ManagedReadOptions readOptions = new ManagedReadOptions()) { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire(); + ManagedReadOptions readOptions = new ManagedReadOptions()) { readOptions.setFillCache(fillCache); return managed(db.get().newIterator(family.getHandle(), readOptions)); - } finally { - counter.decrementAndGet(); } } public void batchWrite(ManagedWriteBatch writeBatch, ManagedWriteOptions options) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().write(options, writeBatch); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); throw toIOException(this, "batchWrite", e); - } finally { - counter.decrementAndGet(); } } @@ -888,46 +797,34 @@ public void batchWrite(ManagedWriteBatch writeBatch) throws IOException { } public void delete(ColumnFamily family, byte[] key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().delete(family.getHandle(), key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); final String message = "delete " + bytes2String(key) + " from " + family; throw toIOException(this, message, e); - } finally { - counter.decrementAndGet(); } } public void delete(ColumnFamily family, ByteBuffer key) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().delete(family.getHandle(), writeOptions, key); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); final String message = "delete " + bytes2String(key) + " from " + family; throw toIOException(this, message, e); - } finally { - counter.decrementAndGet(); } } public void deleteRange(ColumnFamily family, byte[] beginKey, byte[] endKey) throws IOException { - assertClose(); - try { - counter.incrementAndGet(); + try (UncheckedAutoCloseable ignored = acquire()) { db.get().deleteRange(family.getHandle(), beginKey, endKey); } catch (RocksDBException e) { - closeOnError(e, true); + closeOnError(e); final String message = "delete range " + bytes2String(beginKey) + " to " + bytes2String(endKey) + " from " + family; throw toIOException(this, message, e); - } finally { - counter.decrementAndGet(); } } @@ -938,8 +835,9 @@ public String toString() { @VisibleForTesting public List getSstFileList() throws IOException { - assertClose(); - return db.get().getLiveFilesMetaData(); + try (UncheckedAutoCloseable ignored = acquire()) { + return db.get().getLiveFilesMetaData(); + } } /** @@ -958,41 +856,42 @@ private int getLastLevel() throws IOException { */ public void deleteFilesNotMatchingPrefix(Map prefixPairs) throws IOException, RocksDBException { - assertClose(); - for (LiveFileMetaData liveFileMetaData : getSstFileList()) { - String sstFileColumnFamily = - new String(liveFileMetaData.columnFamilyName(), UTF_8); - int lastLevel = getLastLevel(); - - if (!prefixPairs.containsKey(sstFileColumnFamily)) { - continue; - } + try (UncheckedAutoCloseable ignored = acquire()) { + for (LiveFileMetaData liveFileMetaData : getSstFileList()) { + String sstFileColumnFamily = + new String(liveFileMetaData.columnFamilyName(), UTF_8); + int lastLevel = getLastLevel(); + + if (!prefixPairs.containsKey(sstFileColumnFamily)) { + continue; + } - // RocksDB #deleteFile API allows only to delete the last level of - // SST Files. Any level < last level won't get deleted and - // only last file of level 0 can be deleted - // and will throw warning in the rocksdb manifest. - // Instead, perform the level check here - // itself to avoid failed delete attempts for lower level files. - if (liveFileMetaData.level() != lastLevel || lastLevel == 0) { - continue; - } + // RocksDB #deleteFile API allows only to delete the last level of + // SST Files. Any level < last level won't get deleted and + // only last file of level 0 can be deleted + // and will throw warning in the rocksdb manifest. + // Instead, perform the level check here + // itself to avoid failed delete attempts for lower level files. + if (liveFileMetaData.level() != lastLevel || lastLevel == 0) { + continue; + } - String prefixForColumnFamily = prefixPairs.get(sstFileColumnFamily); - String firstDbKey = new String(liveFileMetaData.smallestKey(), UTF_8); - String lastDbKey = new String(liveFileMetaData.largestKey(), UTF_8); - boolean isKeyWithPrefixPresent = RocksDiffUtils.isKeyWithPrefixPresent( - prefixForColumnFamily, firstDbKey, lastDbKey); - if (!isKeyWithPrefixPresent) { - LOG.info("Deleting sst file: {} with start key: {} and end key: {} " + - "corresponding to column family {} from db: {}. " + - "Prefix for the column family: {}.", - liveFileMetaData.fileName(), - firstDbKey, lastDbKey, - StringUtils.bytes2String(liveFileMetaData.columnFamilyName()), - db.get().getName(), - prefixForColumnFamily); - db.deleteFile(liveFileMetaData); + String prefixForColumnFamily = prefixPairs.get(sstFileColumnFamily); + String firstDbKey = new String(liveFileMetaData.smallestKey(), UTF_8); + String lastDbKey = new String(liveFileMetaData.largestKey(), UTF_8); + boolean isKeyWithPrefixPresent = RocksDiffUtils.isKeyWithPrefixPresent( + prefixForColumnFamily, firstDbKey, lastDbKey); + if (!isKeyWithPrefixPresent) { + LOG.info("Deleting sst file: {} with start key: {} and end key: {} " + + "corresponding to column family {} from db: {}. " + + "Prefix for the column family: {}.", + liveFileMetaData.fileName(), + firstDbKey, lastDbKey, + StringUtils.bytes2String(liveFileMetaData.columnFamilyName()), + db.get().getName(), + prefixForColumnFamily); + db.deleteFile(liveFileMetaData); + } } } } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java index 4c9b29a995b2..58f1906abd3c 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStore.java @@ -58,7 +58,7 @@ public static RDBStore newRDBStore(File dbFile, ManagedDBOptions options, Set families, long maxDbUpdatesSizeThreshold) throws IOException { - return new RDBStore(dbFile, options, new ManagedWriteOptions(), families, + return new RDBStore(dbFile, options, null, new ManagedWriteOptions(), families, CodecRegistry.newBuilder().build(), false, 1000, null, false, maxDbUpdatesSizeThreshold, true, null, ""); } diff --git a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java index 4efeb2c59093..e56f4ec03a01 100644 --- a/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java +++ b/hadoop-hdds/framework/src/test/java/org/apache/hadoop/hdds/utils/db/TestRDBStoreByteArrayIterator.java @@ -304,28 +304,4 @@ public void testNormalPrefixedIterator() throws IOException { iter.close(); } - - @Test - public void testGetStackTrace() { - ManagedRocksIterator iterator = mock(ManagedRocksIterator.class); - RocksIterator mock = mock(RocksIterator.class); - when(iterator.get()).thenReturn(mock); - when(mock.isOwningHandle()).thenReturn(true); - ManagedRocksObjectUtils.assertClosed(iterator); - verify(iterator, times(1)).getStackTrace(); - - iterator = new ManagedRocksIterator(rocksDBIteratorMock); - - // construct the expected trace. - StackTraceElement[] traceElements = Thread.currentThread().getStackTrace(); - StringBuilder sb = new StringBuilder(); - // first 2 lines will differ. - for (int i = 2; i < traceElements.length; i++) { - sb.append(traceElements[i]); - sb.append("\n"); - } - String expectedTrace = sb.toString(); - String fromObjectInit = iterator.getStackTrace(); - assertTrue(fromObjectInit.contains(expectedTrace)); - } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedBloomFilter.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedBloomFilter.java index 9e7bad651f77..ffee7c1f5519 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedBloomFilter.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedBloomFilter.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.BloomFilter; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed BloomFilter. */ public class ManagedBloomFilter extends BloomFilter { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedColumnFamilyOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedColumnFamilyOptions.java index 39ee7b0ce7ac..7b1da6a16923 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedColumnFamilyOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedColumnFamilyOptions.java @@ -18,31 +18,24 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.ColumnFamilyOptions; import org.rocksdb.TableFormatConfig; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed ColumnFamilyOptions. */ public class ManagedColumnFamilyOptions extends ColumnFamilyOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); - /** * Indicate if this ColumnFamilyOptions is intentionally used across RockDB * instances. */ private boolean reused = false; + private final LeakTracker leakTracker = track(this); public ManagedColumnFamilyOptions() { - super(); } public ManagedColumnFamilyOptions(ColumnFamilyOptions columnFamilyOptions) { @@ -85,9 +78,9 @@ public boolean isReused() { } @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } /** diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedCompactRangeOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedCompactRangeOptions.java index 6f7da30cdba7..0e397ed0e9b6 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedCompactRangeOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedCompactRangeOptions.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.CompactRangeOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed CompactRangeOptions. */ public class ManagedCompactRangeOptions extends CompactRangeOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedDBOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedDBOptions.java index a66a04eae90a..fa01e2e1018b 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedDBOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedDBOptions.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.DBOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed DBOptions. */ public class ManagedDBOptions extends DBOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedEnvOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedEnvOptions.java index 38d6f95ce708..baad1ad7f4ca 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedEnvOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedEnvOptions.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.EnvOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed EnvOptions. */ public class ManagedEnvOptions extends EnvOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedFlushOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedFlushOptions.java index 8801f7d241d0..126f5336ba90 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedFlushOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedFlushOptions.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.FlushOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed FlushOptions. */ public class ManagedFlushOptions extends FlushOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedIngestExternalFileOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedIngestExternalFileOptions.java index 94b34e20d2ca..1783a34587cf 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedIngestExternalFileOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedIngestExternalFileOptions.java @@ -18,26 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.IngestExternalFileOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed IngestExternalFileOptions. */ -public class ManagedIngestExternalFileOptions extends - IngestExternalFileOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); +public class ManagedIngestExternalFileOptions extends IngestExternalFileOptions { + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedLRUCache.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedLRUCache.java index 8bf9147c1571..5244863a5a17 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedLRUCache.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedLRUCache.java @@ -18,29 +18,24 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.LRUCache; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed LRUCache. */ public class ManagedLRUCache extends LRUCache { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); public ManagedLRUCache(long capacity) { super(capacity); } @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedObject.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedObject.java index 0ab87f156ec9..1e4068a7a800 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedObject.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedObject.java @@ -18,11 +18,10 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.RocksObject; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * General template for a managed RocksObject. @@ -30,13 +29,10 @@ */ class ManagedObject implements AutoCloseable { private final T original; - - @Nullable - private final StackTraceElement[] elements; + private final LeakTracker leakTracker = track(this); ManagedObject(T original) { this.original = original; - this.elements = ManagedRocksObjectUtils.getStackTrace(); } public T get() { @@ -46,16 +42,6 @@ public T get() { @Override public void close() { original.close(); + leakTracker.close(); } - - @Override - protected void finalize() throws Throwable { - ManagedRocksObjectUtils.assertClosed(this); - super.finalize(); - } - - public String getStackTrace() { - return formatStackTrace(elements); - } - } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedOptions.java index 4ae96e1b7682..e438068e3a70 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedOptions.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.Options; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed Options. */ public class ManagedOptions extends Options { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedReadOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedReadOptions.java index a281722d2480..48c2238ec4a0 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedReadOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedReadOptions.java @@ -18,26 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.ReadOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed {@link ReadOptions}. */ public class ManagedReadOptions extends ReadOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } - } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectMetrics.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectMetrics.java index 8d9d7fe78d7e..706aa9bb345f 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectMetrics.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectMetrics.java @@ -18,6 +18,7 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import com.google.common.annotations.VisibleForTesting; import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.metrics2.annotation.Metric; import org.apache.hadoop.metrics2.annotation.Metrics; @@ -64,4 +65,14 @@ public void assertNoLeaks() { void increaseManagedObject() { totalManagedObjects.incr(); } + + @VisibleForTesting + long totalLeakObjects() { + return totalLeakObjects.value(); + } + + @VisibleForTesting + long totalManagedObjects() { + return totalManagedObjects.value(); + } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectUtils.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectUtils.java index b32f1111cbcb..7ae7001ccd39 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectUtils.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedRocksObjectUtils.java @@ -19,10 +19,11 @@ package org.apache.hadoop.hdds.utils.db.managed; import org.apache.hadoop.hdds.HddsUtils; +import org.apache.hadoop.hdds.utils.LeakDetector; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.awaitility.Awaitility; import org.awaitility.core.ConditionTimeoutException; import org.rocksdb.RocksDB; -import org.rocksdb.RocksObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -41,38 +42,36 @@ private ManagedRocksObjectUtils() { public static final Logger LOG = LoggerFactory.getLogger(ManagedRocksObjectUtils.class); + private static final Duration POLL_DELAY_DURATION = Duration.ZERO; private static final Duration POLL_INTERVAL_DURATION = Duration.ofMillis(100); - public static void assertClosed(ManagedObject object) { - assertClosed(object.get(), object.getStackTrace()); - } - static void assertClosed(RocksObject rocksObject, String stackTrace) { + private static final LeakDetector LEAK_DETECTOR = new LeakDetector("ManagedRocksObject"); + + static LeakTracker track(AutoCloseable object) { ManagedRocksObjectMetrics.INSTANCE.increaseManagedObject(); - if (rocksObject.isOwningHandle()) { - reportLeak(rocksObject, stackTrace); - } + final Class clazz = object.getClass(); + final StackTraceElement[] stackTrace = getStackTrace(); + return LEAK_DETECTOR.track(object, () -> reportLeak(clazz, formatStackTrace(stackTrace))); } - static void reportLeak(Object object, String stackTrace) { + static void reportLeak(Class clazz, String stackTrace) { ManagedRocksObjectMetrics.INSTANCE.increaseLeakObject(); - String warning = String.format("%s is not closed properly", - object.getClass().getSimpleName()); + String warning = String.format("%s is not closed properly", clazz.getSimpleName()); if (stackTrace != null && LOG.isDebugEnabled()) { - String debugMessage = String - .format("%nStackTrace for unclosed instance: %s", stackTrace); + String debugMessage = String.format("%nStackTrace for unclosed instance: %s", stackTrace); warning = warning.concat(debugMessage); } LOG.warn(warning); } - static @Nullable StackTraceElement[] getStackTrace() { + private static @Nullable StackTraceElement[] getStackTrace() { return HddsUtils.getStackTrace(LOG); } static String formatStackTrace(@Nullable StackTraceElement[] elements) { - return HddsUtils.formatStackTrace(elements, 3); + return HddsUtils.formatStackTrace(elements, 4); } /** diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSlice.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSlice.java index 2de2031fb9c0..8c366bdaa423 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSlice.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSlice.java @@ -18,23 +18,19 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.Slice; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed Slice. */ public class ManagedSlice extends Slice { + private final LeakTracker leakTracker = track(this); - @Nullable - private final StackTraceElement[] elements; - - public ManagedSlice(byte[] var1) { - super(var1); - this.elements = ManagedRocksObjectUtils.getStackTrace(); + public ManagedSlice(byte[] data) { + super(data); } @Override @@ -43,12 +39,10 @@ public synchronized long getNativeHandle() { } @Override - protected void finalize() throws Throwable { - ManagedRocksObjectMetrics.INSTANCE.increaseManagedObject(); - if (isOwningHandle()) { - ManagedRocksObjectUtils.reportLeak(this, formatStackTrace(elements)); - } - super.finalize(); + protected void disposeInternal() { + super.disposeInternal(); + // RocksMutableObject.close is final thus can't be decorated. + // So, we decorate disposeInternal instead to track closure. + leakTracker.close(); } - } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReader.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReader.java index 7ba1001a432e..b49c6e7a9e49 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReader.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReader.java @@ -29,15 +29,7 @@ public class ManagedSstFileReader extends ManagedObject { super(original); } - public static ManagedSstFileReader managed( - SstFileReader reader) { + public static ManagedSstFileReader managed(SstFileReader reader) { return new ManagedSstFileReader(reader); } - - @Override - protected void finalize() throws Throwable { - ManagedRocksObjectUtils.assertClosed(this); - super.finalize(); - } - } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReaderIterator.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReaderIterator.java index 0916e89571b1..660a7c347aee 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReaderIterator.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileReaderIterator.java @@ -23,19 +23,12 @@ /** * Managed SstFileReaderIterator. */ -public class ManagedSstFileReaderIterator - extends ManagedObject { +public class ManagedSstFileReaderIterator extends ManagedObject { ManagedSstFileReaderIterator(SstFileReaderIterator original) { super(original); } - @Override - protected void finalize() throws Throwable { - ManagedRocksObjectUtils.assertClosed(this); - super.finalize(); - } - public static ManagedSstFileReaderIterator managed( SstFileReaderIterator iterator) { return new ManagedSstFileReaderIterator(iterator); diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileWriter.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileWriter.java index e6fdcbc2ed0b..de7e9d526634 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileWriter.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedSstFileWriter.java @@ -18,23 +18,18 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.EnvOptions; import org.rocksdb.Options; import org.rocksdb.SstFileWriter; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed SstFileWriter. */ public class ManagedSstFileWriter extends SstFileWriter { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); public ManagedSstFileWriter(EnvOptions envOptions, Options options) { @@ -42,8 +37,8 @@ public ManagedSstFileWriter(EnvOptions envOptions, } @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedStatistics.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedStatistics.java index 4cbd6f98287b..75af8b881355 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedStatistics.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedStatistics.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.Statistics; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed Statistics. */ public class ManagedStatistics extends Statistics { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteBatch.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteBatch.java index 51c9bcc0df02..b1411b09a49a 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteBatch.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteBatch.java @@ -18,24 +18,18 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.WriteBatch; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed WriteBatch. */ public class ManagedWriteBatch extends WriteBatch { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); public ManagedWriteBatch() { - super(); } public ManagedWriteBatch(byte[] data) { @@ -43,8 +37,8 @@ public ManagedWriteBatch(byte[] data) { } @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteOptions.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteOptions.java index 7ba05a0ee6d9..5d32a290b5e2 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteOptions.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/ManagedWriteOptions.java @@ -18,25 +18,20 @@ */ package org.apache.hadoop.hdds.utils.db.managed; +import org.apache.hadoop.hdds.utils.LeakTracker; import org.rocksdb.WriteOptions; -import javax.annotation.Nullable; - -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.assertClosed; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.formatStackTrace; -import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.getStackTrace; +import static org.apache.hadoop.hdds.utils.db.managed.ManagedRocksObjectUtils.track; /** * Managed {@link WriteOptions}. */ public class ManagedWriteOptions extends WriteOptions { - - @Nullable - private final StackTraceElement[] elements = getStackTrace(); + private final LeakTracker leakTracker = track(this); @Override - protected void finalize() throws Throwable { - assertClosed(this, formatStackTrace(elements)); - super.finalize(); + public void close() { + super.close(); + leakTracker.close(); } } diff --git a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/package-info.java b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/package-info.java index c60dd4c82ec9..ee214f8150ed 100644 --- a/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/package-info.java +++ b/hadoop-hdds/managed-rocksdb/src/main/java/org/apache/hadoop/hdds/utils/db/managed/package-info.java @@ -18,18 +18,16 @@ */ /** - * RocksDB is deprecating the RocksObject's finalizer that cleans up native - * resources. In fact, the finalizer is removed in the new version of RocksDB - * as per https://github.com/facebook/rocksdb/commit/99d86252b. That poses a + * RocksDB has deprecated the RocksObject's finalizer that cleans up native + * resources, see https://github.com/facebook/rocksdb/commit/99d86252b. That poses a * requirement for RocksDb's applications to explicitly close RocksObject * instances themselves to avoid leaking native resources. The general approach * is to close RocksObjects with try-with-resource statement. * Yet, this is not always an easy option in Ozone we need a mechanism to * manage and detect leaks. * - * This package contains wrappers and utilities to catch RocksObject - * instantiates in Ozone, intercept their finalizers and assert if the created - * instances are closed properly before being GCed. + * This package contains RocksObject decorators and utilities to catch track RocksObject's + * lifecycle to ensure they're properly closed before being GCed. */ package org.apache.hadoop.hdds.utils.db.managed; diff --git a/hadoop-hdds/rocks-native/pom.xml b/hadoop-hdds/rocks-native/pom.xml index 99f7a032e259..a7b1c367ac52 100644 --- a/hadoop-hdds/rocks-native/pom.xml +++ b/hadoop-hdds/rocks-native/pom.xml @@ -63,6 +63,7 @@ 8 8 + https://zlib.net/fossils/zlib-${zlib.version}.tar.gz @@ -141,7 +142,7 @@ wget - https://zlib.net/fossils/zlib-${zlib.version}.tar.gz + ${zlib.url} zlib-${zlib.version}.tar.gz ${project.build.directory}/zlib diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/ManagedSstFileReader.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/ManagedSstFileReader.java index 120b199c880e..7d25413072b6 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/ManagedSstFileReader.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdb/util/ManagedSstFileReader.java @@ -78,9 +78,10 @@ public long getEstimatedTotalKeys() throws RocksDBException { try (ManagedOptions options = new ManagedOptions()) { for (String sstFile : sstFiles) { - SstFileReader fileReader = new SstFileReader(options); - fileReader.open(sstFile); - estimatedSize += fileReader.getTableProperties().getNumEntries(); + try (SstFileReader fileReader = new SstFileReader(options)) { + fileReader.open(sstFile); + estimatedSize += fileReader.getTableProperties().getNumEntries(); + } } } estimatedTotalKeys = estimatedSize; diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java index b6d6e773dfa1..e830106e5700 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDBCheckpointDiffer.java @@ -629,16 +629,18 @@ private long getSSTFileSummary(String filename) filename += SST_FILE_EXTENSION; } - Options option = new Options(); - SstFileReader reader = new SstFileReader(option); + try ( + ManagedOptions option = new ManagedOptions(); + SstFileReader reader = new SstFileReader(option)) { - reader.open(getAbsoluteSstFilePath(filename)); + reader.open(getAbsoluteSstFilePath(filename)); - TableProperties properties = reader.getTableProperties(); - if (LOG.isDebugEnabled()) { - LOG.debug("{} has {} keys", filename, properties.getNumEntries()); + TableProperties properties = reader.getTableProperties(); + if (LOG.isDebugEnabled()) { + LOG.debug("{} has {} keys", filename, properties.getNumEntries()); + } + return properties.getNumEntries(); } - return properties.getNumEntries(); } private String getAbsoluteSstFilePath(String filename) diff --git a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java index 4b7da351f1da..5ddcf8b7e6af 100644 --- a/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java +++ b/hadoop-hdds/rocksdb-checkpoint-differ/src/main/java/org/apache/ozone/rocksdiff/RocksDiffUtils.java @@ -18,12 +18,12 @@ package org.apache.ozone.rocksdiff; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hdds.utils.db.managed.ManagedOptions; +import org.apache.hadoop.hdds.utils.db.managed.ManagedReadOptions; import org.apache.hadoop.hdds.utils.db.managed.ManagedSstFileReader; import org.apache.hadoop.hdds.utils.db.managed.ManagedSstFileReaderIterator; import org.rocksdb.SstFileReader; import org.rocksdb.TableProperties; -import org.rocksdb.Options; -import org.rocksdb.ReadOptions; import org.rocksdb.RocksDBException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -87,16 +87,20 @@ public static void filterRelevantSstFiles(Set inputFiles, public static boolean doesSstFileContainKeyRange(String filepath, Map tableToPrefixMap) throws IOException { - try (ManagedSstFileReader sstFileReader = ManagedSstFileReader.managed( - new SstFileReader(new Options()))) { + + try ( + ManagedOptions options = new ManagedOptions(); + ManagedSstFileReader sstFileReader = ManagedSstFileReader.managed(new SstFileReader(options))) { sstFileReader.get().open(filepath); TableProperties properties = sstFileReader.get().getTableProperties(); String tableName = new String(properties.getColumnFamilyName(), UTF_8); if (tableToPrefixMap.containsKey(tableName)) { String prefix = tableToPrefixMap.get(tableName); - try (ManagedSstFileReaderIterator iterator = - ManagedSstFileReaderIterator.managed(sstFileReader.get() - .newIterator(new ReadOptions()))) { + + try ( + ManagedReadOptions readOptions = new ManagedReadOptions(); + ManagedSstFileReaderIterator iterator = ManagedSstFileReaderIterator.managed( + sstFileReader.get().newIterator(readOptions))) { iterator.get().seek(prefix.getBytes(UTF_8)); String seekResultKey = new String(iterator.get().key(), UTF_8); return seekResultKey.startsWith(prefix); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java index 4c96175b6c08..0942b27898d4 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/SCMCommonPlacementPolicy.java @@ -460,6 +460,16 @@ public ContainerPlacementStatus validateContainerPlacement( } int maxReplicasPerRack = getMaxReplicasPerRack(replicas, Math.min(requiredRacks, numRacks)); + + // There are scenarios where there could be excessive replicas on a rack due to nodes + // in decommission or maintenance or over-replication in general. + // In these cases, ReplicationManager shouldn't report mis-replication. + // The original intention of mis-replication was to indicate that there isn't enough rack tolerance. + // In case of over-replication, this isn't an issue. + // Mis-replication should be an issue once over-replication is fixed, not before. + // Adjust the maximum number of replicas per rack to allow containers + // with excessive replicas to not be reported as mis-replicated. + maxReplicasPerRack += Math.max(0, dns.size() - replicas); return new ContainerPlacementStatusDefault( currentRackCount.size(), requiredRacks, numRacks, maxReplicasPerRack, currentRackCount); diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java index d23934184eb5..fe771fac6a4a 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/RatisContainerReplicaCount.java @@ -470,10 +470,10 @@ && getReplicas().stream() /** * QUASI_CLOSED containers that have a mix of healthy and UNHEALTHY - * replicas require special treatment. If the healthy replicas don't have - * the same BCSID as the container, but the UNHEALTHY ones do, then we need - * to save at least one copy of each such UNHEALTHY replica. This method - * finds such UNHEALTHY replicas. + * replicas require special treatment. If the UNHEALTHY replicas have the + * correct sequence ID and have unique origins, then we need to save at least + * one copy of each such UNHEALTHY replicas. This method finds such UNHEALTHY + * replicas. * * @param nodeStatusFn a function used to check the {@link NodeStatus} of a node, * accepting a {@link DatanodeDetails} and returning {@link NodeStatus} @@ -498,11 +498,6 @@ public List getVulnerableUnhealthyReplicas(Function getVulnerableUnhealthyReplicas(Function { + NodeStatus status = nodeStatusFn.apply(replica.getDatanodeDetails()); + return status == null || !status.isHealthy(); + }); /* At this point, the list of unhealthyReplicas contains all UNHEALTHY non-empty replicas with the greatest Sequence ID that are on healthy Datanodes. @@ -531,9 +531,9 @@ public List getVulnerableUnhealthyReplicas(Function originsOfInServiceReplicas = new HashSet<>(); - for (ContainerReplica replica : unhealthyReplicas) { + for (ContainerReplica replica : replicas) { if (replica.getDatanodeDetails().getPersistedOpState() - .equals(IN_SERVICE)) { + .equals(IN_SERVICE) && replica.getSequenceId().equals(container.getSequenceId())) { originsOfInServiceReplicas.add(replica.getOriginDatanodeId()); } } diff --git a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/VulnerableUnhealthyReplicasHandler.java b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/VulnerableUnhealthyReplicasHandler.java index 21b2d8151d20..d8a4edf31e93 100644 --- a/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/VulnerableUnhealthyReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/main/java/org/apache/hadoop/hdds/scm/container/replication/health/VulnerableUnhealthyReplicasHandler.java @@ -37,9 +37,8 @@ /** * A QUASI_CLOSED container may have some UNHEALTHY replicas with the - * same Sequence ID as the container. RM should try to maintain one - * copy of such replicas when there are no healthy replicas that - * match the container's Sequence ID. + * same Sequence ID as the container and on unique origins. RM should try to maintain one + * copy of such replicas. */ public class VulnerableUnhealthyReplicasHandler extends AbstractCheck { public static final Logger LOG = LoggerFactory.getLogger(VulnerableUnhealthyReplicasHandler.class); @@ -52,8 +51,8 @@ public VulnerableUnhealthyReplicasHandler(ReplicationManager replicationManager) /** * Checks if the container is QUASI_CLOSED has some vulnerable UNHEALTHY replicas that need to replicated to * other Datanodes. These replicas have the same sequence ID as the container while other healthy replicas don't. - * If the node hosting such a replica is being taken offline, then the replica may have to be replicated to another - * node. + * Or, these replicas have unique origin Datanodes. If the node hosting such a replica is being taken offline, then + * the replica may have to be replicated to another node. * @param request ContainerCheckRequest object representing the container * @return true if some vulnerable UNHEALTHY replicas were found, else false */ diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java index 6016b2f14d31..575460f3053f 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackAware.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; @@ -52,6 +53,7 @@ import org.apache.commons.lang3.StringUtils; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.DECOMMISSIONED; +import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeOperationalState.IN_SERVICE; import static org.apache.hadoop.hdds.protocol.proto.HddsProtos.NodeState.HEALTHY; import static org.apache.hadoop.hdds.scm.ScmConfigKeys.OZONE_DATANODE_RATIS_VOLUME_FREE_SPACE_MIN; import static org.apache.hadoop.hdds.scm.net.NetConstants.LEAF_SCHEMA; @@ -60,6 +62,7 @@ import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -563,6 +566,50 @@ public void testvalidateContainerPlacementSingleRackCluster() { assertEquals(0, stat.misReplicationCount()); } + @ParameterizedTest + @MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates") + public void testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) { + setup(7); + // 7 datanodes, all nodes are used. + // /rack0/node0 -> IN_SERVICE + // /rack0/node1 -> IN_SERVICE + // /rack0/node2 -> OFFLINE + // /rack0/node3 -> OFFLINE + // /rack0/node4 -> OFFLINE + // /rack1/node5 -> IN_SERVICE + // /rack1/node6 -> OFFLINE + datanodes.get(2).setPersistedOpState(state); + datanodes.get(3).setPersistedOpState(state); + datanodes.get(4).setPersistedOpState(state); + datanodes.get(6).setPersistedOpState(state); + List dns = new ArrayList<>(datanodes); + + ContainerPlacementStatus status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(2, status.actualPlacementCount()); + assertEquals(2, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + + // /rack0/node0 -> IN_SERVICE + // /rack0/node1 -> IN_SERVICE + // /rack0/node2 -> OFFLINE > IN_SERVICE + // /rack0/node3 -> OFFLINE + // /rack0/node4 -> OFFLINE + // /rack1/node5 -> IN_SERVICE + // /rack1/node6 -> OFFLINE > IN_SERVICE + datanodes.get(2).setPersistedOpState(IN_SERVICE); + datanodes.get(6).setPersistedOpState(IN_SERVICE); + dns = new ArrayList<>(datanodes); + + status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(2, status.actualPlacementCount()); + assertEquals(2, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + } + @ParameterizedTest @MethodSource("numDatanodes") public void testOutOfServiceNodesNotSelected(int datanodeCount) { diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java index 8e267f498859..b5453a5be8db 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/placement/algorithms/TestSCMContainerPlacementRackScatter.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.conf.StorageUnit; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.MockDatanodeDetails; +import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.MetadataStorageReportProto; import org.apache.hadoop.hdds.protocol.proto.StorageContainerDatanodeProtocolProtos.StorageReportProto; import org.apache.hadoop.hdds.scm.ContainerPlacementStatus; @@ -68,6 +69,7 @@ import static org.hamcrest.Matchers.matchesPattern; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.junit.jupiter.api.Assertions.fail; @@ -569,6 +571,72 @@ public void testValidateContainerPlacement(int datanodeCount) { assertEquals(0, stat.misReplicationCount()); } + @ParameterizedTest + @MethodSource("org.apache.hadoop.hdds.scm.node.NodeStatus#outOfServiceStates") + public void testOverReplicationAndOutOfServiceNodes(HddsProtos.NodeOperationalState state) { + setup(9, 3); + // 9 datanodes, 3 per rack. + // /rack0/node0 -> IN_SERVICE - used + // /rack0/node1 + // /rack0/node2 + // /rack1/node3 -> IN_SERVICE - used + // /rack1/node4 + // /rack1/node5 + // /rack2/node6 -> IN_SERVICE - used + // /rack2/node7 + // /rack2/node8 + List dns = new ArrayList<>(); + dns.add(datanodes.get(0)); + dns.add(datanodes.get(3)); + dns.add(datanodes.get(6)); + + ContainerPlacementStatus status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(3, status.actualPlacementCount()); + assertEquals(3, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + + // /rack0/node0 -> IN_SERVICE - used + // /rack0/node1 -> OFFLINE - used + // /rack0/node2 + // /rack1/node3 -> IN_SERVICE - used + // /rack1/node4 -> OFFLINE - used + // /rack1/node5 + // /rack2/node6 -> IN_SERVICE - used + // /rack2/node7 + // /rack2/node8 + datanodes.get(1).setPersistedOpState(state); + datanodes.get(4).setPersistedOpState(state); + dns.add(datanodes.get(1)); + dns.add(datanodes.get(4)); + + status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(3, status.actualPlacementCount()); + assertEquals(3, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + + // /rack0/node0 -> IN_SERVICE - used + // /rack0/node1 -> OFFLINE - used + // /rack0/node2 -> IN_SERVICE - used + // /rack1/node3 -> IN_SERVICE - used + // /rack1/node4 -> OFFLINE - used + // /rack1/node5 + // /rack2/node6 -> IN_SERVICE - used + // /rack2/node7 + // /rack2/node8 + dns.add(datanodes.get(2)); + + status = policy.validateContainerPlacement(dns, 3); + assertTrue(status.isPolicySatisfied()); + assertEquals(3, status.actualPlacementCount()); + assertEquals(3, status.expectedPlacementCount()); + assertEquals(0, status.misReplicationCount()); + assertNull(status.misReplicatedReason()); + } + public List getDatanodes(List dnIndexes) { return dnIndexes.stream().map(datanodes::get).collect(Collectors.toList()); } diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java index dd7747e12718..ce9dfb3f3dbe 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestRatisUnderReplicationHandler.java @@ -39,6 +39,7 @@ import org.apache.hadoop.ozone.container.common.SCMTestUtils; import org.apache.hadoop.ozone.protocol.commands.SCMCommand; import org.apache.ratis.protocol.exceptions.NotLeaderException; +import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; @@ -579,6 +580,35 @@ public void testUnderReplicationWithVulnerableReplicas() throws IOException { assertEquals(unhealthyReplica.getDatanodeDetails(), commands.iterator().next().getKey()); } + /** + * A QUASI_CLOSED container may have UNHEALTHY replicas with the correct sequence ID which have unique + * origin Datanodes. If any of these UNHEALTHY replicas is being taken offline, then it needs to be replicated to + * another DN for decommission to progress. This test asserts that a replicate command is sent for one such replica. + */ + @Test + public void testUnderReplicationWithVulnerableReplicasOnUniqueOrigins() throws IOException { + final long sequenceID = 20; + container = ReplicationTestUtil.createContainerInfo(RATIS_REPLICATION_CONFIG, 1, + HddsProtos.LifeCycleState.QUASI_CLOSED, sequenceID); + + final Set replicas = new HashSet<>(4); + for (int i = 0; i < 3; i++) { + replicas.add(createContainerReplica(container.containerID(), 0, IN_SERVICE, State.QUASI_CLOSED, + sequenceID)); + } + + // create an UNHEALTHY replica with a unique origin + final ContainerReplica unhealthyReplica = createContainerReplica(container.containerID(), 0, + DECOMMISSIONING, State.UNHEALTHY, sequenceID); + replicas.add(unhealthyReplica); + UnderReplicatedHealthResult result = getUnderReplicatedHealthResult(); + Mockito.when(result.hasVulnerableUnhealthy()).thenReturn(true); + + final Set>> commands = testProcessing(replicas, Collections.emptyList(), + result, 2, 1); + Assertions.assertEquals(unhealthyReplica.getDatanodeDetails(), commands.iterator().next().getKey()); + } + /** * In the push replication model, a replicate command is sent to the DN hosting the replica, and that DN is * expected to "push" the replica to another DN. If the DN hosting the replica has too many commands already, an diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java index f3943617291f..ff65da6bfd97 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/TestReplicationManager.java @@ -502,6 +502,67 @@ public void testQuasiClosedContainerWithVulnerableUnhealthyReplica() assertEquals(decommissioning.getDatanodeDetails().getUuid(), command.getKey()); } + + /** + * There is a QUASI_CLOSED container with some UNHEALTHY replicas on unique origin nodes. If the datanode hosting + * one such replica is being taken offline, then the UNHEALTHY replica needs to be replicated to another node. + */ + @Test + public void testQuasiClosedContainerWithUnhealthyReplicaOnDecommissioningNodeWithUniqueOrigin() + throws IOException, NodeNotFoundException { + RatisReplicationConfig ratisRepConfig = + RatisReplicationConfig.getInstance(THREE); + // create a QUASI_CLOSED container with 3 QUASI_CLOSED replicas on same origin, and 1 UNHEALTHY on unique origin + ContainerInfo container = createContainerInfo(ratisRepConfig, 1, + HddsProtos.LifeCycleState.QUASI_CLOSED); + Set replicas = + createReplicasWithSameOrigin(container.containerID(), + ContainerReplicaProto.State.QUASI_CLOSED, 0, 0, 0); + ContainerReplica unhealthy = + createContainerReplica(container.containerID(), 0, DECOMMISSIONING, + ContainerReplicaProto.State.UNHEALTHY); + replicas.add(unhealthy); + storeContainerAndReplicas(container, replicas); + Mockito.when(replicationManager.getNodeStatus(any(DatanodeDetails.class))) + .thenAnswer(invocation -> { + DatanodeDetails dn = invocation.getArgument(0); + if (dn.equals(unhealthy.getDatanodeDetails())) { + return new NodeStatus(DECOMMISSIONING, HddsProtos.NodeState.HEALTHY); + } + + return NodeStatus.inServiceHealthy(); + }); + + // the container should be under replicated and queued to under replication queue + replicationManager.processContainer(container, repQueue, repReport); + assertEquals(1, repReport.getStat( + ReplicationManagerReport.HealthState.UNDER_REPLICATED)); + assertEquals(0, repReport.getStat( + ReplicationManagerReport.HealthState.OVER_REPLICATED)); + assertEquals(1, repQueue.underReplicatedQueueSize()); + assertEquals(0, repQueue.overReplicatedQueueSize()); + + // next, this test sets up some mocks to test if RatisUnderReplicationHandler will handle this container correctly + Mockito.when(ratisPlacementPolicy.chooseDatanodes(anyList(), anyList(), eq(null), eq(1), anyLong(), + anyLong())).thenAnswer(invocation -> ImmutableList.of(MockDatanodeDetails.randomDatanodeDetails())); + Mockito.when(nodeManager.getTotalDatanodeCommandCounts(any(DatanodeDetails.class), any(), any())) + .thenAnswer(invocation -> { + Map map = new HashMap<>(); + map.put(SCMCommandProto.Type.replicateContainerCommand, 0); + map.put(SCMCommandProto.Type.reconstructECContainersCommand, 0); + return map; + }); + RatisUnderReplicationHandler handler = + new RatisUnderReplicationHandler(ratisPlacementPolicy, configuration, replicationManager); + + handler.processAndSendCommands(replicas, Collections.emptyList(), repQueue.dequeueUnderReplicatedContainer(), 2); + assertEquals(1, commandsSent.size()); + Pair> command = commandsSent.iterator().next(); + // a replicate command should have been sent for the UNHEALTHY replica + assertEquals(SCMCommandProto.Type.replicateContainerCommand, command.getValue().getType()); + assertEquals(unhealthy.getDatanodeDetails().getUuid(), command.getKey()); + } + /** * When there is Quasi Closed Replica with incorrect sequence id * for a Closed container, it's treated as unhealthy and deleted. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestVulnerableUnhealthyReplicasHandler.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestVulnerableUnhealthyReplicasHandler.java index 72a89f028625..cb5e5842a8a7 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestVulnerableUnhealthyReplicasHandler.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/container/replication/health/TestVulnerableUnhealthyReplicasHandler.java @@ -126,6 +126,11 @@ public void testReturnsFalseForQuasiClosedContainerWithNoVulnerableReplicas() { assertEquals(0, repQueue.overReplicatedQueueSize()); } + /** + * A QUASI_CLOSED container with 3 QUASI_CLOSED replicas with incorrect sequence id. They're on unique origin nodes. + * There's an UNHEALTHY replica on a Decommissioning node, which has the correct sequence ID and unique origin. + * It's expected that the UNHEALTHY replica is queued for under replication. + */ @Test public void testReturnsTrueForQuasiClosedContainerWithVulnerableReplica() throws NodeNotFoundException { long sequenceId = 10; @@ -154,6 +159,49 @@ public void testReturnsTrueForQuasiClosedContainerWithVulnerableReplica() throws assertEquals(0, repQueue.overReplicatedQueueSize()); } + /** + * A QUASI_CLOSED container with 3 QUASI_CLOSED replicas with correct sequence id. They're on unique origin nodes. + * There's an UNHEALTHY replica on a Decommissioning node, which also has the correct sequence ID and unique origin. + * It's expected that the UNHEALTHY replica is queued for under replication. This is a variation of the situation + * where the healthy replicas have incorrect sequence id, and the unhealthy ones have the correct sequence id. + * Here, all the replicas have the correct sequence id but the unhealthy still need to be saved because they're on + * unique origin nodes. + *

+ * Why do we need to save the UNHEALTHY replicas if we have enough unique QUASI_CLOSED replicas to form a quorum? + * Simply because we're ensuring redundancy of replicas having unique origin node IDs. When HDDS has the ability to + * restore UNHEALTHY replicas to a healthy state, they can also be used to create a quorum. In any case, when the + * container transitions to CLOSED, any UNHEALTHY replicas will be deleted. + *

+ */ + @Test + public void testReturnsTrueForQuasiClosedContainerWithVulnerableReplicaWhenAllReplicasHaveCorrectSequence() + throws NodeNotFoundException { + long sequenceId = 10; + ContainerInfo container = createContainerInfo(repConfig, 1, LifeCycleState.QUASI_CLOSED, sequenceId); + Set replicas = new HashSet<>(4); + for (int i = 0; i < 3; i++) { + replicas.add(createContainerReplica(container.containerID(), 0, IN_SERVICE, State.QUASI_CLOSED, + container.getSequenceId())); + } + // create UNHEALTHY replica with unique origin id on a DECOMMISSIONING node + ContainerReplica unhealthy = + createContainerReplica(container.containerID(), 0, DECOMMISSIONING, State.UNHEALTHY, sequenceId); + replicas.add(unhealthy); + Mockito.when(replicationManager.getNodeStatus(Mockito.any(DatanodeDetails.class))) + .thenAnswer(invocation -> { + DatanodeDetails dn = invocation.getArgument(0); + if (dn.equals(unhealthy.getDatanodeDetails())) { + return new NodeStatus(DECOMMISSIONING, HEALTHY); + } + return NodeStatus.inServiceHealthy(); + }); + requestBuilder.setContainerReplicas(replicas).setContainerInfo(container); + + assertTrue(handler.handle(requestBuilder.build())); + assertEquals(1, repQueue.underReplicatedQueueSize()); + assertEquals(0, repQueue.overReplicatedQueueSize()); + } + @Test public void testReturnsFalseForVulnerableReplicaWithAnotherCopy() throws NodeNotFoundException { long sequenceId = 10; diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java index 17107cfa9582..6307544578c2 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/hdds/scm/node/TestDatanodeAdminMonitor.java @@ -42,6 +42,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.Mockito; + import java.io.IOException; import java.util.Collections; import java.util.HashSet; @@ -373,6 +374,70 @@ public void testDecommissionWaitsForUnhealthyReplicaToReplicateNewRM() nodeManager.getNodeStatus(dn1).getOperationalState()); } + /** + * Situation: A QUASI_CLOSED container has an UNHEALTHY replica with a unique origin and three QUASI_CLOSED replicas. + * All the replicas have the correct Sequence ID. UNHEALTHY container is on a decommissioning node, and there are + * no other copies of this replica, that is, replicas with the same Origin ID as this replica. + * + * Expectation: Decommissioning should not complete until the UNHEALTHY replica has been replicated to another node. + */ + @Test + public void testDecommissionWaitsForUnhealthyReplicaWithUniqueOriginToReplicateNewRM() + throws NodeNotFoundException, ContainerNotFoundException { + DatanodeDetails dn1 = MockDatanodeDetails.randomDatanodeDetails(); + nodeManager.register(dn1, + new NodeStatus(HddsProtos.NodeOperationalState.DECOMMISSIONING, HddsProtos.NodeState.HEALTHY)); + + // create a container and 3 QUASI_CLOSED replicas with containerID 1 and same origin ID + ContainerID containerID = ContainerID.valueOf(1); + ContainerInfo container = ReplicationTestUtil.createContainerInfo(RatisReplicationConfig.getInstance( + HddsProtos.ReplicationFactor.THREE), containerID.getId(), HddsProtos.LifeCycleState.QUASI_CLOSED); + Set replicas = + ReplicationTestUtil.createReplicasWithSameOrigin(containerID, State.QUASI_CLOSED, 0, 0, 0); + + // UNHEALTHY replica is on a unique origin and has same sequence id as the container + ContainerReplica unhealthy = + ReplicationTestUtil.createContainerReplica(containerID, 0, + dn1.getPersistedOpState(), State.UNHEALTHY, + container.getNumberOfKeys(), container.getUsedBytes(), dn1, + dn1.getUuid(), container.getSequenceId()); + replicas.add(unhealthy); + nodeManager.setContainers(dn1, ImmutableSet.of(containerID)); + + Mockito.when(repManager.getContainerReplicaCount(Mockito.eq(containerID))) + .thenReturn(new RatisContainerReplicaCount(container, replicas, + Collections.emptyList(), 2, false)); + DatanodeAdminMonitorTestUtil.mockCheckContainerState(repManager, true); + + // start monitoring dn1 + monitor.startMonitoring(dn1); + monitor.run(); + assertEquals(1, monitor.getTrackedNodeCount()); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, + nodeManager.getNodeStatus(dn1).getOperationalState()); + + // Running the monitor again causes it to remain DECOMMISSIONING + // as nothing has changed. + monitor.run(); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONING, + nodeManager.getNodeStatus(dn1).getOperationalState()); + + // add a copy of the UNHEALTHY replica on a new node, dn1 should get + // decommissioned now + ContainerReplica copyOfUnhealthyOnNewNode = unhealthy.toBuilder() + .setDatanodeDetails(MockDatanodeDetails.randomDatanodeDetails()) + .build(); + replicas.add(copyOfUnhealthyOnNewNode); + Mockito.when(repManager.getContainerReplicaCount(Mockito.eq(containerID))) + .thenReturn(new RatisContainerReplicaCount(container, replicas, + Collections.emptyList(), 2, false)); + DatanodeAdminMonitorTestUtil.mockCheckContainerState(repManager, false); + monitor.run(); + assertEquals(0, monitor.getTrackedNodeCount()); + assertEquals(HddsProtos.NodeOperationalState.DECOMMISSIONED, + nodeManager.getNodeStatus(dn1).getOperationalState()); + } + /** * Consider a QUASI_CLOSED container with only UNHEALTHY replicas. If one * of its nodes is decommissioned, the decommissioning should succeed. diff --git a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java index 572016946ed7..95ec7914129a 100644 --- a/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java +++ b/hadoop-hdds/server-scm/src/test/java/org/apache/hadoop/ozone/container/common/TestEndPoint.java @@ -25,6 +25,7 @@ import java.util.Map; import java.util.UUID; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.protocol.proto.HddsProtos; @@ -79,6 +80,7 @@ import org.junit.jupiter.api.io.TempDir; import org.mockito.Mockito; +import static org.junit.jupiter.api.Assertions.assertNotEquals; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -285,6 +287,32 @@ public void testDnLayoutVersionFile() throws Exception { = new DatanodeLayoutStorage(ozoneConf, "na_expect_storage_initialized"); assertEquals(scmServerImpl.getClusterId(), layout.getClusterID()); + + // Delete storage volume info + File storageDir = ozoneContainer.getVolumeSet() + .getVolumesList().get(0).getStorageDir(); + FileUtils.forceDelete(storageDir); + + // Format volume VERSION file with + // different clusterId than SCM clusterId. + ozoneContainer.getVolumeSet().getVolumesList() + .get(0).format("different_cluster_id"); + // Update layout clusterId and persist it. + layout.setClusterId("different_cluster_id"); + layout.persistCurrentState(); + + // As the volume level clusterId didn't match with SCM clusterId + // Even after the version call, the datanode layout file should + // not update its clusterID field. + rpcEndPoint.setState(EndpointStateMachine.EndPointStates.GETVERSION); + versionTask.call(); + DatanodeLayoutStorage layout1 + = new DatanodeLayoutStorage(ozoneConf, + "na_expect_storage_initialized"); + + assertEquals("different_cluster_id", layout1.getClusterID()); + assertNotEquals(scmServerImpl.getClusterId(), layout1.getClusterID()); + FileUtils.forceDelete(storageDir); } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java index 2501803fc070..9bdec27f534f 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntry.java @@ -27,6 +27,7 @@ import org.apache.hadoop.hdds.protocol.DatanodeDetails; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.storage.BlockOutputStream; @@ -61,6 +62,7 @@ public class BlockOutputStreamEntry extends OutputStream { private BufferPool bufferPool; private ContainerClientMetrics clientMetrics; + private StreamBufferArgs streamBufferArgs; @SuppressWarnings({"parameternumber", "squid:S00107"}) BlockOutputStreamEntry( @@ -71,7 +73,7 @@ public class BlockOutputStreamEntry extends OutputStream { BufferPool bufferPool, Token token, OzoneClientConfig config, - ContainerClientMetrics clientMetrics + ContainerClientMetrics clientMetrics, StreamBufferArgs streamBufferArgs ) { this.config = config; this.outputStream = null; @@ -84,6 +86,7 @@ public class BlockOutputStreamEntry extends OutputStream { this.currentPosition = 0; this.bufferPool = bufferPool; this.clientMetrics = clientMetrics; + this.streamBufferArgs = streamBufferArgs; } /** @@ -105,13 +108,17 @@ void checkStream() throws IOException { */ void createOutputStream() throws IOException { outputStream = new RatisBlockOutputStream(blockID, xceiverClientManager, - pipeline, bufferPool, config, token, clientMetrics); + pipeline, bufferPool, config, token, clientMetrics, streamBufferArgs); } ContainerClientMetrics getClientMetrics() { return clientMetrics; } + StreamBufferArgs getStreamBufferArgs() { + return streamBufferArgs; + } + @Override public void write(int b) throws IOException { checkStream(); @@ -353,6 +360,7 @@ public static class Builder { private Token token; private OzoneClientConfig config; private ContainerClientMetrics clientMetrics; + private StreamBufferArgs streamBufferArgs; public Builder setBlockID(BlockID bID) { this.blockID = bID; @@ -398,6 +406,10 @@ public Builder setClientMetrics(ContainerClientMetrics clientMetrics) { this.clientMetrics = clientMetrics; return this; } + public Builder setStreamBufferArgs(StreamBufferArgs streamBufferArgs) { + this.streamBufferArgs = streamBufferArgs; + return this; + } public BlockOutputStreamEntry build() { return new BlockOutputStreamEntry(blockID, @@ -406,7 +418,7 @@ public BlockOutputStreamEntry build() { pipeline, length, bufferPool, - token, config, clientMetrics); + token, config, clientMetrics, streamBufferArgs); } } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntryPool.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntryPool.java index 573e4a8dd3ca..d0f3b5728a8b 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntryPool.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/BlockOutputStreamEntryPool.java @@ -27,10 +27,10 @@ import java.util.Map; import org.apache.hadoop.hdds.client.ReplicationConfig; -import org.apache.hadoop.hdds.conf.OzoneConfiguration; import org.apache.hadoop.hdds.scm.ByteStringConversion; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.hdds.scm.pipeline.PipelineID; @@ -84,6 +84,7 @@ public class BlockOutputStreamEntryPool implements KeyMetadataAware { private final long openID; private final ExcludeList excludeList; private final ContainerClientMetrics clientMetrics; + private final StreamBufferArgs streamBufferArgs; @SuppressWarnings({"parameternumber", "squid:S00107"}) public BlockOutputStreamEntryPool( @@ -94,7 +95,7 @@ public BlockOutputStreamEntryPool( boolean isMultipart, OmKeyInfo info, boolean unsafeByteBufferConversion, XceiverClientFactory xceiverClientFactory, long openID, - ContainerClientMetrics clientMetrics + ContainerClientMetrics clientMetrics, StreamBufferArgs streamBufferArgs ) { this.config = config; this.xceiverClientFactory = xceiverClientFactory; @@ -111,12 +112,13 @@ public BlockOutputStreamEntryPool( this.excludeList = createExcludeList(); this.bufferPool = - new BufferPool(config.getStreamBufferSize(), - (int) (config.getStreamBufferMaxSize() / config + new BufferPool(streamBufferArgs.getStreamBufferSize(), + (int) (streamBufferArgs.getStreamBufferMaxSize() / streamBufferArgs .getStreamBufferSize()), ByteStringConversion .createByteBufferConversion(unsafeByteBufferConversion)); this.clientMetrics = clientMetrics; + this.streamBufferArgs = streamBufferArgs; } ExcludeList createExcludeList() { @@ -124,17 +126,14 @@ ExcludeList createExcludeList() { Clock.system(ZoneOffset.UTC)); } - BlockOutputStreamEntryPool(ContainerClientMetrics clientMetrics) { + BlockOutputStreamEntryPool(ContainerClientMetrics clientMetrics, + OzoneClientConfig clientConfig, StreamBufferArgs streamBufferArgs) { streamEntries = new ArrayList<>(); omClient = null; keyArgs = null; xceiverClientFactory = null; - config = - new OzoneConfiguration().getObject(OzoneClientConfig.class); - config.setStreamBufferSize(0); - config.setStreamBufferMaxSize(0); - config.setStreamBufferFlushSize(0); - config.setStreamBufferFlushDelay(false); + config = clientConfig; + streamBufferArgs.setStreamBufferFlushDelay(false); requestID = null; int chunkSize = 0; bufferPool = new BufferPool(chunkSize, 1); @@ -143,6 +142,7 @@ ExcludeList createExcludeList() { openID = -1; excludeList = createExcludeList(); this.clientMetrics = clientMetrics; + this.streamBufferArgs = null; } /** @@ -189,6 +189,7 @@ BlockOutputStreamEntry createStreamEntry(OmKeyLocationInfo subKeyInfo) { .setBufferPool(bufferPool) .setToken(subKeyInfo.getToken()) .setClientMetrics(clientMetrics) + .setStreamBufferArgs(streamBufferArgs) .build(); } @@ -255,6 +256,10 @@ ContainerClientMetrics getClientMetrics() { return clientMetrics; } + StreamBufferArgs getStreamBufferArgs() { + return streamBufferArgs; + } + /** * Discards the subsequent pre allocated blocks and removes the streamEntries * from the streamEntries list for the container which is closed. diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntry.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntry.java index 26d11f3d642b..07d0f46069ca 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntry.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntry.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hdds.protocol.datanode.proto.ContainerProtos; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.pipeline.Pipeline; import org.apache.hadoop.hdds.scm.storage.BlockOutputStream; @@ -78,9 +79,10 @@ public class ECBlockOutputStreamEntry extends BlockOutputStreamEntry { ECBlockOutputStreamEntry(BlockID blockID, String key, XceiverClientFactory xceiverClientManager, Pipeline pipeline, long length, BufferPool bufferPool, Token token, - OzoneClientConfig config, ContainerClientMetrics clientMetrics) { + OzoneClientConfig config, ContainerClientMetrics clientMetrics, + StreamBufferArgs streamBufferArgs) { super(blockID, key, xceiverClientManager, pipeline, length, bufferPool, - token, config, clientMetrics); + token, config, clientMetrics, streamBufferArgs); assertInstanceOf( pipeline.getReplicationConfig(), ECReplicationConfig.class); this.replicationConfig = @@ -99,7 +101,7 @@ void checkStream() throws IOException { streams[i] = new ECBlockOutputStream(getBlockID(), getXceiverClientManager(), createSingleECBlockPipeline(getPipeline(), nodes.get(i), i + 1), - getBufferPool(), getConf(), getToken(), getClientMetrics()); + getBufferPool(), getConf(), getToken(), getClientMetrics(), getStreamBufferArgs()); } blockOutputStreams = streams; } @@ -441,6 +443,7 @@ public static class Builder { private Token token; private OzoneClientConfig config; private ContainerClientMetrics clientMetrics; + private StreamBufferArgs streamBufferArgs; public ECBlockOutputStreamEntry.Builder setBlockID(BlockID bID) { this.blockID = bID; @@ -492,6 +495,12 @@ public ECBlockOutputStreamEntry.Builder setClientMetrics( return this; } + public ECBlockOutputStreamEntry.Builder setStreamBufferArgs( + StreamBufferArgs args) { + this.streamBufferArgs = args; + return this; + } + public ECBlockOutputStreamEntry build() { return new ECBlockOutputStreamEntry(blockID, key, @@ -499,7 +508,7 @@ public ECBlockOutputStreamEntry build() { pipeline, length, bufferPool, - token, config, clientMetrics); + token, config, clientMetrics, streamBufferArgs); } } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java index acc70d0dda61..e551605d842d 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECBlockOutputStreamEntryPool.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.container.common.helpers.ExcludeList; import org.apache.hadoop.ozone.om.helpers.OmKeyInfo; @@ -56,10 +57,10 @@ public ECBlockOutputStreamEntryPool(OzoneClientConfig config, boolean unsafeByteBufferConversion, XceiverClientFactory xceiverClientFactory, long openID, - ContainerClientMetrics clientMetrics) { + ContainerClientMetrics clientMetrics, StreamBufferArgs streamBufferArgs) { super(config, omClient, requestId, replicationConfig, uploadID, partNumber, isMultipart, info, unsafeByteBufferConversion, xceiverClientFactory, - openID, clientMetrics); + openID, clientMetrics, streamBufferArgs); assert replicationConfig instanceof ECReplicationConfig; } @@ -82,6 +83,7 @@ BlockOutputStreamEntry createStreamEntry(OmKeyLocationInfo subKeyInfo) { .setBufferPool(getBufferPool()) .setToken(subKeyInfo.getToken()) .setClientMetrics(getClientMetrics()) + .setStreamBufferArgs(getStreamBufferArgs()) .build(); } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java index 15ebccda2886..b5c36474ff9e 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/ECKeyOutputStream.java @@ -128,14 +128,12 @@ public long getFlushCheckpoint() { } private ECKeyOutputStream(Builder builder) { - super(builder.getReplicationConfig(), builder.getClientMetrics()); + super(builder.getReplicationConfig(), builder.getClientMetrics(), + builder.getClientConfig(), builder.getStreamBufferArgs()); this.config = builder.getClientConfig(); this.bufferPool = builder.getByteBufferPool(); // For EC, cell/chunk size and buffer size can be same for now. ecChunkSize = builder.getReplicationConfig().getEcChunkSize(); - this.config.setStreamBufferMaxSize(ecChunkSize); - this.config.setStreamBufferFlushSize(ecChunkSize); - this.config.setStreamBufferSize(ecChunkSize); this.numDataBlks = builder.getReplicationConfig().getData(); this.numParityBlks = builder.getReplicationConfig().getParity(); ecChunkBufferCache = new ECChunkBuffers( @@ -151,7 +149,7 @@ private ECKeyOutputStream(Builder builder) { builder.isMultipartKey(), info, builder.isUnsafeByteBufferConversionEnabled(), builder.getXceiverManager(), builder.getOpenHandler().getId(), - builder.getClientMetrics()); + builder.getClientMetrics(), builder.getStreamBufferArgs()); this.writeOffset = 0; this.encoder = CodecUtil.createRawEncoderWithFallback( diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java index 4e0c4c91faed..8b128e9cd945 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/io/KeyOutputStream.java @@ -34,6 +34,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos.ReplicationType; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.client.HddsClientUtils; import org.apache.hadoop.hdds.scm.container.ContainerID; @@ -94,6 +95,7 @@ enum StreamAction { private final BlockOutputStreamEntryPool blockOutputStreamEntryPool; private long clientID; + private StreamBufferArgs streamBufferArgs; /** * Indicates if an atomic write is required. When set to true, @@ -104,8 +106,10 @@ enum StreamAction { private boolean atomicKeyCreation; public KeyOutputStream(ReplicationConfig replicationConfig, - ContainerClientMetrics clientMetrics) { + ContainerClientMetrics clientMetrics, OzoneClientConfig clientConfig, + StreamBufferArgs streamBufferArgs) { this.replication = replicationConfig; + this.config = clientConfig; closed = false; this.retryPolicyMap = HddsClientUtils.getExceptionList() .stream() @@ -113,7 +117,8 @@ public KeyOutputStream(ReplicationConfig replicationConfig, e -> RetryPolicies.TRY_ONCE_THEN_FAIL)); retryCount = 0; offset = 0; - blockOutputStreamEntryPool = new BlockOutputStreamEntryPool(clientMetrics); + blockOutputStreamEntryPool = + new BlockOutputStreamEntryPool(clientMetrics, clientConfig, streamBufferArgs); } @VisibleForTesting @@ -151,7 +156,7 @@ public KeyOutputStream( String uploadID, int partNumber, boolean isMultipart, boolean unsafeByteBufferConversion, ContainerClientMetrics clientMetrics, - boolean atomicKeyCreation + boolean atomicKeyCreation, StreamBufferArgs streamBufferArgs ) { this.config = config; this.replication = replicationConfig; @@ -165,7 +170,7 @@ public KeyOutputStream( unsafeByteBufferConversion, xceiverClientManager, handler.getId(), - clientMetrics); + clientMetrics, streamBufferArgs); this.retryPolicyMap = HddsClientUtils.getRetryPolicyByException( config.getMaxRetryCount(), config.getRetryInterval()); this.retryCount = 0; @@ -173,6 +178,7 @@ public KeyOutputStream( this.writeOffset = 0; this.clientID = handler.getId(); this.atomicKeyCreation = atomicKeyCreation; + this.streamBufferArgs = streamBufferArgs; } /** @@ -279,7 +285,7 @@ private int writeToOutputStream(BlockOutputStreamEntry current, // to or less than the max length of the buffer allocated. // The len specified here is the combined sum of the data length of // the buffers - Preconditions.checkState(!retry || len <= config + Preconditions.checkState(!retry || len <= streamBufferArgs .getStreamBufferMaxSize()); int dataWritten = (int) (current.getWrittenDataLength() - currentPos); writeLen = retry ? (int) len : dataWritten; @@ -330,7 +336,7 @@ private void handleException(BlockOutputStreamEntry streamEntry, pipeline, totalSuccessfulFlushedData, bufferedDataLen, retryCount); } Preconditions.checkArgument( - bufferedDataLen <= config.getStreamBufferMaxSize()); + bufferedDataLen <= streamBufferArgs.getStreamBufferMaxSize()); Preconditions.checkArgument( offset - blockOutputStreamEntryPool.getKeyLength() == bufferedDataLen); long containerId = streamEntry.getBlockID().getContainerID(); @@ -608,6 +614,7 @@ public static class Builder { private ReplicationConfig replicationConfig; private ContainerClientMetrics clientMetrics; private boolean atomicKeyCreation = false; + private StreamBufferArgs streamBufferArgs; public String getMultipartUploadID() { return multipartUploadID; @@ -676,6 +683,15 @@ public Builder setConfig(OzoneClientConfig config) { return this; } + public StreamBufferArgs getStreamBufferArgs() { + return streamBufferArgs; + } + + public Builder setStreamBufferArgs(StreamBufferArgs streamBufferArgs) { + this.streamBufferArgs = streamBufferArgs; + return this; + } + public boolean isUnsafeByteBufferConversionEnabled() { return unsafeByteBufferConversion; } @@ -725,7 +741,8 @@ public KeyOutputStream build() { isMultipartKey, unsafeByteBufferConversion, clientMetrics, - atomicKeyCreation); + atomicKeyCreation, + streamBufferArgs); } } diff --git a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java index 5302c88520d1..850ae0d19376 100644 --- a/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java +++ b/hadoop-ozone/client/src/main/java/org/apache/hadoop/ozone/client/rpc/RpcClient.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hdds.protocol.proto.HddsProtos; import org.apache.hadoop.hdds.scm.ContainerClientMetrics; import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.hdds.scm.XceiverClientFactory; import org.apache.hadoop.hdds.scm.XceiverClientManager; import org.apache.hadoop.hdds.scm.client.ClientTrustManager; @@ -207,6 +208,7 @@ public class RpcClient implements ClientProtocol { private final boolean topologyAwareReadEnabled; private final boolean checkKeyNameEnabled; private final OzoneClientConfig clientConfig; + private final ReplicationConfigValidator replicationConfigValidator; private final Cache keyProviderCache; private final boolean getLatestVersionLocation; private final ByteBufferPool byteBufferPool; @@ -230,6 +232,8 @@ public RpcClient(ConfigurationSource conf, String omServiceId) this.ugi = UserGroupInformation.getCurrentUser(); // Get default acl rights for user and group. OzoneAclConfig aclConfig = this.conf.getObject(OzoneAclConfig.class); + replicationConfigValidator = + this.conf.getObject(ReplicationConfigValidator.class); this.userRights = aclConfig.getUserDefaultRights(); this.groupRights = aclConfig.getGroupDefaultRights(); @@ -1343,9 +1347,7 @@ public OzoneOutputStream createKey( } if (replicationConfig != null) { - ReplicationConfigValidator validator = - this.conf.getObject(ReplicationConfigValidator.class); - validator.validate(replicationConfig); + replicationConfigValidator.validate(replicationConfig); } OmKeyArgs.Builder builder = new OmKeyArgs.Builder() @@ -1854,7 +1856,7 @@ public OzoneDataStreamOutput createMultipartStreamKey( .setMultipartUploadID(uploadID) .setIsMultipartKey(true) .enableUnsafeByteBufferConversion(unsafeByteBufferConversion) - .setConfig(conf.getObject(OzoneClientConfig.class)) + .setConfig(clientConfig) .setAtomicKeyCreation(isS3GRequest.get()) .build(); keyOutputStream @@ -2269,7 +2271,7 @@ private OzoneDataStreamOutput createDataStreamOutput(OpenKeySession openKey) .setOmClient(ozoneManagerClient) .setReplicationConfig(replicationConfig) .enableUnsafeByteBufferConversion(unsafeByteBufferConversion) - .setConfig(conf.getObject(OzoneClientConfig.class)) + .setConfig(clientConfig) .setAtomicKeyCreation(isS3GRequest.get()) .build(); keyOutputStream @@ -2279,6 +2281,7 @@ private OzoneDataStreamOutput createDataStreamOutput(OpenKeySession openKey) openKey, keyOutputStream, null); return new OzoneDataStreamOutput(out != null ? out : keyOutputStream); } + private OzoneOutputStream createOutputStream(OpenKeySession openKey) throws IOException { KeyOutputStream keyOutputStream = createKeyOutputStream(openKey) @@ -2336,6 +2339,8 @@ private KeyOutputStream.Builder createKeyOutputStream( ReplicationConfig replicationConfig = openKey.getKeyInfo().getReplicationConfig(); + StreamBufferArgs streamBufferArgs = StreamBufferArgs.getDefaultStreamBufferArgs( + replicationConfig, clientConfig); if (replicationConfig.getReplicationType() == HddsProtos.ReplicationType.EC) { builder = new ECKeyOutputStream.Builder() @@ -2351,9 +2356,10 @@ private KeyOutputStream.Builder createKeyOutputStream( .setXceiverClientManager(xceiverClientManager) .setOmClient(ozoneManagerClient) .enableUnsafeByteBufferConversion(unsafeByteBufferConversion) - .setConfig(conf.getObject(OzoneClientConfig.class)) + .setConfig(clientConfig) .setAtomicKeyCreation(isS3GRequest.get()) - .setClientMetrics(clientMetrics); + .setClientMetrics(clientMetrics) + .setStreamBufferArgs(streamBufferArgs); } @Override diff --git a/hadoop-ozone/dev-support/checks/native.sh b/hadoop-ozone/dev-support/checks/native.sh index a90dd30c2078..4d22e695e76f 100755 --- a/hadoop-ozone/dev-support/checks/native.sh +++ b/hadoop-ozone/dev-support/checks/native.sh @@ -19,6 +19,13 @@ DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" CHECK=native +zlib_version=$(mvn -N help:evaluate -Dexpression=zlib.version -q -DforceStdout) +if [[ -z "${zlib_version}" ]]; then + echo "ERROR zlib.version not defined in pom.xml" + exit 1 +fi + source "${DIR}/junit.sh" -Pnative -Drocks_tools_native \ + -Dzlib.url="https://github.com/madler/zlib/releases/download/v${zlib_version}/zlib-${zlib_version}.tar.gz" \ -DexcludedGroups="unhealthy | org.apache.ozone.test.UnhealthyTest" \ "$@" diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java index 272a12a492b0..c7d19678c913 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/fs/ozone/TestRootedOzoneFileSystem.java @@ -1012,6 +1012,39 @@ private void teardownVolumeBucketWithDir(Path bucketPath1) objectStore.deleteVolume(ofsPath.getVolumeName()); } + /** + * Create a bucket with a different owner than the volume owner + * and test the owner on listStatus. + */ + @Test + public void testListStatusWithDifferentBucketOwner() throws IOException { + String volName = getRandomNonExistVolumeName(); + objectStore.createVolume(volName); + OzoneVolume ozoneVolume = objectStore.getVolume(volName); + + String buckName = "bucket-" + RandomStringUtils.randomNumeric(5); + UserGroupInformation currUgi = UserGroupInformation.getCurrentUser(); + String bucketOwner = currUgi.getUserName() + RandomStringUtils.randomNumeric(5); + BucketArgs bucketArgs = BucketArgs.newBuilder() + .setOwner(bucketOwner) + .build(); + ozoneVolume.createBucket(buckName, bucketArgs); + + Path volPath = new Path(OZONE_URI_DELIMITER + volName); + + OzoneBucket ozoneBucket = ozoneVolume.getBucket(buckName); + + FileStatus[] fileStatusVolume = ofs.listStatus(volPath); + assertEquals(1, fileStatusVolume.length); + // FileStatus owner is different from the volume owner. + // Owner is the same as the bucket owner returned by the ObjectStore. + assertNotEquals(ozoneVolume.getOwner(), fileStatusVolume[0].getOwner()); + assertEquals(ozoneBucket.getOwner(), fileStatusVolume[0].getOwner()); + + ozoneVolume.deleteBucket(buckName); + objectStore.deleteVolume(volName); + } + /** * OFS: Test non-recursive listStatus on root and volume. */ diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/utils/db/managed/TestRocksObjectLeakDetector.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/utils/db/managed/TestRocksObjectLeakDetector.java new file mode 100644 index 000000000000..62912ad4aab8 --- /dev/null +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/hdds/utils/db/managed/TestRocksObjectLeakDetector.java @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package org.apache.hadoop.hdds.utils.db.managed; + +import org.apache.commons.lang3.RandomUtils; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.ozone.MiniOzoneCluster; +import org.apache.ozone.test.tag.Unhealthy; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; +import java.util.UUID; +import java.util.concurrent.TimeoutException; +import java.util.function.Supplier; + +import static org.apache.hadoop.ozone.OzoneConfigKeys.OZONE_METADATA_STORE_ROCKSDB_STATISTICS; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +/** + * Test managed rocks object leak detection. + * This test creates garbage that will fail other tests and is intended for manual run only. + * It is also flaky because of depending on background processes and environment (other background tasks + * can create extra managed rocks objects and thus fails the counter assertions). + */ +@Unhealthy +public class TestRocksObjectLeakDetector { + + private static MiniOzoneCluster cluster; + + @BeforeAll + static void setUp() throws IOException, InterruptedException, + TimeoutException { + OzoneConfiguration conf = new OzoneConfiguration(); + conf.set(OZONE_METADATA_STORE_ROCKSDB_STATISTICS, "ALL"); + String clusterId = UUID.randomUUID().toString(); + String scmId = UUID.randomUUID().toString(); + String omServiceId = "omServiceId1"; + cluster = MiniOzoneCluster.newBuilder(conf) + .setClusterId(clusterId) + .setScmId(scmId) + .setOMServiceId(omServiceId) + .setNumOfOzoneManagers(1) + .build(); + cluster.waitForClusterToBeReady(); + } + + @AfterAll + static void cleanUp() { + if (cluster != null) { + assertThrows(AssertionError.class, () -> cluster.shutdown()); + } + } + + @Test + public void testLeakDetector() throws Exception { + testLeakDetector(ManagedBloomFilter::new); + testLeakDetector(ManagedColumnFamilyOptions::new); + testLeakDetector(ManagedEnvOptions::new); + testLeakDetector(ManagedFlushOptions::new); + testLeakDetector(ManagedIngestExternalFileOptions::new); + testLeakDetector(() -> new ManagedLRUCache(1L)); + testLeakDetector(ManagedOptions::new); + testLeakDetector(ManagedReadOptions::new); + testLeakDetector(() -> new ManagedSlice(RandomUtils.nextBytes(10))); + testLeakDetector(ManagedStatistics::new); + testLeakDetector(ManagedWriteBatch::new); + testLeakDetector(ManagedWriteOptions::new); + } + + private void testLeakDetector(Supplier supplier) throws Exception { + // base metrics + long managedObjects = ManagedRocksObjectMetrics.INSTANCE.totalManagedObjects(); + long leakObjects = ManagedRocksObjectMetrics.INSTANCE.totalLeakObjects(); + + // Allocate and close. + allocate(supplier, true); + System.gc(); + // it could take a while for leak detection to run in the background. + Thread.sleep(500); + assertEquals(managedObjects + 1, ManagedRocksObjectMetrics.INSTANCE.totalManagedObjects()); + assertEquals(leakObjects, ManagedRocksObjectMetrics.INSTANCE.totalLeakObjects()); + + // Allocate and not close. + allocate(supplier, false); + System.gc(); + Thread.sleep(500); + assertEquals(managedObjects + 2, ManagedRocksObjectMetrics.INSTANCE.totalManagedObjects()); + assertEquals(leakObjects + 1, ManagedRocksObjectMetrics.INSTANCE.totalLeakObjects()); + } + + private void allocate(Supplier supplier, boolean close) throws Exception { + T object = supplier.get(); + if (close) { + object.close(); + } + } +} diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java index e42e9081aae8..a19c8bef96e4 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/MiniOzoneClusterImpl.java @@ -447,15 +447,14 @@ public String getClusterId() { public void shutdown() { try { LOG.info("Shutting down the Mini Ozone Cluster"); + CodecTestUtil.gc(); IOUtils.closeQuietly(clients); final File baseDir = new File(getBaseDir()); stop(); FileUtils.deleteDirectory(baseDir); ContainerCache.getInstance(conf).shutdownCache(); DefaultMetricsSystem.shutdown(); - ManagedRocksObjectMetrics.INSTANCE.assertNoLeaks(); - CodecTestUtil.gc(); } catch (Exception e) { LOG.error("Exception while shutting down the cluster.", e); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java index 7e4cfa00867b..da3d1a17ddda 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/KeyManagerImpl.java @@ -671,10 +671,12 @@ public BackgroundService getMultipartUploadCleanupService() { return multipartUploadCleanupService; } + @Override public SstFilteringService getSnapshotSstFilteringService() { return snapshotSstFilteringService; } + @Override public SnapshotDeletingService getSnapshotDeletingService() { return snapshotDeletingService; } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java index 2a7771fe60a3..3e6d70626728 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OMDBCheckpointServlet.java @@ -36,6 +36,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.ozone.rocksdiff.RocksDBCheckpointDiffer; +import com.google.common.base.Preconditions; import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -644,29 +645,35 @@ public BootstrapStateHandler.Lock getBootstrapStateLock() { } static class Lock extends BootstrapStateHandler.Lock { - private final BootstrapStateHandler keyDeletingService; - private final BootstrapStateHandler sstFilteringService; - private final BootstrapStateHandler rocksDbCheckpointDiffer; - private final BootstrapStateHandler snapshotDeletingService; + private final List locks; private final OzoneManager om; Lock(OzoneManager om) { + Preconditions.checkNotNull(om); + Preconditions.checkNotNull(om.getKeyManager()); + Preconditions.checkNotNull(om.getMetadataManager()); + Preconditions.checkNotNull(om.getMetadataManager().getStore()); + this.om = om; - keyDeletingService = om.getKeyManager().getDeletingService(); - sstFilteringService = om.getKeyManager().getSnapshotSstFilteringService(); - rocksDbCheckpointDiffer = om.getMetadataManager().getStore() - .getRocksDBCheckpointDiffer(); - snapshotDeletingService = om.getKeyManager().getSnapshotDeletingService(); + + locks = Stream.of( + om.getKeyManager().getDeletingService(), + om.getKeyManager().getSnapshotSstFilteringService(), + om.getMetadataManager().getStore().getRocksDBCheckpointDiffer(), + om.getKeyManager().getSnapshotDeletingService() + ) + .filter(Objects::nonNull) + .map(BootstrapStateHandler::getBootstrapStateLock) + .collect(Collectors.toList()); } @Override public BootstrapStateHandler.Lock lock() throws InterruptedException { // First lock all the handlers. - keyDeletingService.getBootstrapStateLock().lock(); - sstFilteringService.getBootstrapStateLock().lock(); - rocksDbCheckpointDiffer.getBootstrapStateLock().lock(); - snapshotDeletingService.getBootstrapStateLock().lock(); + for (BootstrapStateHandler.Lock lock : locks) { + lock.lock(); + } // Then wait for the double buffer to be flushed. om.awaitDoubleBufferFlush(); @@ -675,10 +682,7 @@ public BootstrapStateHandler.Lock lock() @Override public void unlock() { - snapshotDeletingService.getBootstrapStateLock().unlock(); - rocksDbCheckpointDiffer.getBootstrapStateLock().unlock(); - sstFilteringService.getBootstrapStateLock().unlock(); - keyDeletingService.getBootstrapStateLock().unlock(); + locks.forEach(BootstrapStateHandler.Lock::unlock); } } } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java index 97a7a0608d42..d8932c0e7e0a 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/snapshot/SnapshotCache.java @@ -169,8 +169,8 @@ public ReferenceCounted get(String key, // does not exist, and increment the reference count on the instance. ReferenceCounted rcOmSnapshot = dbMap.compute(key, (k, v) -> { - LOG.info("Loading snapshot. Table key: {}", k); if (v == null) { + LOG.info("Loading snapshot. Table key: {}", k); try { v = new ReferenceCounted<>(cacheLoader.load(k), false, this); } catch (OMException omEx) { @@ -317,7 +317,7 @@ private void cleanupInternal() { Preconditions.checkState(rcOmSnapshot == result, "Cache map entry removal failure. The cache is in an inconsistent " + "state. Expected OmSnapshot instance: " + rcOmSnapshot - + ", actual: " + result); + + ", actual: " + result + " for key: " + key); pendingEvictionList.remove(result); diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java index a60134d4f649..e6892d9784db 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicOzoneClientAdapterImpl.java @@ -29,7 +29,6 @@ import java.util.List; import com.google.common.annotations.VisibleForTesting; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -37,7 +36,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; import org.apache.hadoop.fs.SafeModeAction; -import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -69,7 +67,6 @@ import org.apache.hadoop.ozone.snapshot.SnapshotDiffReportOzone; import org.apache.hadoop.ozone.snapshot.SnapshotDiffResponse; import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenRenewer; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; @@ -470,56 +467,6 @@ void setClock(Clock monotonicClock) { this.clock = monotonicClock; } - /** - * Ozone Delegation Token Renewer. - */ - @InterfaceAudience.Private - public static class Renewer extends TokenRenewer { - - //Ensure that OzoneConfiguration files are loaded before trying to use - // the renewer. - static { - OzoneConfiguration.activate(); - } - - public Text getKind() { - return OzoneTokenIdentifier.KIND_NAME; - } - - @Override - public boolean handleKind(Text kind) { - return getKind().equals(kind); - } - - @Override - public boolean isManaged(Token token) throws IOException { - return true; - } - - @Override - public long renew(Token token, Configuration conf) - throws IOException, InterruptedException { - Token ozoneDt = - (Token) token; - - OzoneClient ozoneClient = - OzoneClientFactory.getOzoneClient(OzoneConfiguration.of(conf), - ozoneDt); - return ozoneClient.getObjectStore().renewDelegationToken(ozoneDt); - } - - @Override - public void cancel(Token token, Configuration conf) - throws IOException, InterruptedException { - Token ozoneDt = - (Token) token; - OzoneClient ozoneClient = - OzoneClientFactory.getOzoneClient(OzoneConfiguration.of(conf), - ozoneDt); - ozoneClient.getObjectStore().cancelDelegationToken(ozoneDt); - } - } - /** * Adapter to convert OzoneKey to a safe and simple Key implementation. */ diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java index 085a2300fd41..2fa5f70444e7 100644 --- a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/BasicRootedOzoneClientAdapterImpl.java @@ -32,7 +32,6 @@ import com.google.common.base.Preconditions; import org.apache.commons.collections.CollectionUtils; -import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.crypto.key.KeyProvider; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -43,7 +42,6 @@ import org.apache.hadoop.fs.PathIsNotEmptyDirectoryException; import org.apache.hadoop.fs.SafeModeAction; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.hdds.annotation.InterfaceAudience; import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.client.ReplicationFactor; import org.apache.hadoop.hdds.conf.ConfigurationSource; @@ -80,7 +78,6 @@ import org.apache.hadoop.ozone.snapshot.SnapshotDiffResponse; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; -import org.apache.hadoop.security.token.TokenRenewer; import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; @@ -790,16 +787,12 @@ private List listStatusVolume(String volumeStr, OFSPath ofsStartPath = new OFSPath(startPath, config); // list buckets in the volume OzoneVolume volume = objectStore.getVolume(volumeStr); - UserGroupInformation ugi = - UserGroupInformation.createRemoteUser(volume.getOwner()); - String owner = ugi.getShortUserName(); - String group = getGroupName(ugi); Iterator iter = volume.listBuckets(null, ofsStartPath.getBucketName()); List res = new ArrayList<>(); while (iter.hasNext() && res.size() < numEntries) { OzoneBucket bucket = iter.next(); - res.add(getFileStatusAdapterForBucket(bucket, uri, owner, group)); + res.add(getFileStatusAdapterForBucket(bucket, uri)); if (recursive) { String pathStrNext = volumeStr + OZONE_URI_DELIMITER + bucket.getName(); res.addAll(listStatus(pathStrNext, recursive, startPath, @@ -958,55 +951,6 @@ public String getCanonicalServiceName() { return objectStore.getCanonicalServiceName(); } - /** - * Ozone Delegation Token Renewer. - */ - @InterfaceAudience.Private - public static class Renewer extends TokenRenewer { - - //Ensure that OzoneConfiguration files are loaded before trying to use - // the renewer. - static { - OzoneConfiguration.activate(); - } - - public Text getKind() { - return OzoneTokenIdentifier.KIND_NAME; - } - - @Override - public boolean handleKind(Text kind) { - return getKind().equals(kind); - } - - @Override - public boolean isManaged(Token token) throws IOException { - return true; - } - - @Override - public long renew(Token token, Configuration conf) - throws IOException, InterruptedException { - Token ozoneDt = - (Token) token; - OzoneClient ozoneClient = - OzoneClientFactory.getOzoneClient(OzoneConfiguration.of(conf), - ozoneDt); - return ozoneClient.getObjectStore().renewDelegationToken(ozoneDt); - } - - @Override - public void cancel(Token token, Configuration conf) - throws IOException, InterruptedException { - Token ozoneDt = - (Token) token; - OzoneClient ozoneClient = - OzoneClientFactory.getOzoneClient(OzoneConfiguration.of(conf), - ozoneDt); - ozoneClient.getObjectStore().cancelDelegationToken(ozoneDt); - } - } - /** * Adapter to convert OzoneKey to a safe and simple Key implementation. */ @@ -1161,12 +1105,9 @@ private static FileStatusAdapter getFileStatusAdapterForVolume( * Generate a FileStatusAdapter for a bucket. * @param ozoneBucket OzoneBucket object. * @param uri Full URI to OFS root. - * @param owner Owner of the parent volume of the bucket. - * @param group Group of the parent volume of the bucket. * @return FileStatusAdapter for a bucket. */ - private static FileStatusAdapter getFileStatusAdapterForBucket( - OzoneBucket ozoneBucket, URI uri, String owner, String group) { + private static FileStatusAdapter getFileStatusAdapterForBucket(OzoneBucket ozoneBucket, URI uri) { String pathStr = uri.toString() + OZONE_URI_DELIMITER + ozoneBucket.getVolumeName() + OZONE_URI_DELIMITER + ozoneBucket.getName(); @@ -1176,6 +1117,9 @@ private static FileStatusAdapter getFileStatusAdapterForBucket( ozoneBucket.getName(), pathStr); } Path path = new Path(pathStr); + UserGroupInformation ugi = UserGroupInformation.createRemoteUser(ozoneBucket.getOwner()); + String owner = ugi.getShortUserName(); + String group = getGroupName(ugi); return new FileStatusAdapter(0L, 0L, path, true, (short)0, 0L, ozoneBucket.getCreationTime().getEpochSecond() * 1000, 0L, FsPermission.getDirDefault().toShort(), diff --git a/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneDelegationTokenRenewer.java b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneDelegationTokenRenewer.java new file mode 100644 index 000000000000..b64c0a2dbbdb --- /dev/null +++ b/hadoop-ozone/ozonefs-common/src/main/java/org/apache/hadoop/fs/ozone/OzoneDelegationTokenRenewer.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs.ozone; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdds.annotation.InterfaceAudience; +import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.ozone.client.OzoneClient; +import org.apache.hadoop.ozone.security.OzoneTokenIdentifier; +import org.apache.hadoop.security.token.Token; +import org.apache.hadoop.security.token.TokenRenewer; + +import java.io.IOException; + +import static org.apache.hadoop.ozone.client.OzoneClientFactory.getOzoneClient; + +/** + * Ozone Delegation Token Renewer. + */ +@InterfaceAudience.Private +public class OzoneDelegationTokenRenewer extends TokenRenewer { + + //Ensure that OzoneConfiguration files are loaded before trying to use + // the renewer. + static { + OzoneConfiguration.activate(); + } + + public Text getKind() { + return OzoneTokenIdentifier.KIND_NAME; + } + + @Override + public boolean handleKind(Text kind) { + return getKind().equals(kind); + } + + @Override + public boolean isManaged(Token token) { + return true; + } + + @Override + public long renew(Token token, Configuration conf) throws IOException { + Token ozoneDt = + (Token) token; + OzoneConfiguration ozoneConf = OzoneConfiguration.of(conf); + try (OzoneClient ozoneClient = getOzoneClient(ozoneConf, ozoneDt)) { + return ozoneClient.getObjectStore().renewDelegationToken(ozoneDt); + } + } + + @Override + public void cancel(Token token, Configuration conf) + throws IOException, InterruptedException { + Token ozoneDt = + (Token) token; + OzoneConfiguration ozoneConf = OzoneConfiguration.of(conf); + try (OzoneClient ozoneClient = getOzoneClient(ozoneConf, ozoneDt)) { + ozoneClient.getObjectStore().cancelDelegationToken(ozoneDt); + } + } +} diff --git a/hadoop-ozone/ozonefs-hadoop3/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer b/hadoop-ozone/ozonefs-hadoop3/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer index ac1b3cf677e9..a5de85b69cf3 100644 --- a/hadoop-ozone/ozonefs-hadoop3/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer +++ b/hadoop-ozone/ozonefs-hadoop3/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer @@ -16,5 +16,4 @@ # limitations under the License. # -org.apache.hadoop.fs.ozone.BasicOzoneClientAdapterImpl$Renewer -org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl$Renewer +org.apache.hadoop.fs.ozone.OzoneDelegationTokenRenewer diff --git a/hadoop-ozone/ozonefs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer b/hadoop-ozone/ozonefs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer index ac1b3cf677e9..a5de85b69cf3 100644 --- a/hadoop-ozone/ozonefs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer +++ b/hadoop-ozone/ozonefs/src/main/resources/META-INF/services/org.apache.hadoop.security.token.TokenRenewer @@ -16,5 +16,4 @@ # limitations under the License. # -org.apache.hadoop.fs.ozone.BasicOzoneClientAdapterImpl$Renewer -org.apache.hadoop.fs.ozone.BasicRootedOzoneClientAdapterImpl$Renewer +org.apache.hadoop.fs.ozone.OzoneDelegationTokenRenewer diff --git a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java index 00a7ba557490..983516002909 100644 --- a/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java +++ b/hadoop-ozone/s3gateway/src/test/java/org/apache/hadoop/ozone/client/OzoneOutputStreamStub.java @@ -22,6 +22,8 @@ import org.apache.hadoop.hdds.client.ReplicationConfig; import org.apache.hadoop.hdds.conf.OzoneConfiguration; +import org.apache.hadoop.hdds.scm.OzoneClientConfig; +import org.apache.hadoop.hdds.scm.StreamBufferArgs; import org.apache.hadoop.ozone.client.io.KeyOutputStream; import org.apache.hadoop.ozone.client.io.OzoneOutputStream; import org.apache.hadoop.ozone.om.helpers.OmMultipartCommitUploadPartInfo; @@ -74,8 +76,13 @@ public synchronized void close() throws IOException { @Override public KeyOutputStream getKeyOutputStream() { - return new KeyOutputStream( - ReplicationConfig.getDefault(new OzoneConfiguration()), null) { + OzoneConfiguration conf = new OzoneConfiguration(); + ReplicationConfig replicationConfig = + ReplicationConfig.getDefault(conf); + OzoneClientConfig ozoneClientConfig = conf.getObject(OzoneClientConfig.class); + StreamBufferArgs streamBufferArgs = + StreamBufferArgs.getDefaultStreamBufferArgs(replicationConfig, ozoneClientConfig); + return new KeyOutputStream(replicationConfig, null, ozoneClientConfig, streamBufferArgs) { @Override public synchronized OmMultipartCommitUploadPartInfo getCommitUploadPartInfo() { diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OmMetadataGenerator.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OmMetadataGenerator.java index b214b0968a25..c94048e00d85 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OmMetadataGenerator.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/freon/OmMetadataGenerator.java @@ -326,8 +326,8 @@ private void applyOperation(long counter) throws Exception { case CREATE_KEY: keyName = getPath(counter); getMetrics().timer(operation.name()).time(() -> { - try (OutputStream stream = bucket.createKey(keyName, - dataSize.toBytes())) { + try (OutputStream stream = bucket.createStreamKey(keyName, + dataSize.toBytes(), replicationConfig, new HashMap<>())) { contentGenerator.write(stream); } return null; diff --git a/pom.xml b/pom.xml index f1b6b3d38eb1..d2497a434948 100644 --- a/pom.xml +++ b/pom.xml @@ -257,7 +257,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs -Xmx4096m -XX:+HeapDumpOnOutOfMemoryError flaky | org.apache.ozone.test.FlakyTest | slow | org.apache.ozone.test.SlowTest | unhealthy | org.apache.ozone.test.UnhealthyTest - 3.2.2 + 3.0.0-M5 ${maven-surefire-plugin.version} ${maven-surefire-plugin.version} @@ -1951,6 +1951,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xs false ${surefire.fork.timeout} + all ${maven-surefire-plugin.argLine} @{argLine}