diff --git a/flink-table-store-connector/pom.xml b/flink-table-store-connector/pom.xml index 0b3e178048ca..e4e8f3839c0e 100644 --- a/flink-table-store-connector/pom.xml +++ b/flink-table-store-connector/pom.xml @@ -211,6 +211,20 @@ under the License. + + + org.apache.flink + flink-connector-kafka + ${flink.version} + test + + + + org.apache.flink + flink-json + ${flink.version} + test + diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/Committable.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/Committable.java index b4ba9927ea0e..cea1b35914f3 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/Committable.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/Committable.java @@ -23,28 +23,21 @@ public class Committable { private final Kind kind; - private final byte[] wrappedCommittable; + private final Object wrappedCommittable; - private final int serializerVersion; - - public Committable(Kind kind, byte[] wrappedCommittable, int serializerVersion) { + public Committable(Kind kind, Object wrappedCommittable) { this.kind = kind; this.wrappedCommittable = wrappedCommittable; - this.serializerVersion = serializerVersion; } public Kind kind() { return kind; } - public byte[] wrappedCommittable() { + public Object wrappedCommittable() { return wrappedCommittable; } - public int serializerVersion() { - return serializerVersion; - } - enum Kind { FILE((byte) 0), diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/CommittableSerializer.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/CommittableSerializer.java index 82627ea70a16..95938625022a 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/CommittableSerializer.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/CommittableSerializer.java @@ -20,12 +20,22 @@ import org.apache.flink.core.io.SimpleVersionedSerializer; +import java.io.IOException; import java.nio.ByteBuffer; /** {@link SimpleVersionedSerializer} for {@link Committable}. */ public class CommittableSerializer implements SimpleVersionedSerializer { - public static final CommittableSerializer INSTANCE = new CommittableSerializer(); + private final FileCommittableSerializer fileCommittableSerializer; + + private final SimpleVersionedSerializer logCommittableSerializer; + + public CommittableSerializer( + FileCommittableSerializer fileCommittableSerializer, + SimpleVersionedSerializer logCommittableSerializer) { + this.fileCommittableSerializer = fileCommittableSerializer; + this.logCommittableSerializer = logCommittableSerializer; + } @Override public int getVersion() { @@ -33,22 +43,57 @@ public int getVersion() { } @Override - public byte[] serialize(Committable committable) { - byte[] wrapped = committable.wrappedCommittable(); + public byte[] serialize(Committable committable) throws IOException { + byte[] wrapped; + int version; + switch (committable.kind()) { + case FILE: + version = fileCommittableSerializer.getVersion(); + wrapped = + fileCommittableSerializer.serialize( + (FileCommittable) committable.wrappedCommittable()); + break; + case LOG: + version = logCommittableSerializer.getVersion(); + wrapped = logCommittableSerializer.serialize(committable.wrappedCommittable()); + break; + case LOG_OFFSET: + version = 1; + wrapped = ((LogOffsetCommittable) committable.wrappedCommittable()).toBytes(); + break; + default: + throw new UnsupportedOperationException("Unsupported kind: " + committable.kind()); + } + return ByteBuffer.allocate(1 + wrapped.length + 4) .put(committable.kind().toByteValue()) .put(wrapped) - .putInt(committable.serializerVersion()) + .putInt(version) .array(); } @Override - public Committable deserialize(int i, byte[] bytes) { + public Committable deserialize(int i, byte[] bytes) throws IOException { ByteBuffer buffer = ByteBuffer.wrap(bytes); Committable.Kind kind = Committable.Kind.fromByteValue(buffer.get()); byte[] wrapped = new byte[bytes.length - 5]; buffer.get(wrapped); int version = buffer.getInt(); - return new Committable(kind, wrapped, version); + + Object wrappedCommittable; + switch (kind) { + case FILE: + wrappedCommittable = fileCommittableSerializer.deserialize(version, wrapped); + break; + case LOG: + wrappedCommittable = logCommittableSerializer.deserialize(version, wrapped); + break; + case LOG_OFFSET: + wrappedCommittable = LogOffsetCommittable.fromBytes(wrapped); + break; + default: + throw new UnsupportedOperationException("Unsupported kind: " + kind); + } + return new Committable(kind, wrappedCommittable); } } diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/GlobalCommittable.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/GlobalCommittable.java deleted file mode 100644 index ac49d154c8c9..000000000000 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/GlobalCommittable.java +++ /dev/null @@ -1,44 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.table.store.connector.sink; - -import org.apache.flink.table.store.file.manifest.ManifestCommittable; - -import java.util.List; - -/** Global aggregated committable for {@link StoreGlobalCommitter}. */ -public class GlobalCommittable { - - private final List logCommittables; - - private final ManifestCommittable fileCommittable; - - public GlobalCommittable(List logCommittables, ManifestCommittable fileCommittable) { - this.logCommittables = logCommittables; - this.fileCommittable = fileCommittable; - } - - public List logCommittables() { - return logCommittables; - } - - public ManifestCommittable fileCommittable() { - return fileCommittable; - } -} diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/GlobalCommittableSerializer.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/GlobalCommittableSerializer.java deleted file mode 100644 index ea180d2f9c3a..000000000000 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/GlobalCommittableSerializer.java +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.table.store.connector.sink; - -import org.apache.flink.core.io.SimpleVersionedSerializer; -import org.apache.flink.core.memory.DataInputDeserializer; -import org.apache.flink.core.memory.DataOutputViewStreamWrapper; -import org.apache.flink.table.store.file.manifest.ManifestCommittable; -import org.apache.flink.table.store.file.manifest.ManifestCommittableSerializer; - -import java.io.ByteArrayOutputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -/** {@link SimpleVersionedSerializer} for {@link GlobalCommittable}. */ -public class GlobalCommittableSerializer - implements SimpleVersionedSerializer> { - - private final SimpleVersionedSerializer logSerializer; - - private final ManifestCommittableSerializer fileSerializer; - - public GlobalCommittableSerializer( - SimpleVersionedSerializer logSerializer, - ManifestCommittableSerializer fileSerializer) { - this.logSerializer = logSerializer; - this.fileSerializer = fileSerializer; - } - - @Override - public int getVersion() { - return 1; - } - - @Override - public byte[] serialize(GlobalCommittable committable) throws IOException { - ByteArrayOutputStream out = new ByteArrayOutputStream(); - DataOutputViewStreamWrapper view = new DataOutputViewStreamWrapper(out); - - view.writeInt(logSerializer.getVersion()); - view.writeInt(committable.logCommittables().size()); - for (LogCommT commT : committable.logCommittables()) { - byte[] bytes = logSerializer.serialize(commT); - view.writeInt(bytes.length); - view.write(bytes); - } - - view.writeInt(fileSerializer.getVersion()); - byte[] bytes = fileSerializer.serialize(committable.fileCommittable()); - view.writeInt(bytes.length); - view.write(bytes); - - return out.toByteArray(); - } - - @Override - public GlobalCommittable deserialize(int version, byte[] serialized) - throws IOException { - DataInputDeserializer view = new DataInputDeserializer(serialized); - - int logVersion = view.readInt(); - int logSize = view.readInt(); - List logCommTList = new ArrayList<>(); - for (int i = 0; i < logSize; i++) { - byte[] bytes = new byte[view.readInt()]; - view.read(bytes); - logCommTList.add(logSerializer.deserialize(logVersion, bytes)); - } - - int fileVersion = view.readInt(); - byte[] bytes = new byte[view.readInt()]; - view.read(bytes); - ManifestCommittable file = fileSerializer.deserialize(fileVersion, bytes); - - return new GlobalCommittable<>(logCommTList, file); - } -} diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/LogOffsetCommittable.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/LogOffsetCommittable.java index 68b62d80f74b..8e474807f592 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/LogOffsetCommittable.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/LogOffsetCommittable.java @@ -19,6 +19,7 @@ package org.apache.flink.table.store.connector.sink; import java.nio.ByteBuffer; +import java.util.Objects; /** Log offset committable for a bucket. */ public class LogOffsetCommittable { @@ -48,4 +49,21 @@ public static LogOffsetCommittable fromBytes(byte[] bytes) { ByteBuffer buffer = ByteBuffer.wrap(bytes); return new LogOffsetCommittable(buffer.getInt(), buffer.getLong()); } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + LogOffsetCommittable that = (LogOffsetCommittable) o; + return bucket == that.bucket && offset == that.offset; + } + + @Override + public int hashCode() { + return Objects.hash(bucket, offset); + } } diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreGlobalCommitter.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreGlobalCommitter.java index a70b3b1deab9..bf9fec203604 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreGlobalCommitter.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreGlobalCommitter.java @@ -27,22 +27,17 @@ import javax.annotation.Nullable; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.stream.Collectors; /** {@link GlobalCommitter} for dynamic store. */ -public class StoreGlobalCommitter - implements GlobalCommitter> { +public class StoreGlobalCommitter implements GlobalCommitter { private final FileStoreCommit fileStoreCommit; private final FileStoreExpire fileStoreExpire; - private final FileCommittableSerializer fileCommitSerializer; - @Nullable private final CatalogLock lock; @Nullable private final Map overwritePartition; @@ -50,12 +45,10 @@ public class StoreGlobalCommitter public StoreGlobalCommitter( FileStoreCommit fileStoreCommit, FileStoreExpire fileStoreExpire, - FileCommittableSerializer fileCommitSerializer, @Nullable CatalogLock lock, @Nullable Map overwritePartition) { this.fileStoreCommit = fileStoreCommit; this.fileStoreExpire = fileStoreExpire; - this.fileCommitSerializer = fileCommitSerializer; this.lock = lock; this.overwritePartition = overwritePartition; } @@ -68,55 +61,45 @@ public void close() throws Exception { } @Override - public List> filterRecoveredCommittables( - List> globalCommittables) { - List filtered = - fileStoreCommit.filterCommitted( - globalCommittables.stream() - .map(GlobalCommittable::fileCommittable) - .collect(Collectors.toList())); - return globalCommittables.stream() - .filter(c -> filtered.contains(c.fileCommittable())) - .collect(Collectors.toList()); + public List filterRecoveredCommittables( + List globalCommittables) { + return fileStoreCommit.filterCommitted(globalCommittables); } @Override - public GlobalCommittable combine(long checkpointId, List committables) + public ManifestCommittable combine(long checkpointId, List committables) throws IOException { - List logCommittables = new ArrayList<>(); ManifestCommittable fileCommittable = new ManifestCommittable(String.valueOf(checkpointId)); for (Committable committable : committables) { switch (committable.kind()) { case FILE: - FileCommittable file = - fileCommitSerializer.deserialize( - committable.serializerVersion(), - committable.wrappedCommittable()); + FileCommittable file = (FileCommittable) committable.wrappedCommittable(); fileCommittable.addFileCommittable( file.partition(), file.bucket(), file.increment()); break; case LOG_OFFSET: LogOffsetCommittable offset = - LogOffsetCommittable.fromBytes(committable.wrappedCommittable()); + (LogOffsetCommittable) committable.wrappedCommittable(); fileCommittable.addLogOffset(offset.bucket(), offset.offset()); break; case LOG: - throw new UnsupportedOperationException(); + // log should be committed in local committer + break; } } - return new GlobalCommittable<>(logCommittables, fileCommittable); + return fileCommittable; } @Override - public void commit(List> committables) { + public void commit(List committables) + throws IOException, InterruptedException { if (overwritePartition == null) { - for (GlobalCommittable committable : committables) { - fileStoreCommit.commit(committable.fileCommittable(), new HashMap<>()); + for (ManifestCommittable committable : committables) { + fileStoreCommit.commit(committable, new HashMap<>()); } } else { - for (GlobalCommittable committable : committables) { - fileStoreCommit.overwrite( - overwritePartition, committable.fileCommittable(), new HashMap<>()); + for (ManifestCommittable committable : committables) { + fileStoreCommit.overwrite(overwritePartition, committable, new HashMap<>()); } } diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreLocalCommitter.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreLocalCommitter.java new file mode 100644 index 000000000000..d3bda6ab6050 --- /dev/null +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreLocalCommitter.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.connector.sink; + +import org.apache.flink.api.connector.sink2.Committer; + +import javax.annotation.Nullable; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Objects; + +import static org.apache.flink.table.store.connector.sink.global.LocalCommitterOperator.convertCommitRequest; + +/** Store local {@link Committer} to commit log sink. */ +public class StoreLocalCommitter implements Committer { + + @Nullable private final Committer logCommitter; + + public StoreLocalCommitter(@Nullable Committer logCommitter) { + this.logCommitter = logCommitter; + } + + @Override + public void commit(Collection> requests) + throws IOException, InterruptedException { + List> logRequests = new ArrayList<>(); + for (CommitRequest request : requests) { + if (request.getCommittable().kind() == Committable.Kind.LOG) { + //noinspection unchecked + logRequests.add( + convertCommitRequest( + request, + committable -> (LogCommT) committable.wrappedCommittable(), + committable -> new Committable(Committable.Kind.LOG, committable))); + } + } + + if (logRequests.size() > 0) { + Objects.requireNonNull(logCommitter, "logCommitter should not be null."); + logCommitter.commit(logRequests); + } + } + + @Override + public void close() throws Exception { + if (logCommitter != null) { + logCommitter.close(); + } + } +} diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSink.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSink.java index 9ae59fd5b73f..a848ca61a801 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSink.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSink.java @@ -18,30 +18,40 @@ package org.apache.flink.table.store.connector.sink; +import org.apache.flink.api.connector.sink2.Committer; import org.apache.flink.api.connector.sink2.Sink; +import org.apache.flink.api.connector.sink2.SinkWriter; import org.apache.flink.api.connector.sink2.StatefulSink; +import org.apache.flink.api.connector.sink2.TwoPhaseCommittingSink; import org.apache.flink.core.io.SimpleVersionedSerializer; import org.apache.flink.table.catalog.CatalogLock; import org.apache.flink.table.catalog.ObjectIdentifier; import org.apache.flink.table.data.RowData; import org.apache.flink.table.store.connector.sink.global.GlobalCommittingSink; import org.apache.flink.table.store.file.FileStore; +import org.apache.flink.table.store.file.manifest.ManifestCommittable; import org.apache.flink.table.store.file.manifest.ManifestCommittableSerializer; import org.apache.flink.table.store.file.operation.FileStoreCommit; import org.apache.flink.table.store.file.operation.Lock; +import org.apache.flink.table.store.log.LogInitContext; +import org.apache.flink.table.store.log.LogSinkProvider; +import org.apache.flink.table.store.log.LogWriteCallback; +import org.apache.flink.table.store.sink.SinkRecord; import org.apache.flink.table.store.sink.SinkRecordConverter; import javax.annotation.Nullable; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Collection; import java.util.Map; import java.util.concurrent.Callable; +import java.util.function.Consumer; /** {@link Sink} of dynamic store. */ public class StoreSink implements StatefulSink, - GlobalCommittingSink> { + GlobalCommittingSink { private static final long serialVersionUID = 1L; @@ -59,6 +69,8 @@ public class StoreSink @Nullable private final Map overwritePartition; + @Nullable private final LogSinkProvider logSinkProvider; + public StoreSink( ObjectIdentifier tableIdentifier, FileStore fileStore, @@ -66,7 +78,8 @@ public StoreSink( int[] primaryKeys, int numBucket, @Nullable CatalogLock.Factory lockFactory, - @Nullable Map overwritePartition) { + @Nullable Map overwritePartition, + @Nullable LogSinkProvider logSinkProvider) { this.tableIdentifier = tableIdentifier; this.fileStore = fileStore; this.partitions = partitions; @@ -74,6 +87,7 @@ public StoreSink( this.numBucket = numBucket; this.lockFactory = lockFactory; this.overwritePartition = overwritePartition; + this.logSinkProvider = logSinkProvider; } @Override @@ -84,6 +98,19 @@ public StoreSinkWriter createWriter(InitContext initContext) throw @Override public StoreSinkWriter restoreWriter( InitContext initContext, Collection states) throws IOException { + SinkWriter logWriter = null; + LogWriteCallback logCallback = null; + if (logSinkProvider != null) { + logCallback = new LogWriteCallback(); + Consumer metadataConsumer = logSinkProvider.createMetadataConsumer(logCallback); + LogInitContext logInitContext = new LogInitContext(initContext, metadataConsumer); + Sink logSink = logSinkProvider.createSink(); + logWriter = + states == null + ? logSink.createWriter(logInitContext) + : ((StatefulSink) logSink) + .restoreWriter(logInitContext, states); + } return new StoreSinkWriter<>( fileStore.newWrite(), new SinkRecordConverter( @@ -91,17 +118,55 @@ public StoreSinkWriter restoreWriter( primaryKeys.length > 0 ? fileStore.valueType() : fileStore.keyType(), partitions, primaryKeys), - fileCommitSerializer(), - overwritePartition != null); + overwritePartition != null, + logWriter, + logCallback); } @Override public SimpleVersionedSerializer getWriterStateSerializer() { - return new NoOutputSerializer<>(); + return logSinkProvider == null + ? new NoOutputSerializer<>() + : ((StatefulSink) logSinkProvider.createSink()) + .getWriterStateSerializer(); + } + + @Nullable + private Committer logCommitter() { + if (logSinkProvider != null) { + Sink sink = logSinkProvider.createSink(); + if (sink instanceof TwoPhaseCommittingSink) { + try { + return ((TwoPhaseCommittingSink) sink).createCommitter(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + } + + return null; + } + + @Nullable + private SimpleVersionedSerializer logCommitSerializer() { + if (logSinkProvider != null) { + Sink sink = logSinkProvider.createSink(); + if (sink instanceof TwoPhaseCommittingSink) { + return ((TwoPhaseCommittingSink) sink) + .getCommittableSerializer(); + } + } + + return null; } @Override - public StoreGlobalCommitter createGlobalCommitter() { + public Committer createCommitter() { + return new StoreLocalCommitter<>(logCommitter()); + } + + @Override + public StoreGlobalCommitter createGlobalCommitter() { FileStoreCommit commit = fileStore.newCommit(); CatalogLock lock; if (lockFactory == null) { @@ -120,22 +185,20 @@ public T runWithLock(Callable callable) throws Exception { }); } - return new StoreGlobalCommitter<>( - commit, fileStore.newExpire(), fileCommitSerializer(), lock, overwritePartition); + return new StoreGlobalCommitter(commit, fileStore.newExpire(), lock, overwritePartition); } + @SuppressWarnings("unchecked") @Override public SimpleVersionedSerializer getCommittableSerializer() { - return CommittableSerializer.INSTANCE; + return new CommittableSerializer( + fileCommitSerializer(), (SimpleVersionedSerializer) logCommitSerializer()); } @Override - public GlobalCommittableSerializer getGlobalCommittableSerializer() { - ManifestCommittableSerializer fileCommSerializer = - new ManifestCommittableSerializer( - fileStore.partitionType(), fileStore.keyType(), fileStore.valueType()); - SimpleVersionedSerializer logCommitSerializer = new NoOutputSerializer<>(); - return new GlobalCommittableSerializer<>(logCommitSerializer, fileCommSerializer); + public ManifestCommittableSerializer getGlobalCommittableSerializer() { + return new ManifestCommittableSerializer( + fileStore.partitionType(), fileStore.keyType(), fileStore.valueType()); } private FileCommittableSerializer fileCommitSerializer() { diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSinkWriter.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSinkWriter.java index ccc24821f9e1..6fea71d82111 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSinkWriter.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/StoreSinkWriter.java @@ -28,16 +28,21 @@ import org.apache.flink.table.store.file.ValueKind; import org.apache.flink.table.store.file.operation.FileStoreWrite; import org.apache.flink.table.store.file.utils.RecordWriter; +import org.apache.flink.table.store.log.LogWriteCallback; import org.apache.flink.table.store.sink.SinkRecord; import org.apache.flink.table.store.sink.SinkRecordConverter; +import javax.annotation.Nullable; + import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -50,10 +55,12 @@ public class StoreSinkWriter private final SinkRecordConverter recordConverter; - private final FileCommittableSerializer fileCommitSerializer; - private final boolean overwrite; + @Nullable private final SinkWriter logWriter; + + @Nullable private final LogWriteCallback logCallback; + private final ExecutorService compactExecutor; private final Map> writers; @@ -61,12 +68,14 @@ public class StoreSinkWriter public StoreSinkWriter( FileStoreWrite fileStoreWrite, SinkRecordConverter recordConverter, - FileCommittableSerializer fileCommitSerializer, - boolean overwrite) { + boolean overwrite, + @Nullable SinkWriter logWriter, + @Nullable LogWriteCallback logCallback) { this.fileStoreWrite = fileStoreWrite; this.recordConverter = recordConverter; - this.fileCommitSerializer = fileCommitSerializer; this.overwrite = overwrite; + this.logWriter = logWriter; + this.logCallback = logCallback; this.compactExecutor = Executors.newSingleThreadScheduledExecutor(); this.writers = new HashMap<>(); } @@ -95,6 +104,11 @@ public void write(RowData rowData, Context context) throws IOException, Interrup } catch (Exception e) { throw new IOException(e); } + + // write to log store + if (logWriter != null) { + logWriter.write(record, context); + } } private void writeToFileStore(RecordWriter writer, SinkRecord record) throws Exception { @@ -119,15 +133,22 @@ private void writeToFileStore(RecordWriter writer, SinkRecord record) throws Exc } @Override - public void flush(boolean endOfInput) {} + public void flush(boolean endOfInput) throws IOException, InterruptedException { + if (logWriter != null) { + logWriter.flush(endOfInput); + } + } @Override public List snapshotState(long checkpointId) throws IOException { + if (logWriter != null && logWriter instanceof StatefulSinkWriter) { + return ((StatefulSinkWriter) logWriter).snapshotState(checkpointId); + } return Collections.emptyList(); } @Override - public List prepareCommit() throws IOException { + public List prepareCommit() throws IOException, InterruptedException { List committables = new ArrayList<>(); Iterator>> partIter = writers.entrySet().iterator(); @@ -146,11 +167,7 @@ public List prepareCommit() throws IOException { } catch (Exception e) { throw new IOException(e); } - committables.add( - new Committable( - Committable.Kind.FILE, - fileCommitSerializer.serialize(committable), - fileCommitSerializer.getVersion())); + committables.add(new Committable(Committable.Kind.FILE, committable)); // clear if no update // we need a mechanism to clear writers, otherwise there will be more and more @@ -166,6 +183,25 @@ public List prepareCommit() throws IOException { } } + if (logWriter != null) { + if (logWriter instanceof PrecommittingSinkWriter) { + Collection logCommittables = + ((PrecommittingSinkWriter) logWriter).prepareCommit(); + for (Object logCommittable : logCommittables) { + committables.add(new Committable(Committable.Kind.LOG, logCommittable)); + } + } + + Objects.requireNonNull(logCallback, "logCallback should not be null."); + logCallback + .offsets() + .forEach( + (k, v) -> + committables.add( + new Committable( + Committable.Kind.LOG_OFFSET, + new LogOffsetCommittable(k, v)))); + } return committables; } @@ -187,6 +223,10 @@ public void close() throws Exception { } } writers.clear(); + + if (logWriter != null) { + logWriter.close(); + } } @VisibleForTesting diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/AbstractCommitterOperator.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/AbstractCommitterOperator.java new file mode 100644 index 000000000000..7b807bbd2c86 --- /dev/null +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/AbstractCommitterOperator.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.connector.sink.global; + +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer; +import org.apache.flink.api.connector.sink2.Sink; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.runtime.state.StateInitializationContext; +import org.apache.flink.runtime.state.StateSnapshotContext; +import org.apache.flink.streaming.api.connector.sink2.CommittableMessage; +import org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.BoundedOneInput; +import org.apache.flink.streaming.api.operators.ChainingStrategy; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.api.operators.util.SimpleVersionedListState; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.function.SerializableSupplier; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayDeque; +import java.util.ArrayList; +import java.util.Deque; +import java.util.List; +import java.util.NavigableMap; +import java.util.TreeMap; + +/** An operator that processes committables of a {@link Sink}. */ +public abstract class AbstractCommitterOperator + extends AbstractStreamOperator> + implements OneInputStreamOperator, CommittableMessage>, + BoundedOneInput { + + private static final long serialVersionUID = 1L; + + private static final Logger LOG = LoggerFactory.getLogger(AbstractCommitterOperator.class); + + /** Record all the inputs until commit. */ + private final Deque inputs = new ArrayDeque<>(); + + /** The operator's state descriptor. */ + private static final ListStateDescriptor STREAMING_COMMITTER_RAW_STATES_DESC = + new ListStateDescriptor<>( + "streaming_committer_raw_states", BytePrimitiveArraySerializer.INSTANCE); + + /** Group the committable by the checkpoint id. */ + private final NavigableMap> committablesPerCheckpoint; + + /** The committable's serializer. */ + private final SerializableSupplier> committableSerializer; + + /** The operator's state. */ + private ListState streamingCommitterState; + + public AbstractCommitterOperator( + SerializableSupplier> committableSerializer) { + this.committableSerializer = committableSerializer; + this.committablesPerCheckpoint = new TreeMap<>(); + setChainingStrategy(ChainingStrategy.ALWAYS); + } + + @Override + public void initializeState(StateInitializationContext context) throws Exception { + super.initializeState(context); + streamingCommitterState = + new SimpleVersionedListState<>( + context.getOperatorStateStore() + .getListState(STREAMING_COMMITTER_RAW_STATES_DESC), + committableSerializer.get()); + List restored = new ArrayList<>(); + streamingCommitterState.get().forEach(restored::add); + streamingCommitterState.clear(); + commit(true, restored); + } + + public abstract void commit(boolean isRecover, List committables) throws Exception; + + public abstract List toCommittables(long checkpoint, List inputs) throws Exception; + + @Override + public void snapshotState(StateSnapshotContext context) throws Exception { + super.snapshotState(context); + List poll = pollInputs(); + if (poll.size() > 0) { + committablesPerCheckpoint.put( + context.getCheckpointId(), toCommittables(context.getCheckpointId(), poll)); + } + streamingCommitterState.update(committables(committablesPerCheckpoint)); + } + + private List committables(NavigableMap> map) { + List committables = new ArrayList<>(); + map.values().forEach(committables::addAll); + return committables; + } + + @Override + public void endInput() throws Exception { + List poll = pollInputs(); + if (!poll.isEmpty()) { + commit(false, toCommittables(Long.MAX_VALUE, poll)); + } + } + + @Override + public void notifyCheckpointComplete(long checkpointId) throws Exception { + super.notifyCheckpointComplete(checkpointId); + LOG.info("Committing the state for checkpoint {}", checkpointId); + NavigableMap> headMap = + committablesPerCheckpoint.headMap(checkpointId, true); + commit(false, committables(headMap)); + headMap.clear(); + } + + @Override + public void processElement(StreamRecord> element) { + output.collect(element); + CommittableMessage message = element.getValue(); + if (message instanceof CommittableWithLineage) { + this.inputs.add(((CommittableWithLineage) message).getCommittable()); + } + } + + @Override + public void close() throws Exception { + committablesPerCheckpoint.clear(); + inputs.clear(); + super.close(); + } + + private List pollInputs() { + List poll = new ArrayList<>(this.inputs); + this.inputs.clear(); + return poll; + } +} diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperator.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperator.java index 36b168047b5a..a606bf974092 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperator.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperator.java @@ -17,142 +17,60 @@ package org.apache.flink.table.store.connector.sink.global; -import org.apache.flink.api.common.state.ListState; -import org.apache.flink.api.common.state.ListStateDescriptor; -import org.apache.flink.api.common.typeutils.base.array.BytePrimitiveArraySerializer; -import org.apache.flink.api.connector.sink2.Sink; import org.apache.flink.core.io.SimpleVersionedSerializer; import org.apache.flink.runtime.state.StateInitializationContext; -import org.apache.flink.runtime.state.StateSnapshotContext; -import org.apache.flink.streaming.api.connector.sink2.CommittableMessage; -import org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage; -import org.apache.flink.streaming.api.operators.AbstractStreamOperator; -import org.apache.flink.streaming.api.operators.BoundedOneInput; -import org.apache.flink.streaming.api.operators.ChainingStrategy; -import org.apache.flink.streaming.api.operators.OneInputStreamOperator; -import org.apache.flink.streaming.api.operators.util.SimpleVersionedListState; -import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; import org.apache.flink.util.function.SerializableSupplier; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.ArrayDeque; -import java.util.ArrayList; +import java.io.IOException; import java.util.Collections; -import java.util.Deque; import java.util.List; -import java.util.NavigableMap; -import java.util.TreeMap; import static org.apache.flink.util.Preconditions.checkNotNull; -/** An operator that processes committables of a {@link Sink}. */ -public class GlobalCommitterOperator extends AbstractStreamOperator - implements OneInputStreamOperator, Void>, BoundedOneInput { +/** An {@link AbstractCommitterOperator} to process global committer. */ +public class GlobalCommitterOperator + extends AbstractCommitterOperator { - private static final Logger LOG = LoggerFactory.getLogger(GlobalCommitterOperator.class); + private static final long serialVersionUID = 1L; - /** Record all the committables until commit. */ - private final Deque committables = new ArrayDeque<>(); + private final SerializableSupplier> committerFactory; /** * Aggregate committables to global committables and commit the global committables to the * external system. */ - private final SerializableSupplier> committerFactory; - - /** The operator's state descriptor. */ - private static final ListStateDescriptor STREAMING_COMMITTER_RAW_STATES_DESC = - new ListStateDescriptor<>( - "streaming_committer_raw_states", BytePrimitiveArraySerializer.INSTANCE); - - /** Group the committable by the checkpoint id. */ - private final NavigableMap committablesPerCheckpoint; - - /** The committable's serializer. */ - private final SerializableSupplier> - committableSerializer; - - /** The operator's state. */ - private ListState streamingCommitterState; - private GlobalCommitter committer; public GlobalCommitterOperator( SerializableSupplier> committerFactory, SerializableSupplier> committableSerializer) { + super(committableSerializer); this.committerFactory = checkNotNull(committerFactory); - this.committableSerializer = committableSerializer; - this.committablesPerCheckpoint = new TreeMap<>(); - setChainingStrategy(ChainingStrategy.ALWAYS); } @Override public void initializeState(StateInitializationContext context) throws Exception { - super.initializeState(context); committer = committerFactory.get(); - streamingCommitterState = - new SimpleVersionedListState<>( - context.getOperatorStateStore() - .getListState(STREAMING_COMMITTER_RAW_STATES_DESC), - committableSerializer.get()); - List restored = new ArrayList<>(); - streamingCommitterState.get().forEach(restored::add); - streamingCommitterState.clear(); - committer.commit(committer.filterRecoveredCommittables(restored)); - } - - @Override - public void snapshotState(StateSnapshotContext context) throws Exception { - super.snapshotState(context); - List committables = pollCommittables(); - if (committables.size() > 0) { - committablesPerCheckpoint.put( - context.getCheckpointId(), - committer.combine(context.getCheckpointId(), committables)); - } - streamingCommitterState.update(new ArrayList<>(committablesPerCheckpoint.values())); + super.initializeState(context); } @Override - public void endInput() throws Exception { - List allCommittables = pollCommittables(); - if (!allCommittables.isEmpty()) { - committer.commit( - Collections.singletonList(committer.combine(Long.MAX_VALUE, allCommittables))); + public void commit(boolean isRecover, List committables) + throws IOException, InterruptedException { + if (isRecover) { + committables = committer.filterRecoveredCommittables(committables); } + committer.commit(committables); } @Override - public void notifyCheckpointComplete(long checkpointId) throws Exception { - super.notifyCheckpointComplete(checkpointId); - LOG.info("Committing the state for checkpoint {}", checkpointId); - NavigableMap headMap = - committablesPerCheckpoint.headMap(checkpointId, true); - committer.commit(new ArrayList<>(headMap.values())); - headMap.clear(); - } - - @Override - public void processElement(StreamRecord> element) { - CommittableMessage message = element.getValue(); - if (message instanceof CommittableWithLineage) { - this.committables.add(((CommittableWithLineage) message).getCommittable()); - } + public List toCommittables(long checkpoint, List inputs) throws Exception { + return Collections.singletonList(committer.combine(checkpoint, inputs)); } @Override public void close() throws Exception { committer.close(); - committablesPerCheckpoint.clear(); - committables.clear(); super.close(); } - - private List pollCommittables() { - List committables = new ArrayList<>(this.committables); - this.committables.clear(); - return committables; - } } diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSink.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSink.java index 4b2b512fa3f9..90d97b9f3d46 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSink.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSink.java @@ -18,7 +18,6 @@ package org.apache.flink.table.store.connector.sink.global; -import org.apache.flink.api.connector.sink2.Committer; import org.apache.flink.api.connector.sink2.Sink; import org.apache.flink.api.connector.sink2.SinkWriter; import org.apache.flink.api.connector.sink2.TwoPhaseCommittingSink; @@ -36,11 +35,6 @@ public interface GlobalCommittingSink extends TwoPhaseCommittingSink { - @Override - default Committer createCommitter() { - throw new UnsupportedOperationException("Please create global committer."); - } - /** * Creates a {@link GlobalCommitter} that permanently makes the previously written data visible * through {@link GlobalCommitter#commit}. diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSinkTranslator.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSinkTranslator.java index e5476428f98e..c034276f37fc 100644 --- a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSinkTranslator.java +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/GlobalCommittingSinkTranslator.java @@ -19,7 +19,6 @@ package org.apache.flink.table.store.connector.sink.global; import org.apache.flink.api.common.typeinfo.TypeInformation; -import org.apache.flink.api.common.typeinfo.Types; import org.apache.flink.streaming.api.connector.sink2.CommittableMessage; import org.apache.flink.streaming.api.connector.sink2.CommittableMessageTypeInfo; import org.apache.flink.streaming.api.datastream.DataStream; @@ -28,38 +27,47 @@ import org.apache.flink.streaming.api.functions.sink.DiscardingSink; import org.apache.flink.streaming.runtime.operators.sink.SinkWriterOperatorFactory; +import java.io.IOException; +import java.io.UncheckedIOException; + /** A translator for the {@link GlobalCommittingSink}. */ public class GlobalCommittingSinkTranslator { - private static final String GLOBAL_COMMITTER_NAME = "Global Committer"; - private static final String WRITER_NAME = "Writer"; + private static final String LOCAL_COMMITTER_NAME = "Local Committer"; + + private static final String GLOBAL_COMMITTER_NAME = "Global Committer"; + public static DataStreamSink translate( DataStream input, GlobalCommittingSink sink) { TypeInformation> commitType = CommittableMessageTypeInfo.of(sink::getCommittableSerializer); - boolean checkpointingEnabled = - input.getExecutionEnvironment().getCheckpointConfig().isCheckpointingEnabled(); - - // We cannot determine the mode, when the execution mode is auto. - // We set isBatch to false and only use checkpointingEnabled to determine if we want to do - // the final commit. - // When isBatch is true, only the checkpointID is different, which has no effect on the - // commit operator. - SingleOutputStreamOperator> written = - input.transform( - WRITER_NAME, - commitType, - new SinkWriterOperatorFactory<>(sink, false, checkpointingEnabled)); + input.transform(WRITER_NAME, commitType, new SinkWriterOperatorFactory<>(sink)) + .setParallelism(input.getParallelism()); + + SingleOutputStreamOperator> local = + written.transform( + LOCAL_COMMITTER_NAME, + commitType, + new LocalCommitterOperator<>( + () -> { + try { + return sink.createCommitter(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + }, + sink::getCommittableSerializer)) + .setParallelism(written.getParallelism()); - SingleOutputStreamOperator committed = - written.global() + SingleOutputStreamOperator committed = + local.global() .transform( GLOBAL_COMMITTER_NAME, - Types.VOID, + commitType, new GlobalCommitterOperator<>( sink::createGlobalCommitter, sink::getGlobalCommittableSerializer)) diff --git a/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/LocalCommitterOperator.java b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/LocalCommitterOperator.java new file mode 100644 index 000000000000..9e7974ced73b --- /dev/null +++ b/flink-table-store-connector/src/main/java/org/apache/flink/table/store/connector/sink/global/LocalCommitterOperator.java @@ -0,0 +1,205 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.connector.sink.global; + +import org.apache.flink.api.connector.sink2.Committer; +import org.apache.flink.api.connector.sink2.Committer.CommitRequest; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.runtime.state.StateInitializationContext; +import org.apache.flink.streaming.runtime.operators.sink.committables.CommitRequestImpl; +import org.apache.flink.streaming.runtime.operators.sink.committables.CommitRequestState; +import org.apache.flink.util.function.SerializableSupplier; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +import static org.apache.flink.util.Preconditions.checkNotNull; + +/** A {@link AbstractCommitterOperator} to process local committer. */ +public class LocalCommitterOperator extends AbstractCommitterOperator { + + private static final long serialVersionUID = 1L; + + private final SerializableSupplier> committerFactory; + + private Committer committer; + + public LocalCommitterOperator( + SerializableSupplier> committerFactory, + SerializableSupplier> committableSerializer) { + super(committableSerializer); + this.committerFactory = checkNotNull(committerFactory); + } + + @Override + public void initializeState(StateInitializationContext context) throws Exception { + committer = committerFactory.get(); + super.initializeState(context); + } + + @Override + public void commit(boolean isRecover, List committables) + throws IOException, InterruptedException { + if (committables.isEmpty()) { + return; + } + + List requests = new ArrayList<>(committables.size()); + for (CommT comm : committables) { + requests.add(new CommitRequestImpl(comm)); + } + + long sleep = 1000; + while (true) { + // commit + requests.forEach(CommitRequestImpl::setSelected); + committer.commit(new ArrayList<>(requests)); + requests.forEach(CommitRequestImpl::setCommittedIfNoError); + + // drain finished + requests.removeIf(CommitRequestImpl::isFinished); + if (requests.isEmpty()) { + return; + } + + //noinspection BusyWait + Thread.sleep(sleep); + sleep *= 2; + } + } + + @Override + public List toCommittables(long checkpoint, List inputs) { + return inputs; + } + + @Override + public void close() throws Exception { + committer.close(); + super.close(); + } + + /** {@link CommitRequest} implementation. */ + public class CommitRequestImpl implements CommitRequest { + + private CommT committable; + private int numRetries; + private CommitRequestState state; + + private CommitRequestImpl(CommT committable) { + this.committable = committable; + this.state = CommitRequestState.RECEIVED; + } + + private boolean isFinished() { + return state.isFinalState(); + } + + @Override + public CommT getCommittable() { + return this.committable; + } + + @Override + public int getNumberOfRetries() { + return this.numRetries; + } + + @Override + public void signalFailedWithKnownReason(Throwable t) { + this.state = CommitRequestState.FAILED; + } + + @Override + public void signalFailedWithUnknownReason(Throwable t) { + this.state = CommitRequestState.FAILED; + throw new IllegalStateException("Failed to commit " + this.committable, t); + } + + @Override + public void retryLater() { + this.state = CommitRequestState.RETRY; + ++this.numRetries; + } + + @Override + public void updateAndRetryLater(CommT committable) { + this.committable = committable; + this.retryLater(); + } + + @Override + public void signalAlreadyCommitted() { + this.state = CommitRequestState.COMMITTED; + } + + void setSelected() { + state = CommitRequestState.RECEIVED; + } + + void setCommittedIfNoError() { + if (state == CommitRequestState.RECEIVED) { + state = CommitRequestState.COMMITTED; + } + } + } + + /** Convert a {@link CommitRequest} to another type. */ + public static CommitRequest convertCommitRequest( + CommitRequest request, Function to, Function from) { + return new CommitRequest() { + + @Override + public NewT getCommittable() { + return to.apply(request.getCommittable()); + } + + @Override + public int getNumberOfRetries() { + return request.getNumberOfRetries(); + } + + @Override + public void signalFailedWithKnownReason(Throwable throwable) { + request.signalFailedWithKnownReason(throwable); + } + + @Override + public void signalFailedWithUnknownReason(Throwable throwable) { + request.signalFailedWithUnknownReason(throwable); + } + + @Override + public void retryLater() { + request.retryLater(); + } + + @Override + public void updateAndRetryLater(NewT committable) { + request.updateAndRetryLater(from.apply(committable)); + } + + @Override + public void signalAlreadyCommitted() { + request.signalAlreadyCommitted(); + } + }; + } +} diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FileStoreITCase.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FileStoreITCase.java index 28f2f8f71822..f671476b742b 100644 --- a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FileStoreITCase.java +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FileStoreITCase.java @@ -24,7 +24,6 @@ import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; -import org.apache.flink.streaming.util.FiniteTestSource; import org.apache.flink.table.data.GenericRowData; import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.StringData; @@ -39,6 +38,7 @@ import org.apache.flink.table.store.file.FileStoreImpl; import org.apache.flink.table.store.file.mergetree.compact.DeduplicateAccumulator; import org.apache.flink.table.store.file.utils.FailingAtomicRenameFileSystem; +import org.apache.flink.table.store.log.LogSinkProvider; import org.apache.flink.table.types.logical.IntType; import org.apache.flink.table.types.logical.RowType; import org.apache.flink.table.types.logical.VarCharType; @@ -77,7 +77,7 @@ public class FileStoreITCase extends AbstractTestBase { private static final RowType KEY_TYPE = new RowType(Collections.singletonList(new RowType.RowField("k", new IntType()))); - private static final RowType VALUE_TYPE = + public static final RowType VALUE_TYPE = new RowType( Arrays.asList( new RowType.RowField("v", new IntType()), @@ -86,7 +86,7 @@ public class FileStoreITCase extends AbstractTestBase { new RowType.RowField("_k", new IntType()))); @SuppressWarnings({"unchecked", "rawtypes"}) - private static final DataStructureConverter CONVERTER = + public static final DataStructureConverter CONVERTER = (DataStructureConverter) DataStructureConverters.getConverter( TypeConversions.fromLogicalToDataType(VALUE_TYPE)); @@ -209,10 +209,10 @@ public static StreamExecutionEnvironment buildBatchEnv() { return env; } - public static Configuration buildConfiguration(boolean isBatch, File folder) { + public static Configuration buildConfiguration(boolean noFail, File folder) { Configuration options = new Configuration(); options.set(BUCKET, NUM_BUCKET); - if (isBatch) { + if (noFail) { options.set(FILE_PATH, folder.toURI().toString()); } else { FailingAtomicRenameFileSystem.get().reset(3, 100); @@ -237,7 +237,7 @@ public static DataStreamSource buildTestSource( return isBatch ? env.fromCollection(SOURCE_DATA, InternalTypeInfo.of(VALUE_TYPE)) : env.addSource( - new FiniteTestSource<>(null, SOURCE_DATA), InternalTypeInfo.of(VALUE_TYPE)); + new FiniteTestSource<>(SOURCE_DATA), InternalTypeInfo.of(VALUE_TYPE)); } public static void write(DataStream input, FileStore fileStore, boolean partitioned) @@ -251,11 +251,28 @@ public static void write( boolean partitioned, @Nullable Map overwritePartition) throws Exception { + write(input, fileStore, partitioned, overwritePartition, null); + } + + public static void write( + DataStream input, + FileStore fileStore, + boolean partitioned, + @Nullable Map overwritePartition, + @Nullable LogSinkProvider logSinkProvider) + throws Exception { int[] partitions = partitioned ? new int[] {1} : new int[0]; int[] keys = new int[] {2}; StoreSink sink = new StoreSink<>( - null, fileStore, partitions, keys, NUM_BUCKET, null, overwritePartition); + null, + fileStore, + partitions, + keys, + NUM_BUCKET, + null, + overwritePartition, + logSinkProvider); input = input.keyBy(row -> row.getInt(2)); // key by GlobalCommittingSinkTranslator.translate(input, sink); input.getExecutionEnvironment().execute(); diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FiniteTestSource.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FiniteTestSource.java new file mode 100644 index 000000000000..0e60ceacb073 --- /dev/null +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/FiniteTestSource.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.connector; + +import org.apache.flink.api.common.state.CheckpointListener; +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.typeutils.base.IntSerializer; +import org.apache.flink.runtime.state.FunctionInitializationContext; +import org.apache.flink.runtime.state.FunctionSnapshotContext; +import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.util.Preconditions; + +import java.util.ArrayList; +import java.util.List; + +/** + * A stream source that: 1) emits a list of elements without allowing checkpoints, 2) then waits for + * two more checkpoints to complete, 3) then re-emits the same elements before 4) waiting for + * another two checkpoints and 5) exiting. + * + *

The reason this class is rewritten is to support {@link CheckpointedFunction}. + */ +@SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter") +public class FiniteTestSource + implements SourceFunction, CheckpointedFunction, CheckpointListener { + + private static final long serialVersionUID = 1L; + + @SuppressWarnings("NonSerializableFieldInSerializableClass") + private final Iterable elements; + + private volatile boolean running = true; + + private transient int numCheckpointsComplete; + + private transient ListState checkpointedState; + + private volatile int numTimesEmitted; + + public FiniteTestSource(Iterable elements) { + this.elements = elements; + } + + @Override + public void initializeState(FunctionInitializationContext context) throws Exception { + this.checkpointedState = + context.getOperatorStateStore() + .getListState( + new ListStateDescriptor<>("emit-times", IntSerializer.INSTANCE)); + + if (context.isRestored()) { + List retrievedStates = new ArrayList<>(); + for (Integer entry : this.checkpointedState.get()) { + retrievedStates.add(entry); + } + + // given that the parallelism of the function is 1, we can only have 1 state + Preconditions.checkArgument( + retrievedStates.size() == 1, + getClass().getSimpleName() + " retrieved invalid state."); + + this.numTimesEmitted = retrievedStates.get(0); + Preconditions.checkArgument( + numTimesEmitted <= 2, + getClass().getSimpleName() + + " retrieved invalid numTimesEmitted: " + + numTimesEmitted); + } else { + this.numTimesEmitted = 0; + } + } + + @Override + public void run(SourceContext ctx) throws Exception { + switch (numTimesEmitted) { + case 0: + emitElementsAndWaitForCheckpoints(ctx); + emitElementsAndWaitForCheckpoints(ctx); + break; + case 1: + emitElementsAndWaitForCheckpoints(ctx); + break; + case 2: + // Maybe missed notifyCheckpointComplete, wait next notifyCheckpointComplete + final Object lock = ctx.getCheckpointLock(); + synchronized (lock) { + int checkpointToAwait = numCheckpointsComplete + 2; + while (running && numCheckpointsComplete < checkpointToAwait) { + lock.wait(1); + } + } + break; + } + } + + private void emitElementsAndWaitForCheckpoints(SourceContext ctx) + throws InterruptedException { + final Object lock = ctx.getCheckpointLock(); + + final int checkpointToAwait; + synchronized (lock) { + checkpointToAwait = numCheckpointsComplete + 2; + for (T t : elements) { + ctx.collect(t); + } + numTimesEmitted++; + } + + synchronized (lock) { + while (running && numCheckpointsComplete < checkpointToAwait) { + lock.wait(1); + } + } + } + + @Override + public void cancel() { + running = false; + } + + @Override + public void notifyCheckpointComplete(long checkpointId) { + numCheckpointsComplete++; + } + + @Override + public void notifyCheckpointAborted(long checkpointId) {} + + @Override + public void snapshotState(FunctionSnapshotContext context) throws Exception { + Preconditions.checkState( + this.checkpointedState != null, + "The " + getClass().getSimpleName() + " has not been properly initialized."); + + this.checkpointedState.clear(); + this.checkpointedState.add(this.numTimesEmitted); + } +} diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/CommittableSerializerTest.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/CommittableSerializerTest.java index 2d59440df27e..17733f7f6c7d 100644 --- a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/CommittableSerializerTest.java +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/CommittableSerializerTest.java @@ -18,21 +18,76 @@ package org.apache.flink.table.store.connector.sink; +import org.apache.flink.core.io.SimpleVersionedSerializer; +import org.apache.flink.streaming.runtime.operators.sink.TestSink; +import org.apache.flink.table.store.file.mergetree.Increment; +import org.apache.flink.table.types.logical.IntType; +import org.apache.flink.table.types.logical.RowType; + import org.junit.jupiter.api.Test; +import java.io.IOException; + +import static org.apache.flink.table.store.file.manifest.ManifestCommittableSerializerTest.randomIncrement; +import static org.apache.flink.table.store.file.mergetree.compact.CompactManagerTest.row; import static org.assertj.core.api.Assertions.assertThat; /** Test for {@link CommittableSerializer}. */ public class CommittableSerializerTest { + private final FileCommittableSerializer fileSerializer = + new FileCommittableSerializer( + RowType.of(new IntType()), + RowType.of(new IntType()), + RowType.of(new IntType())); + + private final CommittableSerializer serializer = + new CommittableSerializer( + fileSerializer, + (SimpleVersionedSerializer) TestSink.StringCommittableSerializer.INSTANCE); + + @Test + public void testFile() throws IOException { + Increment increment = randomIncrement(); + FileCommittable committable = new FileCommittable(row(0), 1, increment); + FileCommittable newCommittable = + (FileCommittable) + serializer + .deserialize( + 1, + serializer.serialize( + new Committable( + Committable.Kind.FILE, committable))) + .wrappedCommittable(); + assertThat(newCommittable).isEqualTo(committable); + } + + @Test + public void testLogOffset() throws IOException { + LogOffsetCommittable committable = new LogOffsetCommittable(2, 3); + LogOffsetCommittable newCommittable = + (LogOffsetCommittable) + serializer + .deserialize( + 1, + serializer.serialize( + new Committable( + Committable.Kind.LOG_OFFSET, committable))) + .wrappedCommittable(); + assertThat(newCommittable).isEqualTo(committable); + } + @Test - public void test() { - byte[] bytes = new byte[] {4, 5, 1}; - Committable committable = new Committable(Committable.Kind.LOG, bytes, 9); - byte[] serialize = CommittableSerializer.INSTANCE.serialize(committable); - Committable deser = CommittableSerializer.INSTANCE.deserialize(1, serialize); - assertThat(deser.kind()).isEqualTo(Committable.Kind.LOG); - assertThat(deser.serializerVersion()).isEqualTo(9); - assertThat(deser.wrappedCommittable()).isEqualTo(bytes); + public void testLog() throws IOException { + String log = "random_string"; + String newCommittable = + (String) + serializer + .deserialize( + 1, + serializer.serialize( + new Committable(Committable.Kind.LOG, log))) + .wrappedCommittable(); + assertThat(newCommittable).isEqualTo(log); } } diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/GlobalCommittableSerializerTest.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/GlobalCommittableSerializerTest.java deleted file mode 100644 index 31753c3761ca..000000000000 --- a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/GlobalCommittableSerializerTest.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.flink.table.store.connector.sink; - -import org.apache.flink.core.io.SimpleVersionedSerializer; -import org.apache.flink.table.store.file.manifest.ManifestCommittableSerializerTest; - -import org.junit.jupiter.api.Test; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.List; - -import static org.assertj.core.api.Assertions.assertThat; - -/** Test for {@link GlobalCommittableSerializer}. */ -public class GlobalCommittableSerializerTest { - - @Test - public void test() throws IOException { - List logs = Arrays.asList("1", "2"); - GlobalCommittable committable = - new GlobalCommittable<>(logs, ManifestCommittableSerializerTest.create()); - GlobalCommittableSerializer serializer = - new GlobalCommittableSerializer<>( - new TestStringSerializer(), ManifestCommittableSerializerTest.serializer()); - byte[] serialized = serializer.serialize(committable); - GlobalCommittable deser = serializer.deserialize(1, serialized); - assertThat(deser.logCommittables()).isEqualTo(committable.logCommittables()); - assertThat(deser.fileCommittable()).isEqualTo(committable.fileCommittable()); - } - - private static final class TestStringSerializer implements SimpleVersionedSerializer { - - private static final int VERSION = 1073741823; - - private TestStringSerializer() {} - - public int getVersion() { - return VERSION; - } - - public byte[] serialize(String str) { - return str.getBytes(StandardCharsets.UTF_8); - } - - public String deserialize(int version, byte[] serialized) { - assertThat(version).isEqualTo(VERSION); - return new String(serialized, StandardCharsets.UTF_8); - } - } -} diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/LogStoreSinkITCase.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/LogStoreSinkITCase.java new file mode 100644 index 000000000000..023b4b626e8d --- /dev/null +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/LogStoreSinkITCase.java @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.connector.sink; + +import org.apache.flink.api.common.eventtime.WatermarkStrategy; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.table.data.RowData; +import org.apache.flink.table.factories.DynamicTableFactory; +import org.apache.flink.table.store.connector.FileStoreITCase; +import org.apache.flink.table.store.file.FileStore; +import org.apache.flink.table.store.kafka.KafkaLogSinkProvider; +import org.apache.flink.table.store.kafka.KafkaLogSourceProvider; +import org.apache.flink.table.store.kafka.KafkaLogStoreFactory; +import org.apache.flink.table.store.kafka.KafkaLogTestUtils; +import org.apache.flink.table.store.kafka.KafkaTableTestBase; +import org.apache.flink.table.store.log.LogOptions; +import org.apache.flink.types.Row; + +import org.junit.Test; + +import java.util.List; +import java.util.stream.Collectors; + +import static org.apache.flink.table.store.connector.FileStoreITCase.buildBatchEnv; +import static org.apache.flink.table.store.connector.FileStoreITCase.buildConfiguration; +import static org.apache.flink.table.store.connector.FileStoreITCase.buildFileStore; +import static org.apache.flink.table.store.connector.FileStoreITCase.buildStreamEnv; +import static org.apache.flink.table.store.connector.FileStoreITCase.buildTestSource; +import static org.apache.flink.table.store.kafka.KafkaLogTestUtils.SINK_CONTEXT; +import static org.apache.flink.table.store.kafka.KafkaLogTestUtils.SOURCE_CONTEXT; +import static org.apache.flink.table.store.kafka.KafkaLogTestUtils.discoverKafkaLogFactory; +import static org.assertj.core.api.Assertions.assertThat; + +/** Test for table store with kafka. */ +public class LogStoreSinkITCase extends KafkaTableTestBase { + + @Test + public void testStreamingPartitioned() throws Exception { + innerTest("testStreamingPartitioned", false, true, true); + } + + @Test + public void testStreamingNonPartitioned() throws Exception { + innerTest("testStreamingNonPartitioned", false, false, true); + } + + @Test + public void testBatchPartitioned() throws Exception { + innerTest("testBatchPartitioned", true, true, true); + } + + @Test + public void testStreamingEventual() throws Exception { + innerTest("testStreamingEventual", false, true, false); + } + + private void innerTest(String name, boolean isBatch, boolean partitioned, boolean transaction) + throws Exception { + StreamExecutionEnvironment env = isBatch ? buildBatchEnv() : buildStreamEnv(); + + // in eventual mode, failure will result in duplicate data + FileStore fileStore = + buildFileStore( + buildConfiguration(isBatch || !transaction, TEMPORARY_FOLDER.newFolder()), + partitioned); + + // prepare log + DynamicTableFactory.Context context = + KafkaLogTestUtils.testContext( + name, + getBootstrapServers(), + LogOptions.LogChangelogMode.AUTO, + transaction + ? LogOptions.LogConsistency.TRANSACTIONAL + : LogOptions.LogConsistency.EVENTUAL, + FileStoreITCase.VALUE_TYPE, + new int[] {2}); + + KafkaLogStoreFactory factory = discoverKafkaLogFactory(); + KafkaLogSinkProvider sinkProvider = factory.createSinkProvider(context, SINK_CONTEXT); + KafkaLogSourceProvider sourceProvider = + factory.createSourceProvider(context, SOURCE_CONTEXT); + + factory.onCreateTable(context, 3, true); + + try { + // write + DataStreamSource finiteSource = buildTestSource(env, isBatch); + FileStoreITCase.write(finiteSource, fileStore, partitioned, null, sinkProvider); + + // read + List results = FileStoreITCase.read(env, fileStore); + + Row[] expected = + partitioned + ? new Row[] { + Row.of(5, "p2", 1), + Row.of(3, "p2", 5), + Row.of(5, "p1", 1), + Row.of(0, "p1", 2) + } + : new Row[] { + Row.of(5, "p2", 1), Row.of(0, "p1", 2), Row.of(3, "p2", 5) + }; + assertThat(results).containsExactlyInAnyOrder(expected); + + results = + buildStreamEnv() + .fromSource( + sourceProvider.createSource(null), + WatermarkStrategy.noWatermarks(), + "source") + .executeAndCollect(isBatch ? 6 : 12).stream() + .map(FileStoreITCase.CONVERTER::toExternal) + .collect(Collectors.toList()); + + if (isBatch) { + expected = + new Row[] { + Row.of(0, "p1", 1), + Row.of(0, "p1", 2), + Row.of(5, "p1", 1), + Row.of(6, "p2", 1), + Row.of(3, "p2", 5), + Row.of(5, "p2", 1) + }; + } else { + // read log + // expect origin data X 2 (FiniteTestSource) + expected = + new Row[] { + Row.of(0, "p1", 1), + Row.of(0, "p1", 2), + Row.of(5, "p1", 1), + Row.of(6, "p2", 1), + Row.of(3, "p2", 5), + Row.of(5, "p2", 1), + Row.of(0, "p1", 1), + Row.of(0, "p1", 2), + Row.of(5, "p1", 1), + Row.of(6, "p2", 1), + Row.of(3, "p2", 5), + Row.of(5, "p2", 1) + }; + } + assertThat(results).containsExactlyInAnyOrder(expected); + } finally { + factory.onDropTable(context, true); + } + } +} diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/StoreSinkTest.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/StoreSinkTest.java index fd3d97fa9a22..e2ef3e76b4c1 100644 --- a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/StoreSinkTest.java +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/StoreSinkTest.java @@ -24,6 +24,7 @@ import org.apache.flink.table.data.RowData; import org.apache.flink.table.data.binary.BinaryRowData; import org.apache.flink.table.store.connector.sink.TestFileStore.TestRecordWriter; +import org.apache.flink.table.store.file.manifest.ManifestCommittable; import org.apache.flink.table.store.file.utils.RecordWriter; import org.apache.flink.table.types.logical.BigIntType; import org.apache.flink.table.types.logical.IntType; @@ -123,7 +124,8 @@ public void testNoKeyChangelogs() throws Exception { primaryKeys, 2, () -> lock, - new HashMap<>()); + new HashMap<>(), + null); writeAndCommit( sink, GenericRowData.ofKind(RowKind.INSERT, 0, 0, 1), @@ -225,7 +227,7 @@ private List write(StoreSink sink, RowData... rows) throws Ex private void commit(StoreSink sink, List fileCommittables) throws Exception { StoreGlobalCommitter committer = sink.createGlobalCommitter(); - GlobalCommittable committable = committer.combine(0, fileCommittables); + ManifestCommittable committable = committer.combine(0, fileCommittables); fileStore.expired = false; lock.locked = false; @@ -246,7 +248,14 @@ private void commit(StoreSink sink, List fileCommittables) th private StoreSink newSink(Map overwritePartition) { return new StoreSink<>( - identifier, fileStore, partitions, primaryKeys, 2, () -> lock, overwritePartition); + identifier, + fileStore, + partitions, + primaryKeys, + 2, + () -> lock, + overwritePartition, + null); } private class TestLock implements CatalogLock { diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperatorTest.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperatorTest.java index 3bbead5c65c5..17cbba1fe67d 100644 --- a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperatorTest.java +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/GlobalCommitterOperatorTest.java @@ -49,7 +49,7 @@ public class GlobalCommitterOperatorTest { @Test public void closeCommitter() throws Exception { final DefaultGlobalCommitter globalCommitter = new DefaultGlobalCommitter(); - final OneInputStreamOperatorTestHarness, Void> testHarness = + final OneInputStreamOperatorTestHarness, ?> testHarness = createTestHarness(globalCommitter); testHarness.initializeEmptyState(); testHarness.open(); @@ -69,7 +69,7 @@ public void restoredFromMergedState() throws Exception { buildSubtaskState(createTestHarness(), input2); final DefaultGlobalCommitter globalCommitter = new DefaultGlobalCommitter(); - final OneInputStreamOperatorTestHarness, Void> testHarness = + final OneInputStreamOperatorTestHarness, ?> testHarness = createTestHarness(globalCommitter); final OperatorSubtaskState mergedOperatorSubtaskState = @@ -108,7 +108,7 @@ public void commitMultipleStagesTogether() throws Exception { expectedOutput.add(DefaultGlobalCommitter.COMBINER.apply(input2)); expectedOutput.add(DefaultGlobalCommitter.COMBINER.apply(input3)); - final OneInputStreamOperatorTestHarness, Void> testHarness = + final OneInputStreamOperatorTestHarness, ?> testHarness = createTestHarness(globalCommitter); testHarness.initializeEmptyState(); testHarness.open(); @@ -138,7 +138,7 @@ public void filterRecoveredCommittables() throws Exception { final DefaultGlobalCommitter globalCommitter = new DefaultGlobalCommitter(successCommittedCommittable); - final OneInputStreamOperatorTestHarness, Void> testHarness = + final OneInputStreamOperatorTestHarness, ?> testHarness = createTestHarness(globalCommitter); // all data from previous checkpoint are expected to be committed, @@ -155,7 +155,7 @@ public void filterRecoveredCommittables() throws Exception { public void endOfInput() throws Exception { final DefaultGlobalCommitter globalCommitter = new DefaultGlobalCommitter(); - final OneInputStreamOperatorTestHarness, Void> testHarness = + final OneInputStreamOperatorTestHarness, ?> testHarness = createTestHarness(globalCommitter); testHarness.initializeEmptyState(); testHarness.open(); @@ -166,12 +166,12 @@ public void endOfInput() throws Exception { assertThat(globalCommitter.getCommittedData()).contains("elder+patience+silent"); } - private OneInputStreamOperatorTestHarness, Void> createTestHarness() + private OneInputStreamOperatorTestHarness, ?> createTestHarness() throws Exception { return createTestHarness(new DefaultGlobalCommitter()); } - private OneInputStreamOperatorTestHarness, Void> createTestHarness( + private OneInputStreamOperatorTestHarness, ?> createTestHarness( GlobalCommitter globalCommitter) throws Exception { return new OneInputStreamOperatorTestHarness<>( new GlobalCommitterOperator<>( @@ -183,7 +183,7 @@ private OneInputStreamOperatorTestHarness, Void> crea } public static OperatorSubtaskState buildSubtaskState( - OneInputStreamOperatorTestHarness, Void> testHarness, + OneInputStreamOperatorTestHarness, ?> testHarness, List input) throws Exception { testHarness.initializeEmptyState(); @@ -199,7 +199,7 @@ public static OperatorSubtaskState buildSubtaskState( return operatorSubtaskState; } - private static List>> committableRecords( + static List>> committableRecords( Collection elements) { return elements.stream() .map(GlobalCommitterOperatorTest::toCommittableMessage) diff --git a/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/LocalCommitterOperatorTest.java b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/LocalCommitterOperatorTest.java new file mode 100644 index 000000000000..528f5dc57eee --- /dev/null +++ b/flink-table-store-connector/src/test/java/org/apache/flink/table/store/connector/sink/global/LocalCommitterOperatorTest.java @@ -0,0 +1,255 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.connector.sink.global; + +import org.apache.flink.api.connector.sink2.Committer; +import org.apache.flink.runtime.checkpoint.OperatorSubtaskState; +import org.apache.flink.streaming.api.connector.sink2.CommittableMessage; +import org.apache.flink.streaming.api.connector.sink2.CommittableWithLineage; +import org.apache.flink.streaming.runtime.operators.sink.TestSink.StringCommittableSerializer; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.streaming.util.OneInputStreamOperatorTestHarness; + +import org.junit.Test; + +import javax.annotation.Nullable; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.function.Supplier; +import java.util.stream.Collectors; + +import static org.apache.flink.table.store.connector.sink.global.GlobalCommitterOperatorTest.buildSubtaskState; +import static org.apache.flink.table.store.connector.sink.global.GlobalCommitterOperatorTest.committableRecords; +import static org.apache.flink.util.Preconditions.checkNotNull; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.Assert.assertNotNull; + +/** Test the {@link LocalCommitterOperator}. */ +public class LocalCommitterOperatorTest { + + @Test + public void supportRetry() throws Exception { + final List input = Arrays.asList("lazy", "leaf"); + final RetryOnceCommitter committer = new RetryOnceCommitter(); + final OneInputStreamOperatorTestHarness< + CommittableMessage, CommittableMessage> + testHarness = createTestHarness(committer); + + testHarness.initializeEmptyState(); + testHarness.open(); + testHarness.processElements(committableRecords(input)); + testHarness.prepareSnapshotPreBarrier(1); + testHarness.snapshot(1L, 1L); + testHarness.notifyOfCompletedCheckpoint(1L); + testHarness.snapshot(2L, 2L); + testHarness.notifyOfCompletedCheckpoint(2L); + + testHarness.close(); + + assertThat(committer.getCommittedData()).contains("lazy", "leaf"); + } + + @Test + public void closeCommitter() throws Exception { + final DefaultCommitter committer = new DefaultCommitter(); + final OneInputStreamOperatorTestHarness< + CommittableMessage, CommittableMessage> + testHarness = createTestHarness(committer); + testHarness.initializeEmptyState(); + testHarness.open(); + testHarness.close(); + assertThat(committer.isClosed()).isTrue(); + } + + @Test + public void restoredFromMergedState() throws Exception { + final List input1 = Arrays.asList("today", "whom"); + final OperatorSubtaskState operatorSubtaskState1 = + buildSubtaskState(createTestHarness(), input1); + + final List input2 = Arrays.asList("future", "evil", "how"); + final OperatorSubtaskState operatorSubtaskState2 = + buildSubtaskState(createTestHarness(), input2); + + final DefaultCommitter committer = new DefaultCommitter(); + final OneInputStreamOperatorTestHarness< + CommittableMessage, CommittableMessage> + testHarness = createTestHarness(committer); + + final OperatorSubtaskState mergedOperatorSubtaskState = + OneInputStreamOperatorTestHarness.repackageState( + operatorSubtaskState1, operatorSubtaskState2); + + testHarness.initializeState( + OneInputStreamOperatorTestHarness.repartitionOperatorState( + mergedOperatorSubtaskState, 2, 2, 1, 0)); + testHarness.open(); + + final List expectedOutput = new ArrayList<>(); + expectedOutput.addAll(input1); + expectedOutput.addAll(input2); + + testHarness.prepareSnapshotPreBarrier(1L); + testHarness.snapshot(1L, 1L); + testHarness.notifyOfCompletedCheckpoint(1); + + testHarness.close(); + + assertThat(committer.getCommittedData()) + .containsExactlyInAnyOrder(expectedOutput.toArray(new String[0])); + } + + @Test + public void commitMultipleStagesTogether() throws Exception { + + final DefaultCommitter committer = new DefaultCommitter(); + + final List input1 = Arrays.asList("cautious", "nature"); + final List input2 = Arrays.asList("count", "over"); + final List input3 = Arrays.asList("lawyer", "grammar"); + + final List expectedOutput = new ArrayList<>(); + + expectedOutput.addAll(input1); + expectedOutput.addAll(input2); + expectedOutput.addAll(input3); + + final OneInputStreamOperatorTestHarness< + CommittableMessage, CommittableMessage> + testHarness = createTestHarness(committer); + testHarness.initializeEmptyState(); + testHarness.open(); + + testHarness.processElements(committableRecords(input1)); + testHarness.prepareSnapshotPreBarrier(1L); + testHarness.snapshot(1L, 1L); + testHarness.processElements(committableRecords(input2)); + testHarness.prepareSnapshotPreBarrier(2L); + testHarness.snapshot(2L, 2L); + testHarness.processElements(committableRecords(input3)); + testHarness.prepareSnapshotPreBarrier(3L); + testHarness.snapshot(3L, 3L); + + testHarness.notifyOfCompletedCheckpoint(1); + testHarness.notifyOfCompletedCheckpoint(3); + + testHarness.close(); + + assertThat(fromRecords(testHarness.getRecordOutput())).isEqualTo(expectedOutput); + + assertThat(committer.getCommittedData()).isEqualTo(expectedOutput); + } + + private static List fromRecords( + Collection>> elements) { + return elements.stream() + .map(StreamRecord::getValue) + .filter(message -> message instanceof CommittableWithLineage) + .map(message -> ((CommittableWithLineage) message).getCommittable()) + .collect(Collectors.toList()); + } + + private OneInputStreamOperatorTestHarness< + CommittableMessage, CommittableMessage> + createTestHarness() throws Exception { + return createTestHarness(new DefaultCommitter()); + } + + private OneInputStreamOperatorTestHarness< + CommittableMessage, CommittableMessage> + createTestHarness(Committer committer) throws Exception { + return new OneInputStreamOperatorTestHarness<>( + new LocalCommitterOperator<>( + () -> committer, () -> StringCommittableSerializer.INSTANCE)); + } + + /** Base class for testing {@link Committer}. */ + private static class DefaultCommitter implements Committer, Serializable { + + @Nullable protected Queue committedData; + + private boolean isClosed; + + @Nullable private final Supplier> queueSupplier; + + public DefaultCommitter() { + this.committedData = new ConcurrentLinkedQueue<>(); + this.isClosed = false; + this.queueSupplier = null; + } + + public List getCommittedData() { + if (committedData != null) { + return new ArrayList<>(committedData); + } else { + return Collections.emptyList(); + } + } + + @Override + public void close() { + isClosed = true; + } + + public boolean isClosed() { + return isClosed; + } + + @Override + public void commit(Collection> requests) { + if (committedData == null) { + assertNotNull(queueSupplier); + committedData = queueSupplier.get(); + } + committedData.addAll( + requests.stream() + .map(CommitRequest::getCommittable) + .collect(Collectors.toList())); + } + } + + /** A {@link Committer} that always re-commits the committables data it received. */ + private static class RetryOnceCommitter extends DefaultCommitter implements Committer { + + private final Set seen = new LinkedHashSet<>(); + + @Override + public void commit(Collection> requests) { + requests.forEach( + c -> { + if (seen.remove(c.getCommittable())) { + checkNotNull(committedData); + committedData.add(c.getCommittable()); + } else { + seen.add(c.getCommittable()); + c.retryLater(); + } + }); + } + } +} diff --git a/flink-table-store-core/src/main/java/org/apache/flink/table/store/log/LogWriteCallback.java b/flink-table-store-core/src/main/java/org/apache/flink/table/store/log/LogWriteCallback.java new file mode 100644 index 000000000000..03c62d6713ec --- /dev/null +++ b/flink-table-store-core/src/main/java/org/apache/flink/table/store/log/LogWriteCallback.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.table.store.log; + +import org.apache.flink.table.store.log.LogSinkProvider.WriteCallback; + +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.LongAccumulator; + +/** A {@link WriteCallback} implementation. */ +public class LogWriteCallback implements WriteCallback { + + private final ConcurrentHashMap offsetMap = new ConcurrentHashMap<>(); + + @Override + public void onCompletion(int bucket, long offset) { + LongAccumulator acc = offsetMap.get(bucket); + if (acc == null) { + // computeIfAbsent will lock on the key + acc = offsetMap.computeIfAbsent(bucket, k -> new LongAccumulator(Long::max, 0)); + } // else lock free + acc.accumulate(offset); + } + + public Map offsets() { + Map offsets = new HashMap<>(); + offsetMap.forEach((k, v) -> offsets.put(k, v.longValue())); + return offsets; + } +} diff --git a/flink-table-store-kafka/src/test/java/org/apache/flink/table/store/kafka/KafkaLogTestUtils.java b/flink-table-store-kafka/src/test/java/org/apache/flink/table/store/kafka/KafkaLogTestUtils.java index 0fad212afbe9..b4b2efc3f1c1 100644 --- a/flink-table-store-kafka/src/test/java/org/apache/flink/table/store/kafka/KafkaLogTestUtils.java +++ b/flink-table-store-kafka/src/test/java/org/apache/flink/table/store/kafka/KafkaLogTestUtils.java @@ -63,7 +63,7 @@ /** Utils for the test of {@link KafkaLogStoreFactory}. */ public class KafkaLogTestUtils { - static final LogStoreTableFactory.SourceContext SOURCE_CONTEXT = + public static final LogStoreTableFactory.SourceContext SOURCE_CONTEXT = new LogStoreTableFactory.SourceContext() { @Override public TypeInformation createTypeInformation(DataType producedDataType) { @@ -85,7 +85,7 @@ public DynamicTableSource.DataStructureConverter createDataStructureConverter( } }; - static final LogStoreTableFactory.SinkContext SINK_CONTEXT = + public static final LogStoreTableFactory.SinkContext SINK_CONTEXT = new LogStoreTableFactory.SinkContext() { @Override @@ -113,7 +113,7 @@ public DynamicTableSink.DataStructureConverter createDataStructureConverter( } }; - static KafkaLogStoreFactory discoverKafkaLogFactory() { + public static KafkaLogStoreFactory discoverKafkaLogFactory() { return (KafkaLogStoreFactory) LogStoreTableFactory.discoverLogStoreFactory( Thread.currentThread().getContextClassLoader(), @@ -169,15 +169,27 @@ static DynamicTableFactory.Context testContext( LogChangelogMode changelogMode, LogConsistency consistency, boolean keyed) { + return testContext( + name, + servers, + changelogMode, + consistency, + RowType.of(new IntType(), new IntType()), + keyed ? new int[] {0} : new int[0]); + } + + public static DynamicTableFactory.Context testContext( + String name, + String servers, + LogChangelogMode changelogMode, + LogConsistency consistency, + RowType type, + int[] keys) { Map options = new HashMap<>(); options.put(CHANGELOG_MODE.key(), changelogMode.toString()); options.put(CONSISTENCY.key(), consistency.toString()); options.put(BOOTSTRAP_SERVERS.key(), servers); - return createContext( - name, - RowType.of(new IntType(), new IntType()), - keyed ? new int[] {0} : new int[0], - options); + return createContext(name, type, keys, options); } static SinkRecord testRecord(boolean keyed, int bucket, int key, int value, RowKind rowKind) {