Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -22,19 +22,25 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.base.Preconditions;
import org.apache.druid.data.input.AbstractInputSourceBuilder;
import org.apache.druid.data.input.InputFormat;
import org.apache.druid.data.input.InputRow;
import org.apache.druid.data.input.InputRowListPlusRawValues;
import org.apache.druid.data.input.InputRowSchema;
import org.apache.druid.data.input.InputSource;
import org.apache.druid.data.input.InputSourceFactory;
import org.apache.druid.data.input.InputSourceReader;
import org.apache.druid.data.input.InputSplit;
import org.apache.druid.data.input.InputStats;
import org.apache.druid.data.input.SplitHintSpec;
import org.apache.druid.data.input.impl.SplittableInputSource;
import org.apache.druid.iceberg.filter.IcebergFilter;
import org.apache.druid.java.util.common.CloseableIterators;
import org.apache.druid.java.util.common.parsers.CloseableIterator;

import javax.annotation.Nullable;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

Expand All @@ -60,7 +66,7 @@ public class IcebergInputSource implements SplittableInputSource<List<String>>
private IcebergFilter icebergFilter;

@JsonProperty
private AbstractInputSourceBuilder warehouseSource;
private InputSourceFactory warehouseSource;

private boolean isLoaded = false;

Expand All @@ -72,7 +78,7 @@ public IcebergInputSource(
@JsonProperty("namespace") String namespace,
@JsonProperty("icebergFilter") @Nullable IcebergFilter icebergFilter,
@JsonProperty("icebergCatalog") IcebergCatalog icebergCatalog,
@JsonProperty("warehouseSource") AbstractInputSourceBuilder warehouseSource
@JsonProperty("warehouseSource") InputSourceFactory warehouseSource
)
{
this.tableName = Preconditions.checkNotNull(tableName, "tableName cannot be null");
Expand Down Expand Up @@ -170,7 +176,77 @@ protected void retrieveIcebergDatafiles()
getTableName(),
getIcebergFilter()
);
delegateInputSource = warehouseSource.setupInputSource(snapshotDataFiles);
if (snapshotDataFiles.isEmpty()) {
delegateInputSource = new EmptyInputSource();
} else {
delegateInputSource = warehouseSource.create(snapshotDataFiles);
}
isLoaded = true;
}

/**
* This input source is used in place of a delegate input source if there are no input file paths.
* Certain input sources cannot be instantiated with an empty input file list and so composing input sources such as IcebergInputSource
* may use this input source as delegate in such cases.
*/
private static class EmptyInputSource implements SplittableInputSource
{
@Override
public boolean needsFormat()
{
return false;
}

@Override
public boolean isSplittable()
{
return false;
}

@Override
public InputSourceReader reader(
InputRowSchema inputRowSchema,
@Nullable InputFormat inputFormat,
File temporaryDirectory
)
{
return new InputSourceReader()
{
@Override
public CloseableIterator<InputRow> read(InputStats inputStats)
{
return CloseableIterators.wrap(Collections.emptyIterator(), () -> {
});
}

@Override
public CloseableIterator<InputRowListPlusRawValues> sample()
{
return CloseableIterators.wrap(Collections.emptyIterator(), () -> {
});
}
};
}

@Override
public Stream<InputSplit> createSplits(
InputFormat inputFormat,
@Nullable SplitHintSpec splitHintSpec
)
{
return Stream.empty();
}

@Override
public int estimateNumSplits(InputFormat inputFormat, @Nullable SplitHintSpec splitHintSpec)
{
return 0;
}

@Override
public InputSource withSplit(InputSplit split)
{
return null;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.apache.druid.data.input.InputSplit;
import org.apache.druid.data.input.MaxSizeSplitHintSpec;
import org.apache.druid.data.input.impl.LocalInputSource;
import org.apache.druid.data.input.impl.LocalInputSourceBuilder;
import org.apache.druid.data.input.impl.LocalInputSourceFactory;
import org.apache.druid.iceberg.filter.IcebergEqualsFilter;
import org.apache.druid.java.util.common.FileUtils;
import org.apache.iceberg.DataFile;
Expand Down Expand Up @@ -87,7 +87,7 @@ public void testInputSource() throws IOException
NAMESPACE,
null,
testCatalog,
new LocalInputSourceBuilder()
new LocalInputSourceFactory()
);
Stream<InputSplit<List<String>>> splits = inputSource.createSplits(null, new MaxSizeSplitHintSpec(null, null));
List<File> localInputSourceList = splits.map(inputSource::withSplit)
Expand Down Expand Up @@ -115,7 +115,7 @@ public void testInputSource() throws IOException
}

@Test
public void testInputSourceWithFilter() throws IOException
public void testInputSourceWithEmptySource() throws IOException
{
final File warehouseDir = FileUtils.createTempDir();
testCatalog = new LocalCatalog(warehouseDir.getPath(), new HashMap<>());
Expand All @@ -128,7 +128,28 @@ public void testInputSourceWithFilter() throws IOException
NAMESPACE,
new IcebergEqualsFilter("id", "0000"),
testCatalog,
new LocalInputSourceBuilder()
new LocalInputSourceFactory()
);
Stream<InputSplit<List<String>>> splits = inputSource.createSplits(null, new MaxSizeSplitHintSpec(null, null));
Assert.assertEquals(0, splits.count());
dropTableFromCatalog(tableIdentifier);
}

@Test
public void testInputSourceWithFilter() throws IOException
{
final File warehouseDir = FileUtils.createTempDir();
testCatalog = new LocalCatalog(warehouseDir.getPath(), new HashMap<>());
TableIdentifier tableIdentifier = TableIdentifier.of(Namespace.of(NAMESPACE), TABLENAME);

createAndLoadTable(tableIdentifier);

IcebergInputSource inputSource = new IcebergInputSource(
TABLENAME,
NAMESPACE,
new IcebergEqualsFilter("id", "123988"),
testCatalog,
new LocalInputSourceFactory()
);
Stream<InputSplit<List<String>>> splits = inputSource.createSplits(null, new MaxSizeSplitHintSpec(null, null));
List<File> localInputSourceList = splits.map(inputSource::withSplit)
Expand All @@ -137,7 +158,21 @@ public void testInputSourceWithFilter() throws IOException
.flatMap(List::stream)
.collect(Collectors.toList());

Assert.assertEquals(0, localInputSourceList.size());
Assert.assertEquals(1, inputSource.estimateNumSplits(null, new MaxSizeSplitHintSpec(1L, null)));
Assert.assertEquals(1, localInputSourceList.size());
CloseableIterable<Record> datafileReader = Parquet.read(Files.localInput(localInputSourceList.get(0)))
.project(tableSchema)
.createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(
tableSchema,
fileSchema
))
.build();


for (Record record : datafileReader) {
Assert.assertEquals(tableData.get("id"), record.get(0));
Assert.assertEquals(tableData.get("name"), record.get(1));
}
dropTableFromCatalog(tableIdentifier);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,20 @@

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import org.apache.druid.data.input.AbstractInputSourceBuilder;
import org.apache.druid.data.input.InputSourceFactory;
import org.apache.druid.data.input.impl.SplittableInputSource;
import org.apache.druid.guice.Hdfs;
import org.apache.hadoop.conf.Configuration;

import java.util.List;

public class HdfsInputSourceBuilder extends AbstractInputSourceBuilder
public class HdfsInputSourceFactory implements InputSourceFactory
{
private final Configuration configuration;
private final HdfsInputSourceConfig inputSourceConfig;

@JsonCreator
public HdfsInputSourceBuilder(
public HdfsInputSourceFactory(
@JacksonInject @Hdfs Configuration configuration,
@JacksonInject HdfsInputSourceConfig inputSourceConfig
)
Expand All @@ -44,7 +44,7 @@ public HdfsInputSourceBuilder(
}

@Override
public SplittableInputSource generateInputSource(List<String> inputFilePaths)
public SplittableInputSource create(List<String> inputFilePaths)
{
return new HdfsInputSource(inputFilePaths, configuration, inputSourceConfig);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
import org.apache.druid.guice.ManageLifecycle;
import org.apache.druid.initialization.DruidModule;
import org.apache.druid.inputsource.hdfs.HdfsInputSource;
import org.apache.druid.inputsource.hdfs.HdfsInputSourceBuilder;
import org.apache.druid.inputsource.hdfs.HdfsInputSourceConfig;
import org.apache.druid.inputsource.hdfs.HdfsInputSourceFactory;
import org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogs;
import org.apache.druid.storage.hdfs.tasklog.HdfsTaskLogsConfig;
import org.apache.hadoop.conf.Configuration;
Expand Down Expand Up @@ -67,7 +67,7 @@ public List<? extends Module> getJacksonModules()
new SimpleModule().registerSubtypes(
new NamedType(HdfsLoadSpec.class, HdfsStorageDruidModule.SCHEME),
new NamedType(HdfsInputSource.class, HdfsStorageDruidModule.SCHEME),
new NamedType(HdfsInputSourceBuilder.class, HdfsStorageDruidModule.SCHEME)
new NamedType(HdfsInputSourceFactory.class, HdfsStorageDruidModule.SCHEME)
)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ public void testAdapterGet()
{
Configuration conf = new Configuration();
HdfsInputSourceConfig inputSourceConfig = new HdfsInputSourceConfig(null);
HdfsInputSourceBuilder hdfsInputSourceAdapter = new HdfsInputSourceBuilder(conf, inputSourceConfig);
Assert.assertTrue(hdfsInputSourceAdapter.generateInputSource(Arrays.asList("hdfs://localhost:7020/bar/def.parquet", "hdfs://localhost:7020/bar/abc.parquet")) instanceof HdfsInputSource);
HdfsInputSourceFactory hdfsInputSourceAdapter = new HdfsInputSourceFactory(conf, inputSourceConfig);
Assert.assertTrue(hdfsInputSourceAdapter.create(Arrays.asList("hdfs://localhost:7020/bar/def.parquet", "hdfs://localhost:7020/bar/abc.parquet")) instanceof HdfsInputSource);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ public List<? extends Module> getJacksonModules()
return ImmutableList.of(
new SimpleModule().registerSubtypes(
new NamedType(S3InputSource.class, S3StorageDruidModule.SCHEME),
new NamedType(S3InputSourceBuilder.class, S3StorageDruidModule.SCHEME)
new NamedType(S3InputSourceFactory.class, S3StorageDruidModule.SCHEME)
)
);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import org.apache.druid.common.aws.AWSClientConfig;
import org.apache.druid.common.aws.AWSEndpointConfig;
import org.apache.druid.common.aws.AWSProxyConfig;
import org.apache.druid.data.input.AbstractInputSourceBuilder;
import org.apache.druid.data.input.InputSourceFactory;
import org.apache.druid.data.input.impl.SplittableInputSource;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.storage.s3.S3InputDataConfig;
Expand All @@ -39,7 +39,7 @@
import java.util.List;
import java.util.stream.Collectors;

public class S3InputSourceBuilder extends AbstractInputSourceBuilder
public class S3InputSourceFactory implements InputSourceFactory
{
private final ServerSideEncryptingAmazonS3 s3Client;
private final ServerSideEncryptingAmazonS3.Builder s3ClientBuilder;
Expand All @@ -51,7 +51,7 @@ public class S3InputSourceBuilder extends AbstractInputSourceBuilder
private final AWSEndpointConfig awsEndpointConfig;

@JsonCreator
public S3InputSourceBuilder(
public S3InputSourceFactory(
@JacksonInject ServerSideEncryptingAmazonS3 s3Client,
@JacksonInject ServerSideEncryptingAmazonS3.Builder s3ClientBuilder,
@JacksonInject S3InputDataConfig inputDataConfig,
Expand All @@ -73,7 +73,7 @@ public S3InputSourceBuilder(
}

@Override
public SplittableInputSource generateInputSource(List<String> inputFilePaths)
public SplittableInputSource create(List<String> inputFilePaths)
{
return new S3InputSource(
s3Client,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
import java.util.Arrays;
import java.util.List;

public class S3InputSourceBuilderTest
public class S3InputSourceFactoryTest
{
@Test
public void testAdapterGet()
Expand All @@ -43,7 +43,7 @@ public void testAdapterGet()
"s3://bar/foo/file3.txt"
);

S3InputSourceBuilder s3Builder = new S3InputSourceBuilder(
S3InputSourceFactory s3Builder = new S3InputSourceFactory(
service,
serverSides3Builder,
dataConfig,
Expand All @@ -53,6 +53,6 @@ public void testAdapterGet()
null,
null
);
Assert.assertTrue(s3Builder.generateInputSource(fileUris) instanceof S3InputSource);
Assert.assertTrue(s3Builder.create(fileUris) instanceof S3InputSource);
}
}
Loading