-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Adding s3a schema and s3a implem to hdfs storage module. #3940
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
| import com.netflix.astyanax.recipes.storage.ChunkedStorage; | ||
|
|
||
| import io.druid.java.util.common.CompressionUtils; | ||
| import io.druid.java.util.common.IAE; | ||
| import io.druid.java.util.common.logger.Logger; | ||
| import io.druid.segment.SegmentUtils; | ||
| import io.druid.segment.loading.DataSegmentPusher; | ||
|
|
@@ -36,6 +37,8 @@ | |
| import java.io.File; | ||
| import java.io.FileInputStream; | ||
| import java.io.IOException; | ||
| import java.net.URI; | ||
| import java.util.Map; | ||
|
|
||
| /** | ||
| * Cassandra Segment Pusher | ||
|
|
@@ -46,7 +49,7 @@ public class CassandraDataSegmentPusher extends CassandraStorage implements Data | |
| { | ||
| private static final Logger log = new Logger(CassandraDataSegmentPusher.class); | ||
| private static final int CONCURRENCY = 10; | ||
| private static final Joiner JOINER = Joiner.on("/").skipNulls(); | ||
| private static final Joiner JOINER = Joiner.on("/").skipNulls(); | ||
| private final ObjectMapper jsonMapper; | ||
|
|
||
| @Inject | ||
|
|
@@ -96,7 +99,7 @@ public DataSegment push(final File indexFilesDir, DataSegment segment) throws IO | |
| MutationBatch mutation = this.keyspace.prepareMutationBatch(); | ||
| mutation.withRow(descriptorStorage, key) | ||
| .putColumn("lastmodified", System.currentTimeMillis(), null) | ||
| .putColumn("descriptor", json, null); | ||
| .putColumn("descriptor", json, null); | ||
| mutation.execute(); | ||
| log.info("Wrote index to C* in [%s] ms", System.currentTimeMillis() - start); | ||
| } catch (Exception e) | ||
|
|
@@ -114,4 +117,10 @@ ImmutableMap.<String, Object> of("type", "c*", "key", key) | |
| compressedIndexFile.delete(); | ||
| return segment; | ||
| } | ||
|
|
||
| @Override | ||
| public Map<String, Object> makeLoadSpec(URI uri) | ||
| { | ||
| throw new IAE("not supported"); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use spaces instead of tabs for indenting. Also this should technically be an |
||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,6 +34,8 @@ | |
| import java.io.File; | ||
| import java.io.FileOutputStream; | ||
| import java.io.IOException; | ||
| import java.net.URI; | ||
| import java.util.Map; | ||
| import java.util.concurrent.Callable; | ||
|
|
||
| public class CloudFilesDataSegmentPusher implements DataSegmentPusher | ||
|
|
@@ -146,4 +148,19 @@ public DataSegment call() throws Exception | |
| } | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Map<String, Object> makeLoadSpec(URI uri) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor nit : for each pusher the makeLoadSpec logic seems duplicated with the push method, this can be extracted to a common method.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agree with @nishantmonu51, to avoid duplicated code please use this in |
||
| { | ||
| return ImmutableMap.<String, Object>of( | ||
| "type", | ||
| CloudFilesStorageDruidModule.SCHEME, | ||
| "region", | ||
| objectApi.getRegion(), | ||
| "container", | ||
| objectApi.getContainer(), | ||
| "path", | ||
| uri.toString() | ||
| ); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -35,6 +35,8 @@ | |
| import java.io.FileInputStream; | ||
| import java.io.FileOutputStream; | ||
| import java.io.IOException; | ||
| import java.net.URI; | ||
| import java.util.Map; | ||
|
|
||
| public class GoogleDataSegmentPusher implements DataSegmentPusher | ||
| { | ||
|
|
@@ -82,7 +84,8 @@ public File createDescriptorFile(final ObjectMapper jsonMapper, final DataSegmen | |
| return descriptorFile; | ||
| } | ||
|
|
||
| public void insert(final File file, final String contentType, final String path) throws IOException { | ||
| public void insert(final File file, final String contentType, final String path) throws IOException | ||
| { | ||
| LOG.info("Inserting [%s] to [%s]", file, path); | ||
|
|
||
| FileInputStream fileSteam = new FileInputStream(file); | ||
|
|
@@ -117,7 +120,7 @@ public DataSegment push(final File indexFilesDir, final DataSegment segment) thr | |
| "bucket", config.getBucket(), | ||
| "path", indexPath | ||
| ) | ||
| ) | ||
| ) | ||
| .withBinaryVersion(version); | ||
|
|
||
| descriptorFile = createDescriptorFile(jsonMapper, outSegment); | ||
|
|
@@ -129,7 +132,8 @@ public DataSegment push(final File indexFilesDir, final DataSegment segment) thr | |
| } | ||
| catch (Exception e) { | ||
| throw Throwables.propagate(e); | ||
| } finally { | ||
| } | ||
| finally { | ||
| if (indexFile != null) { | ||
| LOG.info("Deleting file [%s]", indexFile); | ||
| indexFile.delete(); | ||
|
|
@@ -142,6 +146,19 @@ public DataSegment push(final File indexFilesDir, final DataSegment segment) thr | |
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use this in |
||
| { | ||
| return ImmutableMap.<String, Object>of( | ||
| "type", | ||
| GoogleStorageDruidModule.SCHEME, | ||
| "bucket", | ||
| config.getBucket(), | ||
| "path", | ||
| finalIndexZipFilePath.getPath().substring(1) // remove the leading "/" | ||
| ); | ||
| } | ||
|
|
||
| public String buildPath(final String path) | ||
| { | ||
| if (config.getPrefix() != "") { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,6 +38,8 @@ | |
|
|
||
| import java.io.File; | ||
| import java.io.IOException; | ||
| import java.net.URI; | ||
| import java.util.Map; | ||
| import java.util.concurrent.Callable; | ||
|
|
||
| public class S3DataSegmentPusher implements DataSegmentPusher | ||
|
|
@@ -149,4 +151,14 @@ public DataSegment call() throws Exception | |
| throw Throwables.propagate(e); | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| public Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comment to other pushers about code duplication. |
||
| { | ||
| return ImmutableMap.<String, Object>of( | ||
| "type", "s3_zip", | ||
| "bucket", finalIndexZipFilePath.getHost(), | ||
| "key", finalIndexZipFilePath.getPath().substring(1) // remove the leading "/" | ||
| ); | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -741,7 +741,8 @@ indexes, aggregators, new File(baseFlushFile, "merged"), progressIndicator | |
| new Path(config.getSchema().getIOConfig().getSegmentOutputPath()), | ||
| outputFS, | ||
| segmentTemplate | ||
| ) | ||
| ), | ||
| config.DATA_SEGMENT_PUSHER | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's a static, so this would be more clear as
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Oops, this comment was in the wrong spot. Moved to https://github.com/druid-io/druid/pull/3940/files#r107766956 |
||
| ); | ||
|
|
||
| Path descriptorPath = config.makeDescriptorInfoPath(segment); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -23,7 +23,6 @@ | |
| import com.google.common.base.Predicate; | ||
| import com.google.common.base.Strings; | ||
| import com.google.common.base.Throwables; | ||
| import com.google.common.collect.ImmutableMap; | ||
| import com.google.common.io.ByteStreams; | ||
| import com.google.common.io.Files; | ||
| import com.google.common.io.OutputSupplier; | ||
|
|
@@ -36,6 +35,7 @@ | |
| import io.druid.java.util.common.logger.Logger; | ||
| import io.druid.segment.ProgressIndicator; | ||
| import io.druid.segment.SegmentUtils; | ||
| import io.druid.segment.loading.DataSegmentPusher; | ||
| import io.druid.segment.loading.DataSegmentPusherUtil; | ||
| import io.druid.timeline.DataSegment; | ||
| import org.apache.hadoop.conf.Configuration; | ||
|
|
@@ -379,7 +379,8 @@ public static DataSegment serializeOutIndex( | |
| final Progressable progressable, | ||
| final TaskAttemptID taskAttemptID, | ||
| final File mergedBase, | ||
| final Path segmentBasePath | ||
| final Path segmentBasePath, | ||
| DataSegmentPusher dataSegmentPusher | ||
| ) | ||
| throws IOException | ||
| { | ||
|
|
@@ -415,43 +416,8 @@ public long push() throws IOException | |
|
|
||
| final Path finalIndexZipFilePath = new Path(segmentBasePath, "index.zip"); | ||
| final URI indexOutURI = finalIndexZipFilePath.toUri(); | ||
| final ImmutableMap<String, Object> loadSpec; | ||
| // TODO: Make this a part of Pushers or Pullers | ||
| switch (outputFS.getScheme()) { | ||
| case "hdfs": | ||
| case "viewfs": | ||
| case "maprfs": | ||
| loadSpec = ImmutableMap.<String, Object>of( | ||
| "type", "hdfs", | ||
| "path", indexOutURI.toString() | ||
| ); | ||
| break; | ||
| case "gs": | ||
| loadSpec = ImmutableMap.<String, Object>of( | ||
| "type", "google", | ||
| "bucket", indexOutURI.getHost(), | ||
| "path", indexOutURI.getPath().substring(1) // remove the leading "/" | ||
| ); | ||
| break; | ||
| case "s3": | ||
| case "s3n": | ||
| loadSpec = ImmutableMap.<String, Object>of( | ||
| "type", "s3_zip", | ||
| "bucket", indexOutURI.getHost(), | ||
| "key", indexOutURI.getPath().substring(1) // remove the leading "/" | ||
| ); | ||
| break; | ||
| case "file": | ||
| loadSpec = ImmutableMap.<String, Object>of( | ||
| "type", "local", | ||
| "path", indexOutURI.getPath() | ||
| ); | ||
| break; | ||
| default: | ||
| throw new IAE("Unknown file system scheme [%s]", outputFS.getScheme()); | ||
| } | ||
| final DataSegment finalSegment = segmentTemplate | ||
| .withLoadSpec(loadSpec) | ||
| .withLoadSpec(dataSegmentPusher.makeLoadSpec(indexOutURI)) | ||
| .withSize(size.get()) | ||
| .withBinaryVersion(SegmentUtils.getVersionFromDir(mergedBase)); | ||
|
|
||
|
|
@@ -583,7 +549,9 @@ public static Path makeSegmentOutputPath( | |
| DataSegment segment | ||
| ) | ||
| { | ||
| String segmentDir = "hdfs".equals(fileSystem.getScheme()) || "viewfs".equals(fileSystem.getScheme()) | ||
| String segmentDir = "hdfs".equals(fileSystem.getScheme()) | ||
| || "viewfs".equals(fileSystem.getScheme()) | ||
| || "s3a".equals(fileSystem.getScheme()) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar comment here.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gianm how about the issue with
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. s3a shouldn't really be here… probably just like determining the loadSpec, the directory should be based on the kind of deep storage configured and not on the scheme. I think adding a getStorageDir to DataSegmentPusher, and getting rid of DataSegmentPusherUtil, would solve that. It could use java 8 default methods to prevent any pusher other than HDFS from having to override it.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hum this method is used everywhere... anyway will change that. |
||
| ? DataSegmentPusherUtil.getHdfsStorageDir(segment) | ||
| : DataSegmentPusherUtil.getStorageDir(segment); | ||
| return new Path(prependFSIfNullScheme(fileSystem, basePath), String.format("./%s", segmentDir)); | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please either use this in
uploadDataSegmentor else have them use a common helper method.