apache · b-slim · Feb 15, 2017 · Feb 16, 2017 · Feb 23, 2017 · gianm
diff --git a/api/src/main/java/io/druid/segment/loading/DataSegmentPusher.java b/api/src/main/java/io/druid/segment/loading/DataSegmentPusher.java
@@ -23,11 +23,15 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
 
 public interface DataSegmentPusher
 {
   @Deprecated
   String getPathForHadoop(String dataSource);
   String getPathForHadoop();
   DataSegment push(File file, DataSegment segment) throws IOException;
+  //use map instead of LoadSpec class to avoid dependency pollution.
+  Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath);
 }
diff --git a/...contrib/azure-extensions/src/main/java/io/druid/storage/azure/AzureDataSegmentPusher.java b/...contrib/azure-extensions/src/main/java/io/druid/storage/azure/AzureDataSegmentPusher.java
@@ -35,6 +35,7 @@
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.Map;
 import java.util.concurrent.Callable;
@@ -174,4 +175,17 @@ public DataSegment call() throws Exception
       }
     }
   }
+
+  @Override
+  public Map<String, Object> makeLoadSpec(URI uri)
+  {
+    return ImmutableMap.<String, Object>of(
+        "type",
+        AzureStorageDruidModule.SCHEME,
+        "containerName",
+        config.getContainer(),
+        "blobPath",
+        uri.toString()
+    );
+  }
 }
diff --git a/...assandra-storage/src/main/java/io/druid/storage/cassandra/CassandraDataSegmentPusher.java b/...assandra-storage/src/main/java/io/druid/storage/cassandra/CassandraDataSegmentPusher.java
@@ -27,6 +27,7 @@
 import com.netflix.astyanax.recipes.storage.ChunkedStorage;
 
 import io.druid.java.util.common.CompressionUtils;
+import io.druid.java.util.common.IAE;
 import io.druid.java.util.common.logger.Logger;
 import io.druid.segment.SegmentUtils;
 import io.druid.segment.loading.DataSegmentPusher;
@@ -36,6 +37,8 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
 
 /**
  * Cassandra Segment Pusher
@@ -46,7 +49,7 @@ public class CassandraDataSegmentPusher extends CassandraStorage implements Data
 {
 	private static final Logger log = new Logger(CassandraDataSegmentPusher.class);
 	private static final int CONCURRENCY = 10;
-	private static final Joiner JOINER = Joiner.on("/").skipNulls();  
+	private static final Joiner JOINER = Joiner.on("/").skipNulls();
 	private final ObjectMapper jsonMapper;
 
   @Inject
@@ -96,7 +99,7 @@ public DataSegment push(final File indexFilesDir, DataSegment segment) throws IO
 			MutationBatch mutation = this.keyspace.prepareMutationBatch();
       mutation.withRow(descriptorStorage, key)
       	.putColumn("lastmodified", System.currentTimeMillis(), null)
-      	.putColumn("descriptor", json, null);      	
+      	.putColumn("descriptor", json, null);
       mutation.execute();
 			log.info("Wrote index to C* in [%s] ms", System.currentTimeMillis() - start);
 		} catch (Exception e)
@@ -114,4 +117,10 @@ ImmutableMap.<String, Object> of("type", "c*", "key", key)
 		compressedIndexFile.delete();
 		return segment;
 	}
+
+  @Override
+  public Map<String, Object> makeLoadSpec(URI uri)
+  {
+		throw new IAE("not supported");
+  }
 }
diff --git a/...les-extensions/src/main/java/io/druid/storage/cloudfiles/CloudFilesDataSegmentPusher.java b/...les-extensions/src/main/java/io/druid/storage/cloudfiles/CloudFilesDataSegmentPusher.java
@@ -34,6 +34,8 @@
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
 import java.util.concurrent.Callable;
 
 public class CloudFilesDataSegmentPusher implements DataSegmentPusher
@@ -146,4 +148,19 @@ public DataSegment call() throws Exception
       }
     }
   }
+
+  @Override
+  public Map<String, Object> makeLoadSpec(URI uri)
+  {
+    return ImmutableMap.<String, Object>of(
+        "type",
+        CloudFilesStorageDruidModule.SCHEME,
+        "region",
+        objectApi.getRegion(),
+        "container",
+        objectApi.getContainer(),
+        "path",
+        uri.toString()
+    );
+  }
 }
diff --git a/...trib/google-extensions/src/main/java/io/druid/storage/google/GoogleDataSegmentPusher.java b/...trib/google-extensions/src/main/java/io/druid/storage/google/GoogleDataSegmentPusher.java
@@ -35,6 +35,8 @@
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
 import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
 
 public class GoogleDataSegmentPusher implements DataSegmentPusher
 {
@@ -82,7 +84,8 @@ public File createDescriptorFile(final ObjectMapper jsonMapper, final DataSegmen
     return descriptorFile;
   }
 
-  public void insert(final File file, final String contentType, final String path) throws IOException {
+  public void insert(final File file, final String contentType, final String path) throws IOException
+  {
     LOG.info("Inserting [%s] to [%s]", file, path);
 
     FileInputStream fileSteam = new FileInputStream(file);
@@ -117,7 +120,7 @@ public DataSegment push(final File indexFilesDir, final DataSegment segment) thr
                   "bucket", config.getBucket(),
                   "path", indexPath
               )
-           )
+          )
           .withBinaryVersion(version);
 
       descriptorFile = createDescriptorFile(jsonMapper, outSegment);
@@ -129,7 +132,8 @@ public DataSegment push(final File indexFilesDir, final DataSegment segment) thr
     }
     catch (Exception e) {
       throw Throwables.propagate(e);
-    } finally {
+    }
+    finally {
       if (indexFile != null) {
         LOG.info("Deleting file [%s]", indexFile);
         indexFile.delete();
@@ -142,6 +146,19 @@ public DataSegment push(final File indexFilesDir, final DataSegment segment) thr
     }
   }
 
+  @Override
+  public Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath)
+  {
+    return ImmutableMap.<String, Object>of(
+        "type",
+        GoogleStorageDruidModule.SCHEME,
+        "bucket",
+        config.getBucket(),
+        "path",
+        finalIndexZipFilePath.getPath().substring(1) // remove the leading "/"
+    );
+  }
+
   public String buildPath(final String path)
   {
     if (config.getPrefix() != "") {

diff --git a/extensions-core/hdfs-storage/pom.xml b/extensions-core/hdfs-storage/pom.xml
@@ -140,12 +140,17 @@
             <artifactId>emitter</artifactId>
             <scope>provided</scope>
         </dependency>
+      <dependency>
+        <groupId>org.apache.hadoop</groupId>
+        <artifactId>hadoop-aws</artifactId>
+        <version>${hadoop.compile.version}</version>
+      </dependency>
         <dependency>
             <groupId>commons-io</groupId>
             <artifactId>commons-io</artifactId>
             <scope>provided</scope>
         </dependency>
-      
+
         <!-- Tests -->
         <dependency>
           <groupId>junit</groupId>

diff --git a/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentPusher.java b/extensions-core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentPusher.java
@@ -40,6 +40,8 @@
 import java.io.File;
 import java.io.IOException;
 import java.io.OutputStream;
+import java.net.URI;
+import java.util.Map;
 
 /**
  */
@@ -114,7 +116,7 @@ public DataSegment push(File inDir, DataSegment segment) throws IOException
       ));
       final Path outDir = outFile.getParent();
       dataSegment = createDescriptorFile(
-          segment.withLoadSpec(makeLoadSpec(outFile))
+          segment.withLoadSpec(makeLoadSpec(outFile.toUri()))
                  .withSize(size)
                  .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)),
           tmpFile.getParent(),
@@ -153,6 +155,12 @@ public DataSegment push(File inDir, DataSegment segment) throws IOException
     return dataSegment;
   }
 
+  @Override
+  public Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath)
+  {
+    return  ImmutableMap.<String, Object>of("type", "hdfs", "path", finalIndexZipFilePath.toString());
+  }
+
   private DataSegment createDescriptorFile(DataSegment segment, Path outDir, final FileSystem fs) throws IOException
   {
     final Path descriptorFile = new Path(outDir, "descriptor.json");
@@ -163,11 +171,6 @@ private DataSegment createDescriptorFile(DataSegment segment, Path outDir, final
     return segment;
   }
 
-  private ImmutableMap<String, Object> makeLoadSpec(Path outFile)
-  {
-    return ImmutableMap.<String, Object>of("type", "hdfs", "path", outFile.toUri().toString());
-  }
-
   private static class HdfsOutputStreamSupplier extends ByteSink
   {
     private final FileSystem fs;

diff --git a/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java b/extensions-core/s3-extensions/src/main/java/io/druid/storage/s3/S3DataSegmentPusher.java
@@ -38,6 +38,8 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.net.URI;
+import java.util.Map;
 import java.util.concurrent.Callable;
 
 public class S3DataSegmentPusher implements DataSegmentPusher
@@ -149,4 +151,14 @@ public DataSegment call() throws Exception
       throw Throwables.propagate(e);
     }
   }
+
+  @Override
+  public Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath)
+  {
+    return ImmutableMap.<String, Object>of(
+        "type", "s3_zip",
+        "bucket", finalIndexZipFilePath.getHost(),
+        "key", finalIndexZipFilePath.getPath().substring(1) // remove the leading "/"
+    );
+  }
 }
diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java b/indexing-hadoop/src/main/java/io/druid/indexer/HadoopDruidIndexerConfig.java
@@ -54,6 +54,7 @@
 import io.druid.segment.IndexMergerV9;
 import io.druid.segment.IndexSpec;
 import io.druid.segment.indexing.granularity.GranularitySpec;
+import io.druid.segment.loading.DataSegmentPusher;
 import io.druid.server.DruidNode;
 import io.druid.timeline.DataSegment;
 import io.druid.timeline.partition.ShardSpec;
@@ -92,9 +93,11 @@ public class HadoopDruidIndexerConfig
   public static final IndexMerger INDEX_MERGER;
   public static final IndexMergerV9 INDEX_MERGER_V9;
   public static final HadoopKerberosConfig HADOOP_KERBEROS_CONFIG;
-
+  public static final DataSegmentPusher DATA_SEGMENT_PUSHER;
   private static final String DEFAULT_WORKING_PATH = "/tmp/druid-indexing";
 
+
+
   static {
     injector = Initialization.makeInjectorWithModules(
         GuiceInjectors.makeStartupInjector(),
@@ -118,6 +121,7 @@ public void configure(Binder binder)
     INDEX_MERGER = injector.getInstance(IndexMerger.class);
     INDEX_MERGER_V9 = injector.getInstance(IndexMergerV9.class);
     HADOOP_KERBEROS_CONFIG = injector.getInstance(HadoopKerberosConfig.class);
+    DATA_SEGMENT_PUSHER = injector.getInstance(DataSegmentPusher.class);
   }
 
   public static enum IndexJobCounters

diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java
@@ -741,7 +741,8 @@ indexes, aggregators, new File(baseFlushFile, "merged"), progressIndicator
                 new Path(config.getSchema().getIOConfig().getSegmentOutputPath()),
                 outputFS,
                 segmentTemplate
-            )
+            ),
+            config.DATA_SEGMENT_PUSHER
         );
 
         Path descriptorPath = config.makeDescriptorInfoPath(segment);

diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java
@@ -23,7 +23,6 @@
 import com.google.common.base.Predicate;
 import com.google.common.base.Strings;
 import com.google.common.base.Throwables;
-import com.google.common.collect.ImmutableMap;
 import com.google.common.io.ByteStreams;
 import com.google.common.io.Files;
 import com.google.common.io.OutputSupplier;
@@ -36,6 +35,7 @@
 import io.druid.java.util.common.logger.Logger;
 import io.druid.segment.ProgressIndicator;
 import io.druid.segment.SegmentUtils;
+import io.druid.segment.loading.DataSegmentPusher;
 import io.druid.segment.loading.DataSegmentPusherUtil;
 import io.druid.timeline.DataSegment;
 import org.apache.hadoop.conf.Configuration;
@@ -379,7 +379,8 @@ public static DataSegment serializeOutIndex(
       final Progressable progressable,
       final TaskAttemptID taskAttemptID,
       final File mergedBase,
-      final Path segmentBasePath
+      final Path segmentBasePath,
+      DataSegmentPusher dataSegmentPusher
   )
       throws IOException
   {
@@ -415,43 +416,8 @@ public long push() throws IOException
 
     final Path finalIndexZipFilePath = new Path(segmentBasePath, "index.zip");
     final URI indexOutURI = finalIndexZipFilePath.toUri();
-    final ImmutableMap<String, Object> loadSpec;
-    // TODO: Make this a part of Pushers or Pullers
-    switch (outputFS.getScheme()) {
-      case "hdfs":
-      case "viewfs":
-      case "maprfs":
-        loadSpec = ImmutableMap.<String, Object>of(
-            "type", "hdfs",
-            "path", indexOutURI.toString()
-        );
-        break;
-      case "gs":
-        loadSpec = ImmutableMap.<String, Object>of(
-            "type", "google",
-            "bucket", indexOutURI.getHost(),
-            "path", indexOutURI.getPath().substring(1) // remove the leading "/"
-        );
-        break;
-      case "s3":
-      case "s3n":
-        loadSpec = ImmutableMap.<String, Object>of(
-            "type", "s3_zip",
-            "bucket", indexOutURI.getHost(),
-            "key", indexOutURI.getPath().substring(1) // remove the leading "/"
-        );
-        break;
-      case "file":
-        loadSpec = ImmutableMap.<String, Object>of(
-            "type", "local",
-            "path", indexOutURI.getPath()
-        );
-        break;
-      default:
-        throw new IAE("Unknown file system scheme [%s]", outputFS.getScheme());
-    }
     final DataSegment finalSegment = segmentTemplate
-        .withLoadSpec(loadSpec)
+        .withLoadSpec(dataSegmentPusher.makeLoadSpec(indexOutURI))
         .withSize(size.get())
         .withBinaryVersion(SegmentUtils.getVersionFromDir(mergedBase));
 
@@ -583,7 +549,9 @@ public static Path makeSegmentOutputPath(
       DataSegment segment
   )
   {
-    String segmentDir = "hdfs".equals(fileSystem.getScheme()) || "viewfs".equals(fileSystem.getScheme())
+    String segmentDir = "hdfs".equals(fileSystem.getScheme())
+                        || "viewfs".equals(fileSystem.getScheme())
+                        || "s3a".equals(fileSystem.getScheme())
                         ? DataSegmentPusherUtil.getHdfsStorageDir(segment)
                         : DataSegmentPusherUtil.getStorageDir(segment);
     return new Path(prependFSIfNullScheme(fileSystem, basePath), String.format("./%s", segmentDir));

diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopConverterJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopConverterJob.java
@@ -559,7 +559,8 @@ protected void map(
               baseOutputPath,
               outputFS,
               finalSegmentTemplate
-          )
+          ),
+          config.DATA_SEGMENT_PUSHER
       );
       context.progress();
       context.setStatus("Finished PUSH");