diff --git a/indexing-hadoop/pom.xml b/indexing-hadoop/pom.xml index 5be81e97a73c..45db63a31af8 100644 --- a/indexing-hadoop/pom.xml +++ b/indexing-hadoop/pom.xml @@ -104,6 +104,52 @@ hamcrest-all test + + org.apache.hadoop + hadoop-hdfs + ${hadoop.compile.version} + test-jar + test + + + org.apache.hadoop + hadoop-mapreduce-client-jobclient + ${hadoop.compile.version} + test-jar + test + + + org.apache.hadoop + hadoop-minicluster + ${hadoop.compile.version} + test + + + org.apache.hadoop + hadoop-yarn-server-tests + ${hadoop.compile.version} + test-jar + test + + + io.druid + druid-server + ${project.parent.version} + test-jar + test + + + io.druid + druid-processing + ${project.parent.version} + test-jar + test + + + org.apache.derby + derbyclient + test + diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java index 3233a1c44a1a..f4e96b5cc059 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DetermineHashedPartitionsJob.java @@ -102,7 +102,7 @@ public boolean run() } else { groupByJob.setNumReduceTasks(config.getSegmentGranularIntervals().get().size()); } - JobHelper.setupClasspath(config, groupByJob); + JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), groupByJob); config.addInputPaths(groupByJob); config.addJobProperties(groupByJob); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java index 7868edf88aa8..62dd90470ee1 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/DeterminePartitionsJob.java @@ -135,7 +135,7 @@ public boolean run() groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); - JobHelper.setupClasspath(config, groupByJob); + JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), groupByJob); config.addInputPaths(groupByJob); config.addJobProperties(groupByJob); @@ -186,7 +186,7 @@ public boolean run() dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setPartitionerClass(DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); - JobHelper.setupClasspath(config, dimSelectionJob); + JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), dimSelectionJob); config.addJobProperties(dimSelectionJob); config.intoConfiguration(dimSelectionJob); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java index 09ab8db062d3..1202ab5dabcb 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/IndexGeneratorJob.java @@ -176,7 +176,7 @@ public boolean run() config.intoConfiguration(job); - JobHelper.setupClasspath(config, job); + JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), job); job.submit(); log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java index 36ee1ee80a9f..688bd8028898 100644 --- a/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java +++ b/indexing-hadoop/src/main/java/io/druid/indexer/JobHelper.java @@ -26,6 +26,7 @@ import com.metamx.common.IAE; import com.metamx.common.ISE; import com.metamx.common.logger.Logger; +import io.druid.segment.ProgressIndicator; import io.druid.segment.SegmentUtils; import io.druid.timeline.DataSegment; import org.apache.hadoop.conf.Configuration; @@ -39,6 +40,7 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; @@ -46,8 +48,10 @@ import org.joda.time.Interval; import org.joda.time.format.ISODateTimeFormat; +import java.io.BufferedOutputStream; import java.io.File; import java.io.FileInputStream; +import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.URI; @@ -58,6 +62,7 @@ import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import java.util.zip.ZipEntry; +import java.util.zip.ZipInputStream; import java.util.zip.ZipOutputStream; /** @@ -68,12 +73,21 @@ public class JobHelper private static final Set existing = Sets.newHashSet(); - private static final int NUM_RETRIES = 6; - private static final int SECONDS_BETWEEN_RETRIES = 10; + private static final int NUM_RETRIES = 8; + private static final int SECONDS_BETWEEN_RETRIES = 2; + public static Path distributedClassPath(String path) + { + return distributedClassPath(new Path(path)); + } + + public static Path distributedClassPath(Path base) + { + return new Path(base, "classpath"); + } public static void setupClasspath( - HadoopDruidIndexerConfig config, + Path distributedClassPath, Job job ) throws IOException @@ -86,7 +100,6 @@ public static void setupClasspath( String[] jarFiles = classpathProperty.split(File.pathSeparator); final Configuration conf = job.getConfiguration(); - final Path distributedClassPath = new Path(config.getWorkingPath(), "classpath"); final FileSystem fs = distributedClassPath.getFileSystem(conf); if (fs instanceof LocalFileSystem) { @@ -216,7 +229,7 @@ public static DataSegment serializeOutIndex( DataPusher.class, new DataPusher() { @Override - public void push() throws IOException + public long push() throws IOException { try (OutputStream outputStream = fileContext.create( tmpPath, @@ -231,9 +244,10 @@ public void push() throws IOException log.error(exception, "Exception in retry loop"); throw exception; } + return -1; } }, - RetryPolicies.retryUpToMaximumCountWithFixedSleep(NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS) + RetryPolicies.exponentialBackoffRetry(NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS) ); zipPusher.push(); log.info("Zipped %,d bytes to [%s]", size.get(), tmpPath.toUri()); @@ -294,7 +308,7 @@ public static void writeSegmentDescriptor( DataPusher.class, new DataPusher() { @Override - public void push() throws IOException + public long push() throws IOException { try { progressable.progress(); @@ -317,9 +331,10 @@ public void push() throws IOException log.info(ex, "Error in retry loop"); throw ex; } + return -1; } }, - RetryPolicies.retryUpToMaximumCountWithFixedSleep(NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS) + RetryPolicies.exponentialBackoffRetry(NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS) ); descriptorPusher.push(); } @@ -329,7 +344,7 @@ public void push() throws IOException */ public interface DataPusher { - void push() throws IOException; + long push() throws IOException; } public static long zipAndCopyDir( @@ -425,4 +440,108 @@ public static Path prependFSIfNullScheme(FileSystem fs, Path path) } return path; } + + // TODO: Replace this whenever hadoop gets their act together and stops breaking with more recent versions of Guava + public static long unzipNoGuava( + final Path zip, + final Configuration configuration, + final File outDir, + final Progressable progressable + ) throws IOException + { + final DataPusher zipPusher = (DataPusher) RetryProxy.create( + DataPusher.class, new DataPusher() + { + @Override + public long push() throws IOException + { + final FileContext context = FileContext.getFileContext(zip.toUri(), configuration); + long size = 0L; + final byte[] buffer = new byte[1 << 13]; + progressable.progress(); + try (ZipInputStream in = new ZipInputStream(context.open(zip, 1 << 13))) { + for (ZipEntry entry = in.getNextEntry(); entry != null; entry = in.getNextEntry()) { + final String fileName = entry.getName(); + try (final OutputStream out = new BufferedOutputStream( + new FileOutputStream( + outDir.getAbsolutePath() + + File.separator + + fileName + ), 1 << 13 + )) { + for (int len = in.read(buffer); len >= 0; len = in.read(buffer)) { + progressable.progress(); + if (len == 0) { + continue; + } + size += len; + out.write(buffer, 0, len); + } + out.flush(); + } + } + } + catch (IOException | RuntimeException exception) { + log.error(exception, "Exception in retry loop"); + throw exception; + } + progressable.progress(); + return size; + } + }, + RetryPolicies.exponentialBackoffRetry(NUM_RETRIES, SECONDS_BETWEEN_RETRIES, TimeUnit.SECONDS) + ); + return zipPusher.push(); + } + + public static ProgressIndicator progressIndicatorForContext( + final TaskAttemptContext context + ) + { + return new ProgressIndicator() + { + + @Override + public void progress() + { + context.progress(); + } + + @Override + public void start() + { + context.progress(); + context.setStatus("STARTED"); + } + + @Override + public void stop() + { + context.progress(); + context.setStatus("STOPPED"); + } + + @Override + public void startSection(String section) + { + context.progress(); + context.setStatus(String.format("STARTED [%s]", section)); + } + + @Override + public void progressSection(String section, String message) + { + log.info("Progress message for section [%s] : [%s]", section, message); + context.progress(); + context.setStatus(String.format("PROGRESS [%s]", section)); + } + + @Override + public void stopSection(String section) + { + context.progress(); + context.setStatus(String.format("STOPPED [%s]", section)); + } + }; + } } diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopConverterJob.java b/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopConverterJob.java new file mode 100644 index 000000000000..c961031a685d --- /dev/null +++ b/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopConverterJob.java @@ -0,0 +1,723 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer.updater; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.common.io.Files; +import com.metamx.common.IAE; +import com.metamx.common.ISE; +import com.metamx.common.logger.Logger; +import io.druid.indexer.JobHelper; +import io.druid.segment.IndexIO; +import io.druid.segment.IndexMaker; +import io.druid.timeline.DataSegment; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocatedFileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobPriority; +import org.apache.hadoop.mapreduce.InputFormat; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordReader; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.TaskReport; +import org.apache.hadoop.mapreduce.TaskType; +import org.apache.hadoop.util.Progressable; + +import javax.annotation.Nullable; +import javax.validation.constraints.NotNull; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.URI; +import java.net.URISyntaxException; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class HadoopConverterJob +{ + private static final Logger log = new Logger(HadoopConverterJob.class); + private static final String COUNTER_GROUP = "Hadoop Druid Converter"; + private static final String COUNTER_LOADED = "Loaded Bytes"; + private static final String COUNTER_WRITTEN = "Written Bytes"; + + private static void setJobName(JobConf jobConf, List segments) + { + if (segments.size() == 1) { + final DataSegment segment = segments.get(0); + jobConf.setJobName( + String.format( + "druid-convert-%s-%s-%s", + segment.getDataSource(), + segment.getInterval(), + segment.getVersion() + ) + ); + } else { + final Set dataSources = Sets.newHashSet( + Iterables.transform( + segments, + new Function() + { + @Override + public String apply(DataSegment input) + { + return input.getDataSource(); + } + } + ) + ); + final Set versions = Sets.newHashSet( + Iterables.transform( + segments, + new Function() + { + @Override + public String apply(DataSegment input) + { + return input.getVersion(); + } + } + ) + ); + jobConf.setJobName( + String.format( + "druid-convert-%s-%s", + Arrays.toString(dataSources.toArray()), + Arrays.toString(versions.toArray()) + ) + ); + } + } + + public static Path getJobPath(JobID jobID, Path workingDirectory) + { + return new Path(workingDirectory, jobID.toString()); + } + + public static Path getTaskPath(JobID jobID, TaskAttemptID taskAttemptID, Path workingDirectory) + { + return new Path(getJobPath(jobID, workingDirectory), taskAttemptID.toString()); + } + + public static void cleanup(Job job) throws IOException + { + final Path jobDir = getJobPath(job.getJobID(), job.getWorkingDirectory()); + final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); + fs.delete(jobDir, true); + } + + + public static HadoopDruidConverterConfig converterConfigFromConfiguration(Configuration configuration) + throws IOException + { + final String property = Preconditions.checkNotNull( + configuration.get(HadoopDruidConverterConfig.CONFIG_PROPERTY), + HadoopDruidConverterConfig.CONFIG_PROPERTY + ); + return HadoopDruidConverterConfig.fromString(property); + } + + public static void converterConfigIntoConfiguration( + HadoopDruidConverterConfig priorConfig, + List segments, + Configuration configuration + ) + { + final HadoopDruidConverterConfig config = new HadoopDruidConverterConfig( + priorConfig.getDataSource(), + priorConfig.getInterval(), + priorConfig.getIndexSpec(), + segments, + priorConfig.isValidate(), + priorConfig.getDistributedSuccessCache(), + priorConfig.getHadoopProperties(), + priorConfig.getJobPriority(), + priorConfig.getSegmentOutputPath() + ); + try { + configuration.set( + HadoopDruidConverterConfig.CONFIG_PROPERTY, + HadoopDruidConverterConfig.jsonMapper.writeValueAsString(config) + ); + } + catch (JsonProcessingException e) { + throw Throwables.propagate(e); + } + } + + private final HadoopDruidConverterConfig converterConfig; + private long loadedBytes = 0L; + private long writtenBytes = 0L; + + public HadoopConverterJob( + HadoopDruidConverterConfig converterConfig + ) + { + this.converterConfig = converterConfig; + } + + public List run() throws IOException + { + final JobConf jobConf = new JobConf(); + jobConf.setKeepFailedTaskFiles(false); + for (Map.Entry entry : converterConfig.getHadoopProperties().entrySet()) { + jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()"); + } + final List segments = converterConfig.getSegments(); + if (segments.isEmpty()) { + throw new IAE( + "No segments found for datasource [%s]", + converterConfig.getDataSource() + ); + } + converterConfigIntoConfiguration(converterConfig, segments, jobConf); + + jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format + jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache())); + + setJobName(jobConf, segments); + + if (converterConfig.getJobPriority() != null) { + jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority())); + } + + final Job job = Job.getInstance(jobConf); + + job.setInputFormatClass(ConfigInputFormat.class); + job.setMapperClass(ConvertingMapper.class); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(Text.class); + job.setMapSpeculativeExecution(false); + job.setOutputFormatClass(ConvertingOutputFormat.class); + + JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()), job); + + Throwable throwable = null; + try { + job.submit(); + log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL()); + final boolean success = job.waitForCompletion(true); + if (!success) { + final TaskReport[] reports = job.getTaskReports(TaskType.MAP); + if (reports != null) { + for (final TaskReport report : reports) { + log.error("Error in task [%s] : %s", report.getTaskId(), Arrays.toString(report.getDiagnostics())); + } + } + return null; + } + try { + loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue(); + writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue(); + } + catch (IOException ex) { + log.error(ex, "Could not fetch counters"); + } + final JobID jobID = job.getJobID(); + + final Path jobDir = getJobPath(jobID, job.getWorkingDirectory()); + final FileSystem fs = jobDir.getFileSystem(job.getConfiguration()); + final RemoteIterator it = fs.listFiles(jobDir, true); + final List goodPaths = new ArrayList<>(); + while (it.hasNext()) { + final LocatedFileStatus locatedFileStatus = it.next(); + if (locatedFileStatus.isFile()) { + final Path myPath = locatedFileStatus.getPath(); + if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) { + goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY)); + } + } + } + if (goodPaths.isEmpty()) { + log.warn("No good data found at [%s]", jobDir); + return null; + } + final List returnList = ImmutableList.copyOf( + Lists.transform( + goodPaths, new Function() + { + @Nullable + @Override + public DataSegment apply(final Path input) + { + try { + if (!fs.exists(input)) { + throw new ISE( + "Somehow [%s] was found but [%s] is missing at [%s]", + ConvertingOutputFormat.DATA_SUCCESS_KEY, + ConvertingOutputFormat.DATA_FILE_KEY, + jobDir + ); + } + } + catch (final IOException e) { + throw Throwables.propagate(e); + } + try (final InputStream stream = fs.open(input)) { + return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class); + } + catch (final IOException e) { + throw Throwables.propagate(e); + } + } + } + ) + ); + if (returnList.size() == segments.size()) { + return returnList; + } else { + throw new ISE( + "Tasks reported success but result length did not match! Expected %d found %d at path [%s]", + segments.size(), + returnList.size(), + jobDir + ); + } + } + catch (InterruptedException | ClassNotFoundException e) { + RuntimeException exception = Throwables.propagate(e); + throwable = exception; + throw exception; + } + catch (Throwable t) { + throwable = t; + throw t; + } + finally { + try { + cleanup(job); + } + catch (IOException e) { + if (throwable != null) { + throwable.addSuppressed(e); + } else { + log.error(e, "Could not clean up job [%s]", job.getJobID()); + } + } + } + } + + public long getLoadedBytes() + { + return loadedBytes; + } + + public long getWrittenBytes() + { + return writtenBytes; + } + + public static class ConvertingOutputFormat extends OutputFormat + { + protected static final String DATA_FILE_KEY = "result"; + protected static final String DATA_SUCCESS_KEY = "_SUCCESS"; + protected static final String PUBLISHED_SEGMENT_KEY = "io.druid.indexer.updater.converter.publishedSegment"; + private static final Logger log = new Logger(ConvertingOutputFormat.class); + + @Override + public RecordWriter getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException + { + return new RecordWriter() + { + @Override + public void write(Text key, Text value) throws IOException, InterruptedException + { + // NOOP + } + + @Override + public void close(TaskAttemptContext context) throws IOException, InterruptedException + { + // NOOP + } + }; + } + + @Override + public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException + { + // NOOP + } + + @Override + public OutputCommitter getOutputCommitter(final TaskAttemptContext context) + throws IOException, InterruptedException + { + return new OutputCommitter() + { + @Override + public void setupJob(JobContext jobContext) throws IOException + { + // NOOP + } + + @Override + public void setupTask(TaskAttemptContext taskContext) throws IOException + { + // NOOP + } + + @Override + public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException + { + return taskContext.getConfiguration().get(PUBLISHED_SEGMENT_KEY) != null; + } + + @Override + public void commitTask(final TaskAttemptContext taskContext) throws IOException + { + final Progressable commitProgressable = new Progressable() + { + @Override + public void progress() + { + taskContext.progress(); + } + }; + final String finalSegmentString = taskContext.getConfiguration().get(PUBLISHED_SEGMENT_KEY); + if (finalSegmentString == null) { + throw new IOException("Could not read final segment"); + } + final DataSegment newSegment = HadoopDruidConverterConfig.jsonMapper.readValue( + finalSegmentString, + DataSegment.class + ); + log.info("Committing new segment [%s]", newSegment); + taskContext.progress(); + + final FileSystem fs = taskContext.getWorkingDirectory().getFileSystem(taskContext.getConfiguration()); + final Path taskAttemptDir = getTaskPath( + context.getJobID(), + context.getTaskAttemptID(), + taskContext.getWorkingDirectory() + ); + final Path taskAttemptFile = new Path(taskAttemptDir, DATA_FILE_KEY); + final Path taskAttemptSuccess = new Path(taskAttemptDir, DATA_SUCCESS_KEY); + try (final OutputStream outputStream = fs.create(taskAttemptFile, false, 1 << 10, commitProgressable)) { + outputStream.write(HadoopDruidConverterConfig.jsonMapper.writeValueAsBytes(newSegment)); + } + + fs.create(taskAttemptSuccess, false).close(); + + taskContext.progress(); + taskContext.setStatus("Committed"); + } + + @Override + public void abortTask(TaskAttemptContext taskContext) throws IOException + { + log.warn("Aborting task. Nothing to clean up."); + } + }; + } + } + + + public static class ConvertingMapper extends Mapper + { + private static final Logger log = new Logger(ConvertingMapper.class); + private static final String TMP_FILE_LOC_KEY = "io.druid.indexer.updater.converter.reducer.tmpDir"; + + @Override + protected void map( + String key, String value, + final Context context + ) throws IOException, InterruptedException + { + final InputSplit split = context.getInputSplit(); + if (!(split instanceof DataSegmentSplit)) { + throw new IAE( + "Unexpected split type. Expected [%s] was [%s]", + DataSegmentSplit.class.getCanonicalName(), + split.getClass().getCanonicalName() + ); + } + + final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY); + final File tmpDir = Paths.get(tmpDirLoc).toFile(); + + final DataSegment segment = ((DataSegmentSplit) split).getDataSegment(); + + final HadoopDruidConverterConfig config = converterConfigFromConfiguration(context.getConfiguration()); + + context.setStatus("DOWNLOADING"); + context.progress(); + final Path inPath = new Path(getURIFromSegment(segment)); + final File inDir = new File(tmpDir, "in"); + + if (inDir.exists() && !inDir.delete()) { + log.warn("Could not delete [%s]", inDir); + } + + if (!inDir.mkdir() && (!inDir.exists() || inDir.isDirectory())) { + log.warn("Unable to make directory"); + } + + final long inSize = JobHelper.unzipNoGuava(inPath, context.getConfiguration(), inDir, context); + log.debug("Loaded %d bytes into [%s] for converting", inSize, inDir.getAbsolutePath()); + context.getCounter(COUNTER_GROUP, COUNTER_LOADED).increment(inSize); + + context.setStatus("CONVERTING"); + context.progress(); + final File outDir = new File(tmpDir, "out"); + if (!outDir.mkdir() && (!outDir.exists() || !outDir.isDirectory())) { + throw new IOException(String.format("Could not create output directory [%s]", outDir)); + } + IndexMaker.convert( + inDir, + outDir, + config.getIndexSpec(), + JobHelper.progressIndicatorForContext(context) + ); + if (config.isValidate()) { + context.setStatus("Validating"); + IndexIO.DefaultIndexIOHandler.validateTwoSegments(inDir, outDir); + } + context.progress(); + context.setStatus("Starting PUSH"); + final Path baseOutputPath = new Path(config.getSegmentOutputPath()); + final FileSystem outputFS = baseOutputPath.getFileSystem(context.getConfiguration()); + final DataSegment finalSegmentTemplate = segment.withVersion( + segment.getVersion() + + "_converted" + ); + final DataSegment finalSegment = JobHelper.serializeOutIndex( + finalSegmentTemplate, + context.getConfiguration(), + context, + context.getTaskAttemptID(), + outDir, + JobHelper.makeSegmentOutputPath( + baseOutputPath, + outputFS, + finalSegmentTemplate.getDataSource(), + finalSegmentTemplate.getVersion(), + finalSegmentTemplate.getInterval(), + finalSegmentTemplate.getShardSpec().getPartitionNum() + ) + ); + context.progress(); + context.setStatus("Finished PUSH"); + final String finalSegmentString = HadoopDruidConverterConfig.jsonMapper.writeValueAsString(finalSegment); + context.getConfiguration().set(ConvertingOutputFormat.PUBLISHED_SEGMENT_KEY, finalSegmentString); + context.write(new Text("dataSegment"), new Text(finalSegmentString)); + + context.getCounter(COUNTER_GROUP, COUNTER_WRITTEN).increment(finalSegment.getSize()); + context.progress(); + context.setStatus("Ready To Commit"); + } + + @Override + protected void setup(Context context) throws IOException, InterruptedException + { + final File tmpFile = Files.createTempDir(); + context.getConfiguration().set(TMP_FILE_LOC_KEY, tmpFile.getAbsolutePath()); + } + + private static URI getURIFromSegment(DataSegment dataSegment) + { + // There is no good way around this... + // TODO: add getURI() to URIDataPuller + final Map loadSpec = dataSegment.getLoadSpec(); + final String type = loadSpec.get("type").toString(); + final URI segmentLocURI; + if ("s3_zip".equals(type)) { + segmentLocURI = URI.create(String.format("s3n://%s/%s", loadSpec.get("bucket"), loadSpec.get("key"))); + } else if ("hdfs".equals(type)) { + segmentLocURI = URI.create(loadSpec.get("path").toString()); + } else if ("local".equals(type)) { + try { + segmentLocURI = new URI("file", null, loadSpec.get("path").toString(), null, null); + } + catch (URISyntaxException e) { + throw new ISE(e, "Unable to form simple file uri"); + } + } else { + try { + throw new IAE( + "Cannot figure out loadSpec %s", + HadoopDruidConverterConfig.jsonMapper.writeValueAsString(loadSpec) + ); + } + catch (JsonProcessingException e) { + throw new ISE("Cannot write Map with json mapper"); + } + } + return segmentLocURI; + } + + @Override + protected void cleanup( + Context context + ) throws IOException, InterruptedException + { + final String tmpDirLoc = context.getConfiguration().get(TMP_FILE_LOC_KEY); + final File tmpDir = Paths.get(tmpDirLoc).toFile(); + FileUtils.deleteDirectory(tmpDir); + context.progress(); + context.setStatus("Clean"); + } + } + + public static class DataSegmentSplit extends InputSplit implements Writable + { + private DataSegment dataSegment = null; + + public DataSegmentSplit() + { + // For serialization purposes + } + + public DataSegmentSplit(@NotNull DataSegment dataSegment) + { + this.dataSegment = Preconditions.checkNotNull(dataSegment, "dataSegment"); + } + + @Override + public long getLength() throws IOException, InterruptedException + { + return dataSegment.getSize(); + } + + @Override + public String[] getLocations() throws IOException, InterruptedException + { + return new String[]{}; + } + + protected DataSegment getDataSegment() + { + return dataSegment; + } + + @Override + public void write(DataOutput out) throws IOException + { + out.write(HadoopDruidConverterConfig.jsonMapper.writeValueAsString(dataSegment).getBytes()); + } + + @Override + public void readFields(DataInput in) throws IOException + { + dataSegment = HadoopDruidConverterConfig.jsonMapper.readValue(in.readLine(), DataSegment.class); + } + } + + public static class ConfigInputFormat extends InputFormat + { + @Override + public List getSplits(final JobContext jobContext) throws IOException, InterruptedException + { + final HadoopDruidConverterConfig config = converterConfigFromConfiguration(jobContext.getConfiguration()); + final List segments = config.getSegments(); + if (segments == null) { + throw new IOException("Bad config, missing segments"); + } + return Lists.transform( + segments, new Function() + { + @Nullable + @Override + public InputSplit apply(DataSegment input) + { + return new DataSegmentSplit(input); + } + } + ); + } + + @Override + public RecordReader createRecordReader( + final InputSplit inputSplit, final TaskAttemptContext taskAttemptContext + ) throws IOException, InterruptedException + { + return new RecordReader() + { + boolean readAnything = false; + + @Override + public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) + throws IOException, InterruptedException + { + // NOOP + } + + @Override + public boolean nextKeyValue() throws IOException, InterruptedException + { + return !readAnything; + } + + @Override + public String getCurrentKey() throws IOException, InterruptedException + { + return "key"; + } + + @Override + public String getCurrentValue() throws IOException, InterruptedException + { + readAnything = true; + return "fakeValue"; + } + + @Override + public float getProgress() throws IOException, InterruptedException + { + return readAnything ? 0.0F : 1.0F; + } + + @Override + public void close() throws IOException + { + // NOOP + } + }; + } + } +} diff --git a/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopDruidConverterConfig.java b/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopDruidConverterConfig.java new file mode 100644 index 000000000000..3c822ca5ecea --- /dev/null +++ b/indexing-hadoop/src/main/java/io/druid/indexer/updater/HadoopDruidConverterConfig.java @@ -0,0 +1,190 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer.updater; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.inject.Binder; +import com.google.inject.Injector; +import com.google.inject.Key; +import com.google.inject.Module; +import io.druid.guice.GuiceInjectors; +import io.druid.guice.JsonConfigProvider; +import io.druid.guice.annotations.Self; +import io.druid.initialization.Initialization; +import io.druid.segment.IndexSpec; +import io.druid.server.DruidNode; +import io.druid.timeline.DataSegment; +import org.joda.time.Interval; + +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.util.List; +import java.util.Map; + +public class HadoopDruidConverterConfig +{ + public static final String CONFIG_PROPERTY = "io.druid.indexer.updater.converter"; + public static final ObjectMapper jsonMapper; + private static final Injector injector = Initialization.makeInjectorWithModules( + GuiceInjectors.makeStartupInjector(), + ImmutableList.of( + new Module() + { + @Override + public void configure(Binder binder) + { + JsonConfigProvider.bindInstance( + binder, Key.get(DruidNode.class, Self.class), new DruidNode("hadoop-converter", null, null) + ); + } + } + ) + ); + + static { + jsonMapper = injector.getInstance(ObjectMapper.class); + jsonMapper.registerSubtypes(HadoopDruidConverterConfig.class); + } + + private static final TypeReference> mapTypeReference = new TypeReference>() + { + }; + + public static HadoopDruidConverterConfig fromString(final String string) throws IOException + { + return fromMap(jsonMapper.>readValue(string, mapTypeReference)); + } + + public static HadoopDruidConverterConfig fromFile(final File file) throws IOException + { + return fromMap(jsonMapper.>readValue(file, mapTypeReference)); + } + + public static HadoopDruidConverterConfig fromMap(final Map map) + { + return jsonMapper.convertValue(map, HadoopDruidConverterConfig.class); + } + + @JsonProperty + private final String dataSource; + @JsonProperty + private final Interval interval; + @JsonProperty + private final IndexSpec indexSpec; + @JsonProperty + private final List segments; + @JsonProperty + private final boolean validate; + @JsonProperty + private final URI distributedSuccessCache; + @JsonProperty + private final Map hadoopProperties; + @JsonProperty + private final String jobPriority; + @JsonProperty + private final String segmentOutputPath; + + @JsonCreator + public HadoopDruidConverterConfig( + @JsonProperty("dataSource") final String dataSource, + @JsonProperty("interval") final Interval interval, + @JsonProperty("indexSpec") final IndexSpec indexSpec, + @JsonProperty("segments") final List segments, + @JsonProperty("validate") final Boolean validate, + @JsonProperty("distributedSuccessCache") URI distributedSuccessCache, + @JsonProperty("hadoopProperties") Map hadoopProperties, + @JsonProperty("jobPriority") String jobPriority, + @JsonProperty("segmentOutputPath") String segmentOutputPath + ) + { + this.dataSource = Preconditions.checkNotNull(dataSource, "dataSource"); + this.interval = Preconditions.checkNotNull(interval, "interval"); + this.indexSpec = Preconditions.checkNotNull(indexSpec, "indexSpec"); + this.distributedSuccessCache = Preconditions.checkNotNull(distributedSuccessCache, "distributedSuccessCache"); + this.segments = segments; + this.validate = validate == null ? false : validate; + this.hadoopProperties = hadoopProperties == null + ? ImmutableMap.of() + : ImmutableMap.copyOf(hadoopProperties); + this.jobPriority = jobPriority; + this.segmentOutputPath = Preconditions.checkNotNull(segmentOutputPath, "segmentOutputPath"); + } + + @JsonProperty + public boolean isValidate() + { + return validate; + } + + @JsonProperty + public String getDataSource() + { + return dataSource; + } + + @JsonProperty + public Interval getInterval() + { + return interval; + } + + @JsonProperty + public IndexSpec getIndexSpec() + { + return indexSpec; + } + + @JsonProperty + public List getSegments() + { + return segments; + } + + @JsonProperty + public URI getDistributedSuccessCache() + { + return distributedSuccessCache; + } + + @JsonProperty + public Map getHadoopProperties() + { + return hadoopProperties; + } + + @JsonProperty + public String getJobPriority() + { + return jobPriority; + } + + @JsonProperty + public String getSegmentOutputPath() + { + return segmentOutputPath; + } +} diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java new file mode 100644 index 000000000000..884ce1ad13f2 --- /dev/null +++ b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopConverterJobTest.java @@ -0,0 +1,497 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer.updater; + +import com.fasterxml.jackson.annotation.JsonProperty; +import com.google.common.base.Preconditions; +import com.google.common.base.Supplier; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import com.google.common.io.ByteSource; +import com.google.common.io.Files; +import com.metamx.common.FileUtils; +import com.metamx.common.Granularity; +import io.druid.client.DruidDataSource; +import io.druid.data.input.impl.DelimitedParseSpec; +import io.druid.data.input.impl.DimensionsSpec; +import io.druid.data.input.impl.StringInputRowParser; +import io.druid.data.input.impl.TimestampSpec; +import io.druid.granularity.QueryGranularity; +import io.druid.indexer.HadoopDruidDetermineConfigurationJob; +import io.druid.indexer.HadoopDruidIndexerConfig; +import io.druid.indexer.HadoopDruidIndexerJob; +import io.druid.indexer.HadoopIOConfig; +import io.druid.indexer.HadoopIngestionSpec; +import io.druid.indexer.HadoopTuningConfig; +import io.druid.indexer.JobHelper; +import io.druid.indexer.Jobby; +import io.druid.indexer.SQLMetadataStorageUpdaterJobHandler; +import io.druid.metadata.MetadataSegmentManagerConfig; +import io.druid.metadata.MetadataStorageConnectorConfig; +import io.druid.metadata.MetadataStorageTablesConfig; +import io.druid.metadata.SQLMetadataSegmentManager; +import io.druid.metadata.TestDerbyConnector; +import io.druid.metadata.storage.derby.DerbyConnector; +import io.druid.query.Query; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.DoubleSumAggregatorFactory; +import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; +import io.druid.segment.IndexSpec; +import io.druid.segment.TestIndex; +import io.druid.segment.data.RoaringBitmapSerdeFactory; +import io.druid.segment.indexing.DataSchema; +import io.druid.segment.indexing.granularity.UniformGranularitySpec; +import io.druid.timeline.DataSegment; +import org.joda.time.Interval; +import org.junit.After; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.skife.jdbi.v2.Handle; +import org.skife.jdbi.v2.exceptions.CallbackFailedException; +import org.skife.jdbi.v2.tweak.HandleCallback; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.sql.SQLException; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.Map; + +public class HadoopConverterJobTest +{ + @Rule + public final TemporaryFolder temporaryFolder = new TemporaryFolder(); + private String storageLocProperty = null; + private File tmpSegmentDir = null; + + private static final String DATASOURCE = "testDatasource"; + private static final String STORAGE_PROPERTY_KEY = "druid.storage.storageDirectory"; + private final MetadataStorageUpdaterJobSpec metadataStorageUpdaterJobSpc = new MetadataStorageUpdaterJobSpec() + { + @Override + @JsonProperty + public String getSegmentTable() + { + return "druid_segments"; + } + + @Override + @JsonProperty + public String getType() + { + return "derby"; + } + + @JsonProperty + public String getConnectURI() + { + return "jdbc:derby:memory:druidTest;create=true"; + } + + @JsonProperty + public String getUser() + { + return "sb"; + } + + @JsonProperty + public String getPassword() + { + return "sb"; + } + + @Override + public MetadataStorageConnectorConfig get() + { + return new MetadataStorageConnectorConfig() + { + + public boolean isCreateTables() + { + return true; + } + + public String getHost() + { + return "localhost"; + } + + public int getPort() + { + return -1; + } + + public String getConnectURI() + { + return "jdbc:derby:memory:druidTest;create=true"; + } + + public String getUser() + { + return "sb"; + } + + public String getPassword() + { + return "sb"; + } + + @Override + public String toString() + { + return "DbConnectorConfig{" + + "createTables=" + isCreateTables() + + ", connectURI='" + getConnectURI() + '\'' + + ", user='" + getUser() + '\'' + + ", passwordProvider=" + getPassword() + + '}'; + } + + }; + } + }; + + private Supplier metadataStorageTablesConfigSupplier; + private DerbyConnector connector; + + private final Interval interval = Interval.parse("2011-01-01T00:00:00.000Z/2011-05-01T00:00:00.000Z"); + + @After + public void tearDown() + { + if (storageLocProperty == null) { + System.clearProperty(STORAGE_PROPERTY_KEY); + } else { + System.setProperty(STORAGE_PROPERTY_KEY, storageLocProperty); + } + tmpSegmentDir = null; + } + + @Before + public void setUp() throws Exception + { + final File scratchFileDir = temporaryFolder.newFolder(); + storageLocProperty = System.getProperty(STORAGE_PROPERTY_KEY); + tmpSegmentDir = temporaryFolder.newFolder(); + System.setProperty(STORAGE_PROPERTY_KEY, tmpSegmentDir.getAbsolutePath()); + + final URL url = Preconditions.checkNotNull(Query.class.getClassLoader().getResource("druid.sample.tsv")); + final File tmpInputFile = temporaryFolder.newFile(); + FileUtils.retryCopy( + new ByteSource() + { + @Override + public InputStream openStream() throws IOException + { + return url.openStream(); + } + }, + tmpInputFile, + FileUtils.IS_EXCEPTION, + 3 + ); + final HadoopDruidIndexerConfig hadoopDruidIndexerConfig = new HadoopDruidIndexerConfig( + new HadoopIngestionSpec( + new DataSchema( + DATASOURCE, + new StringInputRowParser( + new DelimitedParseSpec( + new TimestampSpec("ts", "iso", null), + new DimensionsSpec(Arrays.asList(TestIndex.DIMENSIONS), null, null), + "\t", + "\u0001", + Arrays.asList(TestIndex.COLUMNS) + ) + ), + new AggregatorFactory[]{ + new DoubleSumAggregatorFactory(TestIndex.METRICS[0], TestIndex.METRICS[0]), + new HyperUniquesAggregatorFactory("quality_uniques", "quality") + }, + new UniformGranularitySpec( + Granularity.MONTH, + QueryGranularity.DAY, + ImmutableList.of(interval) + ) + ), + new HadoopIOConfig( + ImmutableMap.of( + "type", "static", + "paths", tmpInputFile.getAbsolutePath() + ), + metadataStorageUpdaterJobSpc, + tmpSegmentDir.getAbsolutePath() + ), + new HadoopTuningConfig( + scratchFileDir.getAbsolutePath(), + null, + null, + null, + null, + null, + false, + false, + false, + false, + null, + false, + false, + false, + null, + null + ) + ) + ); + metadataStorageTablesConfigSupplier = + new Supplier() + { + @Override + public MetadataStorageTablesConfig get() + { + return MetadataStorageTablesConfig.fromBase("druid"); + } + }; + connector = new TestDerbyConnector( + new Supplier() + { + @Override + public MetadataStorageConnectorConfig get() + { + return metadataStorageUpdaterJobSpc.get(); + } + }, + new Supplier() + { + + @Override + public MetadataStorageTablesConfig get() + { + return new MetadataStorageTablesConfig(null, null, null, null, null, null, null, null); + } + } + ); + try { + connector.getDBI().withHandle( + new HandleCallback() + { + @Override + public Void withHandle(Handle handle) throws Exception + { + handle.execute("DROP TABLE druid_segments"); + return null; + } + } + ); + } catch (CallbackFailedException e){ + // Who cares + } + List jobs = ImmutableList.of( + new Jobby() + { + @Override + public boolean run() + { + connector.createSegmentTable(connector.getDBI(), "druid_segments"); + return true; + } + }, + new HadoopDruidDetermineConfigurationJob(hadoopDruidIndexerConfig), + new HadoopDruidIndexerJob( + hadoopDruidIndexerConfig, + new SQLMetadataStorageUpdaterJobHandler(connector) + ) + ); + JobHelper.runJobs(jobs, hadoopDruidIndexerConfig); + } + + private List getDataSegments( + SQLMetadataSegmentManager manager + ) throws InterruptedException + { + manager.start(); + while (!manager.isStarted()) { + Thread.sleep(10); + } + manager.poll(); + final DruidDataSource druidDataSource = manager.getInventoryValue(DATASOURCE); + manager.stop(); + return Lists.newArrayList(druidDataSource.getSegments()); + } + + @Test + public void testSimpleJob() throws IOException, InterruptedException + { + + final SQLMetadataSegmentManager manager = new SQLMetadataSegmentManager( + HadoopDruidConverterConfig.jsonMapper, + new Supplier() + { + @Override + public MetadataSegmentManagerConfig get() + { + return new MetadataSegmentManagerConfig(); + } + }, + metadataStorageTablesConfigSupplier, + connector + ); + + final List oldSemgments = getDataSegments(manager); + final File tmpDir = temporaryFolder.newFolder(); + final HadoopConverterJob converterJob = new HadoopConverterJob( + new HadoopDruidConverterConfig( + DATASOURCE, + interval, + new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"), + oldSemgments, + true, + tmpDir.toURI(), + ImmutableMap.of(), + null, + tmpSegmentDir.toURI().toString() + ) + ); + + final List segments = Lists.newArrayList(converterJob.run()); + Assert.assertNotNull("bad result", segments); + Assert.assertEquals("wrong segment count", 4, segments.size()); + Assert.assertTrue(converterJob.getLoadedBytes() > 0); + Assert.assertTrue(converterJob.getWrittenBytes() > 0); + Assert.assertTrue(converterJob.getWrittenBytes() > converterJob.getLoadedBytes()); + + Assert.assertEquals(oldSemgments.size(), segments.size()); + + final DataSegment segment = segments.get(0); + Assert.assertTrue(interval.contains(segment.getInterval())); + Assert.assertTrue(segment.getVersion().endsWith("_converted")); + Assert.assertTrue(segment.getLoadSpec().get("path").toString().contains("_converted")); + + for (File file : tmpDir.listFiles()) { + Assert.assertFalse(file.isDirectory()); + Assert.assertTrue(file.isFile()); + } + + + final Comparator segmentComparator = new Comparator() + { + @Override + public int compare(DataSegment o1, DataSegment o2) + { + return o1.getIdentifier().compareTo(o2.getIdentifier()); + } + }; + Collections.sort( + oldSemgments, + segmentComparator + ); + Collections.sort( + segments, + segmentComparator + ); + + for (int i = 0; i < oldSemgments.size(); ++i) { + final DataSegment oldSegment = oldSemgments.get(i); + final DataSegment newSegment = segments.get(i); + Assert.assertEquals(oldSegment.getDataSource(), newSegment.getDataSource()); + Assert.assertEquals(oldSegment.getInterval(), newSegment.getInterval()); + Assert.assertEquals( + Sets.newHashSet(oldSegment.getMetrics()), + Sets.newHashSet(newSegment.getMetrics()) + ); + Assert.assertEquals( + Sets.newHashSet(oldSegment.getDimensions()), + Sets.newHashSet(newSegment.getDimensions()) + ); + Assert.assertEquals(oldSegment.getVersion() + "_converted", newSegment.getVersion()); + Assert.assertTrue(oldSegment.getSize() < newSegment.getSize()); + Assert.assertEquals(oldSegment.getBinaryVersion(), newSegment.getBinaryVersion()); + } + } + + private static void corrupt( + DataSegment segment + ) throws IOException + { + final Map localLoadSpec = segment.getLoadSpec(); + final Path segmentPath = Paths.get(localLoadSpec.get("path").toString()); + + final MappedByteBuffer buffer = Files.map(segmentPath.toFile(), FileChannel.MapMode.READ_WRITE); + while (buffer.hasRemaining()) { + buffer.put((byte) 0xFF); + } + } + + @Test + public void testHadoopFailure() throws IOException, InterruptedException + { + final SQLMetadataSegmentManager manager = new SQLMetadataSegmentManager( + HadoopDruidConverterConfig.jsonMapper, + new Supplier() + { + @Override + public MetadataSegmentManagerConfig get() + { + return new MetadataSegmentManagerConfig(); + } + }, + metadataStorageTablesConfigSupplier, + connector + ); + + final List oldSemgments = getDataSegments(manager); + final File tmpDir = temporaryFolder.newFolder(); + final HadoopConverterJob converterJob = new HadoopConverterJob( + new HadoopDruidConverterConfig( + DATASOURCE, + interval, + new IndexSpec(new RoaringBitmapSerdeFactory(), "uncompressed", "uncompressed"), + oldSemgments, + true, + tmpDir.toURI(), + ImmutableMap.of(), + null, + tmpSegmentDir.toURI().toString() + ) + ); + + corrupt(oldSemgments.get(0)); + + final List result = converterJob.run(); + Assert.assertNull("result should be null", result); + + final List segments = getDataSegments(manager); + + Assert.assertEquals(oldSemgments.size(), segments.size()); + + Assert.assertEquals(oldSemgments, segments); + } +} diff --git a/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopDruidConverterConfigTest.java b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopDruidConverterConfigTest.java new file mode 100644 index 000000000000..d0fcf9a933e9 --- /dev/null +++ b/indexing-hadoop/src/test/java/io/druid/indexer/updater/HadoopDruidConverterConfigTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexer.updater; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.druid.jackson.DefaultObjectMapper; +import io.druid.segment.IndexSpec; +import io.druid.timeline.DataSegment; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; + +import java.io.IOException; +import java.net.URI; + +public class HadoopDruidConverterConfigTest +{ + @Rule + public TemporaryFolder temporaryFolder = new TemporaryFolder(); + + @Test + public void simpleSerDe() throws IOException + { + final HadoopDruidConverterConfig config = new HadoopDruidConverterConfig( + "datasource", + Interval.parse("2000/2010"), + new IndexSpec(), + ImmutableList.of(), + true, + URI.create("file:/dev/null"), + ImmutableMap.of(), + "HIGH", + temporaryFolder.newFolder().getAbsolutePath() + ); + final ObjectMapper mapper = new DefaultObjectMapper(); + mapper.registerSubtypes(HadoopDruidConverterConfig.class); + final byte[] value = mapper.writeValueAsBytes(config); + final HadoopDruidConverterConfig config2 = mapper.readValue( + value, + HadoopDruidConverterConfig.class + ); + Assert.assertEquals(mapper.writeValueAsString(config), mapper.writeValueAsString(config2)); + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/ConvertSegmentTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/ConvertSegmentTask.java index 2d3b03c06800..b04f9c9f558d 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/ConvertSegmentTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/ConvertSegmentTask.java @@ -22,6 +22,8 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.google.common.base.Function; import com.google.common.base.Preconditions; +import com.google.common.base.Predicate; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import com.metamx.common.guava.FunctionalIterable; import com.metamx.common.logger.Logger; @@ -37,6 +39,7 @@ import org.joda.time.DateTime; import org.joda.time.Interval; +import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.util.Collections; @@ -45,7 +48,7 @@ /** * This task takes a segment and attempts to reindex it in the latest version with the specified indexSpec. - * + *

* Only datasource must be specified. `indexSpec` and `force` are highly suggested but optional. The rest get * auto-configured and should only be modified with great care */ @@ -82,7 +85,7 @@ public static ConvertSegmentTask create( ) { final String id = makeId(dataSource, interval); - return new ConvertSegmentTask(id, id, dataSource, interval, null, indexSpec, force, validate); + return new ConvertSegmentTask(id, dataSource, interval, null, indexSpec, force, validate); } /** @@ -100,7 +103,7 @@ public static ConvertSegmentTask create(DataSegment segment, IndexSpec indexSpec final Interval interval = segment.getInterval(); final String dataSource = segment.getDataSource(); final String id = makeId(dataSource, interval); - return new ConvertSegmentTask(id, id, dataSource, interval, segment, indexSpec, force, validate); + return new ConvertSegmentTask(id, dataSource, interval, segment, indexSpec, force, validate); } private static String makeId(String dataSource, Interval interval) @@ -113,7 +116,6 @@ private static String makeId(String dataSource, Interval interval) @JsonCreator private static ConvertSegmentTask createFromJson( @JsonProperty("id") String id, - @JsonProperty("groupId") String groupId, @JsonProperty("dataSource") String dataSource, @JsonProperty("interval") Interval interval, @JsonProperty("segment") DataSegment segment, @@ -131,12 +133,11 @@ private static ConvertSegmentTask createFromJson( return create(segment, indexSpec, isForce, isValidate); } } - return new ConvertSegmentTask(id, groupId, dataSource, interval, segment, indexSpec, isForce, isValidate); + return new ConvertSegmentTask(id, dataSource, interval, segment, indexSpec, isForce, isValidate); } - private ConvertSegmentTask( + protected ConvertSegmentTask( String id, - String groupId, String dataSource, Interval interval, DataSegment segment, @@ -145,7 +146,7 @@ private ConvertSegmentTask( boolean validate ) { - super(id, groupId, dataSource, interval); + super(id, dataSource, interval); this.segment = segment; this.indexSpec = indexSpec == null ? new IndexSpec() : indexSpec; this.force = force; @@ -185,6 +186,7 @@ public DataSegment getSegment() @Override public TaskStatus run(TaskToolbox toolbox) throws Exception { + final Iterable segmentsToUpdate; if (segment == null) { final List segments = toolbox.getTaskActionClient().submit( new SegmentListUsedAction( @@ -192,46 +194,66 @@ public TaskStatus run(TaskToolbox toolbox) throws Exception getInterval() ) ); - final FunctionalIterable tasks = FunctionalIterable + segmentsToUpdate = FunctionalIterable .create(segments) - .keep( - new Function() + .filter( + new Predicate() { @Override - public Task apply(DataSegment segment) + public boolean apply(DataSegment segment) { final Integer segmentVersion = segment.getBinaryVersion(); if (!CURR_VERSION_INTEGER.equals(segmentVersion)) { - return new SubTask(getGroupId(), segment, indexSpec, force, validate); + return true; } else if (force) { log.info( "Segment[%s] already at version[%s], forcing conversion", segment.getIdentifier(), segmentVersion ); - return new SubTask(getGroupId(), segment, indexSpec, force, validate); + return true; } else { log.info("Skipping[%s], already version[%s]", segment.getIdentifier(), segmentVersion); - return null; + return false; } } } ); - - // Vestigial from a past time when this task spawned subtasks. - for (final Task subTask : tasks) { - final TaskStatus status = subTask.run(toolbox); - if (!status.isSuccess()) { - return status; - } - } } else { log.info("I'm in a subless mood."); - convertSegment(toolbox, segment, indexSpec, force, validate); + segmentsToUpdate = Collections.singleton(segment); + } + // Vestigial from a past time when this task spawned subtasks. + for (final Task subTask : generateSubTasks(getGroupId(), segmentsToUpdate, indexSpec, force, validate)) { + final TaskStatus status = subTask.run(toolbox); + if (!status.isSuccess()) { + return TaskStatus.fromCode(getId(), status.getStatusCode()); + } } return success(); } + protected Iterable generateSubTasks( + final String groupId, + final Iterable segments, + final IndexSpec indexSpec, + final boolean force, + final boolean validate + ) + { + return Iterables.transform( + segments, + new Function() + { + @Override + public Task apply(DataSegment input) + { + return new SubTask(groupId, segment, indexSpec, force, validate); + } + } + ); + } + @Override public boolean equals(Object o) { diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopConverterTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopConverterTask.java new file mode 100644 index 000000000000..ec5433617031 --- /dev/null +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopConverterTask.java @@ -0,0 +1,277 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.type.TypeReference; +import com.google.common.base.Preconditions; +import com.google.common.base.Throwables; +import com.google.common.collect.ImmutableList; +import com.metamx.common.UOE; +import com.metamx.common.logger.Logger; +import io.druid.indexer.updater.HadoopConverterJob; +import io.druid.indexer.updater.HadoopDruidConverterConfig; +import io.druid.indexing.common.TaskStatus; +import io.druid.indexing.common.TaskToolbox; +import io.druid.indexing.common.actions.TaskActionClient; +import io.druid.segment.IndexSpec; +import io.druid.timeline.DataSegment; +import org.joda.time.Interval; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; + +public class HadoopConverterTask extends ConvertSegmentTask +{ + private static final String TYPE = "hadoop_convert_segment"; + private static final Logger log = new Logger(HadoopConverterTask.class); + + @JsonCreator + public HadoopConverterTask( + @JsonProperty("id") String id, + @JsonProperty("dataSource") String dataSource, + @JsonProperty("interval") Interval interval, + @JsonProperty("indexSpec") IndexSpec indexSpec, + @JsonProperty("force") boolean force, + @JsonProperty("validate") Boolean validate, + @JsonProperty("hadoopDependencyCoordinates") List hadoopDependencyCoordinates, + @JsonProperty("distributedSuccessCache") URI distributedSuccessCache, + @JsonProperty("jobPriority") String jobPriority, + @JsonProperty("segmentOutputPath") String segmentOutputPath, + @JsonProperty("classpathPrefix") String classpathPrefix + ) + { + super( + makeId( + id, + TYPE, + Preconditions.checkNotNull(dataSource, "dataSource"), + Preconditions.checkNotNull(interval, "interval") + ), + dataSource, + interval, + null, // Always call subtask codepath + indexSpec, + force, + validate == null ? true : validate + ); + this.hadoopDependencyCoordinates = hadoopDependencyCoordinates; + this.distributedSuccessCache = Preconditions.checkNotNull(distributedSuccessCache, "distributedSuccessCache"); + this.segmentOutputPath = Preconditions.checkNotNull(segmentOutputPath, "segmentOutputPath"); + this.jobPriority = jobPriority; + this.classpathPrefix = classpathPrefix; + } + + private final List hadoopDependencyCoordinates; + private final URI distributedSuccessCache; + private final String jobPriority; + private final String segmentOutputPath; + private final String classpathPrefix; + + @JsonProperty + public List getHadoopDependencyCoordinates() + { + return hadoopDependencyCoordinates; + } + + @JsonProperty + public URI getDistributedSuccessCache() + { + return distributedSuccessCache; + } + + @JsonProperty + public String getJobPriority() + { + return jobPriority; + } + + @JsonProperty + public String getSegmentOutputPath() + { + return segmentOutputPath; + } + + @Override + @JsonProperty + public String getClasspathPrefix() + { + return classpathPrefix; + } + + @Override + protected Iterable generateSubTasks( + final String groupId, + final Iterable segments, + final IndexSpec indexSpec, + final boolean force, + final boolean validate + ) + { + return Collections.singleton( + new ConverterSubTask( + ImmutableList.copyOf(segments), + this + ) + ); + } + + @Override + @JsonIgnore + public DataSegment getSegment() + { + throw new UOE( + "Sub-less data segment not supported for hadoop converter task. Specify interval and datasource instead" + ); + } + + @Override + public String getType() + { + return TYPE; + } + + public static class ConverterSubTask extends HadoopTask + { + private final List segments; + private final HadoopConverterTask parent; + + @JsonCreator + public ConverterSubTask( + @JsonProperty("segments") List segments, + @JsonProperty("parent") HadoopConverterTask parent + ) + { + super( + joinId( + Preconditions.checkNotNull(parent, "parent").getGroupId(), + "sub", + parent.getInterval().getStart(), + parent.getInterval().getEnd() + ), + parent.getDataSource(), + parent.getHadoopDependencyCoordinates() + ); + this.segments = segments; + this.parent = parent; + } + + @JsonProperty + public List getSegments() + { + return segments; + } + + @JsonProperty + public HadoopConverterTask getParent() + { + return parent; + } + + @Override + public String getType() + { + return TYPE + "_sub"; + } + + @Override + public boolean isReady(TaskActionClient taskActionClient) throws Exception + { + return true; + } + + @Override + public TaskStatus run(TaskToolbox toolbox) throws Exception + { + final Map hadoopProperties = new HashMap<>(); + final Properties properties = injector.getInstance(Properties.class); + for (String name : properties.stringPropertyNames()) { + if (name.startsWith("hadoop.")) { + hadoopProperties.put(name.substring("hadoop.".length()), properties.getProperty(name)); + } + } + final ClassLoader loader = buildClassLoader(toolbox); + final HadoopDruidConverterConfig config = new HadoopDruidConverterConfig( + getDataSource(), + parent.getInterval(), + parent.getIndexSpec(), + segments, + parent.isValidate(), + parent.getDistributedSuccessCache(), + hadoopProperties, + parent.getJobPriority(), + parent.getSegmentOutputPath() + ); + + final String finishedSegmentString = invokeForeignLoader( + "io.druid.indexing.common.task.HadoopConverterTask$JobInvoker", + new String[]{HadoopDruidConverterConfig.jsonMapper.writeValueAsString(config)}, + loader + ); + if (finishedSegmentString == null) { + return TaskStatus.failure(getId()); + } + final List finishedSegments = HadoopDruidConverterConfig.jsonMapper.readValue( + finishedSegmentString, + new TypeReference>() + { + } + ); + log.debug("Found new segments %s", Arrays.toString(finishedSegments.toArray())); + toolbox.pushSegments(finishedSegments); + return success(); + } + } + + public static class JobInvoker + { + public static String runTask(String[] input) + { + final HadoopDruidConverterConfig config; + try { + config = HadoopDruidConverterConfig.jsonMapper.readValue( + input[0], + HadoopDruidConverterConfig.class + ); + } + catch (IOException e) { + throw Throwables.propagate(e); + } + final HadoopConverterJob hadoopConverterJob = new HadoopConverterJob(config); + try { + final List result = hadoopConverterJob.run(); + return result == null + ? null + : HadoopDruidConverterConfig.jsonMapper.writeValueAsString(result); + } + catch (IOException e) { + throw Throwables.propagate(e); + } + } + } +} diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java b/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java index c8151e5b6792..31d5e4716541 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/Task.java @@ -47,6 +47,8 @@ @JsonSubTypes.Type(name = "restore", value = RestoreTask.class), @JsonSubTypes.Type(name = "index", value = IndexTask.class), @JsonSubTypes.Type(name = "index_hadoop", value = HadoopIndexTask.class), + @JsonSubTypes.Type(name = "hadoop_convert_segment", value = HadoopConverterTask.class), + @JsonSubTypes.Type(name = "hadoop_convert_segment_sub", value = HadoopConverterTask.ConverterSubTask.class), @JsonSubTypes.Type(name = "index_realtime", value = RealtimeIndexTask.class), @JsonSubTypes.Type(name = "noop", value = NoopTask.class), @JsonSubTypes.Type(name = "version_converter", value = ConvertSegmentTask.class), // Backwards compat - Deprecated diff --git a/indexing-service/src/test/java/io/druid/indexing/common/task/HadoopConverterTaskSerDeTest.java b/indexing-service/src/test/java/io/druid/indexing/common/task/HadoopConverterTaskSerDeTest.java new file mode 100644 index 000000000000..e2236f03ce61 --- /dev/null +++ b/indexing-service/src/test/java/io/druid/indexing/common/task/HadoopConverterTaskSerDeTest.java @@ -0,0 +1,315 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.indexing.common.task; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableMap; +import io.druid.jackson.DefaultObjectMapper; +import io.druid.segment.IndexSpec; +import io.druid.segment.data.ConciseBitmapSerdeFactory; +import io.druid.timeline.DataSegment; +import io.druid.timeline.partition.NoneShardSpec; +import io.druid.timeline.partition.ShardSpec; +import org.joda.time.Interval; +import org.junit.Assert; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.util.List; +import java.util.Map; + +public class HadoopConverterTaskSerDeTest +{ + private static ObjectMapper objectMapper; + private static final String TASK_ID = "task id"; + private static final String DATA_SOURCE = "datasource"; + private static final Interval INTERVAL = Interval.parse("2010/2011"); + private static final String SEGMENT_VERSION = "some version"; + private static final Map LOAD_SPEC = ImmutableMap.of("someKey", "someVal"); + private static final List DIMENSIONS = ImmutableList.of("dim1", "dim2"); + private static final List METRICS = ImmutableList.of("metric1", "metric2"); + private static final ShardSpec SHARD_SPEC = new NoneShardSpec(); + private static final int BINARY_VERSION = 34718; + private static final long SEGMENT_SIZE = 7483901348790L; + private static final IndexSpec INDEX_SPEC = new IndexSpec(new ConciseBitmapSerdeFactory(), "lz4", "lzf"); + private static final DataSegment DATA_SEGMENT = new DataSegment( + DATA_SOURCE, + INTERVAL, + SEGMENT_VERSION, + LOAD_SPEC, + DIMENSIONS, + METRICS, + SHARD_SPEC, + BINARY_VERSION, + SEGMENT_SIZE + ); + private static final List HADOOP_DEPENDENCY = ImmutableList.of("dependency1"); + private static final URI DISTRIBUTED_CACHE = URI.create("http://your.momma"); // Should have plenty of space + private static final String PRIORITY = "0"; + private static final String OUTPUT_PATH = "/dev/null"; + private static final String CLASSPATH_PREFIX = "something:where:I:need:stuff"; + + @BeforeClass + public static void setUpStatic() + { + objectMapper = new DefaultObjectMapper(); + objectMapper.registerSubtypes( + HadoopConverterTask.class, + HadoopConverterTask.ConverterSubTask.class, + ShardSpec.class + ); + } + + @Test + public void testSimpleConverterTaskSerDe() throws IOException + { + HadoopConverterTask orig = new HadoopConverterTask( + TASK_ID, + DATA_SOURCE, + INTERVAL, + INDEX_SPEC, + true, + true, + HADOOP_DEPENDENCY, + DISTRIBUTED_CACHE, + PRIORITY, + OUTPUT_PATH, + CLASSPATH_PREFIX + ); + final String strOrig = objectMapper.writeValueAsString(orig); + HadoopConverterTask other = objectMapper.readValue(strOrig, HadoopConverterTask.class); + Assert.assertEquals(strOrig, objectMapper.writeValueAsString(other)); + Assert.assertFalse(orig == other); + Assert.assertEquals(orig, other); + assertExpectedTask(other); + } + + @Test + public void testSimpleSubTaskSerDe() throws IOException + { + HadoopConverterTask parent = new HadoopConverterTask( + TASK_ID, + DATA_SOURCE, + INTERVAL, + INDEX_SPEC, + true, + true, + HADOOP_DEPENDENCY, + DISTRIBUTED_CACHE, + PRIORITY, + OUTPUT_PATH, + CLASSPATH_PREFIX + ); + HadoopConverterTask.ConverterSubTask subTask = new HadoopConverterTask.ConverterSubTask( + ImmutableList.of( + DATA_SEGMENT + ), parent + ); + final String origString = objectMapper.writeValueAsString(subTask); + final HadoopConverterTask.ConverterSubTask otherSub = objectMapper.readValue( + origString, + HadoopConverterTask.ConverterSubTask.class + ); + Assert.assertEquals(subTask, otherSub); + Assert.assertEquals(origString, objectMapper.writeValueAsString(otherSub)); + Assert.assertEquals(ImmutableList.of(DATA_SEGMENT), otherSub.getSegments()); + Assert.assertFalse(parent == otherSub.getParent()); + Assert.assertEquals(parent, otherSub.getParent()); + + assertExpectedTask(otherSub.getParent()); + } + + private static void assertExpectedTask(HadoopConverterTask other) + { + Assert.assertEquals(TASK_ID, other.getId()); + Assert.assertEquals(DATA_SOURCE, other.getDataSource()); + Assert.assertEquals(INTERVAL, other.getInterval()); + Assert.assertEquals(INDEX_SPEC, other.getIndexSpec()); + Assert.assertTrue(other.isForce()); + Assert.assertTrue(other.isValidate()); + Assert.assertEquals(HADOOP_DEPENDENCY, other.getHadoopDependencyCoordinates()); + Assert.assertEquals(DISTRIBUTED_CACHE, other.getDistributedSuccessCache()); + Assert.assertEquals(PRIORITY, other.getJobPriority()); + Assert.assertEquals(OUTPUT_PATH, other.getSegmentOutputPath()); + Assert.assertEquals(CLASSPATH_PREFIX, other.getClasspathPrefix()); + } + + @Test + public void testSubTask() + { + HadoopConverterTask parent = new HadoopConverterTask( + TASK_ID, + DATA_SOURCE, + INTERVAL, + INDEX_SPEC, + true, + true, + HADOOP_DEPENDENCY, + DISTRIBUTED_CACHE, + PRIORITY, + OUTPUT_PATH, + CLASSPATH_PREFIX + ); + HadoopConverterTask.ConverterSubTask subTask = new HadoopConverterTask.ConverterSubTask( + ImmutableList.of( + DATA_SEGMENT + ), parent + ); + Assert.assertEquals(parent.getType(), "hadoop_convert_segment"); + Assert.assertEquals(parent.getType() + "_sub", subTask.getType()); + } + + @Test + public void testNullValidate() + { + HadoopConverterTask orig = new HadoopConverterTask( + TASK_ID, + DATA_SOURCE, + INTERVAL, + INDEX_SPEC, + true, + null, + HADOOP_DEPENDENCY, + DISTRIBUTED_CACHE, + PRIORITY, + OUTPUT_PATH, + CLASSPATH_PREFIX + ); + Assert.assertTrue(orig.isValidate()); + } + + @Test + public void testMinimal() + { + HadoopConverterTask parent = new HadoopConverterTask( + null, + DATA_SOURCE, + INTERVAL, + null, + true, + null, + null, + DISTRIBUTED_CACHE, + null, + OUTPUT_PATH, + null + ); + Assert.assertEquals(DATA_SOURCE, parent.getDataSource()); + Assert.assertEquals(INTERVAL, parent.getInterval()); + Assert.assertEquals(DISTRIBUTED_CACHE, parent.getDistributedSuccessCache()); + Assert.assertEquals(OUTPUT_PATH, parent.getSegmentOutputPath()); + Assert.assertNotNull(parent.getId()); + Assert.assertFalse(parent.getId().isEmpty()); + } + + @Test(expected = UnsupportedOperationException.class) + public void testGetDataSegment() + { + HadoopConverterTask orig = new HadoopConverterTask( + TASK_ID, + DATA_SOURCE, + INTERVAL, + INDEX_SPEC, + true, + null, + HADOOP_DEPENDENCY, + DISTRIBUTED_CACHE, + PRIORITY, + OUTPUT_PATH, + CLASSPATH_PREFIX + ); + orig.getSegment(); + } + + @Test(expected = NullPointerException.class) + public void testNull1() + { + HadoopConverterTask parent = new HadoopConverterTask( + null, + null, + INTERVAL, + null, + true, + null, + null, + DISTRIBUTED_CACHE, + null, + OUTPUT_PATH, + null + ); + } + + @Test(expected = NullPointerException.class) + public void testNull2() + { + HadoopConverterTask parent = new HadoopConverterTask( + null, + DATA_SOURCE, + null, + null, + true, + null, + null, + DISTRIBUTED_CACHE, + null, + OUTPUT_PATH, + null + ); + } + + @Test(expected = NullPointerException.class) + public void testNull3() + { + HadoopConverterTask parent = new HadoopConverterTask( + null, + DATA_SOURCE, + INTERVAL, + null, + true, + null, + null, + null, + null, + OUTPUT_PATH, + null + ); + } + + @Test(expected = NullPointerException.class) + public void testNull4() + { + HadoopConverterTask parent = new HadoopConverterTask( + null, + DATA_SOURCE, + INTERVAL, + null, + true, + null, + null, + DISTRIBUTED_CACHE, + null, + null, + null + ); + } +} diff --git a/pom.xml b/pom.xml index 65e09189a983..20a5aab78f96 100644 --- a/pom.xml +++ b/pom.xml @@ -72,6 +72,7 @@ 2.4.4 2.2 1.7.10 + 2.3.0 @@ -497,7 +498,7 @@ org.apache.hadoop hadoop-client - 2.3.0 + ${hadoop.compile.version} provided diff --git a/services/src/main/java/io/druid/cli/Main.java b/services/src/main/java/io/druid/cli/Main.java index bb69177e6a88..a5d89f24218d 100644 --- a/services/src/main/java/io/druid/cli/Main.java +++ b/services/src/main/java/io/druid/cli/Main.java @@ -62,9 +62,9 @@ public static void main(String[] args) .withCommands(ConvertProperties.class, DruidJsonValidator.class, PullDependencies.class, CreateTables.class); builder.withGroup("index") - .withDescription("Run indexing for druid") - .withDefaultCommand(Help.class) - .withCommands(CliHadoopIndexer.class); + .withDescription("Run indexing for druid") + .withDefaultCommand(Help.class) + .withCommands(CliHadoopIndexer.class); builder.withGroup("internal") .withDescription("Processes that Druid runs \"internally\", you should rarely use these directly") @@ -73,7 +73,10 @@ public static void main(String[] args) final Injector injector = GuiceInjectors.makeStartupInjector(); final ExtensionsConfig config = injector.getInstance(ExtensionsConfig.class); - final Collection extensionCommands = Initialization.getFromExtensions(config, CliCommandCreator.class); + final Collection extensionCommands = Initialization.getFromExtensions( + config, + CliCommandCreator.class + ); for (CliCommandCreator creator : extensionCommands) { creator.addCommands(builder); @@ -82,7 +85,7 @@ public static void main(String[] args) final Cli cli = builder.build(); try { final Runnable command = cli.parse(args); - if (! (command instanceof Help)) { // Hack to work around Help not liking being injected + if (!(command instanceof Help)) { // Hack to work around Help not liking being injected injector.injectMembers(command); } command.run();