apache · jihoonson · Jan 4, 2018 · Jan 4, 2018 · Jan 4, 2018 · Jan 4, 2018
diff --git a/api/pom.xml b/api/pom.xml
@@ -28,7 +28,7 @@
     <parent>
         <groupId>io.druid</groupId>
         <artifactId>druid</artifactId>
-        <version>0.12.0-SNAPSHOT</version>
+        <version>0.12.2-SNAPSHOT</version>
     </parent>
 
     <dependencies>

diff --git a/api/src/main/java/io/druid/guice/JsonConfigurator.java b/api/src/main/java/io/druid/guice/JsonConfigurator.java
@@ -93,7 +93,6 @@ public <T> T configurate(Properties props, String propertyPrefix, Class<T> clazz
           log.info(e, "Unable to parse [%s]=[%s] as a json object, using as is.", prop, propValue);
           value = propValue;
         }
-
         hieraricalPutValue(propertyPrefix, prop, prop.substring(propertyBase.length()), value, jsonMap);
       }
     }
@@ -175,8 +174,11 @@ private static void hieraricalPutValue(
   )
   {
     int dotIndex = property.indexOf('.');
+    // Always put property with name even if it is of form a.b. This will make sure the property is available for classes
+    // where JsonProperty names are of the form a.b
+    // Note:- this will cause more than required properties to be present in the jsonMap.
+    targetMap.put(property, value);
     if (dotIndex < 0) {
-      targetMap.put(property, value);
       return;
     }
     if (dotIndex == 0) {

diff --git a/api/src/main/java/io/druid/indexer/TaskStatusPlus.java b/api/src/main/java/io/druid/indexer/TaskStatusPlus.java
@@ -25,6 +25,7 @@
 import org.joda.time.DateTime;
 
 import javax.annotation.Nullable;
+import java.util.Objects;
 
 public class TaskStatusPlus
 {
@@ -40,7 +41,7 @@ public TaskStatusPlus(
       @JsonProperty("id") String id,
       @JsonProperty("createdTime") DateTime createdTime,
       @JsonProperty("queueInsertionTime") DateTime queueInsertionTime,
-      @JsonProperty("state") @Nullable TaskState state,
+      @JsonProperty("statusCode") @Nullable TaskState state,
       @JsonProperty("duration") @Nullable Long duration,
       @JsonProperty("location") TaskLocation location
   )
@@ -74,7 +75,8 @@ public DateTime getQueueInsertionTime()
     return queueInsertionTime;
   }
 
-  @JsonProperty
+  @Nullable
+  @JsonProperty("statusCode")
   public TaskState getState()
   {
     return state;
@@ -91,4 +93,40 @@ public TaskLocation getLocation()
   {
     return location;
   }
+
+  @Override
+  public boolean equals(Object o)
+  {
+    if (this == o) {
+      return true;
+    }
+
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    final TaskStatusPlus that = (TaskStatusPlus) o;
+    if (!id.equals(that.id)) {
+      return false;
+    }
+    if (!createdTime.equals(that.createdTime)) {
+      return false;
+    }
+    if (!queueInsertionTime.equals(that.queueInsertionTime)) {
+      return false;
+    }
+    if (!Objects.equals(state, that.state)) {
+      return false;
+    }
+    if (!Objects.equals(duration, that.duration)) {
+      return false;
+    }
+    return location.equals(that.location);
+  }
+
+  @Override
+  public int hashCode()
+  {
+    return Objects.hash(id, createdTime, queueInsertionTime, state, duration, location);
+  }
 }
diff --git a/api/src/main/java/io/druid/segment/loading/DataSegmentFinder.java b/api/src/main/java/io/druid/segment/loading/DataSegmentFinder.java
@@ -20,8 +20,11 @@
 package io.druid.segment.loading;
 
 import io.druid.guice.annotations.ExtensionPoint;
+import io.druid.java.util.common.Pair;
+import io.druid.java.util.common.logger.Logger;
 import io.druid.timeline.DataSegment;
 
+import java.util.Map;
 import java.util.Set;
 
 /**
@@ -31,6 +34,8 @@
 @ExtensionPoint
 public interface DataSegmentFinder
 {
+  Logger log = new Logger(DataSegmentFinder.class);
+
   /**
    * This method should first recursively look for descriptor.json (partitionNum_descriptor.json for HDFS data storage) underneath
    * workingDirPath and then verify that index.zip (partitionNum_index.zip for HDFS data storage) exists in the same folder.
@@ -46,4 +51,26 @@ public interface DataSegmentFinder
    * @return a set of segments that were found underneath workingDirPath
    */
   Set<DataSegment> findSegments(String workingDirPath, boolean updateDescriptor) throws SegmentLoadingException;
+
+  /**
+   * Adds dataSegment if it does not exist in timestampedSegments. If it exists, replaces entry if segmentModifiedAt is
+   * newer than stored timestamp.
+   *
+   * @param timestampedSegments map of <segmentID, Pair<segment, modifiedAt>> containing segments with modified time
+   * @param dataSegment         segment to add
+   * @param segmentModifiedAt   segment modified timestamp
+   */
+  static void putInMapRetainingNewest(
+      Map<String, Pair<DataSegment, Long>> timestampedSegments, DataSegment dataSegment, long segmentModifiedAt
+  )
+  {
+    timestampedSegments.merge(
+        dataSegment.getIdentifier(),
+        Pair.of(dataSegment, segmentModifiedAt),
+        (previous, current) -> {
+          log.warn("Multiple copies of segmentId [%s] found, using newest version", current.lhs.getIdentifier());
+          return previous.rhs > current.rhs ? previous : current;
+        }
+    );
+  }
 }
diff --git a/api/src/main/java/io/druid/segment/loading/DataSegmentKiller.java b/api/src/main/java/io/druid/segment/loading/DataSegmentKiller.java
@@ -20,16 +20,41 @@
 package io.druid.segment.loading;
 
 import io.druid.guice.annotations.ExtensionPoint;
+import io.druid.java.util.common.logger.Logger;
 import io.druid.timeline.DataSegment;
 
 import java.io.IOException;
 
-/**
- */
 @ExtensionPoint
 public interface DataSegmentKiller
 {
-  void kill(DataSegment segments) throws SegmentLoadingException;
-  void killAll() throws IOException;
+  Logger log = new Logger(DataSegmentKiller.class);
+
+  /**
+   * Removes segment files (index and metadata) from deep storage.
+   * @param segment the segment to kill
+   * @throws SegmentLoadingException if the segment could not be completely removed
+   */
+  void kill(DataSegment segment) throws SegmentLoadingException;
 
+  /**
+   * A more stoic killer who doesn't throw a tantrum if things get messy. Use when killing segments for best-effort
+   * cleanup.
+   * @param segment the segment to kill
+   */
+  default void killQuietly(DataSegment segment)
+  {
+    try {
+      kill(segment);
+    }
+    catch (Exception e) {
+      log.debug(e, "Failed to kill segment %s", segment);
+    }
+  }
+
+  /**
+   * Like a nuke. Use wisely. Used by the 'reset-cluster' command, and of the built-in deep storage implementations, it
+   * is only implemented by local and HDFS.
+   */
+  void killAll() throws IOException;
 }
diff --git a/api/src/main/java/io/druid/segment/loading/DataSegmentPusher.java b/api/src/main/java/io/druid/segment/loading/DataSegmentPusher.java
@@ -30,6 +30,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.UUID;
 
 @ExtensionPoint
 public interface DataSegmentPusher
@@ -39,18 +40,53 @@ public interface DataSegmentPusher
   @Deprecated
   String getPathForHadoop(String dataSource);
   String getPathForHadoop();
-  DataSegment push(File file, DataSegment segment) throws IOException;
+
+  /**
+   * Pushes index files and segment descriptor to deep storage.
+   * @param file directory containing index files
+   * @param segment segment descriptor
+   * @param useUniquePath if true, pushes to a unique file path. This prevents situations where task failures or replica
+   *                      tasks can either overwrite or fail to overwrite existing segments leading to the possibility
+   *                      of different versions of the same segment ID containing different data. As an example, a Kafka
+   *                      indexing task starting at offset A and ending at offset B may push a segment to deep storage
+   *                      and then fail before writing the loadSpec to the metadata table, resulting in a replacement
+   *                      task being spawned. This replacement will also start at offset A but will read to offset C and
+   *                      will then push a segment to deep storage and write the loadSpec metadata. Without unique file
+   *                      paths, this can only work correctly if new segments overwrite existing segments. Suppose that
+   *                      at this point the task then fails so that the supervisor retries again from offset A. This 3rd
+   *                      attempt will overwrite the segments in deep storage before failing to write the loadSpec
+   *                      metadata, resulting in inconsistencies in the segment data now in deep storage and copies of
+   *                      the segment already loaded by historicals.
+   *
+   *                      If unique paths are used, caller is responsible for cleaning up segments that were pushed but
+   *                      were not written to the metadata table (for example when using replica tasks).
+   * @return segment descriptor
+   * @throws IOException
+   */
+  DataSegment push(File file, DataSegment segment, boolean useUniquePath) throws IOException;
+
   //use map instead of LoadSpec class to avoid dependency pollution.
   Map<String, Object> makeLoadSpec(URI finalIndexZipFilePath);
 
+  /**
+   * @deprecated backward-compatibiliy shim that should be removed on next major release;
+   * use {@link #getStorageDir(DataSegment, boolean)} instead.
+   */
+  @Deprecated
   default String getStorageDir(DataSegment dataSegment)
   {
-    return getDefaultStorageDir(dataSegment);
+    return getStorageDir(dataSegment, false);
+  }
+
+  default String getStorageDir(DataSegment dataSegment, boolean useUniquePath)
+  {
+    return getDefaultStorageDir(dataSegment, useUniquePath);
   }
 
   default String makeIndexPathName(DataSegment dataSegment, String indexName)
   {
-    return StringUtils.format("./%s/%s", getStorageDir(dataSegment), indexName);
+    // This is only called from Hadoop batch which doesn't require unique segment paths so set useUniquePath=false
+    return StringUtils.format("./%s/%s", getStorageDir(dataSegment, false), indexName);
   }
 
   /**
@@ -66,13 +102,19 @@ default List<String> getAllowedPropertyPrefixesForHadoop()
   // If above format is ever changed, make sure to change it appropriately in other places
   // e.g. HDFSDataSegmentKiller uses this information to clean the version, interval and dataSource directories
   // on segment deletion if segment being deleted was the only segment
-  static String getDefaultStorageDir(DataSegment segment)
+  static String getDefaultStorageDir(DataSegment segment, boolean useUniquePath)
   {
     return JOINER.join(
         segment.getDataSource(),
         StringUtils.format("%s_%s", segment.getInterval().getStart(), segment.getInterval().getEnd()),
         segment.getVersion(),
-        segment.getShardSpec().getPartitionNum()
+        segment.getShardSpec().getPartitionNum(),
+        useUniquePath ? generateUniquePath() : null
     );
   }
+
+  static String generateUniquePath()
+  {
+    return UUID.randomUUID().toString();
+  }
 }
diff --git a/api/src/main/java/io/druid/utils/CompressionUtils.java b/api/src/main/java/io/druid/utils/CompressionUtils.java
@@ -36,35 +36,35 @@ public class CompressionUtils
   private static final Logger log = new Logger(CompressionUtils.class);
 
 
-  @Deprecated // Use com.metamx.common.CompressionUtils.zip
+  @Deprecated // Use io.druid.java.util.common.CompressionUtils.zip
   public static long zip(File directory, File outputZipFile) throws IOException
   {
     return io.druid.java.util.common.CompressionUtils.zip(directory, outputZipFile);
   }
 
 
-  @Deprecated // Use com.metamx.common.CompressionUtils.zip
+  @Deprecated // Use io.druid.java.util.common.CompressionUtils.zip
   public static long zip(File directory, OutputStream out) throws IOException
   {
     return io.druid.java.util.common.CompressionUtils.zip(directory, out);
   }
 
-  @Deprecated // Use com.metamx.common.CompressionUtils.unzip
+  @Deprecated // Use io.druid.java.util.common.CompressionUtils.unzip
   public static void unzip(File pulledFile, File outDir) throws IOException
   {
     io.druid.java.util.common.CompressionUtils.unzip(pulledFile, outDir);
   }
 
-  @Deprecated // Use com.metamx.common.CompressionUtils.unzip
+  @Deprecated // Use io.druid.java.util.common.CompressionUtils.unzip
   public static void unzip(InputStream in, File outDir) throws IOException
   {
     io.druid.java.util.common.CompressionUtils.unzip(in, outDir);
   }
 
   /**
    * Uncompress using a gzip uncompress algorithm from the `pulledFile` to the `outDir`.
-   * Unlike `com.metamx.common.CompressionUtils.gunzip`, this function takes an output *DIRECTORY* and tries to guess the file name.
-   * It is recommended that the caller use `com.metamx.common.CompressionUtils.gunzip` and specify the output file themselves to ensure names are as expected
+   * Unlike `io.druid.java.util.common.CompressionUtils.gunzip`, this function takes an output *DIRECTORY* and tries to guess the file name.
+   * It is recommended that the caller use `io.druid.java.util.common.CompressionUtils.gunzip` and specify the output file themselves to ensure names are as expected
    *
    * @param pulledFile The source file
    * @param outDir     The destination directory to put the resulting file

diff --git a/api/src/test/java/io/druid/guice/JsonConfiguratorTest.java b/api/src/test/java/io/druid/guice/JsonConfiguratorTest.java
@@ -94,10 +94,13 @@ public ExecutableValidator forExecutables()
   public void testTest()
   {
     Assert.assertEquals(
-        new MappableObject("p1", ImmutableList.<String>of("p2")),
-        new MappableObject("p1", ImmutableList.<String>of("p2"))
+        new MappableObject("p1", ImmutableList.<String>of("p2"), "p2"),
+        new MappableObject("p1", ImmutableList.<String>of("p2"), "p2")
+    );
+    Assert.assertEquals(
+        new MappableObject("p1", null, null),
+        new MappableObject("p1", ImmutableList.<String>of(), null)
     );
-    Assert.assertEquals(new MappableObject("p1", null), new MappableObject("p1", ImmutableList.<String>of()));
   }
 
   @Test
@@ -140,6 +143,19 @@ public void testQuotedConfig()
     Assert.assertEquals("testing \"prop1\"", obj.prop1);
     Assert.assertEquals(ImmutableList.of(), obj.prop1List);
   }
+
+  @Test
+  public void testPropertyWithDot()
+  {
+    final JsonConfigurator configurator = new JsonConfigurator(mapper, validator);
+    properties.setProperty(PROP_PREFIX + "prop2.prop.2", "testing");
+    properties.setProperty(PROP_PREFIX + "prop1", "prop1");
+    final MappableObject obj = configurator.configurate(properties, PROP_PREFIX, MappableObject.class);
+    Assert.assertEquals("testing", obj.prop2);
+    Assert.assertEquals(ImmutableList.of(), obj.prop1List);
+    Assert.assertEquals("prop1", obj.prop1);
+
+  }
 }
 
 class MappableObject
@@ -148,15 +164,19 @@ class MappableObject
   final String prop1;
   @JsonProperty("prop1List")
   final List<String> prop1List;
+  @JsonProperty("prop2.prop.2")
+  final String prop2;
 
   @JsonCreator
   protected MappableObject(
       @JsonProperty("prop1") final String prop1,
-      @JsonProperty("prop1List") final List<String> prop1List
+      @JsonProperty("prop1List") final List<String> prop1List,
+      @JsonProperty("prop2.prop.2") final String prop2
   )
   {
     this.prop1 = prop1;
     this.prop1List = prop1List == null ? ImmutableList.<String>of() : prop1List;
+    this.prop2 = prop2;
   }
 
 
@@ -172,6 +192,12 @@ public String getProp1()
     return prop1;
   }
 
+  @JsonProperty
+  public String getProp2()
+  {
+    return prop2;
+  }
+
   @Override
   public boolean equals(Object o)
   {