apache · fjy · Dec 29, 2015 · Oct 26, 2015 · himanshug · Dec 17, 2015
diff --git a/docs/content/development/libraries.md b/docs/content/development/libraries.md
@@ -57,3 +57,8 @@ UIs
 * [mistercrunch/panoramix](https://github.com/mistercrunch/panoramix) - A web application to slice, dice and visualize data out of Druid
 * [grafana](https://github.com/Quantiply/grafana-plugins/tree/master/features/druid) - A plugin for [Grafana](http://grafana.org/)
 * [Pivot](https://github.com/implydata/pivot) - An exploratory analytics UI for Druid
+
+Tools
+---
+
+* [Insert Segments](../../operations/insert-segment-to-db.html) - A tool that can insert segments' metadata into Druid metadata storage.
diff --git a/docs/content/operations/insert-segment-to-db.md b/docs/content/operations/insert-segment-to-db.md
@@ -0,0 +1,95 @@
+---
+layout: doc_page
+---
+# insert-segment-to-db Tool
+
+`insert-segment-to-db` is a tool that can insert segments into Druid metadata storage. It is intended to be used
+to update the segment table in metadata storage after people manually migrate segments from one place to another.
+It can also be used to insert missing segment into Druid, or even recover metadata storage by telling it where the
+segments are stored.
+
+Note: This tool expects users to have Druid cluster running in a "safe" mode, where there are no active tasks to interfere
+the segments being inserted. Users can optionally bring down the cluster to make 100% sure nothing is interfering.
+
+In order to make it work, user will have to provide metadata storage credentials and deep storage type through Java JVM argument
+or runtime.properties file. Specifically, this tool needs to know
+
+`druid.metadata.storage.type`
+
+`druid.metadata.storage.connector.connectURI`
+
+`druid.metadata.storage.connector.user`
+
+`druid.metadata.storage.connector.password`
+
+`druid.storage.type`
+
+Besides the properties above, you also need to specify the location where the segments are stored and whether you want to
+update descriptor.json. These two can be provided through command line arguments.
+
+`--workingDir` (Required)
+
+    The directory URI where segments are stored. This tool will recursively look for segments underneath this directory
+    and insert/update these segments in metdata storage.
+    Attention: workingDir must be a complete URI, which means it must be prefixed with scheme type. For example,
+    hdfs://hostname:port/segment_directory
+
+`--updateDescriptor` (Optional)
+
+    if set to true, this tool will update `loadSpec` field in `descriptor.json` if the path in `loadSpec` is different from
+    where `desciptor.json` was found. Default value is `true`.
+
+Note: you will also need to load different Druid extensions per the metadata and deep storage you use. For example, if you
+use `mysql` as metadata storage and `HDFS` as deep storage, you should load `mysql-metadata-storage` and `druid-hdfs-storage`
+extensions.
+
+
+Example:
+
+Suppose your metadata storage is `mysql` and you've migrated some segments to a directory in HDFS, and that directory looks
+like this,
+
+```
+Directory path: /druid/storage/wikipedia
+
+├── 2013-08-31T000000.000Z_2013-09-01T000000.000Z
+│   └── 2015-10-21T22_07_57.074Z
+│       └── 0
+│           ├── descriptor.json
+│           └── index.zip
+├── 2013-09-01T000000.000Z_2013-09-02T000000.000Z
+│   └── 2015-10-21T22_07_57.074Z
+│       └── 0
+│           ├── descriptor.json
+│           └── index.zip
+├── 2013-09-02T000000.000Z_2013-09-03T000000.000Z
+│   └── 2015-10-21T22_07_57.074Z
+│       └── 0
+│           ├── descriptor.json
+│           └── index.zip
+└── 2013-09-03T000000.000Z_2013-09-04T000000.000Z
+    └── 2015-10-21T22_07_57.074Z
+        └── 0
+            ├── descriptor.json
+            └── index.zip
+```
+
+To load all these segments into `mysql`, you can fire the command below,
+
+```
+java 
+-Ddruid.metadata.storage.type=mysql 
+-Ddruid.metadata.storage.connector.connectURI=jdbc\:mysql\://localhost\:3306/druid 
+-Ddruid.metadata.storage.connector.user=druid 
+-Ddruid.metadata.storage.connector.password=diurd 
+-Ddruid.extensions.loadList=[\"mysql-metadata-storage\",\"druid-hdfs-storage\"] 
+-Ddruid.storage.type=hdfs
+-cp $DRUID_CLASSPATH 
+io.druid.cli.Main tools insert-segment --workingDir hdfs://host:port//druid/storage/wikipedia --updateDescriptor true
+```
+
+In this example, `mysql` and deep storage type are provided through Java JVM arguments, you can optionally put all
+of them in a runtime.properites file and include it in the Druid classpath. Note that we also include `mysql-metadata-storage`
+and `druid-hdfs-storage` in the extension list.
+
+After running this command, the segments table in `mysql` should store the new location for each segment we just inserted.
diff --git a/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentFinder.java b/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsDataSegmentFinder.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to Metamarkets Group Inc. (Metamarkets) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  Metamarkets licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package io.druid.storage.hdfs;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.google.common.collect.Sets;
+import com.google.inject.Inject;
+import com.metamx.common.logger.Logger;
+import io.druid.segment.loading.DataSegmentFinder;
+import io.druid.segment.loading.SegmentLoadingException;
+import io.druid.timeline.DataSegment;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.LocatedFileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.RemoteIterator;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ */
+public class HdfsDataSegmentFinder implements DataSegmentFinder
+{
+
+  private static final Logger log = new Logger(HdfsDataSegmentFinder.class);
+
+  private final Configuration config;
+  private final ObjectMapper mapper;
+
+  @Inject
+  public HdfsDataSegmentFinder(Configuration config, ObjectMapper mapper)
+  {
+    this.config = config;
+    this.mapper = mapper;
+  }
+
+  @Override
+  public Set<DataSegment> findSegments(String workingDirPathStr, boolean updateDescriptor)
+      throws SegmentLoadingException
+  {
+    final Set<DataSegment> segments = Sets.newHashSet();
+    final Path workingDirPath = new Path(workingDirPathStr);
+    FileSystem fs;
+    try {
+      fs = workingDirPath.getFileSystem(config);
+
+      log.info(fs.getScheme());
+      log.info("FileSystem URI:" + fs.getUri().toString());
+
+      if (!fs.exists(workingDirPath)) {
+        throw new SegmentLoadingException("Working directory [%s] doesn't exist.", workingDirPath);
+      }
+
+      if (!fs.isDirectory(workingDirPath)) {
+        throw new SegmentLoadingException("Working directory [%s] is not a directory!?", workingDirPath);
+      }
+
+      final RemoteIterator<LocatedFileStatus> it = fs.listFiles(workingDirPath, true);
+      while (it.hasNext()) {
+        final LocatedFileStatus locatedFileStatus = it.next();
+        final Path path = locatedFileStatus.getPath();
+        if (path.getName().equals("descriptor.json")) {
+          final Path indexZip = new Path(path.getParent(), "index.zip");
+          if (fs.exists(indexZip)) {
+            final DataSegment dataSegment = mapper.readValue(fs.open(path), DataSegment.class);
+            log.info("Found segment [%s] located at [%s]", dataSegment.getIdentifier(), indexZip);
+
+            final Map<String, Object> loadSpec = dataSegment.getLoadSpec();
+            final String pathWithoutScheme = indexZip.toUri().getPath();
+
+            if (!loadSpec.get("type").equals(HdfsStorageDruidModule.SCHEME) || !loadSpec.get("path")
+                                                                                        .equals(pathWithoutScheme)) {
+              loadSpec.put("type", HdfsStorageDruidModule.SCHEME);
+              loadSpec.put("path", pathWithoutScheme);
+              if (updateDescriptor) {
+                log.info("Updating loadSpec in descriptor.json at [%s] with new path [%s]", path, pathWithoutScheme);
+                mapper.writeValue(fs.create(path, true), dataSegment);
+              }
+            }
+            segments.add(dataSegment);
+          } else {
+            throw new SegmentLoadingException(
+                "index.zip didn't exist at [%s] while descripter.json exists!?",
+                indexZip
+            );
+          }
+        }
+      }
+    }
+    catch (IOException e) {
+      throw new SegmentLoadingException(e, "Problems interacting with filesystem[%s].", workingDirPath);
+    }
+
+    return segments;
+  }
+
+}
diff --git a/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsLoadSpec.java b/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsLoadSpec.java
@@ -38,17 +38,21 @@ public class HdfsLoadSpec implements LoadSpec
 {
   private final Path path;
   final HdfsDataSegmentPuller puller;
+
   @JsonCreator
   public HdfsLoadSpec(
       @JacksonInject HdfsDataSegmentPuller puller,
       @JsonProperty(value = "path", required = true) String path
-  ){
+  )
+  {
     Preconditions.checkNotNull(path);
     this.path = new Path(path);
     this.puller = puller;
   }
+
   @JsonProperty("path")
-  public final String getPathString(){
+  public final String getPathString()
+  {
     return path.toString();
   }
 

diff --git a/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsStorageDruidModule.java b/extensions/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsStorageDruidModule.java
@@ -91,6 +91,7 @@ public void configure(Binder binder)
     Binders.dataSegmentPullerBinder(binder).addBinding(SCHEME).to(HdfsDataSegmentPuller.class).in(LazySingleton.class);
     Binders.dataSegmentPusherBinder(binder).addBinding(SCHEME).to(HdfsDataSegmentPusher.class).in(LazySingleton.class);
     Binders.dataSegmentKillerBinder(binder).addBinding(SCHEME).to(HdfsDataSegmentKiller.class).in(LazySingleton.class);
+    Binders.dataSegmentFinderBinder(binder).addBinding(SCHEME).to(HdfsDataSegmentFinder.class).in(LazySingleton.class);
 
     final Configuration conf = new Configuration();
 
@@ -103,9 +104,11 @@ public void configure(Binder binder)
     try {
       Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
       FileSystem.get(conf);
-    } catch(IOException ex) {
+    }
+    catch (IOException ex) {
       throw Throwables.propagate(ex);
-    } finally {
+    }
+    finally {
       Thread.currentThread().setContextClassLoader(currCtxCl);
     }