apache · drcrallen · May 2, 2016 · Mar 18, 2016 · Apr 21, 2016 · Apr 21, 2016
diff --git a/common/src/main/java/io/druid/data/SearchableVersionedDataFinder.java b/common/src/main/java/io/druid/data/SearchableVersionedDataFinder.java
@@ -19,6 +19,7 @@
 
 package io.druid.data;
 
+import javax.annotation.Nullable;
 import java.util.regex.Pattern;
 
 /**
@@ -41,7 +42,7 @@ public interface SearchableVersionedDataFinder<DataDescriptor>
    *
    * @return A DataDescriptor which matches pattern, is a child of descriptorBase, and is of the most recent "version" at some point during the method execution.
    */
-  DataDescriptor getLatestVersion(DataDescriptor descriptorBase, final Pattern pattern);
+  DataDescriptor getLatestVersion(DataDescriptor descriptorBase, @Nullable final Pattern pattern);
 
   /**
    * @return The class of the descriptor for the data

diff --git a/docs/content/development/extensions-core/namespaced-lookup.md b/docs/content/development/extensions-core/namespaced-lookup.md
@@ -75,35 +75,51 @@ For additional lookups, please see our [extensions list](../development/extensio
 
 ## URI namespace update
 
-The remapping values for each namespaced lookup can be specified by json as per
+The remapping values for each namespaced lookup can be specified by a json object as per the following examples:
 
 ```json
 {
   "type":"uri",
   "namespace":"some_lookup",
-  "uri": "s3://bucket/some/key/prefix/",
+  "uri": "s3://bucket/some/key/prefix/renames-0003.gz",
   "namespaceParseSpec":{
     "format":"csv",
     "columns":["key","value"]
   },
   "pollPeriod":"PT5M",
-  "versionRegex": "renames-[0-9]*\\.gz"
 }
 ```
 
+```json
+{
+  "type":"uri",
+  "namespace":"some_lookup",
+  "uriPrefix": "s3://bucket/some/key/prefix/",
+  "fileRegex":"renames-[0-9]*\\.gz",
+  "namespaceParseSpec":{
+    "format":"csv",
+    "columns":["key","value"]
+  },
+  "pollPeriod":"PT5M",
+}
+```
 |Property|Description|Required|Default|
 |--------|-----------|--------|-------|
 |`namespace`|The namespace to define|Yes||
 |`pollPeriod`|Period between polling for updates|No|0 (only once)|
-|`versionRegex`|Regex to help find newer versions of the namespace data|Yes||
+|`uri`|URI for the file of interest|No|Use `uriPrefix`|
+|`uriPrefix`|A URI which specifies a directory (or other searchable resource) in which to search for files|No|Use `uri`|
+|`fileRegex`|Optional regex for matching the file name under `uriPrefix`. Only used if `uriPrefix` is used|No|`".*"`|
 |`namespaceParseSpec`|How to interpret the data at the URI|Yes||
 
-The `pollPeriod` value specifies the period in ISO 8601 format between checks for updates. If the source of the lookup is capable of providing a timestamp, the lookup will only be updated if it has changed since the prior tick of `pollPeriod`. A value of 0, an absent parameter, or `null` all mean populate once and do not attempt to update. Whenever an update occurs, the updating system will look for a file with the most recent timestamp and assume that one with the most recent data.
+One of either `uri` xor `uriPrefix` must be specified.
 
-The `versionRegex` value specifies a regex to use to determine if a filename in the parent path of the uri should be considered when trying to find the latest version. Omitting this setting or setting it equal to `null` will match to all files it can find (equivalent to using `".*"`). The search occurs in the most significant "directory" of the uri.
+The `pollPeriod` value specifies the period in ISO 8601 format between checks for replacement data for the lookup. If the source of the lookup is capable of providing a timestamp, the lookup will only be updated if it has changed since the prior tick of `pollPeriod`. A value of 0, an absent parameter, or `null` all mean populate once and do not attempt to look for new data later. Whenever an poll occurs, the updating system will look for a file with the most recent timestamp and assume that one with the most recent data set, replacing the local cache of the lookup data.
 
 The `namespaceParseSpec` can be one of a number of values. Each of the examples below would rename foo to bar, baz to bat, and buck to truck. All parseSpec types assumes each input is delimited by a new line. See below for the types of parseSpec supported.
 
+Only ONE file which matches the search will be used. For most implementations, the discriminator for choosing the URIs is by whichever one reports the most recent timestamp for its modification time.
+
 ### csv lookupParseSpec
 
 |Parameter|Description|Required|Default|

diff --git a/...core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsFileTimestampVersionFinder.java b/...core/hdfs-storage/src/main/java/io/druid/storage/hdfs/HdfsFileTimestampVersionFinder.java
@@ -29,6 +29,7 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 
+import javax.annotation.Nullable;
 import java.io.IOException;
 import java.net.URI;
 import java.util.concurrent.Callable;
@@ -81,7 +82,7 @@ public boolean accept(Path path)
    * @return The URI of the file with the most recent modified timestamp.
    */
   @Override
-  public URI getLatestVersion(final URI uri, final Pattern pattern)
+  public URI getLatestVersion(final URI uri, final @Nullable Pattern pattern)
   {
     final Path path = new Path(uri);
     try {

diff --git a/...pace-lookup/src/main/java/io/druid/query/extraction/namespace/URIExtractionNamespace.java b/...pace-lookup/src/main/java/io/druid/query/extraction/namespace/URIExtractionNamespace.java
@@ -64,40 +64,58 @@ public class URIExtractionNamespace implements ExtractionNamespace
   @JsonProperty
   private final URI uri;
   @JsonProperty
+  private final URI uriPrefix;
+  @JsonProperty
   private final FlatDataParser namespaceParseSpec;
   @JsonProperty
-  private final Period pollPeriod;
+  private final String fileRegex;
   @JsonProperty
-  private final String versionRegex;
+  private final Period pollPeriod;
 
   @JsonCreator
   public URIExtractionNamespace(
       @NotNull @JsonProperty(value = "namespace", required = true)
-      String namespace,
-      @NotNull @JsonProperty(value = "uri", required = true)
-      URI uri,
+          String namespace,
+      @JsonProperty(value = "uri", required = false)
+          URI uri,
+      @JsonProperty(value = "uriPrefix", required = false)
+          URI uriPrefix,
+      @JsonProperty(value = "fileRegex", required = false)
+          String fileRegex,
       @JsonProperty(value = "namespaceParseSpec", required = true)
-      FlatDataParser namespaceParseSpec,
+          FlatDataParser namespaceParseSpec,
       @Min(0) @Nullable @JsonProperty(value = "pollPeriod", required = false)
-      Period pollPeriod,
+          Period pollPeriod,
+      @Deprecated
       @JsonProperty(value = "versionRegex", required = false)
-      String versionRegex
+          String versionRegex
   )
   {
-    if (versionRegex != null) {
+    this.namespace = Preconditions.checkNotNull(namespace, "namespace");
+    this.uri = uri;
+    this.uriPrefix = uriPrefix;
+    if ((uri != null) == (uriPrefix != null)) {
+      throw new IAE("Either uri xor uriPrefix required");
+    }
+    this.namespaceParseSpec = Preconditions.checkNotNull(namespaceParseSpec, "namespaceParseSpec");
+    this.pollPeriod = pollPeriod == null ? Period.ZERO : pollPeriod;
+    this.fileRegex = fileRegex == null ? versionRegex : fileRegex;
+    if (fileRegex != null && versionRegex != null) {
+      throw new IAE("Cannot specify both versionRegex and fileRegex. versionRegex is deprecated");
+    }
+
+    if (uri != null && this.fileRegex != null) {
+      throw new IAE("Cannot define both uri and fileRegex");
+    }
+
+    if (this.fileRegex != null) {
       try {
-        Pattern.compile(versionRegex);
+        Pattern.compile(this.fileRegex);
       }
       catch (PatternSyntaxException ex) {
-        throw new IAE(ex, "Could not parse `versionRegex` [%s]", versionRegex);
+        throw new IAE(ex, "Could not parse `fileRegex` [%s]", this.fileRegex);
       }
     }
-    this.namespace = Preconditions.checkNotNull(namespace, "namespace");
-    this.uri = Preconditions.checkNotNull(uri, "uri");
-    this.namespaceParseSpec = Preconditions.checkNotNull(namespaceParseSpec, "namespaceParseSpec");
-    this.pollPeriod = pollPeriod == null ? Period.ZERO : pollPeriod;
-
-    this.versionRegex = versionRegex;
   }
 
   @Override
@@ -106,9 +124,9 @@ public String getNamespace()
     return namespace;
   }
 
-  public String getVersionRegex()
+  public String getFileRegex()
   {
-    return versionRegex;
+    return fileRegex;
   }
 
   public FlatDataParser getNamespaceParseSpec()
@@ -121,6 +139,11 @@ public URI getUri()
     return uri;
   }
 
+  public URI getUriPrefix()
+  {
+    return uriPrefix;
+  }
+
   @Override
   public long getPollMs()
   {
@@ -130,17 +153,16 @@ public long getPollMs()
   @Override
   public String toString()
   {
-    return String.format(
-        "URIExtractionNamespace = { namespace = %s, uri = %s, namespaceParseSpec = %s, pollPeriod = %s, versionRegex = %s }",
-        namespace,
-        uri.toString(),
-        namespaceParseSpec.toString(),
-        pollPeriod.toString(),
-        versionRegex
-    );
+    return "URIExtractionNamespace{" +
+           "namespace='" + namespace + '\'' +
+           ", uri=" + uri +
+           ", uriPrefix=" + uriPrefix +
+           ", namespaceParseSpec=" + namespaceParseSpec +
+           ", fileRegex='" + fileRegex + '\'' +
+           ", pollPeriod=" + pollPeriod +
+           '}';
   }
 
-
   @Override
   public boolean equals(Object o)
   {
@@ -151,22 +173,39 @@ public boolean equals(Object o)
       return false;
     }
 
-    URIExtractionNamespace namespace1 = (URIExtractionNamespace) o;
-    return toString().equals(namespace1.toString());
+    URIExtractionNamespace that = (URIExtractionNamespace) o;
+
+    if (!getNamespace().equals(that.getNamespace())) {
+      return false;
+    }
+    if (getUri() != null ? !getUri().equals(that.getUri()) : that.getUri() != null) {
+      return false;
+    }
+    if (getUriPrefix() != null ? !getUriPrefix().equals(that.getUriPrefix()) : that.getUriPrefix() != null) {
+      return false;
+    }
+    if (!getNamespaceParseSpec().equals(that.getNamespaceParseSpec())) {
+      return false;
+    }
+    if (getFileRegex() != null ? !getFileRegex().equals(that.getFileRegex()) : that.getFileRegex() != null) {
+      return false;
+    }
+    return pollPeriod.equals(that.pollPeriod);
+
   }
 
   @Override
   public int hashCode()
   {
-    int result = namespace.hashCode();
-    result = 31 * result + uri.hashCode();
-    result = 31 * result + namespaceParseSpec.hashCode();
+    int result = getNamespace().hashCode();
+    result = 31 * result + (getUri() != null ? getUri().hashCode() : 0);
+    result = 31 * result + (getUriPrefix() != null ? getUriPrefix().hashCode() : 0);
+    result = 31 * result + getNamespaceParseSpec().hashCode();
+    result = 31 * result + (getFileRegex() != null ? getFileRegex().hashCode() : 0);
     result = 31 * result + pollPeriod.hashCode();
-    result = 31 * result + (versionRegex != null ? versionRegex.hashCode() : 0);
     return result;
   }
 
-
   private static class DelegateParser implements Parser<String, String>
   {
     private final Parser<String, Object> delegate;
@@ -268,7 +307,11 @@ public CSVFlatDataParser(
           Arrays.toString(columns.toArray())
       );
 
-      this.parser = new DelegateParser(new CSVParser(Optional.<String>absent(), columns), this.keyColumn, this.valueColumn);
+      this.parser = new DelegateParser(
+          new CSVParser(Optional.<String>absent(), columns),
+          this.keyColumn,
+          this.valueColumn
+      );
     }
 
     @JsonProperty

diff --git a/...lookup/src/main/java/io/druid/server/namespace/URIExtractionNamespaceFunctionFactory.java b/...lookup/src/main/java/io/druid/server/namespace/URIExtractionNamespaceFunctionFactory.java
@@ -45,6 +45,8 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
+import java.nio.file.Path;
+import java.nio.file.Paths;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.Callable;
@@ -120,7 +122,8 @@ public Callable<String> getCachePopulator(
       @Override
       public String call()
       {
-        final URI originalUri = extractionNamespace.getUri();
+        final boolean doSearch = extractionNamespace.getUriPrefix() != null;
+        final URI originalUri = doSearch ? extractionNamespace.getUriPrefix() : extractionNamespace.getUri();
         final SearchableVersionedDataFinder<URI> pullerRaw = pullers.get(originalUri.getScheme());
         if (pullerRaw == null) {
           throw new IAE(
@@ -132,15 +135,29 @@ public String call()
         if (!(pullerRaw instanceof URIDataPuller)) {
           throw new IAE(
               "Cannot load data from location [%s]. Data pulling from [%s] not supported",
-              originalUri.toString(),
+              originalUri,
               originalUri.getScheme()
           );
         }
         final URIDataPuller puller = (URIDataPuller) pullerRaw;
-        final String versionRegex = extractionNamespace.getVersionRegex();
+        final Pattern versionRegex;
+        final URI uriBase;
+        if (doSearch) {
+          uriBase = extractionNamespace.getUriPrefix();
+
+          if (extractionNamespace.getFileRegex() != null) {
+            versionRegex = Pattern.compile(extractionNamespace.getFileRegex());
+          } else {
+            versionRegex = null;
+          }
+        } else {
+          final Path filePath = Paths.get(extractionNamespace.getUri());
+          versionRegex = Pattern.compile(Pattern.quote(filePath.getFileName().toString()));
+          uriBase = filePath.getParent().toUri();
+        }
         final URI uri = pullerRaw.getLatestVersion(
-            originalUri,
-            versionRegex == null ? null : Pattern.compile(versionRegex)
+            uriBase,
+            versionRegex
         );
         if (uri == null) {
           throw new RuntimeException(

diff --git a/...src/main/java/io/druid/server/namespace/cache/OffHeapNamespaceExtractionCacheManager.java b/...src/main/java/io/druid/server/namespace/cache/OffHeapNamespaceExtractionCacheManager.java
@@ -119,6 +119,7 @@ protected boolean swapAndClearCache(String namespaceKey, String cacheKey)
 
       final String priorCache = currentNamespaceCache.put(namespaceKey, swapCacheKey);
       if (priorCache != null) {
+        // TODO: resolve what happens here if query is actively going on
         mmapDB.delete(priorCache);
         dataSize.set(tmpFile.length());
         return true;

diff --git a/.../src/main/java/io/druid/server/namespace/cache/OnHeapNamespaceExtractionCacheManager.java b/.../src/main/java/io/druid/server/namespace/cache/OnHeapNamespaceExtractionCacheManager.java
@@ -23,6 +23,7 @@
 import com.google.common.util.concurrent.Striped;
 import com.google.inject.Inject;
 import com.google.inject.name.Named;
+import com.metamx.common.IAE;
 import com.metamx.common.lifecycle.Lifecycle;
 import com.metamx.emitter.service.ServiceEmitter;
 import io.druid.query.extraction.namespace.ExtractionNamespace;
@@ -53,7 +54,7 @@ public OnHeapNamespaceExtractionCacheManager(
       final Map<Class<? extends ExtractionNamespace>, ExtractionNamespaceFunctionFactory<?>> namespaceFunctionFactoryMap
   )
   {
-    super(lifecycle, fnCache, reverseFnCache,emitter, namespaceFunctionFactoryMap);
+    super(lifecycle, fnCache, reverseFnCache, emitter, namespaceFunctionFactoryMap);
   }
 
   @Override
@@ -64,14 +65,14 @@ protected boolean swapAndClearCache(String namespaceKey, String cacheKey)
     try {
       ConcurrentMap<String, String> cacheMap = mapMap.get(cacheKey);
       if (cacheMap == null) {
-        // Sometimes cache will not be populated (for example: if it doesn't contain new data)
-        return false;
+        throw new IAE("Extraction Cache [%s] does not exist", cacheKey);
       }
       dataSize.addAndGet(cacheMap.size());
       ConcurrentMap<String, String> prior = mapMap.put(namespaceKey, cacheMap);
       mapMap.remove(cacheKey);
       if (prior != null) {
         dataSize.addAndGet(-prior.size());
+        // Old map will get GC'd when it is not used anymore
         return true;
       } else {
         return false;
@@ -87,7 +88,7 @@ public ConcurrentMap<String, String> getCacheMap(String namespaceOrCacheKey)
   {
     ConcurrentMap<String, String> map = mapMap.get(namespaceOrCacheKey);
     if (map == null) {
-      mapMap.putIfAbsent(namespaceOrCacheKey, new ConcurrentHashMap<String, String>(32));
+      mapMap.putIfAbsent(namespaceOrCacheKey, new ConcurrentHashMap<String, String>());
       map = mapMap.get(namespaceOrCacheKey);
     }
     return map;