Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion NOTICE
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Copyright 2014-2019 The Apache Software Foundation



================= Apache Hadoop 2.8.3 =================
================= Apache Hadoop 2.8.5 =================
Apache Hadoop
Copyright 2009-2017 The Apache Software Foundation

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,27 @@ public class SegmentWithOvershadowedStatus implements Comparable<SegmentWithOver
private final boolean overshadowed;
/**
* dataSegment is serialized "unwrapped", i.e. it's properties are included as properties of
* enclosing class. If in future, if {@Code SegmentWithOvershadowedStatus} were to extend {@link DataSegment},
* enclosing class. If in future, if {@code SegmentWithOvershadowedStatus} were to extend {@link DataSegment},
* there will be no change in the serialized format.
*/
@JsonUnwrapped
private final DataSegment dataSegment;

@JsonCreator
public SegmentWithOvershadowedStatus(
@JsonProperty("dataSegment") DataSegment dataSegment,
@JsonProperty("overshadowed") boolean overshadowed
)
{
// Using @JsonUnwrapped with @JsonCreator is not currently possible:
// https://github.com/FasterXML/jackson-databind/issues/1467. However, if the JSON payload contains an unwrapped
// DataSegment, Jackson will overwrite dataSegment (even though the field is final).
this(null, overshadowed);
}

public SegmentWithOvershadowedStatus(
DataSegment dataSegment,
boolean overshadowed
)
{
this.dataSegment = dataSegment;
this.overshadowed = overshadowed;
Expand Down Expand Up @@ -94,4 +104,13 @@ public int compareTo(SegmentWithOvershadowedStatus o)
{
return dataSegment.getId().compareTo(o.dataSegment.getId());
}

@Override
public String toString()
{
return "SegmentWithOvershadowedStatus{" +
"overshadowed=" + overshadowed +
", dataSegment=" + dataSegment +
'}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,7 @@ public void testBadTypeSerde() throws IOException

expectedException.expect(IllegalArgumentException.class);
expectedException.expectCause(CoreMatchers.instanceOf(JsonMappingException.class));
expectedException.expectMessage("Could not resolve type id 'foo' into a subtype");
expectedException.expectMessage("Could not resolve type id 'foo' as a subtype");
mapper.convertValue(mapValue, ParseSpec.class);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
import org.apache.druid.timeline.partition.ShardSpec;
import org.joda.time.Interval;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import javax.annotation.Nullable;
Expand All @@ -43,71 +42,88 @@

public class SegmentWithOvershadowedStatusTest
{
private static final ObjectMapper MAPPER = new TestObjectMapper();
private static final ObjectMapper MAPPER = createObjectMapper();
private static final Interval INTERVAL = Intervals.of("2011-10-01/2011-10-02");
private static final ImmutableMap<String, Object> LOAD_SPEC = ImmutableMap.of("something", "or_other");
private static final boolean OVERSHADOWED = true;
private static final int TEST_VERSION = 0x9;
private static final SegmentWithOvershadowedStatus SEGMENT = createSegmentWithOvershadowedStatus();

@Before
public void setUp()
private static ObjectMapper createObjectMapper()
{
ObjectMapper objectMapper = new TestObjectMapper();
InjectableValues.Std injectableValues = new InjectableValues.Std();
injectableValues.addValue(DataSegment.PruneLoadSpecHolder.class, DataSegment.PruneLoadSpecHolder.DEFAULT);
MAPPER.setInjectableValues(injectableValues);
objectMapper.setInjectableValues(injectableValues);
return objectMapper;
}

@Test
public void testUnwrappedSegmentWithOvershadowedStatusDeserialization() throws Exception
private static SegmentWithOvershadowedStatus createSegmentWithOvershadowedStatus()
{
final Interval interval = Intervals.of("2011-10-01/2011-10-02");
final ImmutableMap<String, Object> loadSpec = ImmutableMap.of("something", "or_other");

final DataSegment dataSegment = new DataSegment(
DataSegment dataSegment = new DataSegment(
"something",
interval,
INTERVAL,
"1",
loadSpec,
LOAD_SPEC,
Arrays.asList("dim1", "dim2"),
Arrays.asList("met1", "met2"),
NoneShardSpec.instance(),
TEST_VERSION,
1
);

final SegmentWithOvershadowedStatus segment = new SegmentWithOvershadowedStatus(dataSegment, false);
return new SegmentWithOvershadowedStatus(dataSegment, OVERSHADOWED);
}

@Test
public void testUnwrappedSegmentWithOvershadowedStatusDeserialization() throws Exception
{
final Map<String, Object> objectMap = MAPPER.readValue(
MAPPER.writeValueAsString(segment),
MAPPER.writeValueAsString(SEGMENT),
JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT
);

Assert.assertEquals(11, objectMap.size());
Assert.assertEquals("something", objectMap.get("dataSource"));
Assert.assertEquals(interval.toString(), objectMap.get("interval"));
Assert.assertEquals(INTERVAL.toString(), objectMap.get("interval"));
Assert.assertEquals("1", objectMap.get("version"));
Assert.assertEquals(loadSpec, objectMap.get("loadSpec"));
Assert.assertEquals(LOAD_SPEC, objectMap.get("loadSpec"));
Assert.assertEquals("dim1,dim2", objectMap.get("dimensions"));
Assert.assertEquals("met1,met2", objectMap.get("metrics"));
Assert.assertEquals(ImmutableMap.of("type", "none"), objectMap.get("shardSpec"));
Assert.assertEquals(TEST_VERSION, objectMap.get("binaryVersion"));
Assert.assertEquals(1, objectMap.get("size"));
Assert.assertEquals(false, objectMap.get("overshadowed"));
Assert.assertEquals(OVERSHADOWED, objectMap.get("overshadowed"));

final String json = MAPPER.writeValueAsString(SEGMENT);

final String json = MAPPER.writeValueAsString(segment);

final TestSegmentWithOvershadowedStatus deserializedSegment = MAPPER.readValue(
json,
TestSegmentWithOvershadowedStatus.class
);

Assert.assertEquals(segment.getDataSegment().getDataSource(), deserializedSegment.getDataSource());
Assert.assertEquals(segment.getDataSegment().getInterval(), deserializedSegment.getInterval());
Assert.assertEquals(segment.getDataSegment().getVersion(), deserializedSegment.getVersion());
Assert.assertEquals(segment.getDataSegment().getLoadSpec(), deserializedSegment.getLoadSpec());
Assert.assertEquals(segment.getDataSegment().getDimensions(), deserializedSegment.getDimensions());
Assert.assertEquals(segment.getDataSegment().getMetrics(), deserializedSegment.getMetrics());
Assert.assertEquals(segment.getDataSegment().getShardSpec(), deserializedSegment.getShardSpec());
Assert.assertEquals(segment.getDataSegment().getSize(), deserializedSegment.getSize());
Assert.assertEquals(segment.getDataSegment().getId(), deserializedSegment.getId());
DataSegment dataSegment = SEGMENT.getDataSegment();
Assert.assertEquals(dataSegment.getDataSource(), deserializedSegment.getDataSource());
Assert.assertEquals(dataSegment.getInterval(), deserializedSegment.getInterval());
Assert.assertEquals(dataSegment.getVersion(), deserializedSegment.getVersion());
Assert.assertEquals(dataSegment.getLoadSpec(), deserializedSegment.getLoadSpec());
Assert.assertEquals(dataSegment.getDimensions(), deserializedSegment.getDimensions());
Assert.assertEquals(dataSegment.getMetrics(), deserializedSegment.getMetrics());
Assert.assertEquals(dataSegment.getShardSpec(), deserializedSegment.getShardSpec());
Assert.assertEquals(dataSegment.getSize(), deserializedSegment.getSize());
Assert.assertEquals(dataSegment.getId(), deserializedSegment.getId());
}

// Previously, the implementation of SegmentWithOvershadowedStatus had @JsonCreator/@JsonProperty and @JsonUnwrapped
// on the same field (dataSegment), which used to work in Jackson 2.6, but does not work with Jackson 2.9:
// https://github.com/FasterXML/jackson-databind/issues/265#issuecomment-264344051
@Test
public void testJsonCreatorAndJsonUnwrappedAnnotationsAreCompatible() throws Exception
{
String json = MAPPER.writeValueAsString(SEGMENT);
SegmentWithOvershadowedStatus segment = MAPPER.readValue(json, SegmentWithOvershadowedStatus.class);
Assert.assertEquals(SEGMENT, segment);
}
}

Expand Down
2 changes: 1 addition & 1 deletion distribution/bin/check-licenses.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,4 +408,4 @@ def check_licenses(license_yaml, dependency_reports_root):
check_licenses(license_yaml, dependency_reports_root)

except KeyboardInterrupt:
print('Interrupted, closing.')
print('Interrupted, closing.')
6 changes: 3 additions & 3 deletions docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ Additional peon configs include:
|`druid.peon.mode`|Choices are "local" and "remote". Setting this to local means you intend to run the peon as a standalone process (Not recommended).|remote|
|`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`|
|`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/tasks`|
|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.3|
|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.5|
|`druid.indexer.task.defaultRowFlushBoundary`|Highest row count before persisting to disk. Used for indexing generating tasks.|75000|
|`druid.indexer.task.directoryLockTimeout`|Wait this long for zombie peons to exit before giving up on their replacements.|PT10M|
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M|
Expand All @@ -1157,7 +1157,7 @@ When new segments are created, Druid temporarily stores some preprocessed data i
*medium* exist for those buffers: *temporary files* and *off-heap memory*.

*Temporary files* (`tmpFile`) are stored under the task working directory (see `druid.indexer.task.baseTaskDir`
configuration above) and thus share it's mounting properties, e. g. they could be backed by HDD, SSD or memory (tmpfs).
configuration above) and thus share it's mounting properties, e.g., they could be backed by HDD, SSD or memory (tmpfs).
This type of medium may do unnecessary disk I/O and requires some disk space to be available.

*Off-heap memory medium* (`offHeapMemory`) creates buffers in off-heap memory of a JVM process that is running a task.
Expand Down Expand Up @@ -1196,7 +1196,7 @@ then the value from the configuration below is used:
|`druid.worker.numConcurrentMerges`|Maximum number of segment persist or merge operations that can run concurrently across all tasks.|`druid.worker.capacity` / 2, rounded down|
|`druid.indexer.task.baseDir`|Base temporary working directory.|`System.getProperty("java.io.tmpdir")`|
|`druid.indexer.task.baseTaskDir`|Base temporary working directory for tasks.|`${druid.indexer.task.baseDir}/persistent/tasks`|
|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.3|
|`druid.indexer.task.defaultHadoopCoordinates`|Hadoop version to use with HadoopIndexTasks that do not request a particular version.|org.apache.hadoop:hadoop-client:2.8.5|
|`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on Indexer restart for restorable tasks to gracefully exit.|PT5M|
|`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`|
|`druid.indexer.task.restoreTasksOnRestart`|If true, the Indexer will attempt to stop tasks gracefully on shutdown and restore them on restart.|false|
Expand Down
2 changes: 1 addition & 1 deletion docs/operations/other-hadoop.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ classloader.
2. Batch ingestion uses jars from `hadoop-dependencies/` to submit Map/Reduce jobs (location customizable via the
`druid.extensions.hadoopDependenciesDir` runtime property; see [Configuration](../configuration/index.html#extensions)).

`hadoop-client:2.8.3` is the default version of the Hadoop client bundled with Druid for both purposes. This works with
`hadoop-client:2.8.5` is the default version of the Hadoop client bundled with Druid for both purposes. This works with
many Hadoop distributions (the version does not necessarily need to match), but if you run into issues, you can instead
have Druid load libraries that exactly match your distribution. To do this, either copy the jars from your Hadoop
cluster, or use the `pull-deps` tool to download the jars from a Maven repository.
Expand Down
10 changes: 5 additions & 5 deletions docs/tutorials/tutorial-batch-hadoop.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,18 @@ Once the Docker install is complete, please proceed to the next steps in the tut

## Build the Hadoop docker image

For this tutorial, we've provided a Dockerfile for a Hadoop 2.8.3 cluster, which we'll use to run the batch indexing task.
For this tutorial, we've provided a Dockerfile for a Hadoop 2.8.5 cluster, which we'll use to run the batch indexing task.

This Dockerfile and related files are located at `quickstart/tutorial/hadoop/docker`.

From the apache-druid-{{DRUIDVERSION}} package root, run the following commands to build a Docker image named "druid-hadoop-demo" with version tag "2.8.3":
From the apache-druid-{{DRUIDVERSION}} package root, run the following commands to build a Docker image named "druid-hadoop-demo" with version tag "2.8.5":

```bash
cd quickstart/tutorial/hadoop/docker
docker build -t druid-hadoop-demo:2.8.3 .
docker build -t druid-hadoop-demo:2.8.5 .
```

This will start building the Hadoop image. Once the image build is done, you should see the message `Successfully tagged druid-hadoop-demo:2.8.3` printed to the console.
This will start building the Hadoop image. Once the image build is done, you should see the message `Successfully tagged druid-hadoop-demo:2.8.5` printed to the console.

## Setup the Hadoop docker cluster

Expand Down Expand Up @@ -77,7 +77,7 @@ On the host machine, add the following entry to `/etc/hosts`:
Once the `/tmp/shared` folder has been created and the `etc/hosts` entry has been added, run the following command to start the Hadoop container.

```bash
docker run -it -h druid-hadoop-demo --name druid-hadoop-demo -p 2049:2049 -p 2122:2122 -p 8020:8020 -p 8021:8021 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v /tmp/shared:/shared druid-hadoop-demo:2.8.3 /etc/bootstrap.sh -bash
docker run -it -h druid-hadoop-demo --name druid-hadoop-demo -p 2049:2049 -p 2122:2122 -p 8020:8020 -p 8021:8021 -p 8030:8030 -p 8031:8031 -p 8032:8032 -p 8033:8033 -p 8040:8040 -p 8042:8042 -p 8088:8088 -p 8443:8443 -p 9000:9000 -p 10020:10020 -p 19888:19888 -p 34455:34455 -p 49707:49707 -p 50010:50010 -p 50020:50020 -p 50030:50030 -p 50060:50060 -p 50070:50070 -p 50075:50075 -p 50090:50090 -p 51111:51111 -v /tmp/shared:/shared druid-hadoop-demo:2.8.5 /etc/bootstrap.sh -bash
```

Once the container is started, your terminal will attach to a bash shell running inside the container:
Expand Down
10 changes: 6 additions & 4 deletions examples/quickstart/tutorial/hadoop/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.

# Creates pseudo distributed hadoop 2.8.3 with java 8
# Creates pseudo distributed hadoop 2.8.5 with java 8
#
# Modified from the SequenceIQ Dockerfiles at https://github.com/sequenceiq/hadoop-docker
#
# docker build -t druid-hadoop-demo:2.8.3 .
# docker build -t druid-hadoop-demo:2.8.5 .

FROM sequenceiq/pam:centos-6.5
MAINTAINER SequenceIQ
Expand All @@ -31,8 +31,10 @@ RUN yum clean all \
&& yum install -y curl which tar sudo openssh-server openssh-clients rsync yum-plugin-ovl\
&& yum clean all \
&& yum update -y libselinux \
&& yum update -y nss \
&& yum clean all
# update libselinux. see https://github.com/sequenceiq/hadoop-docker/issues/14
# update nss. see https://unix.stackexchange.com/questions/280548/curl-doesnt-connect-to-https-while-wget-does-nss-error-12286

# passwordless ssh
RUN ssh-keygen -q -N "" -t dsa -f /etc/ssh/ssh_host_dsa_key
Expand All @@ -49,8 +51,8 @@ ENV JAVA_HOME /usr/lib/jvm/zulu-8
ENV PATH $PATH:$JAVA_HOME/bin

# hadoop
RUN curl -s https://archive.apache.org/dist/hadoop/core/hadoop-2.8.3/hadoop-2.8.3.tar.gz | tar -xz -C /usr/local/
RUN cd /usr/local && ln -s ./hadoop-2.8.3 hadoop
RUN curl -s https://archive.apache.org/dist/hadoop/core/hadoop-2.8.5/hadoop-2.8.5.tar.gz | tar -xz -C /usr/local/
RUN cd /usr/local && ln -s ./hadoop-2.8.5 hadoop

ENV HADOOP_PREFIX /usr/local/hadoop
ENV HADOOP_COMMON_HOME /usr/local/hadoop
Expand Down
2 changes: 1 addition & 1 deletion examples/quickstart/tutorial/wikipedia-index-hadoop.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,5 +75,5 @@
}
}
},
"hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client:2.8.3"]
"hadoopDependencyCoordinates": ["org.apache.hadoop:hadoop-client:2.8.5"]
}
2 changes: 1 addition & 1 deletion extensions-core/postgresql-metadata-storage/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
<dependency>
<groupId>org.postgresql</groupId>
<artifactId>postgresql</artifactId>
<version>9.4.1208.jre7</version>
<version>42.2.8.jre7</version>
</dependency>
<dependency>
<groupId>org.jdbi</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Iterator;
Expand Down Expand Up @@ -117,7 +118,7 @@ public static List<DataSegment> getPublishedSegments(HadoopDruidIndexerConfig co
FileSystem fs = descriptorInfoDir.getFileSystem(conf);

for (FileStatus status : fs.listStatus(descriptorInfoDir)) {
final DataSegment segment = jsonMapper.readValue(fs.open(status.getPath()), DataSegment.class);
final DataSegment segment = jsonMapper.readValue((InputStream) fs.open(status.getPath()), DataSegment.class);
publishedSegmentsBuilder.add(segment);
log.info("Adding segment %s to the list of published segments", segment.getId());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
public class TaskConfig
{
public static final List<String> DEFAULT_DEFAULT_HADOOP_COORDINATES = ImmutableList.of(
"org.apache.hadoop:hadoop-client:2.8.3"
"org.apache.hadoop:hadoop-client:2.8.5"
);

private static final Period DEFAULT_DIRECTORY_LOCK_TIMEOUT = new Period("PT10M");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,6 @@
"is_available": 1,
"is_realtime": 0,
"is_overshadowed": 0,
"payload": "{\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"load spec is pruned, because it's not needed on Brokers, but eats a lot of heap space\":\"\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\",\"overshadowed\":false}"
"payload": "{\"overshadowed\":false,\"dataSource\":\"auth_test\",\"interval\":\"2012-12-29T00:00:00.000Z/2013-01-10T08:00:00.000Z\",\"version\":\"2013-01-10T08:13:47.830Z_v9\",\"loadSpec\":{\"load spec is pruned, because it's not needed on Brokers, but eats a lot of heap space\":\"\"},\"dimensions\":\"anonymous,area_code,city,continent_code,country_name,dma_code,geo,language,namespace,network,newpage,page,postal_code,region_lookup,robot,unpatrolled,user\",\"metrics\":\"added,count,deleted,delta,delta_hist,unique_users,variation\",\"shardSpec\":{\"type\":\"none\"},\"binaryVersion\":9,\"size\":446027801,\"identifier\":\"auth_test_2012-12-29T00:00:00.000Z_2013-01-10T08:00:00.000Z_2013-01-10T08:13:47.830Z_v9\"}"
}
]
Loading