Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dev/intellij-setup.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ You can configure application definitions in XML for import into IntelliJ. Below
<configuration default="false" name="Historical" type="Application" factoryName="Application">
<extension name="coverage" enabled="false" merge="false" sample_coverage="true" runner="idea" />
<option name="MAIN_CLASS_NAME" value="org.apache.druid.cli.Main" />
<option name="VM_PARAMETERS" value="-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Xmx2G -XX:MaxJavaStackTraceDepth=9999 -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintReferenceGC -verbose:gc -XX:+PrintFlagsFinal -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager -Dorg.jboss.logging.provider=slf4j -Dlog4j.configurationFile=$PROJECT_DIR$/core/src/main/resources/log4j2.debug.xml -Ddruid.host=localhost -Ddruid.service=historical -Ddruid.server.maxSize=10000000000 -Ddruid.processing.buffer.sizeBytes=100000000 -Ddruid.extensions.hadoopDependenciesDir=$PROJECT_DIR$/distribution/target/hadoop-dependencies/ -Ddruid.extensions.directory=$PROJECT_DIR$/distribution/target/extensions/ -Ddruid.extensions.loadList=[\&quot;druid-s3-extensions\&quot;,\&quot;druid-histogram\&quot;,\&quot;mysql-metadata-storage\&quot;] -Ddruid.historical.cache.useCache=false -Ddruid.historical.cache.populateCache=false -Ddruid.segmentCache.locations=&quot;[{\&quot;path\&quot;:\&quot;/tmp/druid/indexCache\&quot;,\&quot;maxSize\&quot;:10000000000}]&quot; -Ddruid.zk.service.host=localhost -Ddruid.processing.numThreads=1 -Ddruid.server.http.numThreads=50 -Ddruid.serverview.type=batch -Ddruid.emitter=logging" />
<option name="VM_PARAMETERS" value="-server -Duser.timezone=UTC -Dfile.encoding=UTF-8 -Xmx2G -XX:MaxJavaStackTraceDepth=9999 -XX:+UseG1GC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintAdaptiveSizePolicy -XX:+PrintReferenceGC -verbose:gc -XX:+PrintFlagsFinal -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager -Dorg.jboss.logging.provider=slf4j -Dlog4j.configurationFile=$PROJECT_DIR$/core/src/main/resources/log4j2.debug.xml -Ddruid.host=localhost -Ddruid.service=historical -Ddruid.processing.buffer.sizeBytes=100000000 -Ddruid.extensions.hadoopDependenciesDir=$PROJECT_DIR$/distribution/target/hadoop-dependencies/ -Ddruid.extensions.directory=$PROJECT_DIR$/distribution/target/extensions/ -Ddruid.extensions.loadList=[\&quot;druid-s3-extensions\&quot;,\&quot;druid-histogram\&quot;,\&quot;mysql-metadata-storage\&quot;] -Ddruid.historical.cache.useCache=false -Ddruid.historical.cache.populateCache=false -Ddruid.segmentCache.locations=&quot;[{\&quot;path\&quot;:\&quot;/tmp/druid/indexCache\&quot;,\&quot;maxSize\&quot;:10000000000}]&quot; -Ddruid.zk.service.host=localhost -Ddruid.processing.numThreads=1 -Ddruid.server.http.numThreads=50 -Ddruid.serverview.type=batch -Ddruid.emitter=logging" />
<option name="PROGRAM_PARAMETERS" value="server historical" />
<option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$" />
<option name="ALTERNATIVE_JRE_PATH_ENABLED" value="false" />
Expand Down Expand Up @@ -117,4 +117,4 @@ You can configure application definitions in XML for import into IntelliJ. Below

You can also provide a property file for running or debugging the application through intellij.

For example, put a file named as `common.properties` under `.idea/conf` directory, then add `-Ddruid.properties.file=$PROJECT_DIR$/.idea/conf/common.properties` to `VM_PARAMETERS` in the App Def file.
For example, put a file named as `common.properties` under `.idea/conf` directory, then add `-Ddruid.properties.file=$PROJECT_DIR$/.idea/conf/common.properties` to `VM_PARAMETERS` in the App Def file.
2 changes: 1 addition & 1 deletion docs/configuration/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -1370,7 +1370,7 @@ These Historical configurations can be defined in the `historical/runtime.proper

|Property|Description|Default|
|--------|-----------|-------|
|`druid.server.maxSize`|The maximum number of bytes-worth of segments that the process wants assigned to it. The Coordinator process will attempt to assign segments to a Historical process only if this property is greater than the total size of segments served by it. Since this property defines the upper limit on the total segment size that can be assigned to a Historical, it can be set to the sum of all `maxSize` values specified within `druid.segmentCache.locations` property. Human-readable format is supported, see [here](human-readable-byte.md). |0|
|`druid.server.maxSize`|The maximum number of bytes-worth of segments that the process wants assigned to it. The Coordinator process will attempt to assign segments to a Historical process only if this property is greater than the total size of segments served by it. Since this property defines the upper limit on the total segment size that can be assigned to a Historical, it is defaulted to the sum of all `maxSize` values specified within `druid.segmentCache.locations` property. Human-readable format is supported, see [here](human-readable-byte.md). |Sum of `maxSize` values defined within `druid.segmentCache.locations`|
|`druid.server.tier`| A string to name the distribution tier that the storage process belongs to. Many of the [rules Coordinator processes use](../operations/rule-configuration.md) to manage segments can be keyed on tiers. | `_default_tier` |
|`druid.server.priority`|In a tiered architecture, the priority of the tier, thus allowing control over which processes are queried. Higher numbers mean higher priority. The default (no priority) works for architecture with no cross replication (tiers that have no data-storage overlap). Data centers typically have equal priority. | 0 |

Expand Down
1 change: 0 additions & 1 deletion docs/ingestion/faq.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ You can check the Coordinator console located at `<COORDINATOR_IP>:<PORT>`. Make

```
-Ddruid.segmentCache.locations=[{"path":"/tmp/druid/storageLocation","maxSize":"500000000000"}]
-Ddruid.server.maxSize=500000000000
```

## My queries are returning empty results
Expand Down
10 changes: 4 additions & 6 deletions docs/operations/basic-cluster-tuning.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,15 @@ Tuning the cluster so that each Historical can accept 50 queries and 10 non-quer

#### Segment Cache Size

`druid.server.maxSize` controls the total size of segment data that can be assigned by the Coordinator to a Historical.

`druid.segmentCache.locations` specifies locations where segment data can be stored on the Historical. The sum of available disk space across these locations should equal `druid.server.maxSize`.
`druid.segmentCache.locations` specifies locations where segment data can be stored on the Historical. The sum of available disk space across these locations is set as the default value for property: `druid.server.maxSize`, which controls the total size of segment data that can be assigned by the Coordinator to a Historical.

Segments are memory-mapped by Historical processes using any available free system memory (i.e., memory not used by the Historical JVM and heap/direct memory buffers or other processes on the system). Segments that are not currently in memory will be paged from disk when queried.

Therefore, `druid.server.maxSize` should be set such that a Historical is not allocated an excessive amount of segment data. As the value of (`free system memory` / `druid.server.maxSize`) increases, a greater proportion of segments can be kept in memory, allowing for better query performance.
Therefore, the size of cache locations set within `druid.segmentCache.locations` should be such that a Historical is not allocated an excessive amount of segment data. As the value of (`free system memory` / total size of all `druid.segmentCache.locations`) increases, a greater proportion of segments can be kept in memory, allowing for better query performance. The total segment data size assigned to a Historical can be overridden with `druid.server.maxSize`, but this is not required for most of the use cases.

#### Number of Historicals

The number of Historicals needed in a cluster depends on how much data the cluster has. For good performance, you will want enough Historicals such that each Historical has a good (`free system memory` / `druid.server.maxSize`) ratio, as described in the segment cache size section above.
The number of Historicals needed in a cluster depends on how much data the cluster has. For good performance, you will want enough Historicals such that each Historical has a good (`free system memory` / total size of all `druid.segmentCache.locations`) ratio, as described in the segment cache size section above.

Having a smaller number of big servers is generally better than having a large number of small servers, as long as you have enough fault tolerance for your use case.

Expand All @@ -115,7 +113,7 @@ To estimate total memory usage of the Historical under these guidelines:
- Heap: `(0.5GB * number of CPU cores) + (2 * total size of lookup maps) + druid.cache.sizeInBytes`
- Direct Memory: `(druid.processing.numThreads + druid.processing.numMergeBuffers + 1) * druid.processing.buffer.sizeBytes`

The Historical will use any available free system memory (i.e., memory not used by the Historical JVM and heap/direct memory buffers or other processes on the system) for memory-mapping of segments on disk. For better query performance, you will want to ensure a good (`free system memory` / `druid.server.maxSize`) ratio so that a greater proportion of segments can be kept in memory.
The Historical will use any available free system memory (i.e., memory not used by the Historical JVM and heap/direct memory buffers or other processes on the system) for memory-mapping of segments on disk. For better query performance, you will want to ensure a good (`free system memory` / total size of all `druid.segmentCache.locations`) ratio so that a greater proportion of segments can be kept in memory.

#### Segment sizes matter

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ druid.processing.tmpDir=var/druid/processing

# Segment storage
druid.segmentCache.locations=[{"path":"var/druid/segment-cache","maxSize":"300g"}]
druid.server.maxSize=300g

# Query cache
druid.historical.cache.useCache=true
Expand Down
1 change: 0 additions & 1 deletion integration-tests/docker/environment-configs/historical
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,5 @@ druid_processing_buffer_sizeBytes=25000000
druid_processing_numThreads=2
druid_query_groupBy_maxOnDiskStorage=300000000
druid_segmentCache_locations=[{"path":"/shared/druid/indexCache","maxSize":5000000000}]
druid_server_maxSize=5000000000
druid_auth_basic_common_cacheDirectory=/tmp/authCache/historical
druid_server_https_crlPath=/tls/revocations.crl
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,5 @@ druid_processing_buffer_sizeBytes=25000000
druid_processing_numThreads=2
druid_query_groupBy_maxOnDiskStorage=300000000
druid_segmentCache_locations=[{"path":"/shared/druid/indexCache-query-retry-test","maxSize":5000000000}]
druid_server_maxSize=5000000000
druid_auth_basic_common_cacheDirectory=/tmp/authCache/historical-query-retry-test
druid_server_https_crlPath=/tls/revocations.crl
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@

package org.apache.druid.client;

import com.fasterxml.jackson.annotation.JacksonInject;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.collect.Sets;
import com.google.inject.Inject;
import org.apache.druid.java.util.common.HumanReadableBytes;
import org.apache.druid.java.util.common.HumanReadableBytesRange;
import org.apache.druid.segment.loading.SegmentLoaderConfig;

import javax.validation.constraints.NotNull;
import java.util.Set;
Expand All @@ -45,8 +49,23 @@ public class DruidServerConfig
@NotNull
private Set<String> hiddenProperties = Sets.newHashSet("druid.s3.accessKey", "druid.s3.secretKey", "druid.metadata.storage.connector.password");

private SegmentLoaderConfig segmentLoaderConfig;

// Guice inject added here to properly bind this dependency into its dependents such as StatusResource
@Inject
@JsonCreator
public DruidServerConfig(
@JacksonInject SegmentLoaderConfig segmentLoaderConfig
)
{
this.segmentLoaderConfig = segmentLoaderConfig;
}

public long getMaxSize()
{
if (maxSize.equals(HumanReadableBytes.ZERO)) {
return segmentLoaderConfig.getCombinedMaxSize();
}
return maxSize.getBytes();
}

Expand All @@ -64,4 +83,5 @@ public Set<String> getHiddenProperties()
{
return hiddenProperties;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import java.util.concurrent.TimeUnit;

/**
*
*/
public class SegmentLoaderConfig
{
Expand Down Expand Up @@ -63,6 +64,8 @@ public class SegmentLoaderConfig
@JsonProperty
private int statusQueueMaxSize = 100;

private long combinedMaxSize = 0;

public List<StorageLocationConfig> getLocations()
{
return locations;
Expand Down Expand Up @@ -120,6 +123,14 @@ public int getStatusQueueMaxSize()
return statusQueueMaxSize;
}

public long getCombinedMaxSize()
{
if (combinedMaxSize == 0) {
combinedMaxSize = getLocations().stream().mapToLong(StorageLocationConfig::getMaxSize).sum();
}
return combinedMaxSize;
}

public SegmentLoaderConfig withLocations(List<StorageLocationConfig> locations)
{
SegmentLoaderConfig retVal = new SegmentLoaderConfig();
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.client;

import com.fasterxml.jackson.databind.InjectableValues;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.collect.ImmutableList;
import com.google.inject.Injector;
import com.google.inject.Module;
import com.google.inject.name.Names;
import org.apache.druid.guice.GuiceInjectors;
import org.apache.druid.initialization.Initialization;
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.segment.loading.SegmentLoaderConfig;
import org.apache.druid.segment.loading.StorageLocationConfig;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

public class DruidServerConfigTest
{
private File testSegmentCacheDir1;
private File testSegmentCacheDir2;

@Rule
public final TemporaryFolder tmpFolder = new TemporaryFolder();

public ObjectMapper mapper = new DefaultObjectMapper();

private static final Module SERVER_CONFIG_MODULE = (binder) -> {
binder.bindConstant().annotatedWith(Names.named("serviceName")).to("druid/test");
binder.bindConstant().annotatedWith(Names.named("servicePort")).to(0);
binder.bindConstant().annotatedWith(Names.named("tlsServicePort")).to(-1);
};

@Before
public void setUp() throws Exception
{
testSegmentCacheDir1 = tmpFolder.newFolder("segment_cache_folder1");
testSegmentCacheDir2 = tmpFolder.newFolder("segment_cache_folder2");

}

@Test
public void testBasicInjection()
{
final Injector injector = Initialization.makeInjectorWithModules(
GuiceInjectors.makeStartupInjector(), ImmutableList.of(SERVER_CONFIG_MODULE)
);
final DruidServerConfig druidServerConfig = injector.getInstance(DruidServerConfig.class);

Assert.assertNotNull(druidServerConfig);
Assert.assertEquals(DruidServerConfig.class, druidServerConfig.getClass());

}

@Test
public void testCombinedSize()
{
final List<StorageLocationConfig> locations = new ArrayList<>();
final StorageLocationConfig locationConfig1 = new StorageLocationConfig(testSegmentCacheDir1, 10000000000L, null);
final StorageLocationConfig locationConfig2 = new StorageLocationConfig(testSegmentCacheDir2, 20000000000L, null);
locations.add(locationConfig1);
locations.add(locationConfig2);
DruidServerConfig druidServerConfig = new DruidServerConfig(new SegmentLoaderConfig().withLocations(locations));
Assert.assertEquals(30000000000L, druidServerConfig.getMaxSize());
}

@Test
public void testServerMaxSizePrecedence() throws Exception
{
String serverConfigWithDefaultSizeStr = "{\"maxSize\":0,\"tier\":\"_default_tier\",\"priority\":0,"
+ "\"hiddenProperties\":[\"druid.metadata.storage.connector.password\","
+ "\"druid.s3.accessKey\",\"druid.s3.secretKey\"]}\n";

String serverConfigWithNonDefaultSizeStr = "{\"maxSize\":123456,\"tier\":\"_default_tier\",\"priority\":0,"
+ "\"hiddenProperties\":[\"druid.metadata.storage.connector.password\","
+ "\"druid.s3.accessKey\",\"druid.s3.secretKey\"]}\n";

final List<StorageLocationConfig> locations = new ArrayList<>();
final StorageLocationConfig locationConfig1 = new StorageLocationConfig(testSegmentCacheDir1, 10000000000L, null);
locations.add(locationConfig1);
mapper.setInjectableValues(new InjectableValues.Std().addValue(ObjectMapper.class, new DefaultObjectMapper())
.addValue(
SegmentLoaderConfig.class,
new SegmentLoaderConfig().withLocations(locations)
));

DruidServerConfig serverConfigWithDefaultSize = mapper.readValue(
mapper.writeValueAsString(
mapper.readValue(serverConfigWithDefaultSizeStr, DruidServerConfig.class)
),
DruidServerConfig.class
);

DruidServerConfig serverConfigWithNonDefaultSize = mapper.readValue(
mapper.writeValueAsString(
mapper.readValue(serverConfigWithNonDefaultSizeStr, DruidServerConfig.class)
),
DruidServerConfig.class
);

Assert.assertEquals(serverConfigWithDefaultSize.getMaxSize(), 10000000000L);
Assert.assertEquals(serverConfigWithNonDefaultSize.getMaxSize(), 123456L);
}
}

Loading