diff --git a/.travis.yml b/.travis.yml index d93f928ad0aa..d92c00ba5a93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -232,7 +232,20 @@ jobs: - name: "docs" install: (cd website && npm install) - script: (cd website && npm run lint) + script: (cd website && npm run lint && npm run spellcheck) + after_failure: |- + echo "FAILURE EXPLANATION: + + If there are spell check errors: + + 1) Suppressing False Positives: Edit website/.spelling to add suppressions. Instructions + are at the top of the file and explain how to suppress false positives either globally or + within a particular file. + + 2) Running Spell Check Locally: cd website && npm install && npm run spellcheck + + For more information, refer to: https://www.npmjs.com/package/markdown-spellcheck + " - &integration_batch_index name: "batch index integration test" diff --git a/docs/comparisons/druid-vs-kudu.md b/docs/comparisons/druid-vs-kudu.md index b0ff9324a2cf..aca743dda813 100644 --- a/docs/comparisons/druid-vs-kudu.md +++ b/docs/comparisons/druid-vs-kudu.md @@ -35,5 +35,5 @@ Druid's segment architecture is heavily geared towards fast aggregates and filte fast in Druid, whereas updates of older data is higher latency. This is by design as the data Druid is good for is typically event data, and does not need to be updated too frequently. Kudu supports arbitrary primary keys with uniqueness constraints, and efficient lookup by ranges of those keys. Kudu chooses not to include the execution engine, but supports sufficient -operations so as to allow node-local processing from the execution engines. This means that Kudu can support multiple frameworks on the same data (eg MR, Spark, and SQL). +operations so as to allow node-local processing from the execution engines. This means that Kudu can support multiple frameworks on the same data (e.g., MR, Spark, and SQL). Druid includes its own query layer that allows it to push down aggregations and computations directly to data processes for faster query processing. diff --git a/docs/comparisons/druid-vs-sql-on-hadoop.md b/docs/comparisons/druid-vs-sql-on-hadoop.md index 76665075e943..19eb438c5bd2 100644 --- a/docs/comparisons/druid-vs-sql-on-hadoop.md +++ b/docs/comparisons/druid-vs-sql-on-hadoop.md @@ -37,7 +37,7 @@ Druid was designed to 1. handle slice-n-dice style ad-hoc queries SQL-on-Hadoop engines generally sidestep Map/Reduce, instead querying data directly from HDFS or, in some cases, other storage systems. -Some of these engines (including Impala and Presto) can be colocated with HDFS data nodes and coordinate with them to achieve data locality for queries. +Some of these engines (including Impala and Presto) can be co-located with HDFS data nodes and coordinate with them to achieve data locality for queries. What does this mean? We can talk about it in terms of three general areas 1. Queries @@ -53,7 +53,7 @@ are queries and results, and all computation is done internally as part of the D Most SQL-on-Hadoop engines are responsible for query planning and execution for underlying storage layers and storage formats. They are processes that stay on even if there is no query running (eliminating the JVM startup costs from Hadoop MapReduce). Some (Impala/Presto) SQL-on-Hadoop engines have daemon processes that can be run where the data is stored, virtually eliminating network transfer costs. There is still -some latency overhead (e.g. serde time) associated with pulling data from the underlying storage layer into the computation layer. We are unaware of exactly +some latency overhead (e.g. serialization/deserialization time) associated with pulling data from the underlying storage layer into the computation layer. We are unaware of exactly how much of a performance impact this makes. ### Data Ingestion @@ -79,4 +79,4 @@ Parquet is a column storage format that is designed to work with SQL-on-Hadoop e relies on external sources to pull data out of it. Druid's storage format is highly optimized for linear scans. Although Druid has support for nested data, Parquet's storage format is much -more hierachical, and is more designed for binary chunking. In theory, this should lead to faster scans in Druid. +more hierarchical, and is more designed for binary chunking. In theory, this should lead to faster scans in Druid. diff --git a/docs/configuration/index.md b/docs/configuration/index.md index 43d1a14903e7..5a25b2917b9d 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -58,7 +58,7 @@ conf/tranquility: kafka.json server.json ``` -Each directory has a `runtime.properties` file containing configuration properties for the specific Druid process correponding to the directory (e.g., `historical`). +Each directory has a `runtime.properties` file containing configuration properties for the specific Druid process corresponding to the directory (e.g., `historical`). The `jvm.config` files contain JVM flags such as heap sizing properties for each service. @@ -95,7 +95,7 @@ Many of Druid's external dependencies can be plugged in as modules. Extensions c |Property|Description|Default| |--------|-----------|-------| -|`druid.modules.excludeList`|A JSON array of canonical class names (e. g. `"org.apache.druid.somepackage.SomeModule"`) of module classes which shouldn't be loaded, even if they are found in extensions specified by `druid.extensions.loadList`, or in the list of core modules specified to be loaded on a particular Druid process type. Useful when some useful extension contains some module, which shouldn't be loaded on some Druid process type because some dependencies of that module couldn't be satisfied.|[]| +|`druid.modules.excludeList`|A JSON array of canonical class names (e.g., `"org.apache.druid.somepackage.SomeModule"`) of module classes which shouldn't be loaded, even if they are found in extensions specified by `druid.extensions.loadList`, or in the list of core modules specified to be loaded on a particular Druid process type. Useful when some useful extension contains some module, which shouldn't be loaded on some Druid process type because some dependencies of that module couldn't be satisfied.|[]| ### Zookeeper We recommend just setting the base ZK path and the ZK service host, but all ZK paths that Druid uses can be overwritten to absolute paths. @@ -161,7 +161,7 @@ That is, it allows Druid to keep the connections of Exhibitor-supervised ZooKeep |`druid.exhibitor.service.port`|The REST port used to connect to Exhibitor.|`8080`| |`druid.exhibitor.service.restUriPath`|The path of the REST call used to get the server set.|`/exhibitor/v1/cluster/list`| |`druid.exhibitor.service.useSsl`|Boolean flag for whether or not to use https protocol.|`false`| -|`druid.exhibitor.service.pollingMs`|How ofter to poll the exhibitors for the list|`10000`| +|`druid.exhibitor.service.pollingMs`|How often to poll the exhibitors for the list|`10000`| Note that `druid.zk.service.host` is used as a backup in case an Exhibitor instance can't be contacted and therefore should still be set. @@ -261,7 +261,7 @@ Daily request logs are stored on disk. |Property|Description|Default| |--------|-----------|-------| -|`druid.request.logging.dir`|Historical, Realtime and Broker processes maintain request logs of all of the requests they get (interacton is via POST, so normal request logs don’t generally capture information about the actual query), this specifies the directory to store the request logs in|none| +|`druid.request.logging.dir`|Historical, Realtime and Broker processes maintain request logs of all of the requests they get (interaction is via POST, so normal request logs don’t generally capture information about the actual query), this specifies the directory to store the request logs in|none| |`druid.request.logging.filePattern`|[Joda datetime format](http://www.joda.org/joda-time/apidocs/org/joda/time/format/DateTimeFormat.html) for each file|"yyyy-MM-dd'.log'"| The format of request logs is TSV, one line per requests, with five fields: timestamp, remote\_addr, native\_query, query\_context, sql\_query. @@ -286,7 +286,7 @@ Every request is emitted to some external location. #### SLF4J Request Logging -Every request is logged via SLF4J. Native queries are serialized into JSON in the log message regardless of the SJF4J format specification. They will be logged under the class `org.apache.druid.server.log.LoggingRequestLogger`. +Every request is logged via SLF4J. Native queries are serialized into JSON in the log message regardless of the SLF4J format specification. They will be logged under the class `org.apache.druid.server.log.LoggingRequestLogger`. |Property|Description|Default| |--------|-----------|-------| @@ -313,8 +313,8 @@ For native query, only request logs where query/time is above the threshold are |Property|Description|Default| |--------|-----------|-------| -|`druid.request.logging.queryTimeThresholdMs`|Threshold value for query/time in milliseconds.|0 i.e no filtering| -|`druid.request.logging.sqlQueryTimeThresholdMs`|Threshold value for sqlQuery/time in milliseconds.|0 i.e no filtering| +|`druid.request.logging.queryTimeThresholdMs`|Threshold value for query/time in milliseconds.|0, i.e., no filtering| +|`druid.request.logging.sqlQueryTimeThresholdMs`|Threshold value for sqlQuery/time in milliseconds.|0, i.e., no filtering| |`druid.request.logging.mutedQueryTypes` | Query requests of these types are not logged. Query types are defined as string objects corresponding to the "queryType" value for the specified query in the Druid's [native JSON query API](http://druid.apache.org/docs/latest/querying/querying.html). Misspelled query types will be ignored. Example to ignore scan and timeBoundary queries: ["scan", "timeBoundary"]| []| |`druid.request.logging.delegate.type`|Type of delegate request logger to log requests.|none| @@ -380,7 +380,7 @@ The Druid servers [emit various metrics](../operations/metrics.md) and alerts vi |--------|-----------|-------| |`druid.emitter.http.flushMillis`|How often the internal message buffer is flushed (data is sent).|60000| |`druid.emitter.http.flushCount`|How many messages the internal message buffer can hold before flushing (sending).|500| -|`druid.emitter.http.basicAuthentication`|Login and password for authentification in "login:password" form, e. g. `druid.emitter.http.basicAuthentication=admin:adminpassword`|not specified = no authentification| +|`druid.emitter.http.basicAuthentication`|Login and password for authentication in "login:password" form, e.g., `druid.emitter.http.basicAuthentication=admin:adminpassword`|not specified = no authentication| |`druid.emitter.http.flushTimeOut`|The timeout after which an event should be sent to the endpoint, even if internal buffers are not filled, in milliseconds.|not specified = no timeout| |`druid.emitter.http.batchingStrategy`|The strategy of how the batch is formatted. "ARRAY" means `[event1,event2]`, "NEWLINES" means `event1\nevent2`, ONLY_EVENTS means `event1event2`.|ARRAY| |`druid.emitter.http.maxBatchSize`|The maximum batch size, in bytes.|the minimum of (10% of JVM heap size divided by 2) or (5191680 (i. e. 5 MB))| @@ -410,20 +410,20 @@ The following properties allow the Http Emitter to use its own truststore config #### Parametrized Http Emitter Module `druid.emitter.parametrized.httpEmitting.*` configs correspond to the configs of Http Emitter Modules, see above. -Except `recipientBaseUrl`. E. g. `druid.emitter.parametrized.httpEmitting.flushMillis`, +Except `recipientBaseUrl`. E.g., `druid.emitter.parametrized.httpEmitting.flushMillis`, `druid.emitter.parametrized.httpEmitting.flushCount`, `druid.emitter.parametrized.httpEmitting.ssl.trustStorePath`, etc. The additional configs are: |Property|Description|Default| |--------|-----------|-------| -|`druid.emitter.parametrized.recipientBaseUrlPattern`|The URL pattern to send an event to, based on the event's feed. E. g. `http://foo.bar/{feed}`, that will send event to `http://foo.bar/metrics` if the event's feed is "metrics".|none, required config| +|`druid.emitter.parametrized.recipientBaseUrlPattern`|The URL pattern to send an event to, based on the event's feed. E.g., `http://foo.bar/{feed}`, that will send event to `http://foo.bar/metrics` if the event's feed is "metrics".|none, required config| #### Composing Emitter Module |Property|Description|Default| |--------|-----------|-------| -|`druid.emitter.composing.emitters`|List of emitter modules to load e.g. ["logging","http"].|[]| +|`druid.emitter.composing.emitters`|List of emitter modules to load, e.g., ["logging","http"].|[]| #### Graphite Emitter @@ -432,12 +432,12 @@ To use graphite as emitter set `druid.emitter=graphite`. For configuration detai ### Metadata storage -These properties specify the jdbc connection and other configuration around the metadata storage. The only processes that connect to the metadata storage with these properties are the [Coordinator](../design/coordinator.md) and [Overlord](../design/overlord.md). +These properties specify the JDBC connection and other configuration around the metadata storage. The only processes that connect to the metadata storage with these properties are the [Coordinator](../design/coordinator.md) and [Overlord](../design/overlord.md). |Property|Description|Default| |--------|-----------|-------| |`druid.metadata.storage.type`|The type of metadata storage to use. Choose from "mysql", "postgresql", or "derby".|derby| -|`druid.metadata.storage.connector.connectURI`|The jdbc uri for the database to connect to|none| +|`druid.metadata.storage.connector.connectURI`|The JDBC URI for the database to connect to|none| |`druid.metadata.storage.connector.user`|The username to connect with.|none| |`druid.metadata.storage.connector.password`|The [Password Provider](../operations/password-provider.md) or String password used to connect with.|none| |`druid.metadata.storage.connector.createTables`|If Druid requires a table and it doesn't exist, create it?|true| @@ -451,7 +451,7 @@ These properties specify the jdbc connection and other configuration around the |`druid.metadata.storage.tables.taskLog`|Used by the indexing service to store task logs.|druid_taskLog| |`druid.metadata.storage.tables.taskLock`|Used by the indexing service to store task locks.|druid_taskLock| |`druid.metadata.storage.tables.supervisors`|Used by the indexing service to store supervisor configurations.|druid_supervisors| -|`druid.metadata.storage.tables.audit`|The table to use for audit history of configuration changes e.g. Coordinator rules.|druid_audit| +|`druid.metadata.storage.tables.audit`|The table to use for audit history of configuration changes, e.g., Coordinator rules.|druid_audit| ### Deep storage @@ -517,7 +517,7 @@ If you are running the indexing service in remote mode, the task logs must be st |`druid.indexer.logs.type`|Choices:noop, s3, azure, google, hdfs, file. Where to store task logs|file| You can also configure the Overlord to automatically retain the task logs in log directory and entries in task-related metadata storage tables only for last x milliseconds by configuring following additional properties. -Caution: Automatic log file deletion typically works based on log file modification timestamp on the backing store, so large clock skews between druid processes and backing store nodes might result in un-intended behavior. +Caution: Automatic log file deletion typically works based on log file modification timestamp on the backing store, so large clock skews between druid processes and backing store nodes might result in unintended behavior. |Property|Description|Default| |--------|-----------|-------| @@ -621,7 +621,7 @@ the following properties. Prior to version 0.13.0 Druid's storage layer used a 32-bit float representation to store columns created by the doubleSum, doubleMin, and doubleMax aggregators at indexing time. Starting from version 0.13.0 the default will be 64-bit floats for Double columns. -Using 64-bit representation for double column will lead to avoid precesion loss at the cost of doubling the storage size of such columns. +Using 64-bit representation for double column will lead to avoid precision loss at the cost of doubling the storage size of such columns. To keep the old format set the system-wide property `druid.indexing.doubleStorage=float`. You can also use floatSum, floatMin and floatMax to use 32-bit float representation. Support for 64-bit floating point columns was released in Druid 0.11.0, so if you use this feature then older versions of Druid will not be able to read your data segments. @@ -660,14 +660,14 @@ These Coordinator static configurations can be defined in the `coordinator/runti |`druid.coordinator.period.indexingPeriod`|How often to send compact/merge/conversion tasks to the indexing service. It's recommended to be longer than `druid.manager.segments.pollDuration`|PT1800S (30 mins)| |`druid.coordinator.startDelay`|The operation of the Coordinator works on the assumption that it has an up-to-date view of the state of the world when it runs, the current ZK interaction code, however, is written in a way that doesn’t allow the Coordinator to know for a fact that it’s done loading the current state of the world. This delay is a hack to give it enough time to believe that it has all the data.|PT300S| |`druid.coordinator.load.timeout`|The timeout duration for when the Coordinator assigns a segment to a Historical process.|PT15M| -|`druid.coordinator.kill.pendingSegments.on`|Boolean flag for whether or not the Coordinator clean up old entries in the `pendingSegments` table of metadata store. If set to true, Coordinator will check the created time of most recently complete task. If it doesn't exist, it finds the created time of the earlist running/pending/waiting tasks. Once the created time is found, then for all dataSources not in the `killPendingSegmentsSkipList` (see [Dynamic configuration](#dynamic-configuration)), Coordinator will ask the Overlord to clean up the entries 1 day or more older than the found created time in the `pendingSegments` table. This will be done periodically based on `druid.coordinator.period` specified.|false| +|`druid.coordinator.kill.pendingSegments.on`|Boolean flag for whether or not the Coordinator clean up old entries in the `pendingSegments` table of metadata store. If set to true, Coordinator will check the created time of most recently complete task. If it doesn't exist, it finds the created time of the earliest running/pending/waiting tasks. Once the created time is found, then for all dataSources not in the `killPendingSegmentsSkipList` (see [Dynamic configuration](#dynamic-configuration)), Coordinator will ask the Overlord to clean up the entries 1 day or more older than the found created time in the `pendingSegments` table. This will be done periodically based on `druid.coordinator.period` specified.|false| |`druid.coordinator.kill.on`|Boolean flag for whether or not the Coordinator should submit kill task for unused segments, that is, hard delete them from metadata store and deep storage. If set to true, then for all whitelisted dataSources (or optionally all), Coordinator will submit tasks periodically based on `period` specified. These kill tasks will delete all segments except for the last `durationToRetain` period. Whitelist or All can be set via dynamic configuration `killAllDataSources` and `killDataSourceWhitelist` described later.|false| |`druid.coordinator.kill.period`|How often to send kill tasks to the indexing service. Value must be greater than `druid.coordinator.period.indexingPeriod`. Only applies if kill is turned on.|P1D (1 Day)| |`druid.coordinator.kill.durationToRetain`| Do not kill segments in last `durationToRetain`, must be greater or equal to 0. Only applies and MUST be specified if kill is turned on. Note that default value is invalid.|PT-1S (-1 seconds)| |`druid.coordinator.kill.maxSegments`|Kill at most n segments per kill task submission, must be greater than 0. Only applies and MUST be specified if kill is turned on. Note that default value is invalid.|0| |`druid.coordinator.balancer.strategy`|Specify the type of balancing strategy that the coordinator should use to distribute segments among the historicals. `cachingCost` is logically equivalent to `cost` but is more CPU-efficient on large clusters and will replace `cost` in the future versions, users are invited to try it. Use `diskNormalized` to distribute segments among processes so that the disks fill up uniformly and use `random` to randomly pick processes to distribute segments.|`cost`| |`druid.coordinator.balancer.cachingCost.awaitInitialization`|Whether to wait for segment view initialization before creating the `cachingCost` balancing strategy. This property is enabled only when `druid.coordinator.balancer.strategy` is `cachingCost`. If set to 'true', the Coordinator will not start to assign segments, until the segment view is initialized. If set to 'false', the Coordinator will fallback to use the `cost` balancing strategy only if the segment view is not initialized yet. Notes, it may take much time to wait for the initialization since the `cachingCost` balancing strategy involves much computing to build itself.|false| -|`druid.coordinator.loadqueuepeon.repeatDelay`|The start and repeat delay for the loadqueuepeon , which manages the load and drop of segments.|PT0.050S (50 ms)| +|`druid.coordinator.loadqueuepeon.repeatDelay`|The start and repeat delay for the loadqueuepeon, which manages the load and drop of segments.|PT0.050S (50 ms)| |`druid.coordinator.asOverlord.enabled`|Boolean value for whether this Coordinator process should act like an Overlord as well. This configuration allows users to simplify a druid cluster by not having to deploy any standalone Overlord processes. If set to true, then Overlord console is available at `http://coordinator-host:port/console.html` and be sure to set `druid.coordinator.asOverlord.overlordService` also. See next.|false| |`druid.coordinator.asOverlord.overlordService`| Required, if `druid.coordinator.asOverlord.enabled` is `true`. This must be same value as `druid.service` on standalone Overlord processes and `druid.selectors.indexing.serviceName` on Middle Managers.|NULL| @@ -695,7 +695,7 @@ These Coordinator static configurations can be defined in the `coordinator/runti #### Dynamic Configuration -The Coordinator has dynamic configuration to change certain behaviour on the fly. The Coordinator uses a JSON spec object from the Druid [metadata storage](../dependencies/metadata-storage.md) config table. This object is detailed below: +The Coordinator has dynamic configuration to change certain behavior on the fly. The Coordinator uses a JSON spec object from the Druid [metadata storage](../dependencies/metadata-storage.md) config table. This object is detailed below: It is recommended that you use the Coordinator Console to configure these parameters. However, if you need to do it via HTTP, the JSON object can be submitted to the Coordinator via a POST request at: @@ -971,12 +971,12 @@ http://:/druid/indexer/v1/worker/history?count= ##### Worker Select Strategy -Worker select strategies control how Druid assigns tasks to middleManagers. +Worker select strategies control how Druid assigns tasks to MiddleManagers. ###### Equal Distribution -Tasks are assigned to the middleManager with the most available capacity at the time the task begins running. This is -useful if you want work evenly distributed across your middleManagers. +Tasks are assigned to the MiddleManager with the most available capacity at the time the task begins running. This is +useful if you want work evenly distributed across your MiddleManagers. |Property|Description|Default| |--------|-----------|-------| @@ -986,11 +986,11 @@ useful if you want work evenly distributed across your middleManagers. ###### Fill Capacity Tasks are assigned to the worker with the most currently-running tasks at the time the task begins running. This is -useful in situations where you are elastically auto-scaling middleManagers, since it will tend to pack some full and +useful in situations where you are elastically auto-scaling MiddleManagers, since it will tend to pack some full and leave others empty. The empty ones can be safely terminated. Note that if `druid.indexer.runner.pendingTasksRunnerNumThreads` is set to _N_ > 1, then this strategy will fill _N_ -middleManagers up to capacity simultaneously, rather than a single middleManager. +MiddleManagers up to capacity simultaneously, rather than a single MiddleManager. |Property|Description|Default| |--------|-----------|-------| @@ -999,18 +999,18 @@ middleManagers up to capacity simultaneously, rather than a single middleManager -###### Javascript +###### JavaScript Allows defining arbitrary logic for selecting workers to run task using a JavaScript function. The function is passed remoteTaskRunnerConfig, map of workerId to available workers and task to be executed and returns the workerId on which the task should be run or null if the task cannot be run. It can be used for rapid development of missing features where the worker selection logic is to be changed or tuned often. -If the selection logic is quite complex and cannot be easily tested in javascript environment, +If the selection logic is quite complex and cannot be easily tested in JavaScript environment, its better to write a druid extension module with extending current worker selection strategies written in java. |Property|Description|Default| |--------|-----------|-------| |`type`|`javascript`.|required; must be `javascript`| -|`function`|String representing javascript function|| +|`function`|String representing JavaScript function|| Example: a function that sends batch_index_task to workers 10.0.0.1 and 10.0.0.2 and all other tasks to other available workers. @@ -1030,8 +1030,8 @@ field. If not provided, the default is to not use affinity at all. |Property|Description|Default| |--------|-----------|-------| -|`affinity`|JSON object mapping a datasource String name to a list of indexing service middleManager host:port String values. Druid doesn't perform DNS resolution, so the 'host' value must match what is configured on the middleManager and what the middleManager announces itself as (examine the Overlord logs to see what your middleManager announces itself as).|{}| -|`strong`|With weak affinity (the default), tasks for a dataSource may be assigned to other middleManagers if their affinity-mapped middleManagers are not able to run all pending tasks in the queue for that dataSource. With strong affinity, tasks for a dataSource will only ever be assigned to their affinity-mapped middleManagers, and will wait in the pending queue if necessary.|false| +|`affinity`|JSON object mapping a datasource String name to a list of indexing service MiddleManager host:port String values. Druid doesn't perform DNS resolution, so the 'host' value must match what is configured on the MiddleManager and what the MiddleManager announces itself as (examine the Overlord logs to see what your MiddleManager announces itself as).|{}| +|`strong`|With weak affinity (the default), tasks for a dataSource may be assigned to other MiddleManagers if their affinity-mapped MiddleManagers are not able to run all pending tasks in the queue for that dataSource. With strong affinity, tasks for a dataSource will only ever be assigned to their affinity-mapped MiddleManagers, and will wait in the pending queue if necessary.|false| ##### Autoscaler @@ -1076,11 +1076,11 @@ Middle managers pass their configurations down to their child peons. The MiddleM |`druid.indexer.runner.classpath`|Java classpath for the peon.|System.getProperty("java.class.path")| |`druid.indexer.runner.javaCommand`|Command required to execute java.|java| |`druid.indexer.runner.javaOpts`|*DEPRECATED* A string of -X Java options to pass to the peon's JVM. Quotable parameters or parameters with spaces are encouraged to use javaOptsArray|""| -|`druid.indexer.runner.javaOptsArray`|A json array of strings to be passed in as options to the peon's jvm. This is additive to javaOpts and is recommended for properly handling arguments which contain quotes or spaces like `["-XX:OnOutOfMemoryError=kill -9 %p"]`|`[]`| +|`druid.indexer.runner.javaOptsArray`|A JSON array of strings to be passed in as options to the peon's JVM. This is additive to javaOpts and is recommended for properly handling arguments which contain quotes or spaces like `["-XX:OnOutOfMemoryError=kill -9 %p"]`|`[]`| |`druid.indexer.runner.maxZnodeBytes`|The maximum size Znode in bytes that can be created in Zookeeper.|524288| |`druid.indexer.runner.startPort`|Starting port used for peon processes, should be greater than 1023 and less than 65536.|8100| |`druid.indexer.runner.endPort`|Ending port used for peon processes, should be greater than or equal to `druid.indexer.runner.startPort` and less than 65536.|65535| -|`druid.indexer.runner.ports`|A json array of integers to specify ports that used for peon processes. If provided and non-empty, ports for peon processes will be chosen from these ports. And `druid.indexer.runner.startPort/druid.indexer.runner.endPort` will be completely ignored.|`[]`| +|`druid.indexer.runner.ports`|A JSON array of integers to specify ports that used for peon processes. If provided and non-empty, ports for peon processes will be chosen from these ports. And `druid.indexer.runner.startPort/druid.indexer.runner.endPort` will be completely ignored.|`[]`| |`druid.worker.ip`|The IP of the worker.|localhost| |`druid.worker.version`|Version identifier for the MiddleManager.|0| |`druid.worker.capacity`|Maximum number of tasks the MiddleManager can accept.|Number of available processors - 1| @@ -1141,7 +1141,7 @@ Additional peon configs include: |`druid.indexer.task.directoryLockTimeout`|Wait this long for zombie peons to exit before giving up on their replacements.|PT10M| |`druid.indexer.task.gracefulShutdownTimeout`|Wait this long on middleManager restart for restorable tasks to gracefully exit.|PT5M| |`druid.indexer.task.hadoopWorkingPath`|Temporary working directory for Hadoop tasks.|`/tmp/druid-indexing`| -|`druid.indexer.task.restoreTasksOnRestart`|If true, middleManagers will attempt to stop tasks gracefully on shutdown and restore them on restart.|false| +|`druid.indexer.task.restoreTasksOnRestart`|If true, MiddleManagers will attempt to stop tasks gracefully on shutdown and restore them on restart.|false| |`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0| If the peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations: @@ -1154,11 +1154,11 @@ If the peon is running in remote mode, there must be an Overlord up and running. ##### SegmentWriteOutMediumFactory -When new segments are created, Druid temporarily stores some pre-processed data in some buffers. Currently two types of +When new segments are created, Druid temporarily stores some preprocessed data in some buffers. Currently two types of *medium* exist for those buffers: *temporary files* and *off-heap memory*. *Temporary files* (`tmpFile`) are stored under the task working directory (see `druid.indexer.task.baseTaskDir` -configuration above) and thus share it's mounting properies, e. g. they could be backed by HDD, SSD or memory (tmpfs). +configuration above) and thus share it's mounting properties, e. g. they could be backed by HDD, SSD or memory (tmpfs). This type of medium may do unnecessary disk I/O and requires some disk space to be available. *Off-heap memory medium* (`offHeapMemory`) creates buffers in off-heap memory of a JVM process that is running a task. @@ -1462,9 +1462,9 @@ The Druid SQL server is configured through the following properties on the Broke |`druid.sql.planner.maxTopNLimit`|Maximum threshold for a [TopN query](../querying/topnquery.md). Higher limits will be planned as [GroupBy queries](../querying/groupbyquery.md) instead.|100000| |`druid.sql.planner.metadataRefreshPeriod`|Throttle for metadata refreshes.|PT1M| |`druid.sql.planner.selectThreshold`|Page size threshold for [Select queries](../querying/select-query.md). Select queries for larger resultsets will be issued back-to-back using pagination.|1000| -|`druid.sql.planner.useApproximateCountDistinct`|Whether to use an approximate cardinalty algorithm for `COUNT(DISTINCT foo)`.|true| +|`druid.sql.planner.useApproximateCountDistinct`|Whether to use an approximate cardinality algorithm for `COUNT(DISTINCT foo)`.|true| |`druid.sql.planner.useApproximateTopN`|Whether to use approximate [TopN queries](../querying/topnquery.md) when a SQL query could be expressed as such. If false, exact [GroupBy queries](../querying/groupbyquery.md) will be used instead.|true| -|`druid.sql.planner.requireTimeCondition`|Whether to require SQL to have filter conditions on __time column so that all generated native queries will have user specified intervals. If true, all queries wihout filter condition on __time column will fail|false| +|`druid.sql.planner.requireTimeCondition`|Whether to require SQL to have filter conditions on __time column so that all generated native queries will have user specified intervals. If true, all queries without filter condition on __time column will fail|false| |`druid.sql.planner.sqlTimeZone`|Sets the default time zone for the server, which will affect how time functions and timestamp literals behave. Should be a time zone name like "America/Los_Angeles" or offset like "-08:00".|UTC| |`druid.sql.planner.serializeComplexValues`|Whether to serialize "complex" output values, false will return the class name instead of the serialized value.|true| @@ -1497,10 +1497,10 @@ See [cache configuration](#cache-configuration) for how to configure cache setti This section describes caching configuration that is common to Broker, Historical, and MiddleManager/Peon processes. -Caching can optionally be enabled on the Broker, Historical, and MiddleManager/Peon processses. See [Broker](#broker-caching), +Caching can optionally be enabled on the Broker, Historical, and MiddleManager/Peon processes. See [Broker](#broker-caching), [Historical](#historical-caching), and [Peon](#peon-caching) configuration options for how to enable it for different processes. -Druid uses a local in-memory cache by default, unless a diffrent type of cache is specified. +Druid uses a local in-memory cache by default, unless a different type of cache is specified. Use the `druid.cache.type` configuration to set a different kind of cache. Cache settings are set globally, so the same configuration can be re-used @@ -1667,7 +1667,7 @@ Supported runtime properties: |`druid.query.groupBy.bufferGrouperInitialBuckets`|Initial number of buckets in the off-heap hash table used for grouping results. Set to 0 to use a reasonable default (1024).|0| |`druid.query.groupBy.bufferGrouperMaxLoadFactor`|Maximum load factor of the off-heap hash table used for grouping results. When the load factor exceeds this size, the table will be grown or spilled to disk. Set to 0 to use a reasonable default (0.7).|0| |`druid.query.groupBy.forceHashAggregation`|Force to use hash-based aggregation.|false| -|`druid.query.groupBy.intermediateCombineDegree`|Number of intermediate processes combined together in the combining tree. Higher degrees will need less threads which might be helpful to improve the query performance by reducing the overhead of too many threads if the server has sufficiently powerful cpu cores.|8| +|`druid.query.groupBy.intermediateCombineDegree`|Number of intermediate processes combined together in the combining tree. Higher degrees will need less threads which might be helpful to improve the query performance by reducing the overhead of too many threads if the server has sufficiently powerful CPU cores.|8| |`druid.query.groupBy.numParallelCombineThreads`|Hint for the number of parallel combining threads. This should be larger than 1 to turn on the parallel combining feature. The actual number of threads used for parallel combining is min(`druid.query.groupBy.numParallelCombineThreads`, `druid.processing.numThreads`).|1 (disabled)| Supported query contexts: @@ -1722,5 +1722,5 @@ Supported query contexts: |`druid.router.pollPeriod`|Any ISO8601 duration.|How often to poll for new rules.|PT1M| |`druid.router.strategies`|An ordered JSON array of objects.|Please see [Router Strategies](../design/router.html#router-strategies) for details.|[{"type":"timeBoundary"},{"type":"priority"}]| |`druid.router.avatica.balancer.type`|String representing an AvaticaConnectionBalancer name. Please see [Avatica Query Balancing](../design/router.html#avatica-query-balancing)|Class to use for balancing Avatica queries across Brokers|rendezvousHash| -|`druid.router.http.maxRequestBufferSize`|Maximum size of the buffer used to write requests when forwarding them to the Broker. This should be set to atleast the maxHeaderSize allowed on the Broker|8 * 1024| +|`druid.router.http.maxRequestBufferSize`|Maximum size of the buffer used to write requests when forwarding them to the Broker. This should be set to at least the maxHeaderSize allowed on the Broker|8 * 1024| |`druid.router.managementProxy.enabled`|Enables the Router's [management proxy](../design/router.html#router-as-management-proxy) functionality.|false| diff --git a/docs/dependencies/metadata-storage.md b/docs/dependencies/metadata-storage.md index bdfa6bd1b6c4..fab7641af4fe 100644 --- a/docs/dependencies/metadata-storage.md +++ b/docs/dependencies/metadata-storage.md @@ -54,7 +54,7 @@ See [postgresql-metadata-storage](../development/extensions-core/postgresql.md). ## Adding custom dbcp properties -NOTE: These properties are not settable through the druid.metadata.storage.connector.dbcp properties : username, password, connectURI, validationQuery, testOnBorrow. These must be set through druid.metadata.storage.connector properties. +NOTE: These properties are not settable through the `druid.metadata.storage.connector.dbcp properties`: `username`, `password`, `connectURI`, `validationQuery`, `testOnBorrow`. These must be set through `druid.metadata.storage.connector` properties. Example supported properties: @@ -78,7 +78,7 @@ system. The table has two main functional columns, the other columns are for indexing purposes. The `used` column is a boolean "tombstone". A 1 means that the segment should -be "used" by the cluster (i.e. it should be loaded and available for requests). +be "used" by the cluster (i.e., it should be loaded and available for requests). A 0 means that the segment should not be actively loaded into the cluster. We do this as a means of removing segments from the cluster without actually removing their metadata (which allows for simpler rolling back if that is ever @@ -138,4 +138,4 @@ The Metadata Storage is accessed only by: 2. Realtime Processes (if any) 3. Coordinator Processes -Thus you need to give permissions (eg in AWS Security Groups) only for these machines to access the Metadata storage. +Thus you need to give permissions (e.g., in AWS Security Groups) only for these machines to access the Metadata storage. diff --git a/docs/design/architecture.md b/docs/design/architecture.md index 51fa00ec6237..ca123686f33f 100644 --- a/docs/design/architecture.md +++ b/docs/design/architecture.md @@ -44,7 +44,7 @@ Druid processes can be deployed any way you like, but for ease of deployment we * **Query**: Runs Broker and optional Router processes, handles queries from external clients. * **Data**: Runs Historical and MiddleManager processes, executes ingestion workloads and stores all queryable data. -For more details on process and server organization, please see [Druid Processses and Servers](../design/processes.md). +For more details on process and server organization, please see [Druid Processes and Servers](../design/processes.md). ## External dependencies @@ -58,7 +58,7 @@ this is typically going to be local disk. Druid uses deep storage to store any d system. Druid uses deep storage only as a backup of your data and as a way to transfer data in the background between -Druid processes. To respond to queries, Historical processes do not read from deep storage, but instead read pre-fetched +Druid processes. To respond to queries, Historical processes do not read from deep storage, but instead read prefetched segments from their local disks before any queries are served. This means that Druid never needs to access deep storage during a query, helping it offer the best query latencies possible. It also means that you must have enough disk space both in deep storage and across your Historical processes for the data you plan to load. diff --git a/docs/design/auth.md b/docs/design/auth.md index ff0d89355873..9e5a2dc3ec3d 100644 --- a/docs/design/auth.md +++ b/docs/design/auth.md @@ -31,14 +31,14 @@ This document describes non-extension specific Apache Druid (incubating) authent |`druid.escalator.type`|String|Type of the Escalator that should be used for internal Druid communications. This Escalator must use an authentication scheme that is supported by an Authenticator in `druid.auth.authenticationChain`.|"noop"|no| |`druid.auth.authorizers`|JSON List of Strings|List of Authorizer type names |["allowAll"]|no| |`druid.auth.unsecuredPaths`| List of Strings|List of paths for which security checks will not be performed. All requests to these paths will be allowed.|[]|no| -|`druid.auth.allowUnauthenticatedHttpOptions`|Boolean|If true, skip authentication checks for HTTP OPTIONS requests. This is needed for certain use cases, such as supporting CORS pre-flight requests. Note that disabling authentication checks for OPTIONS requests will allow unauthenticated users to determine what Druid endpoints are valid (by checking if the OPTIONS request returns a 200 instead of 404), so enabling this option may reveal information about server configuration, including information about what extensions are loaded (if those extensions add endpoints).|false|no| +|`druid.auth.allowUnauthenticatedHttpOptions`|Boolean|If true, skip authentication checks for HTTP OPTIONS requests. This is needed for certain use cases, such as supporting CORS preflight requests. Note that disabling authentication checks for OPTIONS requests will allow unauthenticated users to determine what Druid endpoints are valid (by checking if the OPTIONS request returns a 200 instead of 404), so enabling this option may reveal information about server configuration, including information about what extensions are loaded (if those extensions add endpoints).|false|no| ## Enabling Authentication/AuthorizationLoadingLookupTest ## Authenticator chain Authentication decisions are handled by a chain of Authenticator instances. A request will be checked by Authenticators in the sequence defined by the `druid.auth.authenticatorChain`. -Authenticator implementions are provided by extensions. +Authenticator implementations are provided by extensions. For example, the following authentication chain definition enables the Kerberos and HTTP Basic authenticators, from the `druid-kerberos` and `druid-basic-security` core extensions, respectively: @@ -83,7 +83,7 @@ druid.auth.authenticator.anonymous.authorizerName=myBasicAuthorizer ## Escalator The `druid.escalator.type` property determines what authentication scheme should be used for internal Druid cluster communications (such as when a Broker process communicates with Historical processes for query processing). -The Escalator chosen for this property must use an authentication scheme that is supported by an Authenticator in `druid.auth.authenticationChain`. Authenticator extension implementors must also provide a corresponding Escalator implementation if they intend to use a particular authentication scheme for internal Druid communications. +The Escalator chosen for this property must use an authentication scheme that is supported by an Authenticator in `druid.auth.authenticationChain`. Authenticator extension implementers must also provide a corresponding Escalator implementation if they intend to use a particular authentication scheme for internal Druid communications. ### Noop escalator diff --git a/docs/design/broker.md b/docs/design/broker.md index 6d252423abd2..c1c517d489b4 100644 --- a/docs/design/broker.md +++ b/docs/design/broker.md @@ -50,5 +50,5 @@ To determine which processes to forward queries to, the Broker process first bui ### Caching -Broker processes employ a cache with a LRU cache invalidation strategy. The Broker cache stores per-segment results. The cache can be local to each Broker process or shared across multiple processes using an external distributed cache such as [memcached](http://memcached.org/). Each time a broker process receives a query, it first maps the query to a set of segments. A subset of these segment results may already exist in the cache and the results can be directly pulled from the cache. For any segment results that do not exist in the cache, the broker process will forward the query to the +Broker processes employ a cache with an LRU cache invalidation strategy. The Broker cache stores per-segment results. The cache can be local to each Broker process or shared across multiple processes using an external distributed cache such as [memcached](http://memcached.org/). Each time a broker process receives a query, it first maps the query to a set of segments. A subset of these segment results may already exist in the cache and the results can be directly pulled from the cache. For any segment results that do not exist in the cache, the broker process will forward the query to the Historical processes. Once the Historical processes return their results, the Broker will store those results in the cache. Real-time segments are never cached and hence requests for real-time data will always be forwarded to real-time processes. Real-time data is perpetually changing and caching the results would be unreliable. diff --git a/docs/design/coordinator.md b/docs/design/coordinator.md index df78c46b1f55..8c2483b71918 100644 --- a/docs/design/coordinator.md +++ b/docs/design/coordinator.md @@ -35,7 +35,7 @@ For a list of API endpoints supported by the Coordinator, see [Coordinator API]( The Druid Coordinator process is primarily responsible for segment management and distribution. More specifically, the Druid Coordinator process communicates to Historical processes to load or drop segments based on configurations. The Druid Coordinator is responsible for loading new segments, dropping outdated segments, managing segment replication, and balancing segment load. -The Druid Coordinator runs periodically and the time between each run is a configurable parameter. Each time the Druid Coordinator runs, it assesses the current state of the cluster before deciding on the appropriate actions to take. Similar to the Broker and Historical processses, the Druid Coordinator maintains a connection to a Zookeeper cluster for current cluster information. The Coordinator also maintains a connection to a database containing information about available segments and rules. Available segments are stored in a segment table and list all segments that should be loaded in the cluster. Rules are stored in a rule table and indicate how segments should be handled. +The Druid Coordinator runs periodically and the time between each run is a configurable parameter. Each time the Druid Coordinator runs, it assesses the current state of the cluster before deciding on the appropriate actions to take. Similar to the Broker and Historical processes, the Druid Coordinator maintains a connection to a Zookeeper cluster for current cluster information. The Coordinator also maintains a connection to a database containing information about available segments and rules. Available segments are stored in a segment table and list all segments that should be loaded in the cluster. Rules are stored in a rule table and indicate how segments should be handled. Before any unassigned segments are serviced by Historical processes, the available Historical processes for each tier are first sorted in terms of capacity, with least capacity servers having the highest priority. Unassigned segments are always assigned to the processes with least capacity to maintain a level of balance between processes. The Coordinator does not directly communicate with a historical process when assigning it a new segment; instead the Coordinator creates some temporary information about the new segment under load queue path of the historical process. Once this request is seen, the historical process will load the segment and begin servicing it. @@ -85,8 +85,8 @@ Once a compaction task fails, the Coordinator simply finds the segments for the #### Newest segment first policy At every coordinator run, this policy searches for segments to compact by iterating segments from the latest to the oldest. -Once it finds the latest segment among all dataSources, it checks if the segment is _compactible_ with other segments of the same dataSource which have the same or abutting intervals. -Note that segments are compactible if their total size is smaller than or equal to the configured `inputSegmentSizeBytes`. +Once it finds the latest segment among all dataSources, it checks if the segment is _compactable_ with other segments of the same dataSource which have the same or abutting intervals. +Note that segments are compactable if their total size is smaller than or equal to the configured `inputSegmentSizeBytes`. Here are some details with an example. Let us assume we have two dataSources (`foo`, `bar`) and 5 segments (`foo_2017-10-01T00:00:00.000Z_2017-11-01T00:00:00.000Z_VERSION`, `foo_2017-11-01T00:00:00.000Z_2017-12-01T00:00:00.000Z_VERSION`, `bar_2017-08-01T00:00:00.000Z_2017-09-01T00:00:00.000Z_VERSION`, `bar_2017-09-01T00:00:00.000Z_2017-10-01T00:00:00.000Z_VERSION`, `bar_2017-10-01T00:00:00.000Z_2017-11-01T00:00:00.000Z_VERSION`). diff --git a/docs/design/middlemanager.md b/docs/design/middlemanager.md index 26628e6e4281..694bab888c57 100644 --- a/docs/design/middlemanager.md +++ b/docs/design/middlemanager.md @@ -25,7 +25,7 @@ title: "MiddleManager Process" ### Configuration -For Apache Druid (incubating) Middlemanager Process Configuration, see [Indexing Service Configuration](../configuration/index.html#middlemanager-and-peons). +For Apache Druid (incubating) MiddleManager Process Configuration, see [Indexing Service Configuration](../configuration/index.html#middlemanager-and-peons). ### HTTP endpoints diff --git a/docs/design/overlord.md b/docs/design/overlord.md index 76fd647581b6..f1346d7eedaa 100644 --- a/docs/design/overlord.md +++ b/docs/design/overlord.md @@ -46,7 +46,7 @@ The Overlord provides a UI for managing tasks and workers. For more details, ple If a MiddleManager has task failures above a threshold, the Overlord will blacklist these MiddleManagers. No more than 20% of the MiddleManagers can be blacklisted. Blacklisted MiddleManagers will be periodically whitelisted. -The following vairables can be used to set the threshold and blacklist timeouts. +The following variables can be used to set the threshold and blacklist timeouts. ``` druid.indexer.runner.maxRetriesBeforeBlacklist diff --git a/docs/design/router.md b/docs/design/router.md index b00cf874c04c..ec917cabc422 100644 --- a/docs/design/router.md +++ b/docs/design/router.md @@ -155,7 +155,7 @@ To use this balancer, specify the following property: druid.router.avatica.balancer.type=consistentHash ``` -This is a non-default implementation that is provided for experimentation purposes. The consistent hasher has longer setup times on initialization and when the set of Brokers changes, but has a faster Broker assignment time than the rendezous hasher when tested with 5 Brokers. Benchmarks for both implementations have been provided in `ConsistentHasherBenchmark` and `RendezvousHasherBenchmark`. The consistent hasher also requires locking, while the rendezvous hasher does not. +This is a non-default implementation that is provided for experimentation purposes. The consistent hasher has longer setup times on initialization and when the set of Brokers changes, but has a faster Broker assignment time than the rendezvous hasher when tested with 5 Brokers. Benchmarks for both implementations have been provided in `ConsistentHasherBenchmark` and `RendezvousHasherBenchmark`. The consistent hasher also requires locking, while the rendezvous hasher does not. ### Example production configuration diff --git a/docs/design/segments.md b/docs/design/segments.md index 93daf3ce4efe..bd945f5efb6d 100644 --- a/docs/design/segments.md +++ b/docs/design/segments.md @@ -180,7 +180,7 @@ Each column is stored as two parts: 1. A Jackson-serialized ColumnDescriptor 2. The rest of the binary for the column -A ColumnDescriptor is essentially an object that allows us to use jackson’s polymorphic deserialization to add new and interesting methods of serialization with minimal impact to the code. It consists of some metadata about the column (what type is it, is it multi-value, etc.) and then a list of serde logic that can deserialize the rest of the binary. +A ColumnDescriptor is essentially an object that allows us to use Jackson's polymorphic deserialization to add new and interesting methods of serialization with minimal impact to the code. It consists of some metadata about the column (what type is it, is it multi-value, etc.) and then a list of serialization/deserialization logic that can deserialize the rest of the binary. ## Sharding Data to Create Segments diff --git a/docs/development/extensions-contrib/ambari-metrics-emitter.md b/docs/development/extensions-contrib/ambari-metrics-emitter.md index 30ba958bea61..e8a182cabbab 100644 --- a/docs/development/extensions-contrib/ambari-metrics-emitter.md +++ b/docs/development/extensions-contrib/ambari-metrics-emitter.md @@ -87,7 +87,7 @@ Same as for the `all` converter user has control of `.[.[[,=[,=]] =[,=] []` -where timestamp is in nano-seconds since epoch. +where timestamp is in nanoseconds since epoch. A typical service metric event as recorded by Druid's logging emitter is: `Event [{"feed":"metrics","timestamp":"2017-10-31T09:09:06.857Z","service":"druid/historical","host":"historical001:8083","version":"0.11.0-SNAPSHOT","metric":"query/cache/total/hits","value":34787256}]`. @@ -71,4 +71,4 @@ This gives the following String which can be POSTed to InfluxDB: `"druid_query,s The InfluxDB emitter has a white list of dimensions which will be added as a tag to the line protocol string if the metric has a dimension from the white list. -The value of the dimension is sanitized such that every occurence of a dot or whitespace is replaced with a `_` . +The value of the dimension is sanitized such that every occurrence of a dot or whitespace is replaced with a `_` . diff --git a/docs/development/extensions-contrib/materialized-view.md b/docs/development/extensions-contrib/materialized-view.md index 8a8e7b9ff112..484b9558d400 100644 --- a/docs/development/extensions-contrib/materialized-view.md +++ b/docs/development/extensions-contrib/materialized-view.md @@ -23,12 +23,12 @@ title: "Materialized View" --> -To use this Apache Druid (incubating) feature, make sure to only load `materialized-view-selection` on Broker and load `materialized-view-maintenance` on Overlord. In addtion, this feature currently requires a Hadoop cluster. +To use this Apache Druid (incubating) feature, make sure to only load `materialized-view-selection` on Broker and load `materialized-view-maintenance` on Overlord. In addition, this feature currently requires a Hadoop cluster. This feature enables Druid to greatly improve the query performance, especially when the query dataSource has a very large number of dimensions but the query only required several dimensions. This feature includes two parts. One is `materialized-view-maintenance`, and the other is `materialized-view-selection`. ## Materialized-view-maintenance -In materialized-view-maintenance, dataSouces user ingested are called "base-dataSource". For each base-dataSource, we can submit `derivativeDataSource` supervisors to create and maintain other dataSources which we called "derived-dataSource". The deminsions and metrics of derived-dataSources are the subset of base-dataSource's. +In materialized-view-maintenance, dataSources user ingested are called "base-dataSource". For each base-dataSource, we can submit `derivativeDataSource` supervisors to create and maintain other dataSources which we called "derived-dataSource". The dimensions and metrics of derived-dataSources are the subset of base-dataSource's. The `derivativeDataSource` supervisor is used to keep the timeline of derived-dataSource consistent with base-dataSource. Each `derivativeDataSource` supervisor is responsible for one derived-dataSource. A sample derivativeDataSource supervisor spec is shown below: @@ -76,7 +76,7 @@ A sample derivativeDataSource supervisor spec is shown below: |tuningConfig |TuningConfig must be HadoopTuningConfig. See [Hadoop tuning config](../../ingestion/hadoop.html#tuningconfig).|yes| |dataSource |The name of this derived dataSource. |no(default=baseDataSource-hashCode of supervisor)| |hadoopDependencyCoordinates |A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by druid.extensions.hadoopDependenciesDir |no| -|classpathPrefix |Classpath that will be pre-appended for the Peon process. |no| +|classpathPrefix |Classpath that will be prepended for the Peon process. |no| |context |See below. |no| **Context** diff --git a/docs/development/extensions-contrib/momentsketch-quantiles.md b/docs/development/extensions-contrib/momentsketch-quantiles.md index a69e33956aef..0f9ae21b00b7 100644 --- a/docs/development/extensions-contrib/momentsketch-quantiles.md +++ b/docs/development/extensions-contrib/momentsketch-quantiles.md @@ -36,7 +36,7 @@ druid.extensions.loadList=["druid-momentsketch"] The result of the aggregation is a momentsketch that is the union of all sketches either built from raw data or read from the segments. -The `momentSketch` aggregator operates over raw data while the `momentSketchMerge` aggregator should be used when aggregating pre-computed sketches. +The `momentSketch` aggregator operates over raw data while the `momentSketchMerge` aggregator should be used when aggregating precomputed sketches. ```json { diff --git a/docs/development/extensions-contrib/moving-average-query.md b/docs/development/extensions-contrib/moving-average-query.md index 8cffcdec8a54..8ab60cadc38f 100644 --- a/docs/development/extensions-contrib/moving-average-query.md +++ b/docs/development/extensions-contrib/moving-average-query.md @@ -69,7 +69,7 @@ There are currently no configuration properties specific to Moving Average. |dimensions|A JSON list of [DimensionSpec](../../querying/dimensionspecs.md) (Notice that property is optional)|no| |limitSpec|See [LimitSpec](../../querying/limitspec.md)|no| |having|See [Having](../../querying/having.md)|no| -|granularity|A period granilarity; See [Period Granularities](../../querying/granularities.html#period-granularities)|yes| +|granularity|A period granularity; See [Period Granularities](../../querying/granularities.html#period-granularities)|yes| |filter|See [Filters](../../querying/filters.md)|no| |aggregations|Aggregations forms the input to Averagers; See [Aggregations](../../querying/aggregations.md)|yes| |postAggregations|Supports only aggregations as input; See [Post Aggregations](../../querying/post-aggregations.md)|no| diff --git a/docs/development/extensions-contrib/opentsdb-emitter.md b/docs/development/extensions-contrib/opentsdb-emitter.md index a3e5623e77d1..7167427cd80d 100644 --- a/docs/development/extensions-contrib/opentsdb-emitter.md +++ b/docs/development/extensions-contrib/opentsdb-emitter.md @@ -31,7 +31,7 @@ This extension emits druid metrics to [OpenTSDB](https://github.com/OpenTSDB/ope ## Configuration -All the configuration parameters for the opentsdb emitter are under `druid.emitter.opentsdb`. +All the configuration parameters for the OpenTSDB emitter are under `druid.emitter.opentsdb`. |property|description|required?|default| |--------|-----------|---------|-------| @@ -46,7 +46,7 @@ All the configuration parameters for the opentsdb emitter are under `druid.emitt ### Druid to OpenTSDB Event Converter -The opentsdb emitter will send only the desired metrics and dimensions which is defined in a JSON file. +The OpenTSDB emitter will send only the desired metrics and dimensions which is defined in a JSON file. If the user does not specify their own JSON file, a default file is used. All metrics are expected to be configured in the JSON file. Metrics which are not configured will be logged. Desired metrics and dimensions is organized using the following schema:` : [ ]`
e.g. diff --git a/docs/development/extensions-contrib/statsd.md b/docs/development/extensions-contrib/statsd.md index 1eb5e6891ce3..9f8ddde26bfa 100644 --- a/docs/development/extensions-contrib/statsd.md +++ b/docs/development/extensions-contrib/statsd.md @@ -43,7 +43,7 @@ All the configuration parameters for the StatsD emitter are under `druid.emitter |`druid.emitter.statsd.separator`|Metric name separator|no|.| |`druid.emitter.statsd.includeHost`|Flag to include the hostname as part of the metric name.|no|false| |`druid.emitter.statsd.dimensionMapPath`|JSON file defining the StatsD type, and desired dimensions for every Druid metric|no|Default mapping provided. See below.| -|`druid.emitter.statsd.blankHolder`|The blank character replacement as statsD does not support path with blank character|no|"-"| +|`druid.emitter.statsd.blankHolder`|The blank character replacement as StatsD does not support path with blank character|no|"-"| |`druid.emitter.statsd.dogstatsd`|Flag to enable [DogStatsD](https://docs.datadoghq.com/developers/dogstatsd/) support. Causes dimensions to be included as tags, not as a part of the metric name. `convertRange` fields will be ignored.|no|false| |`druid.emitter.statsd.dogstatsdConstantTags`|If `druid.emitter.statsd.dogstatsd` is true, the tags in the JSON list of strings will be sent with every event.|no|[]| |`druid.emitter.statsd.dogstatsdServiceAsTag`|If `druid.emitter.statsd.dogstatsd` and `druid.emitter.statsd.dogstatsdServiceAsTag` are true, druid service (e.g. `druid/broker`, `druid/coordinator`, etc) is reported as a tag (e.g. `druid_service:druid/broker`) instead of being included in metric name (e.g. `druid.broker.query.time`) and `druid` is used as metric prefix (e.g. `druid.query.time`).|no|false| diff --git a/docs/development/extensions-contrib/tdigestsketch-quantiles.md b/docs/development/extensions-contrib/tdigestsketch-quantiles.md index 6499cdecca2b..2c624c68b027 100644 --- a/docs/development/extensions-contrib/tdigestsketch-quantiles.md +++ b/docs/development/extensions-contrib/tdigestsketch-quantiles.md @@ -24,16 +24,16 @@ title: "T-Digest Quantiles Sketch module" This module provides Apache Druid (incubating) approximate sketch aggregators based on T-Digest. -T-Digest (https://github.com/tdunning/t-digest) is a popular datastructure for accurate on-line accumulation of +T-Digest (https://github.com/tdunning/t-digest) is a popular data structure for accurate on-line accumulation of rank-based statistics such as quantiles and trimmed means. -The datastructure is also designed for parallel programming use cases like distributed aggregations or map reduce jobs by making combining two intermediate t-digests easy and efficient. +The data structure is also designed for parallel programming use cases like distributed aggregations or map reduce jobs by making combining two intermediate t-digests easy and efficient. The tDigestSketch aggregator is capable of generating sketches from raw numeric values as well as aggregating/combining pre-generated T-Digest sketches generated using the tDigestSketch aggregator itself. While one can generate sketches on the fly during the query time itself, it generally is more performant to generate sketches during ingestion time itself and then combining them during query time. The module also provides a postAggregator, quantilesFromTDigestSketch, that can be used to compute approximate -quantiles from T-Digest sketches generated by the tDigestSketch aggreator. +quantiles from T-Digest sketches generated by the tDigestSketch aggregator. To use this aggregator, make sure you [include](../../development/extensions.md#loading-extensions) the extension in your config file: diff --git a/docs/development/extensions-contrib/thrift.md b/docs/development/extensions-contrib/thrift.md index 746f07b8eba6..68497c11a5d7 100644 --- a/docs/development/extensions-contrib/thrift.md +++ b/docs/development/extensions-contrib/thrift.md @@ -39,7 +39,7 @@ If you plan to read LZO-compressed Thrift files, you will need to download versi | Field | Type | Description | Required | | ----------- | ----------- | ---------------------------------------- | -------- | | type | String | This should say `thrift` | yes | -| parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be a Json parseSpec. | yes | +| parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. Should be a JSON parseSpec. | yes | | thriftJar | String | path of thrift jar, if not provided, it will try to find the thrift class in classpath. Thrift jar in batch ingestion should be uploaded to HDFS first and configure `jobProperties` with `"tmpjars":"/path/to/your/thrift.jar"` | no | | thriftClass | String | classname of thrift | yes | @@ -85,7 +85,7 @@ Hadoop-client is also needed, you may copy all the hadoop-client dependency jars - Batch Ingestion - `inputFormat` and `tmpjars` should be set. -This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of inputSpec in ioConfig could be one of `"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"` and `com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat`. Be carefull, when `LzoThriftBlockInputFormat` is used, thrift class must be provided twice. +This is for batch ingestion using the HadoopDruidIndexer. The inputFormat of inputSpec in ioConfig could be one of `"org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat"` and `com.twitter.elephantbird.mapreduce.input.LzoThriftBlockInputFormat`. Be careful, when `LzoThriftBlockInputFormat` is used, thrift class must be provided twice. ```json { diff --git a/docs/development/extensions-core/approximate-histograms.md b/docs/development/extensions-core/approximate-histograms.md index 6f97d64ef592..b235c450951b 100644 --- a/docs/development/extensions-core/approximate-histograms.md +++ b/docs/development/extensions-core/approximate-histograms.md @@ -72,7 +72,7 @@ hundred centroids. To get good accuracy readings on 95th percentiles with millions of rows of data, you may want to use several thousand centroids, especially with long tails, since that's where the approximation will be worse. -### Creating approxiate histogram sketches at ingestion time +### Creating approximate histogram sketches at ingestion time To use this feature, an "approxHistogram" or "approxHistogramFold" aggregator must be included at indexing time. The ingestion aggregator can only apply to numeric values. If you use "approxHistogram" @@ -159,7 +159,7 @@ The histogram aggregator's output object has the following fields: - `upperLimit`: Upper limit of the histogram - `numBuckets`: Number of histogram buckets - `outlierHandlingMode`: Outlier handling mode -- `count`: Total number of values contained in the histgram, excluding outliers +- `count`: Total number of values contained in the histogram, excluding outliers - `lowerOutlierCount`: Count of outlier values below `lowerLimit`. Only used if the outlier mode is `overflow`. - `upperOutlierCount`: Count of outlier values above `upperLimit`. Only used if the outlier mode is `overflow`. - `missingValueCount`: Count of null values seen by the histogram. diff --git a/docs/development/extensions-core/avro.md b/docs/development/extensions-core/avro.md index 44e33e54b641..da11af606acc 100644 --- a/docs/development/extensions-core/avro.md +++ b/docs/development/extensions-core/avro.md @@ -72,7 +72,7 @@ If `type` is not included, the avroBytesDecoder defaults to `schema_repo`. > may need to migrate schemas in the future, consider one of the other decoders, all of which use a message header that > allows the parser to identify the proper Avro schema for reading records. -This decoder can be used if all the input events can be read using the same schema. In that case schema can be specified in the input task json itself as described below. +This decoder can be used if all the input events can be read using the same schema. In that case schema can be specified in the input task JSON itself as described below. ``` ... @@ -94,7 +94,7 @@ This decoder can be used if all the input events can be read using the same sche ##### Multiple Inline Schemas Based Avro Bytes Decoder -This decoder can be used if different input events can have different read schema. In that case schema can be specified in the input task json itself as described below. +This decoder can be used if different input events can have different read schema. In that case schema can be specified in the input task JSON itself as described below. ``` ... @@ -149,7 +149,7 @@ This section describes the format of the `subjectAndIdConverter` object for the | Field | Type | Description | Required | |-------|------|-------------|----------| | type | String | This should say `avro_1124`. | no | -| topic | String | Specifies the topic of your kafka stream. | yes | +| topic | String | Specifies the topic of your Kafka stream. | yes | ###### Avro-1124 Schema Repository @@ -183,7 +183,7 @@ Details can be found in Schema Registry [documentation](http://docs.confluent.io ### Avro Hadoop Parser -This is for batch ingestion using the `HadoopDruidIndexer`. The `inputFormat` of `inputSpec` in `ioConfig` must be set to `"org.apache.druid.data.input.avro.AvroValueInputFormat"`. You may want to set Avro reader's schema in `jobProperties` in `tuningConfig`, eg: `"avro.schema.input.value.path": "/path/to/your/schema.avsc"` or `"avro.schema.input.value": "your_schema_JSON_object"`, if reader's schema is not set, the schema in Avro object container file will be used, see [Avro specification](http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution). Make sure to include "org.apache.druid.extensions:druid-avro-extensions" as an extension. +This is for batch ingestion using the `HadoopDruidIndexer`. The `inputFormat` of `inputSpec` in `ioConfig` must be set to `"org.apache.druid.data.input.avro.AvroValueInputFormat"`. You may want to set Avro reader's schema in `jobProperties` in `tuningConfig`, e.g.: `"avro.schema.input.value.path": "/path/to/your/schema.avsc"` or `"avro.schema.input.value": "your_schema_JSON_object"`, if reader's schema is not set, the schema in Avro object container file will be used, see [Avro specification](http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution). Make sure to include "org.apache.druid.extensions:druid-avro-extensions" as an extension. | Field | Type | Description | Required | |-------|------|-------------|----------| diff --git a/docs/development/extensions-core/bloom-filter.md b/docs/development/extensions-core/bloom-filter.md index f558eb8fb4a2..2e21f906cb4a 100644 --- a/docs/development/extensions-core/bloom-filter.md +++ b/docs/development/extensions-core/bloom-filter.md @@ -27,13 +27,13 @@ This Apache Druid (incubating) extension adds the ability to both construct bloo against a bloom filter. Make sure to [include](../../development/extensions.md#loading-extensions) `druid-bloom-filter` as an extension. -A BloomFilter is a probabilistic data structure for performing a set membership check. A bloom filter is a good candidate +A Bloom filter is a probabilistic data structure for performing a set membership check. A bloom filter is a good candidate to use with Druid for cases where an explicit filter is impossible, e.g. filtering a query against a set of millions of values. -Following are some characteristics of BloomFilters: +Following are some characteristics of Bloom filters: -- BloomFilters are highly space efficient when compared to using a HashSet. +- Bloom filters are highly space efficient when compared to using a HashSet. - Because of the probabilistic nature of bloom filters, false positive results are possible (element was not actually inserted into a bloom filter during construction, but `test()` says true) - False negatives are not possible (if element is present then `test()` will never say false). @@ -57,7 +57,7 @@ BloomKFilter.serialize(byteArrayOutputStream, bloomFilter); String base64Serialized = Base64.encodeBase64String(byteArrayOutputStream.toByteArray()); ``` -This string can then be used in the native or sql Druid query. +This string can then be used in the native or SQL Druid query. ## Filtering queries with a Bloom Filter @@ -110,7 +110,7 @@ bloom_filter_test(, '') Input for a `bloomKFilter` can also be created from a druid query with the `bloom` aggregator. Note that it is very important to set a reasonable value for the `maxNumEntries` parameter, which is the maximum number of distinct entries -that the bloom filter can represent without increasing the false postive rate. It may be worth performing a query using +that the bloom filter can represent without increasing the false positive rate. It may be worth performing a query using one of the unique count sketches to calculate the value for this parameter in order to build a bloom filter appropriate for the query. @@ -176,4 +176,4 @@ SELECT BLOOM_FILTER(, ) FROM druid.foo WHERE ``` but requires the setting `druid.sql.planner.serializeComplexValues` to be set to `true`. Bloom filter results in an SQL - response are serialized into a base64 string, which can then be used in subsequent queries as a filter. \ No newline at end of file + response are serialized into a base64 string, which can then be used in subsequent queries as a filter. diff --git a/docs/development/extensions-core/datasketches-quantiles.md b/docs/development/extensions-core/datasketches-quantiles.md index babc6d374cf9..e91c33371a94 100644 --- a/docs/development/extensions-core/datasketches-quantiles.md +++ b/docs/development/extensions-core/datasketches-quantiles.md @@ -23,7 +23,7 @@ title: "DataSketches Quantiles Sketch module" --> -This module provides Apache Druid (incubating) aggregators based on numeric quantiles DoublesSketch from [datasketches](https://datasketches.github.io/) library. Quantiles sketch is a mergeable streaming algorithm to estimate the distribution of values, and approximately answer queries about the rank of a value, probability mass function of the distribution (PMF) or histogram, cummulative distribution function (CDF), and quantiles (median, min, max, 95th percentile and such). See [Quantiles Sketch Overview](https://datasketches.github.io/docs/Quantiles/QuantilesOverview.html). +This module provides Apache Druid (incubating) aggregators based on numeric quantiles DoublesSketch from [datasketches](https://datasketches.github.io/) library. Quantiles sketch is a mergeable streaming algorithm to estimate the distribution of values, and approximately answer queries about the rank of a value, probability mass function of the distribution (PMF) or histogram, cumulative distribution function (CDF), and quantiles (median, min, max, 95th percentile and such). See [Quantiles Sketch Overview](https://datasketches.github.io/docs/Quantiles/QuantilesOverview.html). There are three major modes of operation: diff --git a/docs/development/extensions-core/datasketches-tuple.md b/docs/development/extensions-core/datasketches-tuple.md index ee655fc1a1c6..202a231ac05f 100644 --- a/docs/development/extensions-core/datasketches-tuple.md +++ b/docs/development/extensions-core/datasketches-tuple.md @@ -51,7 +51,7 @@ druid.extensions.loadList=["druid-datasketches"] |fieldName|A String for the name of the input field.|yes| |nominalEntries|Parameter that determines the accuracy and size of the sketch. Higher k means higher accuracy but more space to store sketches. Must be a power of 2. See the [Theta sketch accuracy](https://datasketches.github.io/docs/Theta/ThetaErrorTable.html) for details. |no, defaults to 16384| |numberOfValues|Number of values associated with each distinct key. |no, defaults to 1| -|metricCoulumns|If building sketches from raw data, an array of names of the input columns containing numeric vaues to be associated with each distinct key.|no, defaults to empty array| +|metricColumns|If building sketches from raw data, an array of names of the input columns containing numeric values to be associated with each distinct key.|no, defaults to empty array| ### Post Aggregators @@ -118,7 +118,7 @@ Returns a list of variance values from a given ArrayOfDoublesSketch. The result #### Quantiles sketch from a column -Returns a quanitles DoublesSketch constructed from a given column of values from a given ArrayOfDoublesSketch using optional parameter k that determines the accuracy and size of the quantiles sketch. See [Quantiles Sketch Module](datasketches-quantiles.html) +Returns a quantiles DoublesSketch constructed from a given column of values from a given ArrayOfDoublesSketch using optional parameter k that determines the accuracy and size of the quantiles sketch. See [Quantiles Sketch Module](datasketches-quantiles.html) * The column number is 1-based and is optional (the default is 1). * The parameter k is optional (the default is defined in the sketch library). diff --git a/docs/development/extensions-core/druid-basic-security.md b/docs/development/extensions-core/druid-basic-security.md index 0c66e69a6385..03c2fa2c40ac 100644 --- a/docs/development/extensions-core/druid-basic-security.md +++ b/docs/development/extensions-core/druid-basic-security.md @@ -93,7 +93,7 @@ druid.escalator.authorizerName=MyBasicAuthorizer #### Properties |Property|Description|Default|required| |--------|-----------|-------|--------| -|`druid.escalator.internalClientUsername`|The escalator will use this username for requests made as the internal systerm user.|n/a|Yes| +|`druid.escalator.internalClientUsername`|The escalator will use this username for requests made as the internal system user.|n/a|Yes| |`druid.escalator.internalClientPassword`|The escalator will use this [Password Provider](../../operations/password-provider.md) for requests made as the internal system user.|n/a|Yes| |`druid.escalator.authorizerName`|Authorizer that requests should be directed to.|n/a|Yes| diff --git a/docs/development/extensions-core/druid-kerberos.md b/docs/development/extensions-core/druid-kerberos.md index 43105ef082a4..bfce6fb55395 100644 --- a/docs/development/extensions-core/druid-kerberos.md +++ b/docs/development/extensions-core/druid-kerberos.md @@ -50,10 +50,10 @@ The configuration examples in the rest of this document will use "kerberos" as t ### Properties |Property|Possible Values|Description|Default|required| |--------|---------------|-----------|-------|--------| -|`druid.auth.authenticator.kerberos.serverPrincipal`|`HTTP/_HOST@EXAMPLE.COM`| SPNego service principal used by druid processes|empty|Yes| +|`druid.auth.authenticator.kerberos.serverPrincipal`|`HTTP/_HOST@EXAMPLE.COM`| SPNEGO service principal used by druid processes|empty|Yes| |`druid.auth.authenticator.kerberos.serverKeytab`|`/etc/security/keytabs/spnego.service.keytab`|SPNego service keytab used by druid processes|empty|Yes| |`druid.auth.authenticator.kerberos.authToLocal`|`RULE:[1:$1@$0](druid@EXAMPLE.COM)s/.*/druid DEFAULT`|It allows you to set a general rule for mapping principal names to local user names. It will be used if there is not an explicit mapping for the principal name that is being translated.|DEFAULT|No| -|`druid.auth.authenticator.kerberos.cookieSignatureSecret`|`secretString`| Secret used to sign authentication cookies. It is advisable to explicitly set it, if you have multiple druid ndoes running on same machine with different ports as the Cookie Specification does not guarantee isolation by port.||No| +|`druid.auth.authenticator.kerberos.cookieSignatureSecret`|`secretString`| Secret used to sign authentication cookies. It is advisable to explicitly set it, if you have multiple druid nodes running on same machine with different ports as the Cookie Specification does not guarantee isolation by port.||No| |`druid.auth.authenticator.kerberos.authorizerName`|Depends on available authorizers|Authorizer that requests should be directed to|Empty|Yes| As a note, it is required that the SPNego principal in use by the druid processes must start with HTTP (This specified by [RFC-4559](https://tools.ietf.org/html/rfc4559)) and must be of the form "HTTP/_HOST@REALM". @@ -65,7 +65,7 @@ In older releases, the Kerberos authenticator had an `excludedPaths` property th ### Auth to Local Syntax `druid.auth.authenticator.kerberos.authToLocal` allows you to set a general rules for mapping principal names to local user names. -The syntax for mapping rules is `RULE:\[n:string](regexp)s/pattern/replacement/g`. The integer n indicates how many components the target principal should have. If this matches, then a string will be formed from string, substituting the realm of the principal for $0 and the n‘th component of the principal for $n. e.g. if the principal was druid/admin then `\[2:$2$1suffix]` would result in the string `admindruidsuffix`. +The syntax for mapping rules is `RULE:\[n:string](regexp)s/pattern/replacement/g`. The integer n indicates how many components the target principal should have. If this matches, then a string will be formed from string, substituting the realm of the principal for $0 and the nth component of the principal for $n. e.g. if the principal was druid/admin then `\[2:$2$1suffix]` would result in the string `admindruidsuffix`. If this string matches regexp, then the s//\[g] substitution command will be run over the string. The optional g will cause the substitution to be global over the string, instead of replacing only the first match in the string. If required, multiple rules can be be joined by newline character and specified as a String. diff --git a/docs/development/extensions-core/druid-lookups.md b/docs/development/extensions-core/druid-lookups.md index 23e0dc313c26..e5eb66637929 100644 --- a/docs/development/extensions-core/druid-lookups.md +++ b/docs/development/extensions-core/druid-lookups.md @@ -48,13 +48,13 @@ This extension comes with two different caching strategies. First strategy is a The poll strategy cache strategy will fetch and swap all the pair of key/values periodically from the lookup source. Hence, user should make sure that the cache can fit all the data. -The current implementation provides 2 type of poll cache, the first is onheap (uses immutable map), while the second uses MapBD based offheap map. +The current implementation provides 2 type of poll cache, the first is on-heap (uses immutable map), while the second uses MapDB based off-heap map. User can also implement a different lookup polling cache by implementing `PollingCacheFactory` and `PollingCache` interfaces. #### Loading lookup Loading cache strategy will load the key/value pair upon request on the key it self, the general algorithm is load key if absent. Once the key/value pair is loaded eviction will occur according to the cache eviction policy. -This module comes with two loading lookup implementation, the first is onheap backed by a Guava cache implementation, the second is MapDB offheap implementation. +This module comes with two loading lookup implementation, the first is on-heap backed by a Guava cache implementation, the second is MapDB off-heap implementation. Both implementations offer various eviction strategies. Same for Loading cache, developer can implement a new type of loading cache by implementing `LookupLoadingCache` interface. @@ -67,8 +67,8 @@ Same for Loading cache, developer can implement a new type of loading cache by i |Field|Type|Description|Required|default| |-----|----|-----------|--------|-------| -|dataFetcher|Json object|Specifies the lookup data fetcher type to use in order to fetch data|yes|null| -|cacheFactory|Json Object|Cache factory implementation|no |onHeapPolling| +|dataFetcher|JSON object|Specifies the lookup data fetcher type to use in order to fetch data|yes|null| +|cacheFactory|JSON Object|Cache factory implementation|no |onHeapPolling| |pollPeriod|Period|polling period |no |null (poll once)| @@ -102,9 +102,9 @@ This example demonstrates an off-heap lookup that will be cached once and never |Field|Type|Description|Required|default| |-----|----|-----------|--------|-------| -|dataFetcher|Json object|Specifies the lookup data fetcher type to use in order to fetch data|yes|null| -|loadingCacheSpec|Json Object|Lookup cache spec implementation|yes |null| -|reverseLoadingCacheSpec|Json Object| Reverse lookup cache implementation|yes |null| +|dataFetcher|JSON object|Specifies the lookup data fetcher type to use in order to fetch data|yes|null| +|loadingCacheSpec|JSON Object|Lookup cache spec implementation|yes |null| +|reverseLoadingCacheSpec|JSON Object| Reverse lookup cache implementation|yes |null| ##### Example Loading On-heap Guava diff --git a/docs/development/extensions-core/kafka-extraction-namespace.md b/docs/development/extensions-core/kafka-extraction-namespace.md index 92085c3f0c6a..1fc2d75f383d 100644 --- a/docs/development/extensions-core/kafka-extraction-namespace.md +++ b/docs/development/extensions-core/kafka-extraction-namespace.md @@ -26,7 +26,7 @@ title: "Apache Kafka Lookups" To use this Apache Druid (incubating) extension, make sure to [include](../../development/extensions.md#loading-extensions) `druid-lookups-cached-global` and `druid-kafka-extraction-namespace` as an extension. -If you need updates to populate as promptly as possible, it is possible to plug into a kafka topic whose key is the old value and message is the desired new value (both in UTF-8) as a LookupExtractorFactory. +If you need updates to populate as promptly as possible, it is possible to plug into a Kafka topic whose key is the old value and message is the desired new value (both in UTF-8) as a LookupExtractorFactory. ```json { @@ -38,12 +38,12 @@ If you need updates to populate as promptly as possible, it is possible to plug |Parameter|Description|Required|Default| |---------|-----------|--------|-------| -|`kafkaTopic`|The kafka topic to read the data from|Yes|| +|`kafkaTopic`|The Kafka topic to read the data from|Yes|| |`kafkaProperties`|Kafka consumer properties. At least"zookeeper.connect" must be specified. Only the zookeeper connector is supported|Yes|| |`connectTimeout`|How long to wait for an initial connection|No|`0` (do not wait)| |`isOneToOne`|The map is a one-to-one (see [Lookup DimensionSpecs](../../querying/dimensionspecs.md))|No|`false`| -The extension `kafka-extraction-namespace` enables reading from a kafka feed which has name/key pairs to allow renaming of dimension values. An example use case would be to rename an ID to a human readable format. +The extension `kafka-extraction-namespace` enables reading from a Kafka feed which has name/key pairs to allow renaming of dimension values. An example use case would be to rename an ID to a human readable format. The consumer properties `group.id` and `auto.offset.reset` CANNOT be set in `kafkaProperties` as they are set by the extension as `UUID.randomUUID().toString()` and `smallest` respectively. @@ -51,13 +51,13 @@ See [lookups](../../querying/lookups.md) for how to configure and use lookups. ## Limitations -Currently the Kafka lookup extractor feeds the entire kafka stream into a local cache. If you are using OnHeap caching, this can easily clobber your java heap if the kafka stream spews a lot of unique keys. -OffHeap caching should alleviate these concerns, but there is still a limit to the quantity of data that can be stored. +Currently the Kafka lookup extractor feeds the entire Kafka stream into a local cache. If you are using on-heap caching, this can easily clobber your java heap if the Kafka stream spews a lot of unique keys. +off-heap caching should alleviate these concerns, but there is still a limit to the quantity of data that can be stored. There is currently no eviction policy. ## Testing the Kafka rename functionality -To test this setup, you can send key/value pairs to a kafka stream via the following producer console: +To test this setup, you can send key/value pairs to a Kafka stream via the following producer console: ``` ./bin/kafka-console-producer.sh --property parse.key=true --property key.separator="->" --broker-list localhost:9092 --topic testTopic diff --git a/docs/development/extensions-core/kafka-ingestion.md b/docs/development/extensions-core/kafka-ingestion.md index c52af797c851..37c17055cdb4 100644 --- a/docs/development/extensions-core/kafka-ingestion.md +++ b/docs/development/extensions-core/kafka-ingestion.md @@ -198,7 +198,7 @@ For Roaring bitmaps: |-----|----|-----------|--------| |`topic`|String|The Kafka topic to read from. This must be a specific topic as topic patterns are not supported.|yes| |`consumerProperties`|Map|A map of properties to be passed to the Kafka consumer. This must contain a property `bootstrap.servers` with a list of Kafka brokers in the form: `:,:,...`. For SSL connections, the `keystore`, `truststore` and `key` passwords can be provided as a [Password Provider](../../operations/password-provider.md) or String password.|yes| -|`pollTimeout`|Long|The length of time to wait for the kafka consumer to poll records, in milliseconds|no (default == 100)| +|`pollTimeout`|Long|The length of time to wait for the Kafka consumer to poll records, in milliseconds|no (default == 100)| |`replicas`|Integer|The number of replica sets, where 1 means a single set of tasks (no replication). Replica tasks will always be assigned to different workers to provide resiliency against process failure.|no (default == 1)| |`taskCount`|Integer|The maximum number of *reading* tasks in a *replica set*. This means that the maximum number of reading tasks will be `taskCount * replicas` and the total number of tasks (*reading* + *publishing*) will be higher than this. See 'Capacity Planning' below for more details. The number of reading tasks will be less than `taskCount` if `taskCount > {numKafkaPartitions}`.|no (default == 1)| |`taskDuration`|ISO8601 Period|The length of time before tasks stop reading and begin publishing their segment.|no (default == PT1H)| @@ -318,9 +318,9 @@ may cause some Kafka messages to be skipped or to be read twice. `POST /druid/indexer/v1/supervisor//terminate` terminates a supervisor and causes all associated indexing tasks managed by this supervisor to immediately stop and begin publishing their segments. This supervisor will still exist in the metadata store and it's history may be retrieved -with the supervisor history api, but will not be listed in the 'get supervisors' api response nor can it's configuration +with the supervisor history API, but will not be listed in the 'get supervisors' API response nor can it's configuration or status report be retrieved. The only way this supervisor can start again is by submitting a functioning supervisor -spec to the create api. +spec to the create API. ### Capacity Planning @@ -370,7 +370,7 @@ A supervisor is stopped via the `POST /druid/indexer/v1/supervisor/ {numKinesisshards}`.|no (default == 1)| +|`taskCount`|Integer|The maximum number of *reading* tasks in a *replica set*. This means that the maximum number of reading tasks will be `taskCount * replicas` and the total number of tasks (*reading* + *publishing*) will be higher than this. See 'Capacity Planning' below for more details. The number of reading tasks will be less than `taskCount` if `taskCount > {numKinesisShards}`.|no (default == 1)| |`taskDuration`|ISO8601 Period|The length of time before tasks stop reading and begin publishing their segment.|no (default == PT1H)| |`startDelay`|ISO8601 Period|The period to wait before the supervisor starts managing tasks.|no (default == PT5S)| |`period`|ISO8601 Period|How often the supervisor will execute its management logic. Note that the supervisor will also run in response to certain events (such as tasks succeeding, failing, and reaching their taskDuration) so this value specifies the maximum time between iterations.|no (default == PT30S)| diff --git a/docs/development/extensions-core/lookups-cached-global.md b/docs/development/extensions-core/lookups-cached-global.md index 4db02af8370e..585e6f9a9837 100644 --- a/docs/development/extensions-core/lookups-cached-global.md +++ b/docs/development/extensions-core/lookups-cached-global.md @@ -95,7 +95,7 @@ Proper functionality of globally cached lookups requires the following extension ## Example configuration -In a simple case where only one [tier](../../querying/lookups.html#dynamic-configuration) exists (`realtime_customer2`) with one `cachedNamespace` lookup called `country_code`, the resulting configuration json looks similar to the following: +In a simple case where only one [tier](../../querying/lookups.html#dynamic-configuration) exists (`realtime_customer2`) with one `cachedNamespace` lookup called `country_code`, the resulting configuration JSON looks similar to the following: ```json { @@ -162,7 +162,7 @@ setting namespaces (Broker, Peon, Historical) |--------|-----------|-------| |`druid.lookup.namespace.cache.type`|Specifies the type of caching to be used by the namespaces. May be one of [`offHeap`, `onHeap`]. `offHeap` uses a temporary file for off-heap storage of the namespace (memory mapped files). `onHeap` stores all cache on the heap in standard java map types.|`onHeap`| |`druid.lookup.namespace.numExtractionThreads`|The number of threads in the thread pool dedicated for lookup extraction and updates. This number may need to be scaled up, if you have a lot of lookups and they take long time to extract, to avoid timeouts.|2| -|`druid.lookup.namespace.numBufferedEntries`|If using offHeap caching, the number of records to be stored on an on-heap buffer.|100,000| +|`druid.lookup.namespace.numBufferedEntries`|If using off-heap caching, the number of records to be stored on an on-heap buffer.|100,000| The cache is populated in different ways depending on the settings below. In general, most namespaces employ a `pollPeriod` at the end of which time they poll the remote resource of interest for updates. @@ -178,7 +178,7 @@ For additional lookups, please see our [extensions list](../extensions.md). ### URI lookup -The remapping values for each globally cached lookup can be specified by a json object as per the following examples: +The remapping values for each globally cached lookup can be specified by a JSON object as per the following examples: ```json { @@ -317,7 +317,7 @@ With customJson parsing, if the value field for a particular row is missing or n will not be included in the lookup. #### simpleJson lookupParseSpec -The `simpleJson` lookupParseSpec does not take any parameters. It is simply a line delimited json file where the field is the key, and the field's value is the value. +The `simpleJson` lookupParseSpec does not take any parameters. It is simply a line delimited JSON file where the field is the key, and the field's value is the value. *example input* @@ -337,7 +337,7 @@ The `simpleJson` lookupParseSpec does not take any parameters. It is simply a li ### JDBC lookup -The JDBC lookups will poll a database to populate its local cache. If the `tsColumn` is set it must be able to accept comparisons in the format `'2015-01-01 00:00:00'`. For example, the following must be valid sql for the table `SELECT * FROM some_lookup_table WHERE timestamp_column > '2015-01-01 00:00:00'`. If `tsColumn` is set, the caching service will attempt to only poll values that were written *after* the last sync. If `tsColumn` is not set, the entire table is pulled every time. +The JDBC lookups will poll a database to populate its local cache. If the `tsColumn` is set it must be able to accept comparisons in the format `'2015-01-01 00:00:00'`. For example, the following must be valid SQL for the table `SELECT * FROM some_lookup_table WHERE timestamp_column > '2015-01-01 00:00:00'`. If `tsColumn` is set, the caching service will attempt to only poll values that were written *after* the last sync. If `tsColumn` is not set, the entire table is pulled every time. |Parameter|Description|Required|Default| |---------|-----------|--------|-------| diff --git a/docs/development/extensions-core/orc.md b/docs/development/extensions-core/orc.md index 3bc1483c4e21..1195d905cc18 100644 --- a/docs/development/extensions-core/orc.md +++ b/docs/development/extensions-core/orc.md @@ -264,7 +264,7 @@ setting `"mapreduce.job.user.classpath.first": "true"`, then this will not be an ### Migration from 'contrib' extension This extension, first available in version 0.15.0, replaces the previous 'contrib' extension which was available until -0.14.0-incubating. While this extension can index any data the 'contrib' extension could, the json spec for the +0.14.0-incubating. While this extension can index any data the 'contrib' extension could, the JSON spec for the ingestion task is *incompatible*, and will need modified to work with the newer 'core' extension. To migrate to 0.15.0+: diff --git a/docs/development/extensions-core/parquet.md b/docs/development/extensions-core/parquet.md index f1f5bac50fb7..645cf2418d00 100644 --- a/docs/development/extensions-core/parquet.md +++ b/docs/development/extensions-core/parquet.md @@ -47,12 +47,12 @@ the `ioConfig`: Both parse options support auto field discovery and flattening if provided with a [`flattenSpec`](../../ingestion/index.md#flattenspec) with `parquet` or `avro` as the format. Parquet nested list and map [logical types](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md) _should_ operate correctly with -json path expressions for all supported types. `parquet-avro` sets a hadoop job property +JSON path expressions for all supported types. `parquet-avro` sets a hadoop job property `parquet.avro.add-list-element-records` to `false` (which normally defaults to `true`), in order to 'unwrap' primitive list elements into multi-value dimensions. The `parquet` parser supports `int96` Parquet values, while `parquet-avro` does not. There may also be some subtle -differences in the behavior of json path expression evaluation of `flattenSpec`. +differences in the behavior of JSON path expression evaluation of `flattenSpec`. We suggest using `parquet` over `parquet-avro` to allow ingesting data beyond the schema constraints of Avro conversion. However, `parquet-avro` was the original basis for this extension, and as such it is a bit more mature. diff --git a/docs/development/extensions-core/postgresql.md b/docs/development/extensions-core/postgresql.md index ff4999a66029..51977433fd33 100644 --- a/docs/development/extensions-core/postgresql.md +++ b/docs/development/extensions-core/postgresql.md @@ -69,7 +69,7 @@ To use this Apache Druid (incubating) extension, make sure to [include](../../de ## Configuration -In most cases, the configuration options map directly to the [postgres jdbc connection options](https://jdbc.postgresql.org/documentation/head/connect.html). +In most cases, the configuration options map directly to the [postgres JDBC connection options](https://jdbc.postgresql.org/documentation/head/connect.html). |Property|Description|Default|Required| |--------|-----------|-------|--------| diff --git a/docs/development/extensions-core/protobuf.md b/docs/development/extensions-core/protobuf.md index 7bba10851aac..b4afaf0c45a3 100644 --- a/docs/development/extensions-core/protobuf.md +++ b/docs/development/extensions-core/protobuf.md @@ -33,7 +33,7 @@ This Apache Druid (incubating) extension enables Druid to ingest and understand | type | String | This should say `protobuf`. | no | | descriptor | String | Protobuf descriptor file name in the classpath or URL. | yes | | protoMessageType | String | Protobuf message type in the descriptor. Both short name and fully qualified name are accepted. The parser uses the first message type found in the descriptor if not specified. | no | -| parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. The format must be json. See [JSON ParseSpec](../../ingestion/index.md) for more configuration options. Please note timeAndDims parseSpec is no longer supported. | yes | +| parseSpec | JSON Object | Specifies the timestamp and dimensions of the data. The format must be JSON. See [JSON ParseSpec](../../ingestion/index.md) for more configuration options. Please note timeAndDims parseSpec is no longer supported. | yes | ## Example: Load Protobuf messages from Kafka @@ -96,7 +96,7 @@ Please make sure these keys are properly configured for successful ingestion. - `protoMessageType` from the proto definition. - parseSpec `format` must be `json`. - `topic` to subscribe. The topic is "metrics_pb" instead of "metrics". -- `bootstrap.server` is the kafka broker host. +- `bootstrap.server` is the Kafka broker host. ```json { diff --git a/docs/development/extensions-core/s3.md b/docs/development/extensions-core/s3.md index 65e883ee9379..6d1281562534 100644 --- a/docs/development/extensions-core/s3.md +++ b/docs/development/extensions-core/s3.md @@ -40,9 +40,9 @@ As an example, to set the region to 'us-east-1' through system properties: |Property|Description|Default| |--------|-----------|-------| -|`druid.s3.accessKey`|S3 access key.See [S3 authentication methods](#s3-authentication-methods) for more details|Can be ommitted according to authentication methods chosen.| -|`druid.s3.secretKey`|S3 secret key.See [S3 authentication methods](#s3-authentication-methods) for more details|Can be ommitted according to authentication methods chosen.| -|`druid.s3.fileSessionCredentials`|Path to properties file containing `sessionToken`, `accessKey` and `secretKey` value. One key/value pair per line (format `key=value`). See [S3 authentication methods](#s3-authentication-methods) for more details |Can be ommitted according to authentication methods chosen.| +|`druid.s3.accessKey`|S3 access key. See [S3 authentication methods](#s3-authentication-methods) for more details|Can be omitted according to authentication methods chosen.| +|`druid.s3.secretKey`|S3 secret key. See [S3 authentication methods](#s3-authentication-methods) for more details|Can be omitted according to authentication methods chosen.| +|`druid.s3.fileSessionCredentials`|Path to properties file containing `sessionToken`, `accessKey` and `secretKey` value. One key/value pair per line (format `key=value`). See [S3 authentication methods](#s3-authentication-methods) for more details |Can be omitted according to authentication methods chosen.| |`druid.s3.protocol`|Communication protocol type to use when sending requests to AWS. `http` or `https` can be used. This configuration would be ignored if `druid.s3.endpoint.url` is filled with a URL with a different protocol.|`https`| |`druid.s3.disableChunkedEncoding`|Disables chunked encoding. See [AWS document](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/AmazonS3Builder.html#disableChunkedEncoding--) for details.|false| |`druid.s3.enablePathStyleAccess`|Enables path style access. See [AWS document](https://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/services/s3/AmazonS3Builder.html#enablePathStyleAccess--) for details.|false| @@ -75,15 +75,15 @@ To connect to your S3 bucket (whether deep storage bucket or source bucket), Dru |order|type|details| |--------|-----------|-------| |1|Druid config file|Based on your runtime.properties if it contains values `druid.s3.accessKey` and `druid.s3.secretKey` | -|2|Custom properties file| Based on custom properties file where you can supply `sessionToken`, `accessKey` and `secretKey` values. This file is provided to Druid through `druid.s3.fileSessionCredentials` propertie| +|2|Custom properties file| Based on custom properties file where you can supply `sessionToken`, `accessKey` and `secretKey` values. This file is provided to Druid through `druid.s3.fileSessionCredentials` properties| |3|Environment variables|Based on environment variables `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY`| |4|Java system properties|Based on JVM properties `aws.accessKeyId` and `aws.secretKey` | -|5|Profile informations|Based on credentials you may have on your druid instance (generally in `~/.aws/credentials`)| -|6|Instance profile informations|Based on the instance profile you may have attached to your druid instance| +|5|Profile information|Based on credentials you may have on your druid instance (generally in `~/.aws/credentials`)| +|6|Instance profile information|Based on the instance profile you may have attached to your druid instance| -You can find more informations about authentication method [here](https://docs.aws.amazon.com/fr_fr/sdk-for-java/v1/developer-guide/credentials.html)
+You can find more information about authentication method [here](https://docs.aws.amazon.com/fr_fr/sdk-for-java/v1/developer-guide/credentials.html)
**Note :** *Order is important here as it indicates the precedence of authentication methods.
-So if you are trying to use Instance profile informations, you **must not** set `druid.s3.accessKey` and `druid.s3.secretKey` in your Druid runtime.properties* +So if you are trying to use Instance profile information, you **must not** set `druid.s3.accessKey` and `druid.s3.secretKey` in your Druid runtime.properties* ## Server-side encryption diff --git a/docs/development/extensions-core/test-stats.md b/docs/development/extensions-core/test-stats.md index a860d423d12e..e517b356525f 100644 --- a/docs/development/extensions-core/test-stats.md +++ b/docs/development/extensions-core/test-stats.md @@ -25,7 +25,7 @@ title: "Test Stats Aggregators" This Apache Druid (incubating) extension incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for math background and details. -Make sure to include `druid-stats` extension in order to use these aggregrators. +Make sure to include `druid-stats` extension in order to use these aggregators. ## Z-Score for two sample ztests post aggregator diff --git a/docs/development/extensions.md b/docs/development/extensions.md index b194dbce8b12..d7638ccd0ad8 100644 --- a/docs/development/extensions.md +++ b/docs/development/extensions.md @@ -78,7 +78,7 @@ All of these community extensions can be downloaded using [pull-deps](../operati |druid-distinctcount|DistinctCount aggregator|[link](../development/extensions-contrib/distinctcount.md)| |druid-redis-cache|A cache implementation for Druid based on Redis.|[link](../development/extensions-contrib/redis-cache.md)| |druid-time-min-max|Min/Max aggregator for timestamp.|[link](../development/extensions-contrib/time-min-max.md)| -|sqlserver-metadata-storage|Microsoft SqlServer deep storage.|[link](../development/extensions-contrib/sqlserver.md)| +|sqlserver-metadata-storage|Microsoft SQLServer deep storage.|[link](../development/extensions-contrib/sqlserver.md)| |graphite-emitter|Graphite metrics emitter|[link](../development/extensions-contrib/graphite.md)| |statsd-emitter|StatsD metrics emitter|[link](../development/extensions-contrib/statsd.md)| |kafka-emitter|Kafka metrics emitter|[link](../development/extensions-contrib/kafka-emitter.md)| diff --git a/docs/development/javascript.md b/docs/development/javascript.md index a50d7abaed68..3e4019bb34f7 100644 --- a/docs/development/javascript.md +++ b/docs/development/javascript.md @@ -45,8 +45,8 @@ Druid uses the Mozilla Rhino engine at optimization level 9 to compile and execu ## Security -Druid does not execute JavaScript functions in a sandbox, so they have full access to the machine. So Javascript -functions allow users to execute arbitrary code inside druid process. So, by default, Javascript is disabled. +Druid does not execute JavaScript functions in a sandbox, so they have full access to the machine. So JavaScript +functions allow users to execute arbitrary code inside druid process. So, by default, JavaScript is disabled. However, on dev/staging environments or secured production environments you can enable those by setting the [configuration property](../configuration/index.html#javascript) `druid.javascript.enabled = true`. diff --git a/docs/development/modules.md b/docs/development/modules.md index 1d7977c3915d..b424e12ae3bc 100644 --- a/docs/development/modules.md +++ b/docs/development/modules.md @@ -164,7 +164,7 @@ public List getJacksonModules() } ``` -This is registering the FirehoseFactory with Jackson's polymorphic serde layer. More concretely, having this will mean that if you specify a `"firehose": { "type": "static-s3", ... }` in your realtime config, then the system will load this FirehoseFactory for your firehose. +This is registering the FirehoseFactory with Jackson's polymorphic serialization/deserialization layer. More concretely, having this will mean that if you specify a `"firehose": { "type": "static-s3", ... }` in your realtime config, then the system will load this FirehoseFactory for your firehose. Note that inside of Druid, we have made the @JacksonInject annotation for Jackson deserialized objects actually use the base Guice injector to resolve the object to be injected. So, if your FirehoseFactory needs access to some object, you can add a @JacksonInject annotation on a setter and it will get set on instantiation. @@ -174,7 +174,7 @@ Adding AggregatorFactory objects is very similar to Firehose objects. They oper ### Adding Complex Metrics -Adding ComplexMetrics is a little ugly in the current version. The method of getting at complex metrics is through registration with the `ComplexMetrics.registerSerde()` method. There is no special Guice stuff to get this working, just in your `configure(Binder)` method register the serde. +Adding ComplexMetrics is a little ugly in the current version. The method of getting at complex metrics is through registration with the `ComplexMetrics.registerSerde()` method. There is no special Guice stuff to get this working, just in your `configure(Binder)` method register the serialization/deserialization. ### Adding new Query types diff --git a/docs/ingestion/data-management.md b/docs/ingestion/data-management.md index 07dc89a4cd6e..cf22c04386e8 100644 --- a/docs/ingestion/data-management.md +++ b/docs/ingestion/data-management.md @@ -117,7 +117,7 @@ Compaction tasks merge all segments of the given interval. The syntax is: |`dimensionsSpec`|Custom dimensionsSpec. Compaction task will use this dimensionsSpec if exist instead of generating one. See below for more details.|No| |`metricsSpec`|Custom metricsSpec. Compaction task will use this metricsSpec if specified rather than generating one.|No| |`segmentGranularity`|If this is set, compactionTask will change the segment granularity for the given interval. See `segmentGranularity` of [`granularitySpec`](index.md#granularityspec) for more details. See the below table for the behavior.|No| -|`targetCompactionSizeBytes`|Target segment size after comapction. Cannot be used with `maxRowsPerSegment`, `maxTotalRows`, and `numShards` in tuningConfig.|No| +|`targetCompactionSizeBytes`|Target segment size after compaction. Cannot be used with `maxRowsPerSegment`, `maxTotalRows`, and `numShards` in tuningConfig.|No| |`tuningConfig`|[Index task tuningConfig](../ingestion/native-batch.md#tuningconfig)|No| |`context`|[Task context](../ingestion/tasks.md#context)|No| diff --git a/docs/ingestion/hadoop.md b/docs/ingestion/hadoop.md index 383a10301c8c..7ca812ac85a8 100644 --- a/docs/ingestion/hadoop.md +++ b/docs/ingestion/hadoop.md @@ -109,7 +109,7 @@ A sample task is shown below: |type|The task type, this should always be "index_hadoop".|yes| |spec|A Hadoop Index Spec. See [Ingestion](../ingestion/index.md)|yes| |hadoopDependencyCoordinates|A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by `druid.extensions.hadoopDependenciesDir`|no| -|classpathPrefix|Classpath that will be pre-appended for the Peon process.|no| +|classpathPrefix|Classpath that will be prepended for the Peon process.|no| Also note that Druid automatically computes the classpath for Hadoop job containers that run in the Hadoop cluster. But in case of conflicts between Hadoop and Druid's dependencies, you can manually specify the classpath by setting `druid.extensions.hadoopContainerDruidClasspath` property. See the extensions config in [base druid configuration](../configuration/index.html#extensions). @@ -186,7 +186,7 @@ Here is what goes inside `ingestionSpec`: |-----|----|-----------|--------| |dataSource|String|Druid dataSource name from which you are loading the data.|yes| |intervals|List|A list of strings representing ISO-8601 Intervals.|yes| -|segments|List|List of segments from which to read data from, by default it is obtained automatically. You can obtain list of segments to put here by making a POST query to Coordinator at url /druid/coordinator/v1/metadata/datasources/segments?full with list of intervals specified in the request paylod e.g. ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]. You may want to provide this list manually in order to ensure that segments read are exactly same as they were at the time of task submission, task would fail if the list provided by the user does not match with state of database when the task actually runs.|no| +|segments|List|List of segments from which to read data from, by default it is obtained automatically. You can obtain list of segments to put here by making a POST query to Coordinator at url /druid/coordinator/v1/metadata/datasources/segments?full with list of intervals specified in the request payload, e.g. ["2012-01-01T00:00:00.000/2012-01-03T00:00:00.000", "2012-01-05T00:00:00.000/2012-01-07T00:00:00.000"]. You may want to provide this list manually in order to ensure that segments read are exactly same as they were at the time of task submission, task would fail if the list provided by the user does not match with state of database when the task actually runs.|no| |filter|JSON|See [Filters](../querying/filters.md)|no| |dimensions|Array of String|Name of dimension columns to load. By default, the list will be constructed from parseSpec. If parseSpec does not have an explicit list of dimensions then all the dimension columns present in stored data will be read.|no| |metrics|Array of String|Name of metric columns to load. By default, the list will be constructed from the "name" of all the configured aggregators.|no| @@ -391,7 +391,7 @@ on your EMR master. ## Kerberized Hadoop clusters -By default druid can use the exisiting TGT kerberos ticket available in local kerberos key cache. +By default druid can use the existing TGT kerberos ticket available in local kerberos key cache. Although TGT ticket has a limited life cycle, therefore you need to call `kinit` command periodically to ensure validity of TGT ticket. To avoid this extra external cron job script calling `kinit` periodically, diff --git a/docs/ingestion/index.md b/docs/ingestion/index.md index 66de3811a916..4725b130ac75 100644 --- a/docs/ingestion/index.md +++ b/docs/ingestion/index.md @@ -487,7 +487,7 @@ A `timestampSpec` can have the following components: |Field|Description|Default| |-----|-----------|-------| |column|Input row field to read the primary timestamp from.

Regardless of the name of this input field, the primary timestamp will always be stored as a column named `__time` in your Druid datasource.|timestamp| -|format|Timestamp format. Options are:
  • `iso`: ISO8601 with 'T' separator, like "2000-01-01T01:02:03.456"
  • `posix`: seconds since epoch
  • `millis`: milliseconds since epoch
  • `micro`: microseconds since epoch
  • `nano`: nanoseconds since epoch
  • `auto`: automatically detects iso (either 'T' or space separator) or millis format
  • any [Joda DateTimeFormat string](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
|auto| +|format|Timestamp format. Options are:
  • `iso`: ISO8601 with 'T' separator, like "2000-01-01T01:02:03.456"
  • `posix`: seconds since epoch
  • `millis`: milliseconds since epoch
  • `micro`: microseconds since epoch
  • `nano`: nanoseconds since epoch
  • `auto`: automatically detects ISO (either 'T' or space separator) or millis format
  • any [Joda DateTimeFormat string](http://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html)
|auto| |missingValue|Timestamp to use for input records that have a null or missing timestamp `column`. Should be in ISO8601 format, like `"2000-01-01T01:02:03.456"`, even if you have specified something else for `format`. Since Druid requires a primary timestamp, this setting can be useful for ingesting datasets that do not have any per-record timestamps at all. |none| ### `dimensionsSpec` diff --git a/docs/ingestion/native-batch.md b/docs/ingestion/native-batch.md index 55ffb5949d51..cdd92d78775d 100644 --- a/docs/ingestion/native-batch.md +++ b/docs/ingestion/native-batch.md @@ -57,11 +57,11 @@ In the first phase, each sub task partitions input data based on `segmentGranula and `partitionDimensions` (secondary partition key) in `partitionsSpec`. The partitioned data is served by the [middleManager](../design/middlemanager.md) or the [indexer](../design/indexer.md) where the first phase tasks ran. In the second phase, each sub task fetches -partitioned data from middleManagers or indexers and merges them to create the final segments. +partitioned data from MiddleManagers or indexers and merges them to create the final segments. As in the single phase execution, the created segments are reported to the supervisor task to publish at once. To use this task, the `firehose` in `ioConfig` should be _splittable_ and `maxNumConcurrentSubTasks` should be set something larger than 1 in `tuningConfig`. -Otherwise, this task runs sequentially. Here is the list of currently splittable fireshoses. +Otherwise, this task runs sequentially. Here is the list of currently splittable firehoses. - [`LocalFirehose`](#local-firehose) - [`IngestSegmentFirehose`](#segment-firehose) @@ -178,7 +178,7 @@ See [Ingestion Spec DataSchema](../ingestion/index.md#dataschema) If you specify `intervals` explicitly in your dataSchema's granularitySpec, batch ingestion will lock the full intervals specified when it starts up, and you will learn quickly if the specified interval overlaps with locks held by other -tasks (eg, Kafka ingestion). Otherwise, batch ingestion will lock each interval as it is discovered, so you may only +tasks (e.g., Kafka ingestion). Otherwise, batch ingestion will lock each interval as it is discovered, so you may only learn that the task overlaps with a higher-priority task later in ingestion. If you specify `intervals` explicitly, any rows outside the specified intervals will be thrown away. We recommend setting `intervals` explicitly if you know the time range of the data so that locking failure happens faster, and so that you don't accidentally replace data outside @@ -216,7 +216,7 @@ The tuningConfig is optional and default parameters will be used if no tuningCon |maxRetry|Maximum number of retries on task failures.|3|no| |maxNumSegmentsToMerge|Max limit for the number of segments that a single task can merge at the same time in the second phase. Used only `forceGuaranteedRollup` is set.|100|no| |totalNumMergeTasks|Total number of tasks to merge segments in the second phase when `forceGuaranteedRollup` is set.|10|no| -|taskStatusCheckPeriodMs|Polling period in milleseconds to check running task statuses.|1000|no| +|taskStatusCheckPeriodMs|Polling period in milliseconds to check running task statuses.|1000|no| |chatHandlerTimeout|Timeout for reporting the pushed segments in worker tasks.|PT10S|no| |chatHandlerNumRetries|Retries for reporting the pushed segments in worker tasks.|5|no| diff --git a/docs/ingestion/tasks.md b/docs/ingestion/tasks.md index fac8cc1bc541..e51ed7f68bec 100644 --- a/docs/ingestion/tasks.md +++ b/docs/ingestion/tasks.md @@ -175,7 +175,7 @@ and Kinesis indexing services. ## Task lock system This section explains the task locking system in Druid. Druid's locking system -and versioning system are tighly coupled with each other to guarantee the correctness of ingested data. +and versioning system are tightly coupled with each other to guarantee the correctness of ingested data. ## "Overshadowing" between segments @@ -247,7 +247,7 @@ then this task will _preempt_ the other task of a lower priority. The lock of the lower-prioritized task will be revoked and the higher-prioritized task will acquire a new lock. This lock preemption can happen at any time while a task is running except -when it is _publishing segments_ in a critical section. Its locks become preemptable again once publishing segments is finished. +when it is _publishing segments_ in a critical section. Its locks become preemptible again once publishing segments is finished. Note that locks are shared by the tasks of the same groupId. For example, Kafka indexing tasks of the same supervisor have the same groupId and share all locks with each other. diff --git a/docs/misc/math-expr.md b/docs/misc/math-expr.md index 31a1444d4875..27b799d0c72d 100644 --- a/docs/misc/math-expr.md +++ b/docs/misc/math-expr.md @@ -105,7 +105,7 @@ The following built-in functions are available. |name|description| |----|-----------| -|timestamp|timestamp(expr[,format-string]) parses string expr into date then returns milli-seconds from java epoch. without 'format-string' it's regarded as ISO datetime format | +|timestamp|timestamp(expr[,format-string]) parses string expr into date then returns milliseconds from java epoch. without 'format-string' it's regarded as ISO datetime format | |unix_timestamp|same with 'timestamp' function but returns seconds instead | |timestamp_ceil|timestamp_ceil(expr, period, \[origin, \[timezone\]\]) rounds up a timestamp, returning it as a new timestamp. Period can be any ISO8601 period, like P3M (quarters) or PT12H (half-days). The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".| |timestamp_floor|timestamp_floor(expr, period, \[origin, [timezone\]\]) rounds down a timestamp, returning it as a new timestamp. Period can be any ISO8601 period, like P3M (quarters) or PT12H (half-days). The time zone, if provided, should be a time zone name like "America/Los_Angeles" or offset like "-08:00".| @@ -186,10 +186,10 @@ See javadoc of java.lang.Math for detailed explanation for each function. | function | description | | --- | --- | | map(lambda,arr) | applies a transform specified by a single argument lambda expression to all elements of arr, returning a new array | -| cartesian_map(lambda,arr1,arr2,...) | applies a transform specified by a multi argument lambda expression to all elements of the cartesian product of all input arrays, returning a new array; the number of lambda arguments and array inputs must be the same | +| cartesian_map(lambda,arr1,arr2,...) | applies a transform specified by a multi argument lambda expression to all elements of the Cartesian product of all input arrays, returning a new array; the number of lambda arguments and array inputs must be the same | | filter(lambda,arr) | filters arr by a single argument lambda, returning a new array with all matching elements, or null if no elements match | | fold(lambda,arr) | folds a 2 argument lambda across arr. The first argument of the lambda is the array element and the second the accumulator, returning a single accumulated value. | -| cartesian_fold(lambda,arr1,arr2,...) | folds a multi argument lambda across the cartesian product of all input arrays. The first arguments of the lambda is the array element and the last is the accumulator, returning a single accumulated value. | +| cartesian_fold(lambda,arr1,arr2,...) | folds a multi argument lambda across the Cartesian product of all input arrays. The first arguments of the lambda is the array element and the last is the accumulator, returning a single accumulated value. | | any(lambda,arr) | returns 1 if any element in the array matches the lambda expression, else 0 | | all(lambda,arr) | returns 1 if all elements in the array matches the lambda expression, else 0 | diff --git a/docs/operations/api-reference.md b/docs/operations/api-reference.md index 9b06eeed0d3b..a3cf416bdcfb 100644 --- a/docs/operations/api-reference.md +++ b/docs/operations/api-reference.md @@ -358,7 +358,7 @@ Returns a compaction config of a dataSource. * `/druid/coordinator/v1/config/compaction/taskslots?ratio={someRatio}&max={someMaxSlots}` Update the capacity for compaction tasks. `ratio` and `max` are used to limit the max number of compaction tasks. -They mean the ratio of the total task slots to the copmaction task slots and the maximum number of task slots for compaction tasks, respectively. +They mean the ratio of the total task slots to the compaction task slots and the maximum number of task slots for compaction tasks, respectively. The actual max number of compaction tasks is `min(max, ratio * total task slots)`. Note that `ratio` and `max` are optional and can be omitted. If they are omitted, default values (0.1 and unbounded) will be set for them. diff --git a/docs/operations/druid-console.md b/docs/operations/druid-console.md index 8c030617f001..24856e7afdd6 100644 --- a/docs/operations/druid-console.md +++ b/docs/operations/druid-console.md @@ -30,7 +30,7 @@ In addition, the following cluster settings must be enabled: - the Router's [management proxy](../design/router.html#enabling-the-management-proxy) must be enabled. - the Broker processes in the cluster must have [Druid SQL](../querying/sql.md) enabled. -After enabling Druid SQL on the Brokers and deploying a Router with the managment proxy enabled, the Druid console can be accessed at: +After enabling Druid SQL on the Brokers and deploying a Router with the management proxy enabled, the Druid console can be accessed at: ``` http://: diff --git a/docs/operations/management-uis.md b/docs/operations/management-uis.md index 68eb4e4a2a62..bb3e80bf5e88 100644 --- a/docs/operations/management-uis.md +++ b/docs/operations/management-uis.md @@ -34,7 +34,7 @@ In addition, the following cluster settings must be enabled: - the Router's [management proxy](../design/router.html#enabling-the-management-proxy) must be enabled. - the Broker processes in the cluster must have [Druid SQL](../querying/sql.md) enabled. -After enabling Druid SQL on the Brokers and deploying a Router with the managment proxy enabled, the Druid console can be accessed at: +After enabling Druid SQL on the Brokers and deploying a Router with the management proxy enabled, the Druid console can be accessed at: ``` http://: diff --git a/docs/operations/metrics.md b/docs/operations/metrics.md index cdcc0c04d752..f5a72dbb354e 100644 --- a/docs/operations/metrics.md +++ b/docs/operations/metrics.md @@ -265,8 +265,8 @@ These metrics are only available if the SysMonitor module is included. |`sys/swap/pageOut`|Paged out swap.||Varies.| |`sys/disk/write/count`|Writes to disk.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|Varies.| |`sys/disk/read/count`|Reads from disk.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|Varies.| -|`sys/disk/write/size`|Bytes written to disk. Can we used to determine how much paging is occuring with regards to segments.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|Varies.| -|`sys/disk/read/size`|Bytes read from disk. Can we used to determine how much paging is occuring with regards to segments.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|Varies.| +|`sys/disk/write/size`|Bytes written to disk. Can we used to determine how much paging is occurring with regards to segments.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|Varies.| +|`sys/disk/read/size`|Bytes read from disk. Can we used to determine how much paging is occurring with regards to segments.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|Varies.| |`sys/net/write/size`|Bytes written to the network.|netName, netAddress, netHwaddr|Varies.| |`sys/net/read/size`|Bytes read from the network.|netName, netAddress, netHwaddr|Varies.| |`sys/fs/used`|Filesystem bytes used.|fsDevName, fsDirName, fsTypeName, fsSysTypeName, fsOptions.|< max| diff --git a/docs/operations/other-hadoop.md b/docs/operations/other-hadoop.md index 87cc9ea8aad3..303a0ac9f473 100644 --- a/docs/operations/other-hadoop.md +++ b/docs/operations/other-hadoop.md @@ -209,7 +209,7 @@ addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.13.0") **Alternate workaround - 3** -If sbt is not your choice, you can also use `maven-shade-plugin` to make a fat jar: relocation all jackson packages will resolve it too. In this way, druid will not be affected by jackson library embedded in hadoop. Please follow the steps below: +If sbt is not your choice, you can also use `maven-shade-plugin` to make a fat jar: relocation all Jackson packages will resolve it too. In this way, druid will not be affected by Jackson library embedded in hadoop. Please follow the steps below: (1) Add all extensions you needed to `services/pom.xml` like @@ -239,7 +239,7 @@ If sbt is not your choice, you can also use `maven-shade-plugin` to make a fat j ``` -(2) Shade jackson packages and assemble a fat jar. +(2) Shade Jackson packages and assemble a fat jar. ```xml diff --git a/docs/operations/pull-deps.md b/docs/operations/pull-deps.md index b93b4b9ea83f..8faf1a1baf68 100644 --- a/docs/operations/pull-deps.md +++ b/docs/operations/pull-deps.md @@ -41,11 +41,11 @@ Don't pull down the default hadoop coordinate, i.e., org.apache.hadoop:hadoop-cl `--clean` -Remove exisiting extension and hadoop dependencies directories before pulling down dependencies. +Remove existing extension and hadoop dependencies directories before pulling down dependencies. `-l` or `--localRepository` -A local repostiry that Maven will use to put downloaded files. Then pull-deps will lay these files out into the extensions directory as needed. +A local repository that Maven will use to put downloaded files. Then pull-deps will lay these files out into the extensions directory as needed. `-r` or `--remoteRepository` diff --git a/docs/operations/recommendations.md b/docs/operations/recommendations.md index 2d6bdb4610b7..20ce567655cb 100644 --- a/docs/operations/recommendations.md +++ b/docs/operations/recommendations.md @@ -52,16 +52,16 @@ JVM Flags: `ExitOnOutOfMemoryError` flag is only supported starting JDK 8u92 . For older versions, `-XX:OnOutOfMemoryError='kill -9 %p'` can be used. -`MaxDirectMemorySize` restricts jvm from allocating more than specified limit, by setting it to unlimited jvm restriction is lifted and OS level memory limits would still be effective. It's still important to make sure that Druid is not configured to allocate more off-heap memory than your machine has available. Important settings here include druid.processing.numThreads, druid.processing.numMergeBuffers, and druid.processing.buffer.sizeBytes. +`MaxDirectMemorySize` restricts JVM from allocating more than specified limit, by setting it to unlimited JVM restriction is lifted and OS level memory limits would still be effective. It's still important to make sure that Druid is not configured to allocate more off-heap memory than your machine has available. Important settings here include druid.processing.numThreads, druid.processing.numMergeBuffers, and druid.processing.buffer.sizeBytes. Please note that above flags are general guidelines only. Be cautious and feel free to change them if necessary for the specific deployment. -Additionally, for large jvm heaps, here are a few Garbage Collection efficiency guidelines that have been known to help in some cases. +Additionally, for large JVM heaps, here are a few Garbage Collection efficiency guidelines that have been known to help in some cases. - Mount /tmp on tmpfs ( See http://www.evanjones.ca/jvm-mmap-pause.html ) - On Disk-IO intensive processes (e.g. Historical and MiddleManager), GC and Druid logs should be written to a different disk than where data is written. - Disable Transparent Huge Pages ( See https://blogs.oracle.com/linux/performance-issues-with-transparent-huge-pages-thp ) -- Try disabling biased locking by using `-XX:-UseBiasedLocking` jvm flag. ( See https://dzone.com/articles/logging-stop-world-pauses-jvm ) +- Try disabling biased locking by using `-XX:-UseBiasedLocking` JVM flag. ( See https://dzone.com/articles/logging-stop-world-pauses-jvm ) ## Use UTC timezone @@ -81,7 +81,7 @@ Timeseries and TopN queries are much more optimized and significantly faster tha ## Segment sizes matter -Segments should generally be between 300MB-700MB in size. Too many small segments results in inefficient CPU utilizations and +Segments should generally be between 300MB-700MB in size. Too many small segments results in inefficient CPU utilization and too many large segments impacts query performance, most notably with TopN queries. ## FAQs and Guides diff --git a/docs/operations/segment-optimization.md b/docs/operations/segment-optimization.md index f0d19c1b9ec8..4f8e501ec3b0 100644 --- a/docs/operations/segment-optimization.md +++ b/docs/operations/segment-optimization.md @@ -38,7 +38,7 @@ In Apache Druid (incubating), it's important to optimize the segment size becaus It would be best if you can optimize the segment size at ingestion time, but sometimes it's not easy especially when it comes to stream ingestion because the amount of data ingested might vary over time. In this case, -you can create segments with a sub-optimzed size first and optimize them later. +you can create segments with a sub-optimized size first and optimize them later. You may need to consider the followings to optimize your segments. diff --git a/docs/querying/dimensionspecs.md b/docs/querying/dimensionspecs.md index 4604be9be116..0120813c1e83 100644 --- a/docs/querying/dimensionspecs.md +++ b/docs/querying/dimensionspecs.md @@ -79,7 +79,7 @@ The following filtered dimension spec acts as a whitelist or blacklist for value { "type" : "listFiltered", "delegate" : , "values": , "isWhitelist": } ``` -Following filtered dimension spec retains only the values matching regex. Note that `listFiltered` is faster than this and one should use that for whitelist or blacklist usecase. +Following filtered dimension spec retains only the values matching regex. Note that `listFiltered` is faster than this and one should use that for whitelist or blacklist use case. ```json { "type" : "regexFiltered", "delegate" : , "pattern": } @@ -301,7 +301,7 @@ If a value cannot be parsed using the provided timeFormat, it will be returned a ``` -### Javascript Extraction Function +### JavaScript Extraction Function Returns the dimension value, as transformed by the given JavaScript function. @@ -327,7 +327,7 @@ Example for a regular dimension } ``` -A property of `injective` specifies if the javascript function preserves uniqueness. The default value is `false` meaning uniqueness is not preserved +A property of `injective` specifies if the JavaScript function preserves uniqueness. The default value is `false` meaning uniqueness is not preserved Example for the `__time` dimension: @@ -465,7 +465,7 @@ Provides chained execution of extraction functions. A property of `extractionFns` contains an array of any extraction functions, which is executed in the array index order. -Example for chaining [regular expression extraction function](#regular-expression-extraction-function), [javascript extraction function](#javascript-extraction-function), and [substring extraction function](#substring-extraction-function) is as followings. +Example for chaining [regular expression extraction function](#regular-expression-extraction-function), [JavaScript extraction function](#javascript-extraction-function), and [substring extraction function](#substring-extraction-function) is as followings. ```json { @@ -490,7 +490,7 @@ Example for chaining [regular expression extraction function](#regular-expressio ``` It will transform dimension values with specified extraction functions in the order named. -For example, `'/druid/prod/historical'` is transformed to `'the dru'` as regular expression extraction function first transforms it to `'druid'` and then, javascript extraction function transforms it to `'the druid'`, and lastly, substring extraction function transforms it to `'the dru'`. +For example, `'/druid/prod/historical'` is transformed to `'the dru'` as regular expression extraction function first transforms it to `'druid'` and then, JavaScript extraction function transforms it to `'the druid'`, and lastly, substring extraction function transforms it to `'the dru'`. ### String Format Extraction Function diff --git a/docs/querying/groupbyquery.md b/docs/querying/groupbyquery.md index 0cac52501cde..5e3e7cfba9d1 100644 --- a/docs/querying/groupbyquery.md +++ b/docs/querying/groupbyquery.md @@ -308,7 +308,7 @@ Druid pushes down the `limit` spec in groupBy queries to the segments on Histori ##### Optimizing hash table -The groupBy v2 engine uses an open addressing hash table for aggregation. The hash table is initalized with a given initial bucket number and gradually grows on buffer full. On hash collisions, the linear probing technique is used. +The groupBy v2 engine uses an open addressing hash table for aggregation. The hash table is initialized with a given initial bucket number and gradually grows on buffer full. On hash collisions, the linear probing technique is used. The default number of initial buckets is 1024 and the default max load factor of the hash table is 0.7. If you can see too many collisions in the hash table, you can adjust these numbers. See `bufferGrouperInitialBuckets` and `bufferGrouperMaxLoadFactor` in [Advanced groupBy v2 configurations](#groupby-v2-configurations). diff --git a/docs/querying/hll-old.md b/docs/querying/hll-old.md index 3323321b07f9..2b6ea1ff83a0 100644 --- a/docs/querying/hll-old.md +++ b/docs/querying/hll-old.md @@ -133,7 +133,7 @@ Uses [HyperLogLog](http://algo.inria.fr/flajolet/Publications/FlFuGaMe07.pdf) to } ``` -"isInputHyperUnique" can be set to true to index pre-computed HLL (Base64 encoded output from druid-hll is expected). +"isInputHyperUnique" can be set to true to index precomputed HLL (Base64 encoded output from druid-hll is expected). The "isInputHyperUnique" field only affects ingestion-time behavior, and is ignored at query-time. The HyperLogLog algorithm generates decimal estimates with some error. "round" can be set to true to round off estimated diff --git a/docs/querying/segmentmetadataquery.md b/docs/querying/segmentmetadataquery.md index f842424fb36f..f23ce3e8ef81 100644 --- a/docs/querying/segmentmetadataquery.md +++ b/docs/querying/segmentmetadataquery.md @@ -87,7 +87,7 @@ Timestamp column will have type `LONG`. If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are undefined. -Only columns which are dimensions (ie, have type `STRING`) will have any cardinality. Rest of the columns (timestamp and metric columns) will show cardinality as `null`. +Only columns which are dimensions (i.e., have type `STRING`) will have any cardinality. Rest of the columns (timestamp and metric columns) will show cardinality as `null`. ### intervals @@ -184,5 +184,5 @@ with unknown aggregators, or any conflicts of any kind, the merged aggregators l merging, segments with unknown aggregators will be ignored, and conflicts between aggregators will only null out the aggregator for that particular column. -In particular, with lenient merging, it is possible for an invidiual column's aggregator to be `null`. This will not +In particular, with lenient merging, it is possible for an individual column's aggregator to be `null`. This will not occur with strict merging. diff --git a/docs/querying/sorting-orders.md b/docs/querying/sorting-orders.md index ee8960bdbb8b..ea2cd7bdb29c 100644 --- a/docs/querying/sorting-orders.md +++ b/docs/querying/sorting-orders.md @@ -26,7 +26,7 @@ title: "Sorting Orders" These sorting orders are used by the [TopNMetricSpec](./topnmetricspec.md), [SearchQuery](./searchquery.md), GroupByQuery's [LimitSpec](./limitspec.md), and [BoundFilter](./filters.html#bound-filter). ## Lexicographic -Sorts values by converting Strings to their UTF-8 byte array representations and comparing lexicgraphically, byte-by-byte. +Sorts values by converting Strings to their UTF-8 byte array representations and comparing lexicographically, byte-by-byte. ## Alphanumeric Suitable for strings with both numeric and non-numeric content, e.g.: "file12 sorts after file2" diff --git a/docs/querying/sql.md b/docs/querying/sql.md index 4fdd2575b50d..71c0b0205d1c 100644 --- a/docs/querying/sql.md +++ b/docs/querying/sql.md @@ -698,7 +698,7 @@ GROUP BY 1 ORDER BY 2 DESC ``` -*Caveat:* Note that a segment can be served by more than one stream ingestion tasks or Historical processes, in that case it would have multiple replicas. These replicas are weakly consistent with each other when served by multiple ingestion tasks, until a segment is eventually served by a Historical, at that point the segment is immutable. Broker prefers to query a segment from Historical over an ingestion task. But if a segment has multiple realtime replicas, for eg. kafka index tasks, and one task is slower than other, then the sys.segments query results can vary for the duration of the tasks because only one of the ingestion tasks is queried by the Broker and it is not gauranteed that the same task gets picked everytime. The `num_rows` column of segments table can have inconsistent values during this period. There is an open [issue](https://github.com/apache/incubator-druid/issues/5915) about this inconsistency with stream ingestion tasks. +*Caveat:* Note that a segment can be served by more than one stream ingestion tasks or Historical processes, in that case it would have multiple replicas. These replicas are weakly consistent with each other when served by multiple ingestion tasks, until a segment is eventually served by a Historical, at that point the segment is immutable. Broker prefers to query a segment from Historical over an ingestion task. But if a segment has multiple realtime replicas, for e.g.. Kafka index tasks, and one task is slower than other, then the sys.segments query results can vary for the duration of the tasks because only one of the ingestion tasks is queried by the Broker and it is not guaranteed that the same task gets picked every time. The `num_rows` column of segments table can have inconsistent values during this period. There is an open [issue](https://github.com/apache/incubator-druid/issues/5915) about this inconsistency with stream ingestion tasks. #### SERVERS table @@ -790,7 +790,7 @@ The Druid SQL server is configured through the following properties on the Broke |`druid.sql.planner.maxSemiJoinRowsInMemory`|Maximum number of rows to keep in memory for executing two-stage semi-join queries like `SELECT * FROM Employee WHERE DeptName IN (SELECT DeptName FROM Dept)`.|100000| |`druid.sql.planner.maxTopNLimit`|Maximum threshold for a [TopN query](../querying/topnquery.md). Higher limits will be planned as [GroupBy queries](../querying/groupbyquery.md) instead.|100000| |`druid.sql.planner.metadataRefreshPeriod`|Throttle for metadata refreshes.|PT1M| -|`druid.sql.planner.useApproximateCountDistinct`|Whether to use an approximate cardinalty algorithm for `COUNT(DISTINCT foo)`.|true| +|`druid.sql.planner.useApproximateCountDistinct`|Whether to use an approximate cardinality algorithm for `COUNT(DISTINCT foo)`.|true| |`druid.sql.planner.useApproximateTopN`|Whether to use approximate [TopN queries](../querying/topnquery.html) when a SQL query could be expressed as such. If false, exact [GroupBy queries](../querying/groupbyquery.html) will be used instead.|true| |`druid.sql.planner.requireTimeCondition`|Whether to require SQL to have filter conditions on __time column so that all generated native queries will have user specified intervals. If true, all queries without filter condition on __time column will fail|false| |`druid.sql.planner.sqlTimeZone`|Sets the default time zone for the server, which will affect how time functions and timestamp literals behave. Should be a time zone name like "America/Los_Angeles" or offset like "-08:00".|UTC| diff --git a/docs/tutorials/cluster.md b/docs/tutorials/cluster.md index 476c316cfa80..acd2c99d7087 100644 --- a/docs/tutorials/cluster.md +++ b/docs/tutorials/cluster.md @@ -118,7 +118,7 @@ When choosing the Data server hardware, you can choose a split factor `N`, divid Instructions for adjusting the Historical/MiddleManager configs for the split are described in a later section in this guide. -#### Query derver +#### Query server The main considerations for the Query server are available CPUs and RAM for the Broker heap + direct memory, and Router heap. @@ -478,7 +478,7 @@ tar -xzf tranquility-distribution-0.8.3.tgz mv tranquility-distribution-0.8.3 tranquility ``` -Afterwards, in `conf/supervise/cluster/data.conf`, uncomment out the `tranquility-server` line, and restart the Data server proceses. +Afterwards, in `conf/supervise/cluster/data.conf`, uncomment out the `tranquility-server` line, and restart the Data server processes. ## Start Query Server diff --git a/docs/tutorials/tutorial-batch-hadoop.md b/docs/tutorials/tutorial-batch-hadoop.md index 7b23f284a245..b155c90e8466 100644 --- a/docs/tutorials/tutorial-batch-hadoop.md +++ b/docs/tutorials/tutorial-batch-hadoop.md @@ -196,7 +196,7 @@ druid.indexer.logs.directory=/druid/indexing-logs Once the Hadoop .xml files have been copied to the Druid cluster and the segment/log storage configuration has been updated to use HDFS, the Druid cluster needs to be restarted for the new configurations to take effect. -If the cluster is still running, CTRL-C to terminate the `bin/start-micro-quickstart` script, and re-reun it to bring the Druid services back up. +If the cluster is still running, CTRL-C to terminate the `bin/start-micro-quickstart` script, and re-run it to bring the Druid services back up. ## Load batch data diff --git a/docs/tutorials/tutorial-ingestion-spec.md b/docs/tutorials/tutorial-ingestion-spec.md index 96926c429fde..de3589fdb84d 100644 --- a/docs/tutorials/tutorial-ingestion-spec.md +++ b/docs/tutorials/tutorial-ingestion-spec.md @@ -146,7 +146,7 @@ When ingesting data, we must consider whether we wish to use rollup or not. For this tutorial, let's enable rollup. This is specified with a `granularitySpec` on the `dataSchema`. -Note that the `granularitySpec` lies outside of the `parser`. We will revist the `parser` soon when we define our dimensions and metrics. +Note that the `granularitySpec` lies outside of the `parser`. We will revisit the `parser` soon when we define our dimensions and metrics. ```json "dataSchema" : { diff --git a/docs/tutorials/tutorial-query.md b/docs/tutorials/tutorial-query.md index 5f6471601561..a578fb82607b 100644 --- a/docs/tutorials/tutorial-query.md +++ b/docs/tutorials/tutorial-query.md @@ -62,7 +62,7 @@ You can also configure extra context flags to be sent with the query from the mo ![Query options](../assets/tutorial-query-02.png "Query options") -Note that the console will by default wrap your SQL queries in a limit so that you can issue queries like `SELECT * FROM wikipedia` without much hesitation - you can turn off this behaviour. +Note that the console will by default wrap your SQL queries in a limit so that you can issue queries like `SELECT * FROM wikipedia` without much hesitation - you can turn off this behavior. ### Query SQL via dsql diff --git a/docs/tutorials/tutorial-retention.md b/docs/tutorials/tutorial-retention.md index adf352d24085..cd1912a8cfeb 100644 --- a/docs/tutorials/tutorial-retention.md +++ b/docs/tutorials/tutorial-retention.md @@ -47,7 +47,7 @@ This view shows the available datasources and a summary of the retention rules f ![Summary](../assets/tutorial-retention-01.png "Summary") -Currently there are no rules set for the `retention-tutorial` datasource. Note that there are default rules for the cluster: load forever with 2 replicants in `_default_tier`. +Currently there are no rules set for the `retention-tutorial` datasource. Note that there are default rules for the cluster: load forever with 2 replicas in `_default_tier`. This means that all data will be loaded regardless of timestamp, and each segment will be replicated to two Historical processes in the default tier. @@ -71,7 +71,7 @@ A rule configuration window will appear: Now click the `+ New rule` button twice. -In the upper rule box, select `Load` and `by interval`, and then enter `2015-09-12T12:00:00.000Z/2015-09-13T00:00:00.000Z` in field next to `by interval`. Replicants can remain at 2 in the `_default_tier`. +In the upper rule box, select `Load` and `by interval`, and then enter `2015-09-12T12:00:00.000Z/2015-09-13T00:00:00.000Z` in field next to `by interval`. Replicas can remain at 2 in the `_default_tier`. In the lower rule box, select `Drop` and `forever`. diff --git a/website/.spelling b/website/.spelling new file mode 100644 index 000000000000..5e4553a04d87 --- /dev/null +++ b/website/.spelling @@ -0,0 +1,1622 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# markdown-spellcheck spelling configuration file +# Format - lines beginning # are comments +# global dictionary is at the start, file overrides afterwards +# one word per line, to define a file override use ' - filename' +# where filename is relative to this configuration file +32-bit +64-bit +ACL +APIs +AWS +Actian +Authorizer +Avatica +Avro +Azul +BCP +Base64 +Base64-encoded +ByteBuffer +CIDR +CORS +CPUs +CSVs +Ceph +ColumnDescriptor +Corretto +DDL +DML +DNS +DRUIDVERSION +DataSketches +DateTime +DateType +DimensionSpec +DimensionSpecs +Dockerfile +DogStatsD +Double.NEGATIVE_INFINITY +Double.NEGATIVE_INFINITY. +Double.POSITIVE_INFINITY +Double.POSITIVE_INFINITY. +DruidSQL +EC2 +EMR +EMRFS +ETL +Elasticsearch +FirehoseFactory +Float.NEGATIVE_INFINITY +Float.POSITIVE_INFINITY +GC +GPG +GSSAPI +GUIs +GroupBy +Guice +HDFS +HLL +HashSet +Homebrew +HyperLogLog +IANA +IETF +IPv4 +IS0 +ISO-8601 +ISO8601 +IndexSpec +IndexTask +InfluxDB +Integer.MAX_VALUE +JDBC +JDK +JDK7 +JDK8 +JKS +JRE +JS +JSON +JVM +JVMs +Joda +JsonProperty +KMS +Kerberized +Kerberos +Kinesis +LRU +LZ4 +LZO +LimitSpec +Long.MAX_VALUE +Long.MIN_VALUE +Lucene +MapBD +MapDB +MiddleManager +MiddleManagers +Montréal +Murmur3 +NFS +OLAP +OOMs +OpenJDK +OpenTSDB +OutputStream +ParAccel +ParseSpec +ParseSpecs +Protobuf +RDBMS +RDDs +Rackspace +Redis +S3 +SDK +SIGAR +SPNEGO +SQLServer +SSD +SSDs +SSL +Samza +Splunk +SqlFirehose +StatsD +TCP +TGT +TLS +TopN +TopNs +UI +UIs +URI +URIs +UTF-16 +UTF-8 +UTF8 +XMLs +ZK +accessor +ad-hoc +aggregator +aggregators +ambari +analytics +authorizer +authorizers +autocomplete +autodiscovery +autoscaler +autoscaling +averager +averagers +backend +backfills +backpressure +base64 +big-endian +blobstore +boolean +breakpoint +broadcasted +checksums +classpath +clickstream +codebase +codec +colocated +colocation +compactable +config +configs +cron +csv +customizable +dataset +datasets +datasketches +datasource +datasources +dbcp +denormalization +denormalize +denormalized +dequeued +deserialization +deserialize +deserialized +downtimes +e.g. +encodings +endian +enum +failover +filenames +filesystem +firefox +firehose +firehoses +frontends +granularities +gzip +gzipped +hadoop +hasher +hashtable +historicals +hostname +hostnames +http +https +i.e. +influxdb +injective +inlined +interruptible +javadoc +kerberos +keystore +keytab +lifecycle +localhost +log4j +log4j2 +log4j2.xml +lookback +lookups +mapreduce +masse +memcached +mergeable +metadata +millis +misconfiguration +multitenancy +multitenant +mysql +namespace +namespaced +namespaces +natively +netflow +non-nullable +noop +numerics +parameterized +parseable +partitioner +performant +plaintext +pluggable +postgres +postgresql +pre-aggregated +pre-aggregates +pre-aggregating +pre-aggregation +pre-computation +pre-compute +pre-computing +pre-configured +pre-generated +pre-made +pre-processing +preemptible +prefetch +prefetched +prefetching +prepend +prepended +prepending +prepends +preprocessing +priori +programmatically +proto +proxied +quantile +quantiles +queryable +quickstart +realtime +rebalance +redis +regexes +reimported +reindex +reindexing +reingest +reingesting +reingestion +repo +rollup +rollups +rsync +runtime +schemas +searchable +sharded +sharding +smooshed +splittable +stdout +storages +stringified +subarray +subnet +subqueries +subquery +subsecond +substring +symlink +tiering +timeseries +timestamp +timestamps +tradeoffs +tsv +unannounce +unannouncements +unary +unassign +uncomment +underutilization +unintuitive +unioned +unmergeable +unmerged +unparseable +unparsed +uptime +v1 +v2 +vCPUs +validator +vectorizable +vectorize +versioning +w.r.t. +whitelist +whitelisted +whitespace +wildcard +xml +znode +znodes + - ../docs/comparisons/druid-vs-elasticsearch.md +100x + - ../docs/configuration/logging.md +_common + - ../docs/dependencies/deep-storage.md +druid-hdfs-storage +druid-s3-extensions + - ../docs/dependencies/metadata-storage.md +BasicDataSource + - ../docs/dependencies/zookeeper.md +LeadershipLatch + - ../docs/design/auth.md +AllowAll +AuthenticationResult +AuthorizationLoadingLookupTest +HttpClient +allowAll +authenticatorChain +defaultUser + - ../docs/design/coordinator.md +inputSegmentSizeBytes +skipOffsetFromLatest + - ../docs/design/router.md +c3.2xlarge +maxPriority +minPriority +runtime.properties +timeBoundary + - ../docs/design/segments.md +0x0 +0x9 +2GB +300mb-700mb +Bieber +IndexTask-based +Ke +datasource_intervalStart_intervalEnd_version_partitionNum +partitionNum +v9 + - ../docs/development/build.md +3.x +8u92 +DskipTests +Papache-release +Pdist + - ../docs/development/extensions-contrib/ambari-metrics-emitter.md +ambari-metrics +metricName +trustStore + - ../docs/development/extensions-contrib/azure.md +StaticAzureBlobStoreFirehose +StaticS3Firehose +fetchTimeout +gz +maxCacheCapacityBytes +maxFetchCapacityBytes +maxFetchRetry +prefetchTriggerBytes +shardSpecs + - ../docs/development/extensions-contrib/cloudfiles.md +StaticCloudFilesFirehose +cloudfiles +rackspace-cloudfiles-uk +rackspace-cloudfiles-us + - ../docs/development/extensions-contrib/distinctcount.md +distinctCount +groupBy +maxIntermediateRows +numValuesPerPass +queryGranularity +segmentGranularity +topN +visitor_id + - ../docs/development/extensions-contrib/influx.md +cpu +web_requests + - ../docs/development/extensions-contrib/influxdb-emitter.md +_ +druid_ +druid_cache_total +druid_hits +druid_query +historical001 + - ../docs/development/extensions-contrib/materialized-view.md +HadoopTuningConfig +TuningConfig +base-dataSource's +baseDataSource +baseDataSource-hashCode +classpathPrefix +derivativeDataSource +dimensionsSpec +druid.extensions.hadoopDependenciesDir +hadoopDependencyCoordinates +maxTaskCount +metricsSpec +queryType +tuningConfig + - ../docs/development/extensions-contrib/momentsketch-quantiles.md +arcsinh +fieldName +momentSketchMerge +momentsketch + - ../docs/development/extensions-contrib/moving-average-query.md +10-minutes +MeanNoNulls +P1D +cycleSize +doubleMax +doubleMean +doubleMeanNoNulls +doubleMin +druid.generic.useDefaultValueForNull +limitSpec +longMax +longMean +longMeanNoNulls +longMin +movingAverage +postAggregations +postAveragers + - ../docs/development/extensions-contrib/opentsdb-emitter.md +defaultMetrics.json +src + - ../docs/development/extensions-contrib/redis-cache.md +loadList + - ../docs/development/extensions-contrib/sqlserver.md +com.microsoft.sqlserver.jdbc.SQLServerDriver +sqljdbc + - ../docs/development/extensions-contrib/statsd.md +convertRange + - ../docs/development/extensions-contrib/tdigestsketch-quantiles.md +postAggregator +quantileFromTDigestSketch +quantilesFromTDigestSketch +tDigestSketch + - ../docs/development/extensions-contrib/thrift.md +HadoopDruidIndexer +LzoThriftBlock +SequenceFile +classname +hadoop-lzo +inputFormat +inputSpec +ioConfig +parseSpec +thriftClass +thriftJar + - ../docs/development/extensions-contrib/time-min-max.md +timeMax +timeMin + - ../docs/development/extensions-core/approximate-histograms.md +approxHistogram +approxHistogramFold +bucketNum +lowerLimit +numBuckets +upperLimit + - ../docs/development/extensions-core/avro.md +AVRO-1124 +Avro-1124 +SchemaRepo +avro +avroBytesDecoder +jq +org.apache.druid.extensions +schemaRepository +schema_inline +subjectAndIdConverter +url + - ../docs/development/extensions-core/bloom-filter.md +BloomKFilter +bitset +outputStream + - ../docs/development/extensions-core/datasketches-hll.md +HLLSketchBuild +HLLSketchMerge +lgK +log2 +tgtHllType + - ../docs/development/extensions-core/datasketches-quantiles.md +CDF +DoublesSketch +PMF +quantilesDoublesSketch +toString + - ../docs/development/extensions-core/datasketches-theta.md +isInputThetaSketch +thetaSketch +user_id + - ../docs/development/extensions-core/datasketches-tuple.md +ArrayOfDoublesSketch +arrayOfDoublesSketch +metricColumns +nominalEntries +numberOfValues + - ../docs/development/extensions-core/druid-basic-security.md +INFORMATION_SCHEMA +MyBasicAuthenticator +MyBasicAuthorizer +authenticatorName +authorizerName +druid_system +pollingPeriod +roleName + - ../docs/development/extensions-core/druid-kerberos.md +8Kb +HttpComponents +MyKerberosAuthenticator +RFC-4559 +SPNego +_HOST + - ../docs/development/extensions-core/druid-lookups.md +cacheFactory +concurrencyLevel +dataFetcher +expireAfterAccess +expireAfterWrite +initialCapacity +loadingCacheSpec +maxEntriesSize +maxStoreSize +maximumSize +onHeapPolling +pollPeriod +reverseLoadingCacheSpec + - ../docs/development/extensions-core/google.md +GCS +StaticGoogleBlobStoreFirehose + - ../docs/development/extensions-core/hdfs.md +gcs-connector +hadoop2 +hdfs + - ../docs/development/extensions-core/kafka-extraction-namespace.md +LookupExtractorFactory +zookeeper.connect + - ../docs/development/extensions-core/kafka-ingestion.md +0.11.x. +00Z +2016-01-01T11 +2016-01-01T12 +2016-01-01T14 +CONNECTING_TO_STREAM +CREATING_TASKS +DISCOVERING_INITIAL_TASKS +KafkaSupervisorIOConfig +KafkaSupervisorTuningConfig +LOST_CONTACT_WITH_STREAM +OffsetOutOfRangeException +P2147483647D +PT10M +PT10S +PT1H +PT30M +PT30S +PT5S +PT80S +SegmentWriteOutMediumFactory +UNABLE_TO_CONNECT_TO_STREAM +UNHEALTHY_SUPERVISOR +UNHEALTHY_TASKS +dimensionCompression +earlyMessageRejectionPeriod +indexSpec +intermediateHandoffPeriod +longEncoding +maxBytesInMemory +maxPendingPersists +maxRowsInMemory +maxRowsPerSegment +maxSavedParseExceptions +maxTotalRows +metricCompression +numKafkaPartitions +taskCount +taskDuration + - ../docs/development/extensions-core/kinesis-ingestion.md +9.2dist +KinesisSupervisorIOConfig +KinesisSupervisorTuningConfig +LZ4LZFuncompressedLZ4LZ4LZFuncompressednoneLZ4autolongsautolongslongstypeconcisetyperoaringcompressRunOnSerializationtruetypestreamendpointreplicastaskCounttaskCount +deaggregate +druid-kinesis-indexing-service +maxRecordsPerPoll +maxRecordsPerPollrecordsPerFetchfetchDelayMillisreplicasfetchDelayMillisrecordsPerFetchfetchDelayMillismaxRecordsPerPollamazon-kinesis-client1 +numKinesisShards +numProcessors +q.size +replicastaskCounttaskCount +resetuseEarliestSequenceNumberPOST +resumePOST +statusrecentErrorsdruid.supervisor.maxStoredExceptionEventsstatedetailedStatestatedetailedStatestatestatePENDINGRUNNINGSUSPENDEDSTOPPINGUNHEALTHY_SUPERVISORUNHEALTHY_TASKSdetailedStatestatedruid.supervisor.unhealthinessThresholddruid.supervisor.taskUnhealthinessThresholdtaskDurationtaskCountreplicasdetailedStatedetailedStateRUNNINGPOST +supervisorPOST +supervisorfetchThreadsfetchDelayMillisrecordsPerFetchmaxRecordsPerPollpoll +suspendPOST +taskCounttaskDurationreplicas +taskCounttaskDurationtaskDurationPOST +taskDurationstartDelayperioduseEarliestSequenceNumbercompletionTimeouttaskDurationlateMessageRejectionPeriodPT1HearlyMessageRejectionPeriodPT1HPT1HrecordsPerFetchfetchDelayMillisawsAssumedRoleArnawsExternalIddeaggregateGET +terminatePOST +terminatedruid.worker.capacitytaskDurationcompletionTimeoutreplicastaskCountreplicas + - ../docs/development/extensions-core/lookups-cached-global.md +baz +customJson +lookupParseSpec +namespaceParseSpec +simpleJson + - ../docs/development/extensions-core/mysql.md +x.xx.jar + - ../docs/development/extensions-core/orc.md +dimensionSpec +flattenSpec + - ../docs/development/extensions-core/parquet.md +binaryAsString + - ../docs/development/extensions-core/postgresql.md +sslFactory's +sslMode + - ../docs/development/extensions-core/protobuf.md +Proto +metrics.desc +metrics.desc. +metrics.proto. +metrics_pb +protoMessageType +timeAndDims +tmp + - ../docs/development/extensions-core/s3.md +SigV4 +jvm.config +kms +s3 +s3a +s3n +uris + - ../docs/development/extensions-core/simple-client-sslcontext.md +KeyManager +SSLContext +TrustManager + - ../docs/development/extensions-core/stats.md +GenericUDAFVariance +Golub +J.L. +LeVeque +Numer +chunk1 +chunk2 +stddev +t1 +t2 +variance1 +variance2 +varianceFold +variance_pop +variance_sample + - ../docs/development/extensions-core/test-stats.md +Berry_statbook +Berry_statbook_chpt6.pdf +S.E. +engineering.com +jcb0773 +n1 +n2 +p1 +p2 +pvalue2tailedZtest +sqrt +successCount1 +successCount2 +www.isixsigma.com +www.paypal +www.ucs.louisiana.edu +zscore +zscore2sample +ztests + - ../docs/development/extensions.md +DistinctCount +artifactId +com.example +common.runtime.properties +druid-cassandra-storage +druid-distinctcount +druid-ec2-extensions +druid-kafka-extraction-namespace +druid-kafka-indexing-service +druid-opentsdb-emitter +druid-protobuf-extensions +druid-tdigestsketch +druid.apache.org +groupId +jvm-global +kafka-emitter +org.apache.druid.extensions.contrib. +pull-deps +simple-client-sslcontext +sqlserver-metadata-storage +statsd-emitter + - ../docs/development/geo.md +coords +dimName +maxCoords +minCoords + - ../docs/development/javascript.md +Metaspace +dev + - ../docs/development/modules.md +AggregatorFactory +ArchiveTask +ComplexMetrics +DataSegmentArchiver +DataSegmentKiller +DataSegmentMover +DataSegmentPuller +DataSegmentPusher +DruidModule +ExtractionFns +HdfsStorageDruidModule +JacksonInject +MapBinder +MoveTask +ObjectMapper +PasswordProvider +PostAggregators +QueryRunnerFactory +SegmentMetadataQuery +SegmentMetadataQueryQueryToolChest +StaticS3FirehoseFactory +loadSpec +multibind +pom.xml + - ../docs/ingestion/data-formats.md +0.6.x +0.7.x +0.7.x. +TimeAndDims +column2 +column_1 +column_n +com.opencsv +ctrl +jsonLowercase +listDelimiter +timestampSpec + - ../docs/ingestion/data-management.md +1GB +compactionTask +compactionTasks +ingestSegmentFirehose +numShards + - ../docs/ingestion/faq.md +IngestSegment +IngestSegmentFirehose +maxSizes +windowPeriod + - ../docs/ingestion/hadoop.md +2012-01-01T00 +2012-01-03T00 +2012-01-05T00 +2012-01-07T00 +500MB +CombineTextInputFormat +HadoopIndexTask +InputFormat +InputSplit +JobHistory +a.example.com +assumeGrouped +cleanupOnFailure +combineText +connectURI +dataGranularity +datetime +f.example.com +filePattern +forceExtendableShardSpecs +granularitySpec +ignoreInvalidRows +ignoreWhenNoSegments +indexSpecForIntermediatePersists +index_hadoop +ingestionSpec +inputPath +inputSpecs +interval1 +interval2 +jobProperties +leaveIntermediate +logParseExceptions +mapred.map.tasks +mapreduce.job.maps +maxParseExceptions +maxPartitionSize +maxSplitSize +metadataUpdateSpec +numBackgroundPersistThreads +overwriteFiles +partitionDimension +partitionDimensions +partitionSpec +partitionsSpec +pathFormat +segmentOutputPath +segmentTable +shardSpec +single_dim +targetPartitionSize +useCombiner +useExplicitVersion +useNewAggs +useYarnRMJobStatusFallback +workingPath +z.example.com + - ../docs/ingestion/native-batch.md +150MB +CombiningFirehose +DataSchema +DefaultPassword +EnvironmentVariablePasswordProvider +HttpFirehose +IOConfig +InlineFirehose +LocalFirehose +PartitionsSpec +PasswordProviders +appendToExisting +baseDir +chatHandlerNumRetries +chatHandlerTimeout +connectorConfig +dataSchema's +foldCase +forceGuaranteedRollup +httpAuthenticationPassword +httpAuthenticationUsername +ingestSegment +maxInputSegmentBytesPerTask +maxNumConcurrentSubTasks +maxNumSegmentsToMerge +maxRetry +pushTimeout +reportParseExceptions +segmentWriteOutMediumFactory +sql +sqls +taskStatusCheckPeriodMs +timeChunk +totalNumMergeTasks + - ../docs/ingestion/schema-design.md +product_category +product_id +product_name + - ../docs/ingestion/tasks.md +BUILD_SEGMENTS +DETERMINE_PARTITIONS +forceTimeChunkLock +taskLockTimeout + - ../docs/misc/math-expr.md +DOUBLE_ARRAY +DOY +DateTimeFormat +LONG_ARRAY +Los_Angeles +P3M +PT12H +STRING_ARRAY +String.format +acos +args +arr1 +arr2 +array_append +array_concat +array_contains +array_length +array_offset +array_offset_of +array_ordinal +array_ordinal_of +array_overlap +array_prepend +array_slice +array_to_string +asin +atan +atan2 +bloom_filter_test +cartesian_fold +cartesian_map +case_searched +case_simple +cbrt +concat +copysign +expm1 +expr +expr1 +expr2 +fromIndex +getExponent +hypot +ipv4_match +ipv4_parse +ipv4_stringify +java.lang.Math +java.lang.String +log10 +log1p +lpad +ltrim +nextUp +nextafter +nvl +parse_long +regexp_extract +result1 +result2 +rint +rpad +rtrim +scalb +signum +str1 +str2 +string_to_array +strlen +strpos +timestamp_ceil +timestamp_extract +timestamp_floor +timestamp_format +timestamp_parse +timestamp_shift +todegrees +toradians +ulp +unix_timestamp +value1 +value2 +valueOf + - ../docs/misc/papers-and-talks.md +RADStack + - ../docs/operations/api-reference.md +00.000Z +2015-09-12T03 +2015-09-12T05 +2016-06-27_2016-06-28 +Param +SupervisorSpec +dropRule +druid.query.segmentMetadata.defaultHistory +isointerval +json +loadRule +maxTime +minTime +numCandidates +param +segmentId1 +segmentId2 +taskId +taskid +un + - ../docs/operations/basic-cluster-tuning.md +100MB +128MB +15ms +2.5MB +24GB +256MB +30GB-60GB +4G +5MB +64KB +8G +G1GC +GroupBys +QoS-type + - ../docs/operations/dump-segment.md +DumpSegment +SegmentMetadata +__time +bitmapSerdeFactory +columnName +index.zip +time-iso8601 + - ../docs/operations/export-metadata.md +hadoopStorageDirectory + - ../docs/operations/insert-segment-to-db.md +0.14.x + - ../docs/operations/metrics.md +0.14.x +1s +Bufferpool +EventReceiverFirehose +EventReceiverFirehoseMonitor +Filesystesm +JVMMonitor +QueryCountStatsMonitor +RealtimeMetricsMonitor +Sys +SysMonitor +TaskCountStatsMonitor +bufferCapacity +bufferPoolName +chunkInterval +cms +cpuName +cpuTime +fsDevName +fsDirName +fsOptions +fsSysTypeName +fsTypeName +g1 +gcGen +gcName +handoffed +hasFilters +memKind +nativeQueryIds +netAddress +netHwaddr +netName +numComplexMetrics +numDimensions +numMetrics +poolKind +poolName +remoteAddress +serviceName +taskStatus +taskType + - ../docs/operations/other-hadoop.md +CDH +Classloader +assembly.sbt +build.sbt +classloader +druid_build +mapred-default +mapred-site +sbt +scala-2 + - ../docs/operations/pull-deps.md +org.apache.hadoop +proxy.com. +remoteRepository + - ../docs/operations/recommendations.md +JBOD +druid.processing.buffer.sizeBytes. +druid.processing.numMergeBuffers +druid.processing.numThreads +tmpfs + - ../docs/operations/rule-configuration.md +broadcastByInterval +broadcastByPeriod +broadcastForever +colocatedDataSources +dropBeforeByPeriod +dropByInterval +dropByPeriod +dropForever +loadByInterval +loadByPeriod +loadForever + - ../docs/operations/segment-optimization.md +700MB + - ../docs/operations/single-server.md +128GB +16GB +256GB +4GB +512GB +64GB +Nano-Quickstart +i3 +i3.16xlarge +i3.2xlarge +i3.4xlarge +i3.8xlarge + - ../docs/operations/tls-support.md +CN +subjectAltNames + - ../docs/querying/aggregations.md +HyperUnique +hyperUnique +longSum + - ../docs/querying/datasource.md +groupBys + - ../docs/querying/datasourcemetadataquery.md +dataSourceMetadata + - ../docs/querying/dimensionspecs.md +ExtractionDimensionSpec +SimpleDateFormat +bar_1 +dimensionSpecs +isWhitelist +joda +nullHandling +product_1 +product_3 +registeredLookup +timeFormat +tz +v3 +weekyears + - ../docs/querying/filters.md +___bar +caseSensitive +extractionFn +insensitive_contains +last_name +lowerStrict +upperStrict + - ../docs/querying/granularities.md +1970-01-01T00 +P2W +PT0.750S +PT1H30M +TimeseriesQuery + - ../docs/querying/groupbyquery.md +D1 +D2 +D3 +chunkPeriod +druid.query.groupBy.defaultStrategy +druid.query.groupBy.maxMergingDictionarySize +druid.query.groupBy.maxOnDiskStorage +druid.query.groupBy.maxResults. +groupByStrategy +maxOnDiskStorage +maxResults +orderby +orderbys +outputName +pushdown +row1 +subtotalsSpec + - ../docs/querying/having.md +HavingSpec +HavingSpecs +dimSelector +equalTo +greaterThan +lessThan + - ../docs/querying/hll-old.md +DefaultDimensionSpec +druid-hll +isInputHyperUnique + - ../docs/querying/joins.md +pre-join + - ../docs/querying/limitspec.md +DefaultLimitSpec +OrderByColumnSpec +OrderByColumnSpecs +dimensionOrder + - ../docs/querying/lookups.md +60_000 +kafka-extraction-namespace +mins +tierName + - ../docs/querying/multi-value-dimensions.md +row2 +row3 +row4 +t3 +t4 +t5 + - ../docs/querying/multitenancy.md +500ms +tenant_id + - ../docs/querying/post-aggregations.md +fieldAccess +finalizingFieldAccess +hyperUniqueCardinality + - ../docs/querying/query-context.md +bySegment +doubleSum +druid.broker.cache.populateCache +druid.broker.cache.populateResultLevelCache +druid.broker.cache.useCache +druid.broker.cache.useResultLevelCache +druid.historical.cache.populateCache +druid.historical.cache.useCache +floatSum +maxQueuedBytes +maxScatterGatherBytes +minTopNThreshold +populateCache +populateResultLevelCache +queryId +row-matchers +serializeDateTimeAsLong +serializeDateTimeAsLongInner +skipEmptyBuckets +useCache +useResultLevelCache +vectorSize + - ../docs/querying/querying.md +DatasourceMetadata +TimeBoundary +errorClass +errorMessage +x-jackson-smile + - ../docs/querying/scan-query.md +batchSize +compactedList +druid.query.scan.legacy +druid.query.scan.maxRowsQueuedForOrdering +druid.query.scan.maxSegmentPartitionsOrderedInMemory +maxRowsQueuedForOrdering +maxSegmentPartitionsOrderedInMemory +resultFormat +valueVector + - ../docs/querying/searchquery.md +SearchQuerySpec +cursorOnly +druid.query.search.searchStrategy +queryableIndexSegment +searchDimensions +searchStrategy +useIndexes + - ../docs/querying/searchqueryspec.md +ContainsSearchQuerySpec +FragmentSearchQuerySpec +InsensitiveContainsSearchQuerySpec +RegexSearchQuerySpec + - ../docs/querying/segmentmetadataquery.md +analysisType +analysisTypes +lenientAggregatorMerge +minmax +segmentMetadata +toInclude + - ../docs/querying/select-query.md +PagingSpec +fromNext +pagingSpec + - ../docs/querying/sorting-orders.md +BoundFilter +GroupByQuery's +SearchQuery +TopNMetricSpec +compareTo +file12 +file2 + - ../docs/querying/sql.md +APPROX_COUNT_DISTINCT +APPROX_QUANTILE +BIGINT +CATALOG_NAME +CHARACTER_MAXIMUM_LENGTH +CHARACTER_OCTET_LENGTH +CHARACTER_SET_NAME +COLLATION_NAME +COLUMN_DEFAULT +COLUMN_NAME +Concats +DATA_TYPE +DATETIME_PRECISION +DEFAULT_CHARACTER_SET_CATALOG +DEFAULT_CHARACTER_SET_NAME +DEFAULT_CHARACTER_SET_SCHEMA +ISODOW +ISOYEAR +IS_NULLABLE +JDBC_TYPE +MIDDLE_MANAGER +NULLable +NUMERIC_PRECISION +NUMERIC_PRECISION_RADIX +NUMERIC_SCALE +ORDINAL_POSITION +PT1M +PT5M +SCHEMA_NAME +SCHEMA_OWNER +SERVER_SEGMENTS +SMALLINT +SQL_PATH +SYSTEM_TABLE +TABLE_CATALOG +TABLE_NAME +TABLE_SCHEMA +TABLE_TYPE +TIME_PARSE +TIME_SHIFT +TINYINT +VARCHAR +avg_num_rows +avg_size +created_time +current_size +druid.server.maxSize +druid.server.tier +druid.sql.planner.maxSemiJoinRowsInMemory +druid.sql.planner.sqlTimeZone +druid.sql.planner.useApproximateCountDistinct +druid.sql.planner.useApproximateTopN +error_msg +exprs +group_id +interval_expr +is_available +is_overshadowed +is_published +is_realtime +java.sql.Types +max_size +num_replicas +num_rows +num_segments +partition_num +plaintext_port +queue_insertion_time +runner_status +segment_id +server_type +sqlTimeZone +sys +sys.segments +task_id +timestamp_expr +tls_port +total_size +useApproximateCountDistinct +useApproximateTopN +wikipedia + - ../docs/querying/timeseriesquery.md +fieldName1 +fieldName2 + - ../docs/querying/topnmetricspec.md +DimensionTopNMetricSpec +metricSpec +previousStop + - ../docs/querying/topnquery.md +GroupByQuery +top500 + - ../docs/querying/virtual-columns.md +outputType + - ../docs/tutorials/cluster.md +1.9TB +16CPU +WebUpd8 +m5.2xlarge +metadata.storage. + - ../docs/tutorials/tutorial-batch-hadoop.md +PATH_TO_DRUID +namenode + - ../docs/tutorials/tutorial-delete-data.md +segmentID +segmentIds + - ../docs/tutorials/tutorial-ingestion-spec.md +dstIP +dstPort +srcIP +srcPort + - ../docs/tutorials/tutorial-kerberos-hadoop.md +common_runtime_properties +druid.extensions.directory +druid.extensions.loadList +druid.hadoop.security.kerberos.keytab +druid.hadoop.security.kerberos.principal +druid.indexer.logs.directory +druid.indexer.logs.type +druid.storage.storageDirectory +druid.storage.type +hdfs.headless.keytab +indexing_log +keytabs + - ../docs/tutorials/tutorial-query.md +dsql + - ../docs/tutorials/tutorial-retention.md +2015-09-12T12 + - ../docs/tutorials/tutorial-update-data.md +bear-111 + - ../docs/configuration/index.md +00.000Z +100ms +10ms +1GB +1_000_000 +2012-01-01T00 +2GB +30_000 +524288000L +5MB +8u60 +Autoscaler +AvaticaConnectionBalancer +EventReceiverFirehose +File.getFreeSpace +File.getTotalSpace +ForkJoinPool +HadoopIndexTasks +HttpEmitter +HttpPostEmitter +InetAddress.getLocalHost +JRE8u60 +KeyManager +L1 +L2 +LoadSpec +LoggingEmitter +Los_Angeles +MDC +NoopServiceEmitter +ONLY_EVENTS +P1D +P1W +PT-1S +PT0.050S +PT10M +PT10S +PT15M +PT1800S +PT1M +PT1S +PT24H +PT300S +PT30S +PT5M +PT5S +PT60S +PT90M +Param +Runtime.maxMemory +SSLContext +SegmentMetadata +SegmentWriteOutMediumFactory +ServiceEmitter +System.getProperty +TLSv1.2 +TrustManager +TuningConfig +_N_ +_default +_default_tier +addr +affinityConfig +allowAll +array_mod +batch_index_task +cgroup +chunkPeriod +classloader +com.metamx +common.runtime.properties +cpuacct +dataSourceName +datetime +defaultHistory +doubleMax +doubleMin +doubleSum +druid.enableTlsPort +druid.indexer.autoscale.workerVersion +druid.service +druid.storage.disableAcl +druid_audit +druid_config +druid_dataSource +druid_pendingSegments +druid_rules +druid_segments +druid_supervisors +druid_taskLock +druid_taskLog +druid_tasks +ec2 +equalDistribution +extractionFn +file.encoding +fillCapacity +first_location +floatMax +floatMin +floatSum +freeSpacePercent +getCanonicalHostName +groupBy +hdfs +httpRemote +indexTask +info_dir +java.class.path +java.io.tmpdir +javaOpts +javaOptsArray +loadList +loadqueuepeon +loadspec +localStorage +maxHeaderSize +maxQueuedBytes +maxSize +middlemanager +minTimeMs +minmax +mins +orderby +orderbys +org.apache.druid +org.apache.druid.jetty.RequestLog +org.apache.hadoop +overlord.html +pendingSegments +pre-flight +queryType +remoteTaskRunnerConfig +rendezvousHash +resultsets +runtime.properties +runtime.properties. +s3 +s3a +s3n +slf4j +sql +sqlQuery +successfulSending +taskBlackListCleanupPeriod +tasklogs +timeBoundary +tmp +tmpfs +truststore +tuningConfig +useIndexes +user.timezone +v0.12.0 +versionReplacementString +workerId +yyyy-MM-dd + - ../docs/design/index.md +logsearch + - ../docs/ingestion/index.md +2000-01-01T01 +DateTimeFormat +JsonPath +autodetect +createBitmapIndex +dimensionExclusions +expr +jackson-jq +missingValue +schemaless +spatialDimensions +useFieldDiscovery + - ../docs/tutorials/index.md +4CPU +cityName +countryIsoCode +countryName +isAnonymous +isMinor +isNew +isRobot +isUnpatrolled +metroCode +regionIsoCode +regionName diff --git a/website/package-lock.json b/website/package-lock.json index 7689d7a9f1b7..407dbf96b787 100644 --- a/website/package-lock.json +++ b/website/package-lock.json @@ -2381,6 +2381,16 @@ "parse-json": "^4.0.0" } }, + "create-thenable": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/create-thenable/-/create-thenable-1.0.2.tgz", + "integrity": "sha1-4gMXIMzJV12M+jH1wUbnYqgMBTQ=", + "dev": true, + "requires": { + "object.omit": "~2.0.0", + "unique-concat": "~0.2.2" + } + }, "cross-spawn": { "version": "6.0.5", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-6.0.5.tgz", @@ -3275,6 +3285,12 @@ } } }, + "exit-hook": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/exit-hook/-/exit-hook-1.1.1.tgz", + "integrity": "sha1-8FyiM7SMBdVP/wd2XfhQfpXAL/g=", + "dev": true + }, "expand-brackets": { "version": "2.1.4", "resolved": "https://registry.npmjs.org/expand-brackets/-/expand-brackets-2.1.4.tgz", @@ -3747,6 +3763,15 @@ "integrity": "sha1-gQaNKVqBQuwKxybG4iAMMPttXoA=", "dev": true }, + "for-own": { + "version": "0.1.5", + "resolved": "https://registry.npmjs.org/for-own/-/for-own-0.1.5.tgz", + "integrity": "sha1-UmXGgaTylNq78XyVCbZ2OqhFEM4=", + "dev": true, + "requires": { + "for-in": "^1.0.1" + } + }, "forever-agent": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/forever-agent/-/forever-agent-0.6.1.tgz", @@ -3888,8 +3913,7 @@ "ansi-regex": { "version": "2.1.1", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "aproba": { "version": "1.2.0", @@ -3910,14 +3934,12 @@ "balanced-match": { "version": "1.0.0", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "brace-expansion": { "version": "1.1.11", "bundled": true, "dev": true, - "optional": true, "requires": { "balanced-match": "^1.0.0", "concat-map": "0.0.1" @@ -3932,20 +3954,17 @@ "code-point-at": { "version": "1.1.0", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "concat-map": { "version": "0.0.1", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "console-control-strings": { "version": "1.1.0", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "core-util-is": { "version": "1.0.2", @@ -4062,8 +4081,7 @@ "inherits": { "version": "2.0.3", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "ini": { "version": "1.3.5", @@ -4075,7 +4093,6 @@ "version": "1.0.0", "bundled": true, "dev": true, - "optional": true, "requires": { "number-is-nan": "^1.0.0" } @@ -4090,7 +4107,6 @@ "version": "3.0.4", "bundled": true, "dev": true, - "optional": true, "requires": { "brace-expansion": "^1.1.7" } @@ -4098,14 +4114,12 @@ "minimist": { "version": "0.0.8", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "minipass": { "version": "2.3.5", "bundled": true, "dev": true, - "optional": true, "requires": { "safe-buffer": "^5.1.2", "yallist": "^3.0.0" @@ -4124,7 +4138,6 @@ "version": "0.5.1", "bundled": true, "dev": true, - "optional": true, "requires": { "minimist": "0.0.8" } @@ -4205,8 +4218,7 @@ "number-is-nan": { "version": "1.0.1", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "object-assign": { "version": "4.1.1", @@ -4218,7 +4230,6 @@ "version": "1.4.0", "bundled": true, "dev": true, - "optional": true, "requires": { "wrappy": "1" } @@ -4304,8 +4315,7 @@ "safe-buffer": { "version": "5.1.2", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "safer-buffer": { "version": "2.1.2", @@ -4341,7 +4351,6 @@ "version": "1.0.2", "bundled": true, "dev": true, - "optional": true, "requires": { "code-point-at": "^1.0.0", "is-fullwidth-code-point": "^1.0.0", @@ -4361,7 +4370,6 @@ "version": "3.0.1", "bundled": true, "dev": true, - "optional": true, "requires": { "ansi-regex": "^2.0.0" } @@ -4405,14 +4413,12 @@ "wrappy": { "version": "1.0.2", "bundled": true, - "dev": true, - "optional": true + "dev": true }, "yallist": { "version": "3.0.3", "bundled": true, - "dev": true, - "optional": true + "dev": true } } }, @@ -5297,6 +5303,12 @@ "sshpk": "^1.7.0" } }, + "hunspell-spellchecker": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/hunspell-spellchecker/-/hunspell-spellchecker-1.0.2.tgz", + "integrity": "sha1-oQsL0voAplq2Kkxrc0zkltMYkQ4=", + "dev": true + }, "iconv-lite": { "version": "0.4.24", "resolved": "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz", @@ -6397,6 +6409,194 @@ "integrity": "sha1-MsXGUZmmRXMWMi0eQinRNAfIx88=", "dev": true }, + "markdown-spellcheck": { + "version": "1.3.1", + "resolved": "https://registry.npmjs.org/markdown-spellcheck/-/markdown-spellcheck-1.3.1.tgz", + "integrity": "sha512-9uyovbDg3Kh2H89VDtqOkXKS9wuRgpLvOHXzPYWMR71tHQZWt2CAf28EIpXNhkFqqoEjXYAx+fXLuKufApYHRQ==", + "dev": true, + "requires": { + "async": "^2.1.4", + "chalk": "^2.0.1", + "commander": "^2.8.1", + "globby": "^6.1.0", + "hunspell-spellchecker": "^1.0.2", + "inquirer": "^1.0.0", + "js-yaml": "^3.10.0", + "marked": "^0.3.5", + "sinon-as-promised": "^4.0.0" + }, + "dependencies": { + "ansi-escapes": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-1.4.0.tgz", + "integrity": "sha1-06ioOzGapneTZisT52HHkRQiMG4=", + "dev": true + }, + "ansi-regex": { + "version": "2.1.1", + "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-2.1.1.tgz", + "integrity": "sha1-w7M6te42DYbg5ijwRorn7yfWVN8=", + "dev": true + }, + "ansi-styles": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-2.2.1.tgz", + "integrity": "sha1-tDLdM1i2NM914eRmQ2gkBTPB3b4=", + "dev": true + }, + "async": { + "version": "2.6.3", + "resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz", + "integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==", + "dev": true, + "requires": { + "lodash": "^4.17.14" + } + }, + "cli-cursor": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/cli-cursor/-/cli-cursor-1.0.2.tgz", + "integrity": "sha1-ZNo/fValRBLll5S9Ytw1KV6PKYc=", + "dev": true, + "requires": { + "restore-cursor": "^1.0.1" + } + }, + "external-editor": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/external-editor/-/external-editor-1.1.1.tgz", + "integrity": "sha1-Etew24UPf/fnCBuvQAVwAGDEYAs=", + "dev": true, + "requires": { + "extend": "^3.0.0", + "spawn-sync": "^1.0.15", + "tmp": "^0.0.29" + } + }, + "globby": { + "version": "6.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-6.1.0.tgz", + "integrity": "sha1-9abXDoOV4hyFj7BInWTfAkJNUGw=", + "dev": true, + "requires": { + "array-union": "^1.0.1", + "glob": "^7.0.3", + "object-assign": "^4.0.1", + "pify": "^2.0.0", + "pinkie-promise": "^2.0.0" + } + }, + "inquirer": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/inquirer/-/inquirer-1.2.3.tgz", + "integrity": "sha1-TexvMvN+97sLLtPx0aXD9UUHSRg=", + "dev": true, + "requires": { + "ansi-escapes": "^1.1.0", + "chalk": "^1.0.0", + "cli-cursor": "^1.0.1", + "cli-width": "^2.0.0", + "external-editor": "^1.1.0", + "figures": "^1.3.5", + "lodash": "^4.3.0", + "mute-stream": "0.0.6", + "pinkie-promise": "^2.0.0", + "run-async": "^2.2.0", + "rx": "^4.1.0", + "string-width": "^1.0.1", + "strip-ansi": "^3.0.0", + "through": "^2.3.6" + }, + "dependencies": { + "chalk": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/chalk/-/chalk-1.1.3.tgz", + "integrity": "sha1-qBFcVeSnAv5NFQq9OHKCKn4J/Jg=", + "dev": true, + "requires": { + "ansi-styles": "^2.2.1", + "escape-string-regexp": "^1.0.2", + "has-ansi": "^2.0.0", + "strip-ansi": "^3.0.0", + "supports-color": "^2.0.0" + } + } + } + }, + "is-fullwidth-code-point": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz", + "integrity": "sha1-754xOG8DGn8NZDr4L95QxFfvAMs=", + "dev": true, + "requires": { + "number-is-nan": "^1.0.0" + } + }, + "mute-stream": { + "version": "0.0.6", + "resolved": "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.6.tgz", + "integrity": "sha1-SJYrGeFp/R38JAs/HnMXYnu8R9s=", + "dev": true + }, + "onetime": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/onetime/-/onetime-1.1.0.tgz", + "integrity": "sha1-ofeDj4MUxRbwXs78vEzP4EtO14k=", + "dev": true + }, + "pify": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/pify/-/pify-2.3.0.tgz", + "integrity": "sha1-7RQaasBDqEnqWISY59yosVMw6Qw=", + "dev": true + }, + "restore-cursor": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/restore-cursor/-/restore-cursor-1.0.1.tgz", + "integrity": "sha1-NGYfRohjJ/7SmRR5FSJS35LapUE=", + "dev": true, + "requires": { + "exit-hook": "^1.0.0", + "onetime": "^1.0.0" + } + }, + "string-width": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", + "integrity": "sha1-EYvfW4zcUaKn5w0hHgfisLmxB9M=", + "dev": true, + "requires": { + "code-point-at": "^1.0.0", + "is-fullwidth-code-point": "^1.0.0", + "strip-ansi": "^3.0.0" + } + }, + "strip-ansi": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-3.0.1.tgz", + "integrity": "sha1-ajhfuIU9lS1f8F0Oiq+UJ43GPc8=", + "dev": true, + "requires": { + "ansi-regex": "^2.0.0" + } + }, + "supports-color": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-2.0.0.tgz", + "integrity": "sha1-U10EXOa2Nj+kARcIRimZXp3zJMc=", + "dev": true + }, + "tmp": { + "version": "0.0.29", + "resolved": "https://registry.npmjs.org/tmp/-/tmp-0.0.29.tgz", + "integrity": "sha1-8lEl/w3Z2jzLDC3Tce4SiLuRKMA=", + "dev": true, + "requires": { + "os-tmpdir": "~1.0.1" + } + } + } + }, "markdown-toc": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/markdown-toc/-/markdown-toc-1.2.0.tgz", @@ -6417,6 +6617,12 @@ "strip-color": "^0.1.0" } }, + "marked": { + "version": "0.3.19", + "resolved": "https://registry.npmjs.org/marked/-/marked-0.3.19.tgz", + "integrity": "sha512-ea2eGWOqNxPcXv8dyERdSr/6FmzvWwzjMxpfGB/sbMccXoct+xY+YukPD+QTUZwyvK7BZwcr4m21WBOW41pAkg==", + "dev": true + }, "math-random": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/math-random/-/math-random-1.0.4.tgz", @@ -6607,6 +6813,12 @@ "to-regex": "^3.0.1" } }, + "native-promise-only": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/native-promise-only/-/native-promise-only-0.8.1.tgz", + "integrity": "sha1-IKMYwwy0X3H+et+/eyHJnBRy7xE=", + "dev": true + }, "negotiator": { "version": "0.6.2", "resolved": "https://registry.npmjs.org/negotiator/-/negotiator-0.6.2.tgz", @@ -6918,6 +7130,16 @@ "es-abstract": "^1.5.1" } }, + "object.omit": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/object.omit/-/object.omit-2.0.1.tgz", + "integrity": "sha1-Gpx0SCnznbuFjHbKNXmuKlTr0fo=", + "dev": true, + "requires": { + "for-own": "^0.1.4", + "is-extendable": "^0.1.1" + } + }, "object.pick": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/object.pick/-/object.pick-1.3.0.tgz", @@ -7018,6 +7240,12 @@ "lcid": "^1.0.0" } }, + "os-shim": { + "version": "0.1.3", + "resolved": "https://registry.npmjs.org/os-shim/-/os-shim-0.1.3.tgz", + "integrity": "sha1-a2LDeRz3kJ6jXtRuF2WLtBfLORc=", + "dev": true + }, "os-tmpdir": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz", @@ -8451,6 +8679,12 @@ "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.1.9.tgz", "integrity": "sha512-DEqnSRTDw/Tc3FXf49zedI638Z9onwUotBMiUFKmrO2sdFKIbXamXGQ3Axd4qgphxKB4kw/qP1w5kTxnfU1B9Q==" }, + "rx": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/rx/-/rx-4.1.0.tgz", + "integrity": "sha1-pfE/957zt0D+MKqAP7CfmIBdR4I=", + "dev": true + }, "rxjs": { "version": "6.5.2", "resolved": "https://registry.npmjs.org/rxjs/-/rxjs-6.5.2.tgz", @@ -8856,6 +9090,16 @@ "is-arrayish": "^0.3.1" } }, + "sinon-as-promised": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/sinon-as-promised/-/sinon-as-promised-4.0.3.tgz", + "integrity": "sha1-wFRbFoX9gTWIpO1pcBJIftEdFRs=", + "dev": true, + "requires": { + "create-thenable": "~1.0.0", + "native-promise-only": "~0.8.1" + } + }, "sitemap": { "version": "3.2.2", "resolved": "https://registry.npmjs.org/sitemap/-/sitemap-3.2.2.tgz", @@ -9082,6 +9326,16 @@ "integrity": "sha1-PpNdfd1zYxuXZZlW1VEo6HtQhKM=", "dev": true }, + "spawn-sync": { + "version": "1.0.15", + "resolved": "https://registry.npmjs.org/spawn-sync/-/spawn-sync-1.0.15.tgz", + "integrity": "sha1-sAeZVX63+wyDdsKdROih6mfldHY=", + "dev": true, + "requires": { + "concat-stream": "^1.4.7", + "os-shim": "^0.1.2" + } + }, "spdx-correct": { "version": "3.1.0", "resolved": "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.0.tgz", @@ -9773,6 +10027,12 @@ "integrity": "sha1-/+3ks2slKQaW5uFl1KWe25mOawI=", "dev": true }, + "unique-concat": { + "version": "0.2.2", + "resolved": "https://registry.npmjs.org/unique-concat/-/unique-concat-0.2.2.tgz", + "integrity": "sha1-khD5vcqsxeHjkpSQ18AZ35bxhxI=", + "dev": true + }, "universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", diff --git a/website/package.json b/website/package.json index 78df279c09d3..b897eabcfab1 100644 --- a/website/package.json +++ b/website/package.json @@ -10,10 +10,12 @@ "rename-version": "docusaurus-rename-version", "compile-scss": "node-sass scss/custom.scss > static/css/custom.css", "link-lint": "docusaurus-build && node script/link-lint.js", - "lint": "npm run link-lint" + "lint": "npm run link-lint", + "spellcheck": "mdspell --en-us --ignore-numbers --report '../docs/**/*.md'" }, "devDependencies": { "docusaurus": "^1.12.0", + "markdown-spellcheck": "^1.3.1", "node-sass": "^4.12.0" }, "dependencies": {