-
Notifications
You must be signed in to change notification settings - Fork 3.8k
Add pending task based resource management autoscaling strategy #2086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,123 @@ | ||
| /* | ||
| * Licensed to Metamarkets Group Inc. (Metamarkets) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. Metamarkets licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package io.druid.indexing.overlord.autoscaling; | ||
|
|
||
| import com.metamx.common.concurrent.ScheduledExecutors; | ||
| import com.metamx.emitter.EmittingLogger; | ||
| import io.druid.granularity.PeriodGranularity; | ||
| import io.druid.indexing.overlord.WorkerTaskRunner; | ||
| import org.joda.time.DateTime; | ||
| import org.joda.time.Duration; | ||
| import org.joda.time.Period; | ||
|
|
||
| import java.util.concurrent.ScheduledExecutorService; | ||
|
|
||
| /** | ||
| */ | ||
| public abstract class AbstractWorkerResourceManagementStrategy implements ResourceManagementStrategy<WorkerTaskRunner> | ||
| { | ||
| private static final EmittingLogger log = new EmittingLogger(AbstractWorkerResourceManagementStrategy.class); | ||
|
|
||
| private final ResourceManagementSchedulerConfig resourceManagementSchedulerConfig; | ||
| private final ScheduledExecutorService exec; | ||
| private final Object lock = new Object(); | ||
|
|
||
| private volatile boolean started = false; | ||
|
|
||
| protected AbstractWorkerResourceManagementStrategy( | ||
| ResourceManagementSchedulerConfig resourceManagementSchedulerConfig, | ||
| ScheduledExecutorService exec | ||
| ) | ||
| { | ||
| this.resourceManagementSchedulerConfig = resourceManagementSchedulerConfig; | ||
| this.exec = exec; | ||
| } | ||
|
|
||
| @Override | ||
| public void startManagement(final WorkerTaskRunner runner) | ||
| { | ||
| synchronized (lock) { | ||
| if (started) { | ||
| return; | ||
| } | ||
|
|
||
| log.info("Started Resource Management Scheduler"); | ||
|
|
||
| ScheduledExecutors.scheduleAtFixedRate( | ||
| exec, | ||
| resourceManagementSchedulerConfig.getProvisionPeriod().toStandardDuration(), | ||
| new Runnable() | ||
| { | ||
| @Override | ||
| public void run() | ||
| { | ||
| // Any Errors are caught by ScheduledExecutors | ||
| doProvision(runner); | ||
| } | ||
| } | ||
| ); | ||
|
|
||
| // Schedule termination of worker nodes periodically | ||
| Period period = resourceManagementSchedulerConfig.getTerminatePeriod(); | ||
| PeriodGranularity granularity = new PeriodGranularity( | ||
| period, | ||
| resourceManagementSchedulerConfig.getOriginTime(), | ||
| null | ||
| ); | ||
| final long startTime = granularity.next(granularity.truncate(new DateTime().getMillis())); | ||
|
|
||
| ScheduledExecutors.scheduleAtFixedRate( | ||
| exec, | ||
| new Duration(System.currentTimeMillis(), startTime), | ||
| resourceManagementSchedulerConfig.getTerminatePeriod().toStandardDuration(), | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor nit, but toStandardDuration could potentially throw an UnsupportedOperationException, we might want to convert to duration upfront in the config.
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I kept the existing logic unchanged, just abstracted out the code. |
||
| new Runnable() | ||
| { | ||
| @Override | ||
| public void run() | ||
| { | ||
| // Any Errors are caught by ScheduledExecutors | ||
| doTerminate(runner); | ||
| } | ||
| } | ||
| ); | ||
|
|
||
| started = true; | ||
|
|
||
| } | ||
| } | ||
|
|
||
| abstract boolean doTerminate(WorkerTaskRunner runner); | ||
|
|
||
| abstract boolean doProvision(WorkerTaskRunner runner); | ||
|
|
||
| @Override | ||
| public void stopManagement() | ||
| { | ||
| synchronized (lock) { | ||
| if (!started) { | ||
| return; | ||
| } | ||
| log.info("Stopping Resource Management Scheduler"); | ||
| exec.shutdown(); | ||
| started = false; | ||
| } | ||
| } | ||
|
|
||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,74 @@ | ||
| /* | ||
| * Licensed to Metamarkets Group Inc. (Metamarkets) under one | ||
| * or more contributor license agreements. See the NOTICE file | ||
| * distributed with this work for additional information | ||
| * regarding copyright ownership. Metamarkets licenses this file | ||
| * to you under the Apache License, Version 2.0 (the | ||
| * "License"); you may not use this file except in compliance | ||
| * with the License. You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, | ||
| * software distributed under the License is distributed on an | ||
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| * KIND, either express or implied. See the License for the | ||
| * specific language governing permissions and limitations | ||
| * under the License. | ||
| */ | ||
|
|
||
| package io.druid.indexing.overlord.autoscaling; | ||
|
|
||
| import com.fasterxml.jackson.annotation.JsonProperty; | ||
| import org.joda.time.Period; | ||
|
|
||
| /** | ||
| */ | ||
| public class PendingTaskBasedWorkerResourceManagementConfig extends SimpleWorkerResourceManagementConfig | ||
| { | ||
| @JsonProperty | ||
| private int maxScalingStep = 10; | ||
|
|
||
|
|
||
| public int getMaxScalingStep() | ||
| { | ||
| return maxScalingStep; | ||
| } | ||
|
|
||
| public PendingTaskBasedWorkerResourceManagementConfig setMaxScalingStep(int maxScalingStep) | ||
| { | ||
| this.maxScalingStep = maxScalingStep; | ||
| return this; | ||
| } | ||
|
|
||
| public PendingTaskBasedWorkerResourceManagementConfig setWorkerIdleTimeout(Period workerIdleTimeout) | ||
| { | ||
| super.setWorkerIdleTimeout(workerIdleTimeout); | ||
| return this; | ||
| } | ||
|
|
||
| public PendingTaskBasedWorkerResourceManagementConfig setMaxScalingDuration(Period maxScalingDuration) | ||
| { | ||
| super.setMaxScalingDuration(maxScalingDuration); | ||
| return this; | ||
| } | ||
|
|
||
| public PendingTaskBasedWorkerResourceManagementConfig setNumEventsToTrack(int numEventsToTrack) | ||
| { | ||
| super.setNumEventsToTrack(numEventsToTrack); | ||
| return this; | ||
| } | ||
|
|
||
| public PendingTaskBasedWorkerResourceManagementConfig setWorkerVersion(String workerVersion) | ||
| { | ||
| super.setWorkerVersion(workerVersion); | ||
| return this; | ||
| } | ||
|
|
||
| public PendingTaskBasedWorkerResourceManagementConfig setPendingTaskTimeout(Period pendingTaskTimeout) | ||
| { | ||
| super.setPendingTaskTimeout(pendingTaskTimeout); | ||
| return this; | ||
| } | ||
|
|
||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
any reason we truncate the time here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I kept the existing logic and just abstracted out from SimpleResourceManagementStrategy. I believe its related to the introduction of origin time in resource management scheduler.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@fjy any clues as to why you originally added originTime and timestamp truncation as opposed to just taking now + duration as the next timestamp?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@fjy: any thoughts ?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@xvrl: I checked the code in more detail, it seems to be making sure that for every overlord restart the scheduler runs at same time. I think its helpful in case someone wants to terminate workers only at specific fixed absolute intervals.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It was made for the way Amazon prices, where I believe they only charge at the top of the hour