-
Notifications
You must be signed in to change notification settings - Fork 3.4k
HBASE-25739 TableSkewCostFunction need to use aggregated deviation #3415
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -72,31 +72,13 @@ private static double computeCost(double[] stats) { | |
| double count = stats.length; | ||
| double mean = total / count; | ||
|
|
||
| // Compute max as if all region servers had 0 and one had the sum of all costs. This must be | ||
| // a zero sum cost for this to make sense. | ||
| double max = ((count - 1) * mean) + (total - mean); | ||
|
|
||
| // It's possible that there aren't enough regions to go around | ||
| double min; | ||
| if (count > total) { | ||
| min = ((count - total) * mean) + ((1 - mean) * total); | ||
| } else { | ||
| // Some will have 1 more than everything else. | ||
| int numHigh = (int) (total - (Math.floor(mean) * count)); | ||
| int numLow = (int) (count - numHigh); | ||
|
|
||
| min = (numHigh * (Math.ceil(mean) - mean)) + (numLow * (mean - Math.floor(mean))); | ||
|
|
||
| } | ||
| min = Math.max(0, min); | ||
| for (int i = 0; i < stats.length; i++) { | ||
| double n = stats[i]; | ||
| double diff = Math.abs(mean - n); | ||
| totalCost += diff; | ||
| } | ||
|
|
||
| double scaled = CostFunction.scale(min, max, totalCost); | ||
| return scaled; | ||
| return CostFunction.scale(getMinSkew(total, count), | ||
| getMaxSkew(total, count), totalCost); | ||
| } | ||
|
|
||
| private static double getSum(double[] stats) { | ||
|
|
@@ -106,4 +88,33 @@ private static double getSum(double[] stats) { | |
| } | ||
| return total; | ||
| } | ||
|
|
||
| /** | ||
| * Return the min skew of distribution | ||
| * @param total is total number of regions | ||
| */ | ||
| public static double getMinSkew(double total, double numServers) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: one way of addressing the nick comment would be to add javadoc for total that said what it was.....
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why are the input arguments
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is to convert the input from integer to double for computation in the function. |
||
| double mean = total / numServers; | ||
| // It's possible that there aren't enough regions to go around | ||
| double min; | ||
| if (numServers > total) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is the case when we have more nodes than regions, we will have nodes without regions and it is balanced. |
||
| min = ((numServers - total) * mean + (1 - mean) * total) ; | ||
| } else { | ||
| // Some will have 1 more than everything else. | ||
| int numHigh = (int) (total - (Math.floor(mean) * numServers)); | ||
| int numLow = (int) (numServers - numHigh); | ||
| min = numHigh * (Math.ceil(mean) - mean) + numLow * (mean - Math.floor(mean)); | ||
| } | ||
| return min; | ||
| } | ||
|
|
||
| /** | ||
| * Return the max deviation of distribution | ||
| * Compute max as if all region servers had 0 and one had the sum of all costs. This must be | ||
| * a zero sum cost for this to make sense. | ||
| */ | ||
| public static double getMaxSkew(double total, double numServers) { | ||
| double mean = total / numServers; | ||
| return (total - mean) + (numServers - 1) * mean; | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unused?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
applied later by prior code change.