From a7321e891aa97aa89c105e74bbaf11d3daf30161 Mon Sep 17 00:00:00 2001 From: ChungHo Chen Date: Mon, 10 Jul 2017 14:47:56 -0700 Subject: [PATCH 1/6] adding new post aggregators of test stats to druid-stats extension --- .../development/extensions-core/test-stats.md | 105 ++++++++++++ extensions-core/stats/pom.xml | 5 + .../aggregation/stats/DruidStatsModule.java | 6 +- .../PvaluefromZscorePostAggregator.java | 120 +++++++++++++ .../teststats/ZtestPostAggregator.java | 159 ++++++++++++++++++ .../aggregation/teststats/TestStatsTest.java | 77 +++++++++ .../aggregation/post/PostAggregatorIds.java | 2 + 7 files changed, 473 insertions(+), 1 deletion(-) create mode 100644 docs/content/development/extensions-core/test-stats.md create mode 100644 extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java create mode 100644 extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java create mode 100644 extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java diff --git a/docs/content/development/extensions-core/test-stats.md b/docs/content/development/extensions-core/test-stats.md new file mode 100644 index 000000000000..5d3dd5198095 --- /dev/null +++ b/docs/content/development/extensions-core/test-stats.md @@ -0,0 +1,105 @@ +--- +layout: doc_page +--- + +# Test Stats Aggregators + +Incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for background and details. + +Make sure to include `druid-stats` extension in order to use these aggregrators. + +## Z-Score for two sample ztests post aggregator + +Please refer to [https://www.isixsigma.com/tools-templates/hypothesis-testing/making-sense-two-proportions-test/](https://www.isixsigma.com/tools-templates/hypothesis-testing/making-sense-two-proportions-test/) and [http://www.ucs.louisiana.edu/~jcb0773/Berry_statbook/Berry_statbook_chpt6.pdf](http://www.ucs.louisiana.edu/~jcb0773/Berry_statbook/Berry_statbook_chpt6.pdf) for more details. + +z = (p1 - p2) / S.E. (assuming null hypothesis is true) + +where S.E. stands for standard error, and + +S.E. = sqrt{ p1 * ( 1 - p1 )/n1 + p2 * (1 - p2)/n2) } + +(p1 – p2) is the observed difference between two sample proportions. + +### zscore2sample post aggregator +* **`zscore2sample`**: calculate the z-score using two-sample z-test while converting binary variables (***e.g.*** success or not) to continuous variables (***e.g.*** conversion rate). + +```json +{ + "type": "zscore2sample", + "name": "", + "fields": [, , , ] +} +``` +Please note as the post aggregator will be converting binary variables to continuous variables for two population proportions, it is sensitive to the ordering of the post aggregators. In other words, + +p1 = (count 1) / (sample size 1) + +p2 = (count 2) / (sample size 2) + +For example, + +``` +"fields": [, , , ] +``` + +### pvalue2tailedztest post aggregator + +* **`pvalue2tailedztest`**: calculate p-value for two sided z-test from zscore + - ***pvalue2tailedZtest(zscore)*** - the input is the z-score calculated using zscore2samples post aggregator + + +```json +{ + "type": "pvalue2tailedztest", + "name": "", + "field": "" +} +``` + +For example, + +``` + "type": "pvalue2tailedztest", + "name": "pvalue", + "field": +``` + +## Example Usage + +In this example, we use zscore2sample post aggregator to calculate z-score, and feed the z-score to pvalue2tailedztest post aggregator to calculate p-value. + +A JSON query example can be as follows: + +```json +{ + ... + "postAggregations" : { + "type" : "pvalue2tailedztest", + "name" : "pvalue", + "field" : + { + "type" : "zscore2sample", + "name" : "zscore", + "fields" : [ + { "type" : "constant", + "name" : "successCountPopulation1", + "value" : 300 + }, + { "type" : "constant", + "name" : "sampleSizePopulation1", + "value" : 500 + }, + { "type" : "constant", + "name" : "successCountPopulation2", + "value" : 450 + }, + { "type" : "constant", + "name" : "sampleSizePopulation2", + "value" : 600 + } + ] + } + } +} + +``` diff --git a/extensions-core/stats/pom.xml b/extensions-core/stats/pom.xml index 44ce7c53a7dd..24d74fd10da6 100644 --- a/extensions-core/stats/pom.xml +++ b/extensions-core/stats/pom.xml @@ -40,6 +40,11 @@ ${project.parent.version} provided + + org.apache.commons + commons-math3 + 3.6.1 + diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/stats/DruidStatsModule.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/stats/DruidStatsModule.java index cc136f5f9dfa..b894af68a4b0 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/stats/DruidStatsModule.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/stats/DruidStatsModule.java @@ -24,6 +24,8 @@ import com.google.common.collect.ImmutableList; import com.google.inject.Binder; import io.druid.initialization.DruidModule; +import io.druid.query.aggregation.teststats.PvaluefromZscorePostAggregator; +import io.druid.query.aggregation.teststats.ZtestPostAggregator; import io.druid.query.aggregation.variance.StandardDeviationPostAggregator; import io.druid.query.aggregation.variance.VarianceAggregatorFactory; import io.druid.query.aggregation.variance.VarianceFoldingAggregatorFactory; @@ -43,7 +45,9 @@ public List getJacksonModules() new SimpleModule().registerSubtypes( VarianceAggregatorFactory.class, VarianceFoldingAggregatorFactory.class, - StandardDeviationPostAggregator.class + StandardDeviationPostAggregator.class, + ZtestPostAggregator.class, + PvaluefromZscorePostAggregator.class ) ); } diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java new file mode 100644 index 000000000000..1d0a8945460d --- /dev/null +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java @@ -0,0 +1,120 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.teststats; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.PostAggregatorIds; +import io.druid.query.cache.CacheKeyBuilder; +import org.apache.commons.math3.distribution.NormalDistribution; + +import java.util.Comparator; +import java.util.Map; +import java.util.Set; + +/** + * Created by chunchen on 4/5/17. + */ +@JsonTypeName("pvalue2tailedztest") +public class PvaluefromZscorePostAggregator implements PostAggregator { + private final String name; + private final PostAggregator field; + + @JsonCreator + public PvaluefromZscorePostAggregator( + @JsonProperty("name") String name, + @JsonProperty("field") PostAggregator field + ) { + Preconditions.checkNotNull(name, "Must have a valid, non-null post-aggregator"); + this.name = name; + this.field = field; + } + + @Override + public Set getDependentFields() { + + Set dependentFields = Sets.newHashSet(); + + dependentFields.addAll(field.getDependentFields()); + + return dependentFields; + } + + @Override + public Comparator getComparator() { + throw new UnsupportedOperationException(); + } + + @Override + public Object compute(Map combinedAggregators) { + + double zScore = + ((Number) field.compute(combinedAggregators)).doubleValue(); + + zScore = Math.abs(zScore); + return 2 * (1 - cumulativeProbability(zScore)); + } + + private double cumulativeProbability(double x) { + try { + NormalDistribution normDist = new NormalDistribution(); + return normDist.cumulativeProbability(x); + } catch (Exception ex) { + return Double.NaN; + } + } + + @Override + @JsonProperty + public String getName() { + return name; + } + + @Override + public PostAggregator decorate(Map aggregators) { + return this; + } + + @JsonProperty + public PostAggregator getField() { + return field; + } + + @Override + public String toString() { + return "PvaluefromZscorePostAggregator{" + + "name'" + name + '\'' + + ", field=" + field + + "}"; + } + + @Override + public byte[] getCacheKey() { + return new CacheKeyBuilder( + PostAggregatorIds.PVALUE_FROM_ZTEST) + .appendCacheable(field) + .build(); + } +} diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java new file mode 100644 index 000000000000..aa8f48a62175 --- /dev/null +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java @@ -0,0 +1,159 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.teststats; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; +import io.druid.query.aggregation.AggregatorFactory; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.ArithmeticPostAggregator; +import io.druid.query.aggregation.post.PostAggregatorIds; +import io.druid.query.cache.CacheKeyBuilder; + +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/* + * 1. calculating zscore using two-sample Z-Test. IOW, + * using z-test statistic for testing the difference of + * two population proportions. + * 2. converting binary variables (e.g. success or not) to continuous variables (e.g. conversion rate). + * + Please refer to http://math.mercyhurst.edu/~griff/courses/m109/Lectures/old/Sum_06/sect8.1.pdf + for more details. + http://facweb.cs.depaul.edu/sjost/csc423/documents/test-descriptions/indep-z.pdf +*/ +@JsonTypeName("zscore2sample") +public class ZtestPostAggregator implements PostAggregator { + private final String name; + private final List fields; + + @JsonCreator + public ZtestPostAggregator( + @JsonProperty("name") String name, + @JsonProperty("fields") List fields + ) { + Preconditions.checkNotNull(name, "Must have a valid, non-null post-aggregator name"); + Preconditions.checkArgument(fields.size() == 4, "Must have 4 fields; " + + "\"fields\": , ," + + " , "); + this.name = name; + this.fields = fields; + } + + @Override + public Set getDependentFields() { + Set dependentFields = Sets.newLinkedHashSet(); + for (PostAggregator field : fields) { + dependentFields.addAll(field.getDependentFields()); + } + return dependentFields; + } + + @Override + public Comparator getComparator() { + return ArithmeticPostAggregator.DEFAULT_COMPARATOR; + } + + @Override + public Object compute(Map combinedAggregators) { + + PostAggregator[] args = new PostAggregator[fields.size()]; + for (int j = 0; j < fields.size(); j++) { + args[j] = fields.get(j); + } + + return zScoreTwoSamples( + ((Number) args[0].compute(combinedAggregators)).doubleValue(), + ((Number) args[1].compute(combinedAggregators)).doubleValue(), + ((Number) args[2].compute(combinedAggregators)).doubleValue(), + ((Number) args[3].compute(combinedAggregators)).doubleValue()); + } + + @Override + @JsonProperty + public String getName() { + return name; + } + + @Override + public PostAggregator decorate(Map aggregators) { + return this; + } + + @JsonProperty + public List getFields() { + return fields; + } + + /** + * 1. calculating zscore for two-sample Z test. IOW, + * using z-test statistic for testing the difference of two population proportions. + * 2. converting binary variables (e.g. success or not) to continuous variables (e.g. conversion rate). + * + * @param the success count of population 1 + * @param param sample size of population 1 + * @param the success count of population 2 + * @param sample size of population 2 + */ + private double zScoreTwoSamples(Double s1count, Double p1count, Double s2count, Double p2count) { + double convertRate1; + double convertRate2; + Preconditions.checkState(s1count >= 0, "success count can't be negative."); + Preconditions.checkState(s2count >= 0, "success count can't be negative."); + Preconditions.checkState(p1count >= s1count, "sample size can't be smaller than the success count."); + Preconditions.checkState(p2count >= s2count, "sample size can't be smaller than the success count."); + + try { + convertRate1 = s1count / p1count; + convertRate2 = s2count / p2count; + + return (convertRate1 - convertRate2) / + Math.sqrt((convertRate1 * (1 - convertRate1) / p1count) + + (convertRate2 * (1 - convertRate2) / p2count)); + } catch (Exception ex) { + return 0; + } + } + + @Override + public String toString() { + return "ZtestPostAggregator{" + + "name='" + + name + + '\'' + + ", fields=" + + fields + + "}"; + } + + @Override + public byte[] getCacheKey() { + return new CacheKeyBuilder( + PostAggregatorIds.ZTEST) + .appendCacheables(fields) + .build(); + } +} diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java new file mode 100644 index 000000000000..bb6ec417c6b2 --- /dev/null +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java @@ -0,0 +1,77 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.teststats; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; +import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.ConstantPostAggregator; +import org.junit.Assert; +import org.junit.Test; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Created by chunchen on 4/23/17. + */ +public class TestStatsTest { + + @Test + public void testCompute() { + ZtestPostAggregator ztestPostAggregator; + PvaluefromZscorePostAggregator pvaluePostAggregator; + ConstantPostAggregator constPostAgg1, constPostAgg2, constPostAgg3, constPostAgg4; + + constPostAgg1 = new ConstantPostAggregator("successCountPopulation1", 39244); + constPostAgg2 = new ConstantPostAggregator("sampleSizePopulation1", 394298); + constPostAgg3 = new ConstantPostAggregator("successCountPopulation2", 8991275); + constPostAgg4 = new ConstantPostAggregator("sampleSizePopulation2", 9385573); + + List postAggregatorList = + Lists.newArrayList( + (PostAggregator) constPostAgg1, + constPostAgg2, + constPostAgg3, + constPostAgg4 + ); + + Map metricValues = new HashMap(); + for (PostAggregator pa : postAggregatorList) { + metricValues.put(pa.getName(), ((ConstantPostAggregator) pa).getConstantValue()); + } + + ztestPostAggregator = new ZtestPostAggregator("zscore", postAggregatorList); + + double zscore = ((Number) ztestPostAggregator.compute(metricValues)).doubleValue(); + + pvaluePostAggregator = new PvaluefromZscorePostAggregator("pvalue", ztestPostAggregator); + + System.out.print("zscore = " + zscore + "\n"); + System.out.print("pvalue = " + + pvaluePostAggregator.compute(ImmutableMap.of("zscore", -1783.8762354220219))); + + Assert.assertEquals(-1783.8762354220219, + zscore, 0.0001); + Assert.assertNotEquals(0.0, + ztestPostAggregator.compute(metricValues)); + } +} diff --git a/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java b/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java index fcca9314e079..bfb7d4df108e 100644 --- a/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java +++ b/processing/src/main/java/io/druid/query/aggregation/post/PostAggregatorIds.java @@ -42,4 +42,6 @@ public class PostAggregatorIds public static final byte DATA_SKETCHES_SKETCH_SET = 18; public static final byte VARIANCE_STANDARD_DEVIATION = 19; public static final byte FINALIZING_FIELD_ACCESS = 20; + public static final byte ZTEST = 21; + public static final byte PVALUE_FROM_ZTEST = 22; } From ccdf21e7c2bd15deed657bb684e3c1ffe45fbaae Mon Sep 17 00:00:00 2001 From: ChungHo Chen Date: Tue, 22 Aug 2017 13:20:52 +0800 Subject: [PATCH 2/6] changes to address code review comments --- .../development/extensions-core/test-stats.md | 59 ++++---- extensions-core/stats/pom.xml | 1 - .../PvaluefromZscorePostAggregator.java | 81 ++++++----- .../teststats/ZtestPostAggregator.java | 133 +++++++++++------- .../aggregation/teststats/TestStatsTest.java | 34 +++-- 5 files changed, 172 insertions(+), 136 deletions(-) diff --git a/docs/content/development/extensions-core/test-stats.md b/docs/content/development/extensions-core/test-stats.md index 5d3dd5198095..ef4311f37873 100644 --- a/docs/content/development/extensions-core/test-stats.md +++ b/docs/content/development/extensions-core/test-stats.md @@ -4,7 +4,7 @@ layout: doc_page # Test Stats Aggregators -Incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for background and details. +Incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for math background and details, although its input spec and example are out of date. Make sure to include `druid-stats` extension in order to use these aggregrators. @@ -14,7 +14,8 @@ Please refer to [https://www.isixsigma.com/tools-templates/hypothesis-testing/ma z = (p1 - p2) / S.E. (assuming null hypothesis is true) -where S.E. stands for standard error, and +Please see below for p1 and p2. +Please note S.E. stands for standard error where S.E. = sqrt{ p1 * ( 1 - p1 )/n1 + p2 * (1 - p2)/n2) } @@ -27,46 +28,36 @@ S.E. = sqrt{ p1 * ( 1 - p1 )/n1 + p2 * (1 - p2)/n2) } { "type": "zscore2sample", "name": "", - "fields": [, , , ] + "successCount1": success count of sample 1, + "sample1Size": sample 1 size, + "successCount2": success count of sample 2, + "sample2size" : sample 2 size } ``` -Please note as the post aggregator will be converting binary variables to continuous variables for two population proportions, it is sensitive to the ordering of the post aggregators. In other words, -p1 = (count 1) / (sample size 1) +Please note the post aggregator will be converting binary variables to continuous variables for two population proportions. Specifically -p2 = (count 2) / (sample size 2) +p1 = (successCount1) / (sample size 1) -For example, +p2 = (successCount2) / (sample size 2) -``` -"fields": [, , , ] -``` +### pvalue2tailedZtest post aggregator -### pvalue2tailedztest post aggregator - -* **`pvalue2tailedztest`**: calculate p-value for two sided z-test from zscore - - ***pvalue2tailedZtest(zscore)*** - the input is the z-score calculated using zscore2samples post aggregator +* **`pvalue2tailedZtest`**: calculate p-value of two-sided z-test from zscore + - ***pvalue2tailedZtest(zscore)*** - the input is a z-score which can be calculated using the zscore2sample post aggregator ```json { - "type": "pvalue2tailedztest", + "type": "pvalue2tailedZtest", "name": "", - "field": "" + "zScore": } ``` - -For example, - -``` - "type": "pvalue2tailedztest", - "name": "pvalue", - "field": -``` ## Example Usage -In this example, we use zscore2sample post aggregator to calculate z-score, and feed the z-score to pvalue2tailedztest post aggregator to calculate p-value. +In this example, we use zscore2sample post aggregator to calculate z-score, and then feed the z-score to pvalue2tailedZtest post aggregator to calculate p-value. A JSON query example can be as follows: @@ -74,30 +65,32 @@ A JSON query example can be as follows: { ... "postAggregations" : { - "type" : "pvalue2tailedztest", + "type" : "pvalue2tailedZtest", "name" : "pvalue", - "field" : + "zScore" : { "type" : "zscore2sample", "name" : "zscore", - "fields" : [ + "successCount1" : { "type" : "constant", - "name" : "successCountPopulation1", + "name" : "successCountFromPopulation1Sample", "value" : 300 }, + "sample1Size" : { "type" : "constant", - "name" : "sampleSizePopulation1", + "name" : "sampleSizeOfPopulation1", "value" : 500 }, + "successCount2": { "type" : "constant", - "name" : "successCountPopulation2", + "name" : "successCountFromPopulation2Sample", "value" : 450 }, + "sample2Size" : { "type" : "constant", - "name" : "sampleSizePopulation2", + "name" : "sampleSizeOfPopulation2", "value" : 600 } - ] } } } diff --git a/extensions-core/stats/pom.xml b/extensions-core/stats/pom.xml index 24d74fd10da6..a9fd40830d79 100644 --- a/extensions-core/stats/pom.xml +++ b/extensions-core/stats/pom.xml @@ -43,7 +43,6 @@ org.apache.commons commons-math3 - 3.6.1 diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java index 1d0a8945460d..6a17271c0b1c 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java @@ -23,98 +23,113 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.google.common.base.Preconditions; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; +import io.druid.query.Queries; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.PostAggregator; +import io.druid.query.aggregation.post.ArithmeticPostAggregator; import io.druid.query.aggregation.post.PostAggregatorIds; import io.druid.query.cache.CacheKeyBuilder; import org.apache.commons.math3.distribution.NormalDistribution; +import java.util.Collections; import java.util.Comparator; import java.util.Map; import java.util.Set; -/** - * Created by chunchen on 4/5/17. - */ -@JsonTypeName("pvalue2tailedztest") -public class PvaluefromZscorePostAggregator implements PostAggregator { +@JsonTypeName("pvalue2tailedZtest") +public class PvaluefromZscorePostAggregator implements PostAggregator +{ private final String name; - private final PostAggregator field; + private final PostAggregator zScore; @JsonCreator public PvaluefromZscorePostAggregator( @JsonProperty("name") String name, - @JsonProperty("field") PostAggregator field - ) { + @JsonProperty("zScore") PostAggregator zScore + ) + { Preconditions.checkNotNull(name, "Must have a valid, non-null post-aggregator"); + Preconditions.checkNotNull(zScore, "Must have a valid, non-null post-aggregator"); this.name = name; - this.field = field; + this.zScore = zScore; } @Override - public Set getDependentFields() { - + public Set getDependentFields() + { Set dependentFields = Sets.newHashSet(); - dependentFields.addAll(field.getDependentFields()); + dependentFields.addAll(zScore.getDependentFields()); return dependentFields; } @Override - public Comparator getComparator() { - throw new UnsupportedOperationException(); + public Comparator getComparator() + { + return ArithmeticPostAggregator.DEFAULT_COMPARATOR; } @Override - public Object compute(Map combinedAggregators) { + public Object compute(Map combinedAggregators) + { - double zScore = - ((Number) field.compute(combinedAggregators)).doubleValue(); + double zScoreValue = + ((Number) zScore.compute(combinedAggregators)).doubleValue(); - zScore = Math.abs(zScore); - return 2 * (1 - cumulativeProbability(zScore)); + zScoreValue = Math.abs(zScoreValue); + return 2 * (1 - cumulativeProbability(zScoreValue)); } - private double cumulativeProbability(double x) { + private double cumulativeProbability(double x) + { try { NormalDistribution normDist = new NormalDistribution(); return normDist.cumulativeProbability(x); - } catch (Exception ex) { + } + catch (IllegalArgumentException ex) { return Double.NaN; } } @Override @JsonProperty - public String getName() { + public String getName() + { return name; } @Override - public PostAggregator decorate(Map aggregators) { - return this; + public PostAggregator decorate(Map aggregators) + { + return new PvaluefromZscorePostAggregator(name, Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(zScore), aggregators))); } - @JsonProperty - public PostAggregator getField() { - return field; + @Override + public int hashCode() + { + int result = name != null ? name.hashCode() : 0; + result = 31 * result + zScore.hashCode(); + return result; } @Override - public String toString() { + public String toString() + { return "PvaluefromZscorePostAggregator{" + - "name'" + name + '\'' + - ", field=" + field + - "}"; + "name'" + name + '\'' + + ", zScore=" + zScore + + '}'; } @Override - public byte[] getCacheKey() { + public byte[] getCacheKey() + { return new CacheKeyBuilder( PostAggregatorIds.PVALUE_FROM_ZTEST) - .appendCacheable(field) + .appendCacheable(zScore) .build(); } } diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java index aa8f48a62175..58004b77d1c1 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java @@ -23,15 +23,17 @@ import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.annotation.JsonTypeName; import com.google.common.base.Preconditions; +import com.google.common.collect.Iterables; import com.google.common.collect.Sets; +import io.druid.query.Queries; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.PostAggregator; import io.druid.query.aggregation.post.ArithmeticPostAggregator; import io.druid.query.aggregation.post.PostAggregatorIds; import io.druid.query.cache.CacheKeyBuilder; +import java.util.Collections; import java.util.Comparator; -import java.util.List; import java.util.Map; import java.util.Set; @@ -46,79 +48,92 @@ http://facweb.cs.depaul.edu/sjost/csc423/documents/test-descriptions/indep-z.pdf */ @JsonTypeName("zscore2sample") -public class ZtestPostAggregator implements PostAggregator { +public class ZtestPostAggregator implements PostAggregator +{ private final String name; - private final List fields; + private final PostAggregator successCount1; + private final PostAggregator sample1Size; + private final PostAggregator successCount2; + private final PostAggregator sample2Size; + @JsonCreator public ZtestPostAggregator( @JsonProperty("name") String name, - @JsonProperty("fields") List fields - ) { + @JsonProperty("successCount1") PostAggregator successCount1, + @JsonProperty("sample1Size") PostAggregator sample1Size, + @JsonProperty("successCount2") PostAggregator successCount2, + @JsonProperty("sample2Size") PostAggregator sample2Size + + ) + { Preconditions.checkNotNull(name, "Must have a valid, non-null post-aggregator name"); - Preconditions.checkArgument(fields.size() == 4, "Must have 4 fields; " + - "\"fields\": , ," + - " , "); + Preconditions.checkNotNull(successCount1, "success count from sample 1 can not be null"); + Preconditions.checkNotNull(sample1Size, "sample size of population 1 can not null"); + Preconditions.checkNotNull(successCount2, "success count from sample 2 can not be null"); + Preconditions.checkNotNull(sample2Size, "sample size of population 2 can not be null"); + this.name = name; - this.fields = fields; + this.successCount1 = successCount1; + this.sample1Size = sample1Size; + this.successCount2 = successCount2; + this.sample2Size = sample2Size; } @Override - public Set getDependentFields() { + public Set getDependentFields() + { Set dependentFields = Sets.newLinkedHashSet(); - for (PostAggregator field : fields) { - dependentFields.addAll(field.getDependentFields()); - } + dependentFields.addAll(successCount1.getDependentFields()); + dependentFields.addAll(sample1Size.getDependentFields()); + dependentFields.addAll(successCount2.getDependentFields()); + dependentFields.addAll(sample2Size.getDependentFields()); + return dependentFields; } @Override - public Comparator getComparator() { + public Comparator getComparator() + { return ArithmeticPostAggregator.DEFAULT_COMPARATOR; } @Override - public Object compute(Map combinedAggregators) { - - PostAggregator[] args = new PostAggregator[fields.size()]; - for (int j = 0; j < fields.size(); j++) { - args[j] = fields.get(j); - } - + public Object compute(Map combinedAggregators) + { return zScoreTwoSamples( - ((Number) args[0].compute(combinedAggregators)).doubleValue(), - ((Number) args[1].compute(combinedAggregators)).doubleValue(), - ((Number) args[2].compute(combinedAggregators)).doubleValue(), - ((Number) args[3].compute(combinedAggregators)).doubleValue()); + ((Number) successCount1.compute(combinedAggregators)).doubleValue(), + ((Number) sample1Size.compute(combinedAggregators)).doubleValue(), + ((Number) successCount2.compute(combinedAggregators)).doubleValue(), + ((Number) sample2Size.compute(combinedAggregators)).doubleValue() + ); } @Override @JsonProperty - public String getName() { + public String getName() + { return name; } @Override - public PostAggregator decorate(Map aggregators) { - return this; - } - - @JsonProperty - public List getFields() { - return fields; + public ZtestPostAggregator decorate(Map aggregators) + { + return new ZtestPostAggregator(name, Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(successCount1), aggregators)), Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(sample1Size), aggregators)), Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(successCount2), aggregators)), Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(sample2Size), aggregators))); } /** * 1. calculating zscore for two-sample Z test. IOW, - * using z-test statistic for testing the difference of two population proportions. + * using z-test statistic for testing the difference of two population proportions. * 2. converting binary variables (e.g. success or not) to continuous variables (e.g. conversion rate). * - * @param the success count of population 1 - * @param param sample size of population 1 - * @param the success count of population 2 - * @param sample size of population 2 + * @param s1count - success count of population 1 + * @param p1count - sample size of population 1 + * @param s2count - the success count of population 2 + * @param p2count - sample size of population 2 */ - private double zScoreTwoSamples(Double s1count, Double p1count, Double s2count, Double p2count) { + private double zScoreTwoSamples(double s1count, double p1count, double s2count, double p2count) + { double convertRate1; double convertRate2; Preconditions.checkState(s1count >= 0, "success count can't be negative."); @@ -131,29 +146,45 @@ private double zScoreTwoSamples(Double s1count, Double p1count, Double s2count, convertRate2 = s2count / p2count; return (convertRate1 - convertRate2) / - Math.sqrt((convertRate1 * (1 - convertRate1) / p1count) + - (convertRate2 * (1 - convertRate2) / p2count)); - } catch (Exception ex) { + Math.sqrt((convertRate1 * (1 - convertRate1) / p1count) + + (convertRate2 * (1 - convertRate2) / p2count)); + } + catch (IllegalArgumentException ex) { return 0; } } + public int hashCode() + { + int result = name != null ? name.hashCode() : 0; + result = 31 * result + successCount1.hashCode(); + result = 31 * result + sample1Size.hashCode(); + result = 31 * result + successCount2.hashCode(); + result = 31 * result + sample2Size.hashCode(); + return result; + } + @Override - public String toString() { + public String toString() + { return "ZtestPostAggregator{" + - "name='" - + name - + '\'' - + ", fields=" - + fields - + "}"; + "name='" + name + '\'' + + ", successCount1" + successCount1 + + ", sample1Size" + sample1Size + + ", successCount2" + successCount2 + + ", sample2size" + sample2Size + + '}'; } @Override - public byte[] getCacheKey() { + public byte[] getCacheKey() + { return new CacheKeyBuilder( PostAggregatorIds.ZTEST) - .appendCacheables(fields) + .appendCacheable(successCount1) + .appendCacheable(sample1Size) + .appendCacheable(successCount2) + .appendCacheable(sample2Size) .build(); } } diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java index bb6ec417c6b2..3d2b93684b33 100644 --- a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java @@ -30,36 +30,34 @@ import java.util.List; import java.util.Map; -/** - * Created by chunchen on 4/23/17. - */ public class TestStatsTest { @Test public void testCompute() { ZtestPostAggregator ztestPostAggregator; PvaluefromZscorePostAggregator pvaluePostAggregator; - ConstantPostAggregator constPostAgg1, constPostAgg2, constPostAgg3, constPostAgg4; + ConstantPostAggregator successCount1, sample1Size, successCount2, sample2Size; - constPostAgg1 = new ConstantPostAggregator("successCountPopulation1", 39244); - constPostAgg2 = new ConstantPostAggregator("sampleSizePopulation1", 394298); - constPostAgg3 = new ConstantPostAggregator("successCountPopulation2", 8991275); - constPostAgg4 = new ConstantPostAggregator("sampleSizePopulation2", 9385573); + successCount1 = new ConstantPostAggregator("successCountPopulation1", 39244); + sample1Size = new ConstantPostAggregator("sampleSizePopulation1", 394298); + successCount2 = new ConstantPostAggregator("successCountPopulation2", 8991275); + sample2Size = new ConstantPostAggregator("sampleSizePopulation2", 9385573); - List postAggregatorList = - Lists.newArrayList( - (PostAggregator) constPostAgg1, - constPostAgg2, - constPostAgg3, - constPostAgg4 - ); + List postAggregatorList; + postAggregatorList = Lists.newArrayList( + successCount1, + sample1Size, + successCount2, + sample2Size + ); - Map metricValues = new HashMap(); + Map metricValues = new HashMap<>(); for (PostAggregator pa : postAggregatorList) { metricValues.put(pa.getName(), ((ConstantPostAggregator) pa).getConstantValue()); } - ztestPostAggregator = new ZtestPostAggregator("zscore", postAggregatorList); + ztestPostAggregator = new ZtestPostAggregator("zscore", successCount1, + sample1Size, successCount2, sample2Size); double zscore = ((Number) ztestPostAggregator.compute(metricValues)).doubleValue(); @@ -67,7 +65,7 @@ public void testCompute() { System.out.print("zscore = " + zscore + "\n"); System.out.print("pvalue = " + - pvaluePostAggregator.compute(ImmutableMap.of("zscore", -1783.8762354220219))); + pvaluePostAggregator.compute(ImmutableMap.of("zscore", -1783.8762354220219))); Assert.assertEquals(-1783.8762354220219, zscore, 0.0001); From d8c16b200951578e69668be92f21f8b7c4fd2871 Mon Sep 17 00:00:00 2001 From: ChungHo Chen Date: Fri, 25 Aug 2017 16:01:53 +0800 Subject: [PATCH 3/6] fix checkstyle violations using druid_intellij_formatting.xml after merge upstream/master --- .../PvaluefromZscorePostAggregator.java | 32 +++++++++++-------- .../teststats/ZtestPostAggregator.java | 18 ++++++++--- .../aggregation/teststats/TestStatsTest.java | 20 ++++++++---- 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java index 6a17271c0b1c..62bda43c3d8d 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java @@ -50,8 +50,14 @@ public PvaluefromZscorePostAggregator( @JsonProperty("zScore") PostAggregator zScore ) { - Preconditions.checkNotNull(name, "Must have a valid, non-null post-aggregator"); - Preconditions.checkNotNull(zScore, "Must have a valid, non-null post-aggregator"); + Preconditions.checkNotNull( + name, + "Must have a valid, non-null post-aggregator" + ); + Preconditions.checkNotNull( + zScore, + "Must have a valid, non-null post-aggregator" + ); this.name = name; this.zScore = zScore; } @@ -76,8 +82,8 @@ public Comparator getComparator() public Object compute(Map combinedAggregators) { - double zScoreValue = - ((Number) zScore.compute(combinedAggregators)).doubleValue(); + double zScoreValue = ((Number) zScore.compute(combinedAggregators)) + .doubleValue(); zScoreValue = Math.abs(zScoreValue); return 2 * (1 - cumulativeProbability(zScoreValue)); @@ -104,7 +110,11 @@ public String getName() @Override public PostAggregator decorate(Map aggregators) { - return new PvaluefromZscorePostAggregator(name, Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(zScore), aggregators))); + return new PvaluefromZscorePostAggregator( + name, + Iterables.getOnlyElement(Queries.decoratePostAggregators( + Collections.singletonList(zScore), aggregators)) + ); } @Override @@ -118,18 +128,14 @@ public int hashCode() @Override public String toString() { - return "PvaluefromZscorePostAggregator{" + - "name'" + name + '\'' + - ", zScore=" + zScore + - '}'; + return "PvaluefromZscorePostAggregator{" + "name'" + name + '\'' + + ", zScore=" + zScore + '}'; } @Override public byte[] getCacheKey() { - return new CacheKeyBuilder( - PostAggregatorIds.PVALUE_FROM_ZTEST) - .appendCacheable(zScore) - .build(); + return new CacheKeyBuilder(PostAggregatorIds.PVALUE_FROM_ZTEST) + .appendCacheable(zScore).build(); } } diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java index 58004b77d1c1..e68f84c06b05 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java @@ -56,7 +56,6 @@ public class ZtestPostAggregator implements PostAggregator private final PostAggregator successCount2; private final PostAggregator sample2Size; - @JsonCreator public ZtestPostAggregator( @JsonProperty("name") String name, @@ -119,13 +118,22 @@ public String getName() @Override public ZtestPostAggregator decorate(Map aggregators) { - return new ZtestPostAggregator(name, Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(successCount1), aggregators)), Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(sample1Size), aggregators)), Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(successCount2), aggregators)), Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(sample2Size), aggregators))); + return new ZtestPostAggregator( + name, + Iterables + .getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(successCount1), aggregators)), + Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(sample1Size), aggregators)), + Iterables + .getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(successCount2), aggregators)), + Iterables.getOnlyElement(Queries.decoratePostAggregators(Collections.singletonList(sample2Size), aggregators)) + ); } /** - * 1. calculating zscore for two-sample Z test. IOW, - * using z-test statistic for testing the difference of two population proportions. - * 2. converting binary variables (e.g. success or not) to continuous variables (e.g. conversion rate). + * 1. calculating zscore for two-sample Z test. IOW, using z-test statistic + * for testing the difference of two population proportions. 2. converting + * binary variables (e.g. success or not) to continuous variables (e.g. + * conversion rate). * * @param s1count - success count of population 1 * @param p1count - sample size of population 1 diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java index 3d2b93684b33..9d40ae6ec556 100644 --- a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java @@ -30,10 +30,12 @@ import java.util.List; import java.util.Map; -public class TestStatsTest { +public class TestStatsTest +{ @Test - public void testCompute() { + public void testCompute() + { ZtestPostAggregator ztestPostAggregator; PvaluefromZscorePostAggregator pvaluePostAggregator; ConstantPostAggregator successCount1, sample1Size, successCount2, sample2Size; @@ -57,7 +59,8 @@ public void testCompute() { } ztestPostAggregator = new ZtestPostAggregator("zscore", successCount1, - sample1Size, successCount2, sample2Size); + sample1Size, successCount2, sample2Size + ); double zscore = ((Number) ztestPostAggregator.compute(metricValues)).doubleValue(); @@ -65,11 +68,14 @@ public void testCompute() { System.out.print("zscore = " + zscore + "\n"); System.out.print("pvalue = " + - pvaluePostAggregator.compute(ImmutableMap.of("zscore", -1783.8762354220219))); + pvaluePostAggregator.compute(ImmutableMap.of("zscore", -1783.8762354220219))); Assert.assertEquals(-1783.8762354220219, - zscore, 0.0001); - Assert.assertNotEquals(0.0, - ztestPostAggregator.compute(metricValues)); + zscore, 0.0001 + ); + Assert.assertNotEquals( + 0.0, + ztestPostAggregator.compute(metricValues) + ); } } From b937e78c99dc0a1427177e039002691313e9d277 Mon Sep 17 00:00:00 2001 From: ChungHo Chen Date: Fri, 25 Aug 2017 19:27:17 +0800 Subject: [PATCH 4/6] add @Override annotation per CI log --- .../druid/query/aggregation/teststats/ZtestPostAggregator.java | 1 + 1 file changed, 1 insertion(+) diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java index e68f84c06b05..d7ba9b96a0ef 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java @@ -162,6 +162,7 @@ private double zScoreTwoSamples(double s1count, double p1count, double s2count, } } + @Override public int hashCode() { int result = name != null ? name.hashCode() : 0; From 2604aee030339b49bd9991731f10ea640eb7e8e4 Mon Sep 17 00:00:00 2001 From: ChungHo Chen Date: Tue, 19 Sep 2017 22:36:53 -0700 Subject: [PATCH 5/6] make changes per review comments/discussions --- .../development/extensions-core/test-stats.md | 4 +- .../PvaluefromZscorePostAggregator.java | 30 +++++++- .../teststats/ZtestPostAggregator.java | 74 +++++++++++++++---- .../PvaluefromZscorePostAggregatorTest.java | 65 ++++++++++++++++ ...Test.java => ZtestPostAggregatorTest.java} | 42 ++++++----- 5 files changed, 179 insertions(+), 36 deletions(-) create mode 100644 extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java rename extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/{TestStatsTest.java => ZtestPostAggregatorTest.java} (73%) diff --git a/docs/content/development/extensions-core/test-stats.md b/docs/content/development/extensions-core/test-stats.md index ef4311f37873..681851af89d1 100644 --- a/docs/content/development/extensions-core/test-stats.md +++ b/docs/content/development/extensions-core/test-stats.md @@ -4,7 +4,7 @@ layout: doc_page # Test Stats Aggregators -Incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for math background and details, although its input spec and example are out of date. +Incorporates test statistics related aggregators, including z-score and p-value. Please refer to [https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/](https://www.paypal-engineering.com/2017/06/29/democratizing-experimentation-data-for-product-innovations/) for math background and details. Make sure to include `druid-stats` extension in order to use these aggregrators. @@ -31,7 +31,7 @@ S.E. = sqrt{ p1 * ( 1 - p1 )/n1 + p2 * (1 - p2)/n2) } "successCount1": success count of sample 1, "sample1Size": sample 1 size, "successCount2": success count of sample 2, - "sample2size" : sample 2 size + "sample2Size" : sample 2 size } ``` diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java index 62bda43c3d8d..f933e7b8cc18 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java @@ -117,6 +117,31 @@ public PostAggregator decorate(Map aggregators) ); } + @JsonProperty + public PostAggregator getZscore() + { + return zScore; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + PvaluefromZscorePostAggregator that = (PvaluefromZscorePostAggregator) o; + + if (!name.equals(that.name)) { + return false; + } + + return (zScore.equals(that.zScore)); + } + @Override public int hashCode() { @@ -128,8 +153,9 @@ public int hashCode() @Override public String toString() { - return "PvaluefromZscorePostAggregator{" + "name'" + name + '\'' - + ", zScore=" + zScore + '}'; + return "PvaluefromZscorePostAggregator{" + + "name='" + name + '\'' + + ", zScore=" + zScore + '}'; } @Override diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java index d7ba9b96a0ef..cfabece347c3 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java @@ -37,16 +37,16 @@ import java.util.Map; import java.util.Set; -/* - * 1. calculating zscore using two-sample Z-Test. IOW, - * using z-test statistic for testing the difference of - * two population proportions. - * 2. converting binary variables (e.g. success or not) to continuous variables (e.g. conversion rate). - * - Please refer to http://math.mercyhurst.edu/~griff/courses/m109/Lectures/old/Sum_06/sect8.1.pdf - for more details. - http://facweb.cs.depaul.edu/sjost/csc423/documents/test-descriptions/indep-z.pdf -*/ +/** + * 1. calculating zscore using two-sample Z-Test. IOW, + * using z-test statistic for testing the difference of + * two population proportions. + * 2. converting binary variables (e.g. success or not) to continuous variables (e.g. conversion rate). + *

+ * Please refer to http://math.mercyhurst.edu/~griff/courses/m109/Lectures/old/Sum_06/sect8.1.pdf + * and http://facweb.cs.depaul.edu/sjost/csc423/documents/test-descriptions/indep-z.pdf + * for more details. + */ @JsonTypeName("zscore2sample") public class ZtestPostAggregator implements PostAggregator { @@ -63,7 +63,6 @@ public ZtestPostAggregator( @JsonProperty("sample1Size") PostAggregator sample1Size, @JsonProperty("successCount2") PostAggregator successCount2, @JsonProperty("sample2Size") PostAggregator sample2Size - ) { Preconditions.checkNotNull(name, "Must have a valid, non-null post-aggregator name"); @@ -162,6 +161,51 @@ private double zScoreTwoSamples(double s1count, double p1count, double s2count, } } + @JsonProperty + public PostAggregator getSuccessCount1() + { + return successCount1; + } + + @JsonProperty + public PostAggregator getSample1Size() + { + return sample1Size; + } + + @JsonProperty + public PostAggregator getSuccessCount2() + { + return successCount2; + } + + @JsonProperty + public PostAggregator getSample2Size() + { + return sample2Size; + } + + @Override + public boolean equals(Object o) + { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + + ZtestPostAggregator that = (ZtestPostAggregator) o; + + if (!name.equals(that.name)) { + return false; + } + + return (successCount1.equals(that.successCount1) && + sample1Size.equals(that.sample1Size) && + successCount2.equals(that.successCount2) && sample2Size.equals(that.sample2Size)); + } + @Override public int hashCode() { @@ -178,10 +222,10 @@ public String toString() { return "ZtestPostAggregator{" + "name='" + name + '\'' + - ", successCount1" + successCount1 + - ", sample1Size" + sample1Size + - ", successCount2" + successCount2 + - ", sample2size" + sample2Size + + ", successCount1='" + successCount1 + '\'' + + ", sample1Size='" + sample1Size + '\'' + + ", successCount2='" + successCount2 + '\'' + + ", sample2size='" + sample2Size + '}'; } diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java new file mode 100644 index 000000000000..078225b62eed --- /dev/null +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java @@ -0,0 +1,65 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.query.aggregation.teststats; + +import com.google.common.collect.ImmutableMap; +import io.druid.jackson.DefaultObjectMapper; +import io.druid.query.aggregation.post.ConstantPostAggregator; +import org.junit.Assert; +import org.junit.Test; + +public class PvaluefromZscorePostAggregatorTest +{ + PvaluefromZscorePostAggregator pvaluefromZscorePostAggregator; + ConstantPostAggregator zscore; + + @Test + public void testPvaluefromZscorePostAggregator() throws Exception + { + zscore = new ConstantPostAggregator("zscore", -1783.8762354220219); + + pvaluefromZscorePostAggregator = new PvaluefromZscorePostAggregator("pvalue", zscore); + + System.out.print("zscore = " + zscore + "\n"); + + double pvalue = ((Number) pvaluefromZscorePostAggregator.compute(ImmutableMap.of( + "zscore", + -1783.8762354220219 + ))).doubleValue(); + + System.out.print("pvalue = " + pvalue + "\n"); + + /* Assert P-value is positive and very small */ + Assert.assertTrue(pvalue >= 0 && pvalue < 0.00001); + + System.out.print(pvaluefromZscorePostAggregator.toString()); + } + + @Test + public void testSerde() throws Exception + { + DefaultObjectMapper mapper = new DefaultObjectMapper(); + PvaluefromZscorePostAggregator postAggregator1 = mapper.readValue(mapper.writeValueAsString( + pvaluefromZscorePostAggregator), PvaluefromZscorePostAggregator.class); + + Assert.assertEquals(pvaluefromZscorePostAggregator, postAggregator1); + } + +} diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java similarity index 73% rename from extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java rename to extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java index 9d40ae6ec556..e4e93cf8b200 100644 --- a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/TestStatsTest.java +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java @@ -19,8 +19,8 @@ package io.druid.query.aggregation.teststats; -import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; +import io.druid.jackson.DefaultObjectMapper; import io.druid.query.aggregation.PostAggregator; import io.druid.query.aggregation.post.ConstantPostAggregator; import org.junit.Assert; @@ -30,14 +30,13 @@ import java.util.List; import java.util.Map; -public class TestStatsTest +public class ZtestPostAggregatorTest { + ZtestPostAggregator ztestPostAggregator; @Test - public void testCompute() + public void testZtestPostAggregator() throws Exception { - ZtestPostAggregator ztestPostAggregator; - PvaluefromZscorePostAggregator pvaluePostAggregator; ConstantPostAggregator successCount1, sample1Size, successCount2, sample2Size; successCount1 = new ConstantPostAggregator("successCountPopulation1", 39244); @@ -58,24 +57,33 @@ public void testCompute() metricValues.put(pa.getName(), ((ConstantPostAggregator) pa).getConstantValue()); } - ztestPostAggregator = new ZtestPostAggregator("zscore", successCount1, - sample1Size, successCount2, sample2Size + ztestPostAggregator = new ZtestPostAggregator( + "zscore", + successCount1, + sample1Size, + successCount2, + sample2Size ); double zscore = ((Number) ztestPostAggregator.compute(metricValues)).doubleValue(); - pvaluePostAggregator = new PvaluefromZscorePostAggregator("pvalue", ztestPostAggregator); - - System.out.print("zscore = " + zscore + "\n"); - System.out.print("pvalue = " + - pvaluePostAggregator.compute(ImmutableMap.of("zscore", -1783.8762354220219))); - Assert.assertEquals(-1783.8762354220219, zscore, 0.0001 ); - Assert.assertNotEquals( - 0.0, - ztestPostAggregator.compute(metricValues) - ); + + System.out.print(ztestPostAggregator.toString()); + } + + @Test + public void testSerde() throws Exception + { + DefaultObjectMapper mapper = new DefaultObjectMapper(); + ZtestPostAggregator postAggregator1 = + mapper.readValue( + mapper.writeValueAsString(ztestPostAggregator), + ZtestPostAggregator.class + ); + + Assert.assertEquals(ztestPostAggregator, postAggregator1); } } From f89472cb124f679971c7cdbc419244544cca8d80 Mon Sep 17 00:00:00 2001 From: ChungHo Chen Date: Thu, 5 Oct 2017 15:25:25 -0700 Subject: [PATCH 6/6] remove some blocks per review comments --- .../teststats/PvaluefromZscorePostAggregator.java | 2 +- .../query/aggregation/teststats/ZtestPostAggregator.java | 2 +- .../teststats/PvaluefromZscorePostAggregatorTest.java | 6 ------ .../aggregation/teststats/ZtestPostAggregatorTest.java | 2 -- 4 files changed, 2 insertions(+), 10 deletions(-) diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java index f933e7b8cc18..9e4099797c69 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregator.java @@ -145,7 +145,7 @@ public boolean equals(Object o) @Override public int hashCode() { - int result = name != null ? name.hashCode() : 0; + int result = name.hashCode(); result = 31 * result + zScore.hashCode(); return result; } diff --git a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java index cfabece347c3..9ac448b0ece0 100644 --- a/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java +++ b/extensions-core/stats/src/main/java/io/druid/query/aggregation/teststats/ZtestPostAggregator.java @@ -209,7 +209,7 @@ public boolean equals(Object o) @Override public int hashCode() { - int result = name != null ? name.hashCode() : 0; + int result = name.hashCode(); result = 31 * result + successCount1.hashCode(); result = 31 * result + sample1Size.hashCode(); result = 31 * result + successCount2.hashCode(); diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java index 078225b62eed..811a5fb8f256 100644 --- a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/PvaluefromZscorePostAggregatorTest.java @@ -37,19 +37,13 @@ public void testPvaluefromZscorePostAggregator() throws Exception pvaluefromZscorePostAggregator = new PvaluefromZscorePostAggregator("pvalue", zscore); - System.out.print("zscore = " + zscore + "\n"); - double pvalue = ((Number) pvaluefromZscorePostAggregator.compute(ImmutableMap.of( "zscore", -1783.8762354220219 ))).doubleValue(); - System.out.print("pvalue = " + pvalue + "\n"); - /* Assert P-value is positive and very small */ Assert.assertTrue(pvalue >= 0 && pvalue < 0.00001); - - System.out.print(pvaluefromZscorePostAggregator.toString()); } @Test diff --git a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java index e4e93cf8b200..74002a421647 100644 --- a/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java +++ b/extensions-core/stats/src/test/java/io/druid/query/aggregation/teststats/ZtestPostAggregatorTest.java @@ -70,8 +70,6 @@ public void testZtestPostAggregator() throws Exception Assert.assertEquals(-1783.8762354220219, zscore, 0.0001 ); - - System.out.print(ztestPostAggregator.toString()); } @Test