From ad64906c8869ebbe6c903c4c41c912a3e9432253 Mon Sep 17 00:00:00 2001 From: Gabor Szadovszky Date: Thu, 24 Jun 2021 13:52:08 +0200 Subject: [PATCH] PARQUET-2043: Fail for undeclared dependencies The purpose of this change is to fail the build if some classes are used from not direct dependencies. Only classes from direct dependencies shall be used. Also fixed some references that broke this rule. --- parquet-arrow/pom.xml | 20 ------ parquet-avro/pom.xml | 24 ++++--- parquet-benchmarks/pom.xml | 43 +++++++++---- parquet-cli/pom.xml | 62 ++++++++++++++++++- parquet-column/pom.xml | 10 +++ parquet-common/pom.xml | 6 -- parquet-encoding/pom.xml | 6 ++ parquet-format-structures/pom.xml | 12 +--- parquet-hadoop-bundle/pom.xml | 15 +++++ parquet-hadoop/pom.xml | 50 +++++++++++++-- .../SchemaControlEncryptionTest.java | 15 +++-- parquet-jackson/pom.xml | 15 +++++ parquet-pig-bundle/pom.xml | 15 +++++ parquet-pig/pom.xml | 32 +++++++--- parquet-protobuf/pom.xml | 16 ++++- parquet-scala/pom.xml | 6 ++ parquet-thrift/pom.xml | 42 +++++++++---- pom.xml | 55 ++++++++++++++++ 18 files changed, 353 insertions(+), 91 deletions(-) diff --git a/parquet-arrow/pom.xml b/parquet-arrow/pom.xml index 76e018165e..81d6e7ab49 100644 --- a/parquet-arrow/pom.xml +++ b/parquet-arrow/pom.xml @@ -42,26 +42,6 @@ arrow-vector ${arrow.version} - - org.apache.arrow - arrow-memory - ${arrow.version} - - - org.apache.arrow - arrow-format - ${arrow.version} - - - org.apache.parquet - parquet-common - ${project.version} - - - org.apache.parquet - parquet-encoding - ${project.version} - org.apache.parquet parquet-column diff --git a/parquet-avro/pom.xml b/parquet-avro/pom.xml index 2679ea06e5..c4d12e484f 100644 --- a/parquet-avro/pom.xml +++ b/parquet-avro/pom.xml @@ -45,7 +45,7 @@ org.apache.parquet - parquet-format-structures + parquet-common ${project.version} @@ -53,6 +53,11 @@ avro ${avro.version} + + org.slf4j + slf4j-api + ${slf4j.version} + it.unimi.dsi fastutil @@ -61,14 +66,17 @@ org.apache.hadoop hadoop-client - ${hadoop.version} provided - - - org.slf4j - slf4j-log4j12 - - + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + org.apache.hadoop + hadoop-common + provided com.google.guava diff --git a/parquet-benchmarks/pom.xml b/parquet-benchmarks/pom.xml index 91f7106a20..673ff86c6a 100644 --- a/parquet-benchmarks/pom.xml +++ b/parquet-benchmarks/pom.xml @@ -37,26 +37,24 @@ + + org.apache.parquet + parquet-hadoop + ${project.version} + org.apache.parquet - parquet-encoding + parquet-column ${project.version} - org.apache.parquet - parquet-hadoop - ${project.version} + org.apache.parquet + parquet-common + ${project.version} org.apache.hadoop hadoop-client - ${hadoop.version} - - - org.slf4j - slf4j-log4j12 - - org.openjdk.jmh @@ -79,6 +77,11 @@ fastutil ${fastutil.version} + + org.slf4j + slf4j-api + ${slf4j.version} + @@ -119,6 +122,24 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + + analyze-only + + + true + + + + diff --git a/parquet-cli/pom.xml b/parquet-cli/pom.xml index 9c7dc13c1c..64a4549204 100644 --- a/parquet-cli/pom.xml +++ b/parquet-cli/pom.xml @@ -38,6 +38,26 @@ parquet-avro ${project.version} + + org.apache.parquet + parquet-format-structures + ${project.version} + + + org.apache.parquet + parquet-common + ${project.version} + + + org.apache.parquet + parquet-column + ${project.version} + + + org.apache.parquet + parquet-hadoop + ${project.version} + org.apache.avro avro @@ -47,6 +67,7 @@ com.github.luben zstd-jni ${zstd-jni.version} + runtime org.slf4j @@ -67,12 +88,23 @@ org.apache.parquet parquet-jackson ${project.version} + runtime ${jackson.groupId} jackson-databind ${jackson-databind.version} + + ${jackson.groupId} + jackson-core + ${jackson.version} + + + ${jackson.groupId} + jackson-annotations + ${jackson.version} + com.beust jcommander @@ -93,7 +125,35 @@ org.apache.hadoop hadoop-client - ${hadoop.version} + provided + + + org.apache.hadoop + hadoop-common + provided + + + com.google.code.findbugs + jsr305 + ${jsr305.version} + provided + + + log4j + log4j + 1.2.17 + provided + + + commons-io + commons-io + 2.4 + provided + + + commons-logging + commons-logging + 1.1.3 provided diff --git a/parquet-column/pom.xml b/parquet-column/pom.xml index a80fe0b1b8..ab10a5f37d 100644 --- a/parquet-column/pom.xml +++ b/parquet-column/pom.xml @@ -70,6 +70,16 @@ zero-allocation-hashing ${net.openhft.version} + + org.apache.yetus + audience-annotations + ${yetus.audience-annotations.version} + + + org.slf4j + slf4j-api + ${slf4j.version} + com.carrotsearch diff --git a/parquet-common/pom.xml b/parquet-common/pom.xml index c090fcba60..1a0f2f9f5e 100644 --- a/parquet-common/pom.xml +++ b/parquet-common/pom.xml @@ -61,12 +61,6 @@ ${slf4j.version} test - - - org.apache.yetus - audience-annotations - 0.12.0 - diff --git a/parquet-encoding/pom.xml b/parquet-encoding/pom.xml index e5485ca96c..2b27c19eaa 100644 --- a/parquet-encoding/pom.xml +++ b/parquet-encoding/pom.xml @@ -42,6 +42,12 @@ ${project.version} + + org.slf4j + slf4j-api + ${slf4j.version} + + org.slf4j slf4j-simple diff --git a/parquet-format-structures/pom.xml b/parquet-format-structures/pom.xml index a48ef96981..ce72ed5353 100644 --- a/parquet-format-structures/pom.xml +++ b/parquet-format-structures/pom.xml @@ -47,7 +47,7 @@ unpack - generate-sources + initialize unpack @@ -151,21 +151,11 @@ - - org.slf4j - slf4j-api - ${slf4j.version} - org.apache.thrift libthrift ${format.thrift.version} - - javax.annotation - javax.annotation-api - 1.3.2 - diff --git a/parquet-hadoop-bundle/pom.xml b/parquet-hadoop-bundle/pom.xml index 2aae6e5d5c..d15792f241 100644 --- a/parquet-hadoop-bundle/pom.xml +++ b/parquet-hadoop-bundle/pom.xml @@ -84,6 +84,21 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + + analyze-only + + + true + + + + diff --git a/parquet-hadoop/pom.xml b/parquet-hadoop/pom.xml index 8e42d42096..d676efac51 100644 --- a/parquet-hadoop/pom.xml +++ b/parquet-hadoop/pom.xml @@ -46,6 +46,11 @@ parquet-format-structures ${project.version} + + org.apache.parquet + parquet-common + ${project.version} + org.apache.parquet parquet-column @@ -56,19 +61,29 @@ org.apache.hadoop hadoop-client + provided + + + org.apache.hadoop + hadoop-common + provided + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + org.apache.hadoop + hadoop-annotations ${hadoop.version} provided - - - org.slf4j - slf4j-log4j12 - - org.apache.parquet parquet-jackson ${project.version} + runtime ${jackson.groupId} @@ -127,6 +142,28 @@ 4.6.0 test + + org.slf4j + slf4j-api + ${slf4j.version} + + + commons-io + commons-io + 2.4 + test + + + net.openhft + zero-allocation-hashing + ${net.openhft.version} + test + + + org.apache.yetus + audience-annotations + ${yetus.audience-annotations.version} + @@ -159,6 +196,7 @@ com.github.rdblue brotli-codec ${brotli-codec.version} + runtime true diff --git a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/propertiesfactory/SchemaControlEncryptionTest.java b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/propertiesfactory/SchemaControlEncryptionTest.java index 17fda97d19..862ae672c6 100644 --- a/parquet-hadoop/src/test/java/org/apache/parquet/crypto/propertiesfactory/SchemaControlEncryptionTest.java +++ b/parquet-hadoop/src/test/java/org/apache/parquet/crypto/propertiesfactory/SchemaControlEncryptionTest.java @@ -19,8 +19,6 @@ package org.apache.parquet.crypto.propertiesfactory; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.parquet.column.ColumnDescriptor; @@ -28,7 +26,6 @@ import org.apache.parquet.crypto.ParquetCipher; import org.apache.parquet.example.data.Group; import org.apache.parquet.example.data.simple.SimpleGroup; -import org.apache.parquet.format.EncryptionAlgorithm; import org.apache.parquet.hadoop.ParquetReader; import org.apache.parquet.hadoop.ParquetWriter; import org.apache.parquet.hadoop.api.WriteSupport; @@ -39,6 +36,8 @@ import org.apache.parquet.schema.PrimitiveType; import org.junit.Before; import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.io.IOException; import java.nio.file.Files; @@ -58,13 +57,13 @@ public class SchemaControlEncryptionTest { - private final static Log LOG = LogFactory.getLog(SchemaControlEncryptionTest.class); + private final static Logger LOG = LoggerFactory.getLogger(SchemaControlEncryptionTest.class); private final static int numRecord = 1000; private Random rnd = new Random(5); - + // In the test We use a map to tell WriteSupport which columns to be encrypted with what key. In real use cases, people - // can find whatever easy way to do so basing on how do they get these information, for example people can choose to - // store in HMS, or other metastore. + // can find whatever easy way to do so basing on how do they get these information, for example people can choose to + // store in HMS, or other metastore. private Map> cryptoMetadata = new HashMap<>(); private Map testData = new HashMap<>(); @@ -122,7 +121,7 @@ private void runTest(Configuration conf ) throws Exception { encryptParquetFile(file, conf); decryptParquetFileAndValid(file, conf); } - + private void markEncryptColumns() { Map ageMetadata = new HashMap<>(); ageMetadata.put("columnKeyMetaData", "age_key_id"); diff --git a/parquet-jackson/pom.xml b/parquet-jackson/pom.xml index 1aa6fe2071..8121832813 100644 --- a/parquet-jackson/pom.xml +++ b/parquet-jackson/pom.xml @@ -90,6 +90,21 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + + analyze-only + + + true + + + + diff --git a/parquet-pig-bundle/pom.xml b/parquet-pig-bundle/pom.xml index e9fa806c88..63661d497e 100644 --- a/parquet-pig-bundle/pom.xml +++ b/parquet-pig-bundle/pom.xml @@ -74,6 +74,21 @@ + + + org.apache.maven.plugins + maven-dependency-plugin + + + + analyze-only + + + true + + + + diff --git a/parquet-pig/pom.xml b/parquet-pig/pom.xml index eaf4e15866..87f37333d3 100644 --- a/parquet-pig/pom.xml +++ b/parquet-pig/pom.xml @@ -48,9 +48,15 @@ org.apache.parquet - parquet-format-structures + parquet-common ${project.version} + + org.apache.parquet + parquet-jackson + ${project.version} + runtime + org.apache.pig pig @@ -65,20 +71,27 @@ provided - org.apache.parquet - parquet-jackson - ${project.version} + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + provided - ${jackson.groupId} - jackson-core - ${jackson.version} + org.apache.hadoop + hadoop-common + ${hadoop.version} + provided ${jackson.groupId} jackson-databind ${jackson-databind.version} + + ${jackson.groupId} + jackson-annotations + ${jackson.version} + org.apache.parquet parquet-column @@ -110,6 +123,11 @@ ${slf4j.version} test + + org.slf4j + slf4j-api + ${slf4j.version} + diff --git a/parquet-protobuf/pom.xml b/parquet-protobuf/pom.xml index d897c02421..c6ae71c6f6 100644 --- a/parquet-protobuf/pom.xml +++ b/parquet-protobuf/pom.xml @@ -87,9 +87,23 @@ org.apache.hadoop hadoop-client - ${hadoop.version} provided + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + org.apache.hadoop + hadoop-common + provided + + + org.slf4j + slf4j-api + ${slf4j.version} + org.slf4j slf4j-simple diff --git a/parquet-scala/pom.xml b/parquet-scala/pom.xml index e33d6f40bd..4420cfe2c0 100644 --- a/parquet-scala/pom.xml +++ b/parquet-scala/pom.xml @@ -64,6 +64,12 @@ 3.0.1 test + + org.scalactic + scalactic_${scala.binary.version} + 3.0.1 + test + org.slf4j slf4j-simple diff --git a/parquet-thrift/pom.xml b/parquet-thrift/pom.xml index 07020de222..7f08ca6176 100644 --- a/parquet-thrift/pom.xml +++ b/parquet-thrift/pom.xml @@ -47,17 +47,25 @@ parquet-hadoop ${project.version} + + org.apache.parquet + parquet-common + ${project.version} + org.apache.hadoop hadoop-client - ${hadoop.version} provided - - - org.slf4j - slf4j-log4j12 - - + + + org.apache.hadoop + hadoop-mapreduce-client-core + provided + + + org.apache.hadoop + hadoop-common + provided com.twitter.elephantbird @@ -87,16 +95,22 @@ org.apache.parquet parquet-jackson ${project.version} + runtime ${jackson.groupId} - jackson-core - ${jackson.version} + jackson-databind + ${jackson-databind.version} ${jackson.groupId} - jackson-databind - ${jackson-databind.version} + jackson-annotations + ${jackson.version} + + + com.google.guava + guava + ${guava.version} org.apache.parquet @@ -135,6 +149,11 @@ ${thrift.version} provided + + org.slf4j + slf4j-api + ${slf4j.version} + org.slf4j slf4j-simple @@ -155,7 +174,6 @@ test-jar test - diff --git a/pom.xml b/pom.xml index 090ae96269..f143b08e87 100644 --- a/pom.xml +++ b/pom.xml @@ -101,12 +101,14 @@ 1.10.19 0.9 1.6.0 + 0.13.0 2.3 1.72 1.5.0-1 1.8 + 3.0.2 INFO @@ -147,6 +149,44 @@ + + + + org.apache.hadoop + hadoop-mapreduce-client-core + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-client + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + + + org.apache.hadoop + hadoop-common + ${hadoop.version} + + + org.slf4j + slf4j-log4j12 + + + + + + @@ -490,6 +530,21 @@ + + org.apache.maven.plugins + maven-dependency-plugin + + + + analyze-only + + + true + true + + + +