From ff99f1e6caa9039053c716b41092534748d046cb Mon Sep 17 00:00:00 2001 From: alanzhao Date: Fri, 19 May 2023 16:43:13 +0800 Subject: [PATCH 01/20] fix:#404 [Bug] Spark loader meet Exception: Class is not registered --- .../loader/spark/HugeGraphSparkLoader.java | 55 +++++++++++-------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java index 849c818ca..a68f3341e 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java @@ -17,11 +17,14 @@ package org.apache.hugegraph.loader.spark; +import org.apache.hadoop.hbase.KeyValue; +import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hugegraph.driver.GraphManager; import org.apache.hugegraph.loader.builder.EdgeBuilder; import org.apache.hugegraph.loader.builder.ElementBuilder; import org.apache.hugegraph.loader.builder.VertexBuilder; import org.apache.hugegraph.loader.direct.loader.HBaseDirectLoader; +import org.apache.hugegraph.loader.exception.LoadException; import org.apache.hugegraph.loader.executor.LoadContext; import org.apache.hugegraph.loader.executor.LoadOptions; import org.apache.hugegraph.loader.metrics.LoadDistributeMetrics; @@ -95,36 +98,42 @@ public HugeGraphSparkLoader(String[] args) { this.executor = Executors.newCachedThreadPool(); } - public void load() throws ExecutionException, InterruptedException { - LoadMapping mapping = LoadMapping.of(this.loadOptions.file); - List structs = mapping.structs(); - boolean sinkType = this.loadOptions.sinkType; - if (!sinkType) { - this.loadOptions.copyBackendStoreInfo(mapping.getBackendStoreInfo()); - } - // kryo序列化 - SparkConf conf = new SparkConf().set("spark.serializer", - "org.apache.spark.serializer.KryoSerializer") - .set("spark.kryo.registrationRequired", "true"); + private void registerKryoClasses (SparkConf conf) { try { - conf.registerKryoClasses(new Class[]{ - org.apache.hadoop.hbase.io.ImmutableBytesWritable.class, - org.apache.hadoop.hbase.KeyValue.class, + conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .set("spark.kryo.registrationRequired", "true") + .registerKryoClasses(new Class[] { + ImmutableBytesWritable.class, + KeyValue.class, org.apache.spark.sql.types.StructType.class, StructField[].class, StructField.class, org.apache.spark.sql.types.LongType$.class, org.apache.spark.sql.types.Metadata.class, org.apache.spark.sql.types.StringType$.class, - Class.forName( - "org.apache.spark.internal.io.FileCommitProtocol$TaskCommitMessage"), - Class.forName("scala.reflect.ClassTag$$anon$1"), + org.apache.spark.sql.catalyst.InternalRow.class, + org.apache.spark.sql.catalyst.InternalRow[].class, + Class.forName("org.apache.spark.internal.io." + + "FileCommitProtocol$TaskCommitMessage"), Class.forName("scala.collection.immutable.Set$EmptySet$"), Class.forName("org.apache.spark.sql.types.DoubleType$") - }); + }); } catch (ClassNotFoundException e) { LOG.error("spark kryo serialized registration failed"); + throw new LoadException("spark kryo serialized registration failed", e); } + } + + public void load() throws ExecutionException, InterruptedException { + LoadMapping mapping = LoadMapping.of(this.loadOptions.file); + List structs = mapping.structs(); + boolean sinkType = this.loadOptions.sinkType; + if (!sinkType) { + this.loadOptions.copyBackendStoreInfo(mapping.getBackendStoreInfo()); + } + + SparkConf conf = new SparkConf(); + registerKryoClasses(conf); SparkSession session = SparkSession.builder().config(conf).getOrCreate(); SparkContext sc = session.sparkContext(); @@ -325,15 +334,15 @@ private void flush(Map.Entry> builderMap, BatchVertexRequest.Builder req = new BatchVertexRequest.Builder(); req.vertices((List) (Object) graphElements) - .updatingStrategies(updateStrategyMap) - .createIfNotExist(true); + .updatingStrategies(updateStrategyMap) + .createIfNotExist(true); g.updateVertices(req.build()); } else { BatchEdgeRequest.Builder req = new BatchEdgeRequest.Builder(); req.edges((List) (Object) graphElements) - .updatingStrategies(updateStrategyMap) - .checkVertex(isCheckVertex) - .createIfNotExist(true); + .updatingStrategies(updateStrategyMap) + .checkVertex(isCheckVertex) + .createIfNotExist(true); g.updateEdges(req.build()); } } From b12af0a881296af26e8eff53ec5173139a85738c Mon Sep 17 00:00:00 2001 From: alanzhao Date: Fri, 19 May 2023 16:49:44 +0800 Subject: [PATCH 02/20] fix:#404 [Bug] Spark loader meet Exception: Class is not registered --- .../hugegraph/loader/spark/HugeGraphSparkLoader.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java index a68f3341e..b0b24cba7 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java @@ -334,15 +334,15 @@ private void flush(Map.Entry> builderMap, BatchVertexRequest.Builder req = new BatchVertexRequest.Builder(); req.vertices((List) (Object) graphElements) - .updatingStrategies(updateStrategyMap) - .createIfNotExist(true); + .updatingStrategies(updateStrategyMap) + .createIfNotExist(true); g.updateVertices(req.build()); } else { BatchEdgeRequest.Builder req = new BatchEdgeRequest.Builder(); req.edges((List) (Object) graphElements) - .updatingStrategies(updateStrategyMap) - .checkVertex(isCheckVertex) - .createIfNotExist(true); + .updatingStrategies(updateStrategyMap) + .checkVertex(isCheckVertex) + .createIfNotExist(true); g.updateEdges(req.build()); } } From bc2c8e93769d869f3dea24beed4227304c972167 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 14 Jun 2023 12:09:33 +0800 Subject: [PATCH 03/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-loader/pom.xml | 49 +++++++++++++++++++ .../shaded.jakarta.ws.rs.client.ClientBuilder | 1 + ...fish.hk2.extension.ServiceLocatorGenerator | 1 + 3 files changed, 51 insertions(+) create mode 100644 hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder create mode 100644 hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index 078e90084..448ae18a2 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -612,6 +612,55 @@ org.apache.maven.plugins maven-compiler-plugin + + org.apache.maven.plugins + maven-shade-plugin + 3.4.1 + + + package + + shade + + + + + org.glassfish.jersey + shaded.org.glassfish.jersey + + + jakarta.ws.rs + shaded.jakarta.ws.rs + + + com.google.common.base + shaded.com.google.common.base + + + org.glassfish.hk2 + shaded.org.glassfish.hk2 + + + org.jvnet.hk2 + shaded.org.jvnet.hk2 + + + + + + + + + *:* + + module-info.class + + + + + + + org.apache.maven.plugins maven-assembly-plugin diff --git a/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder b/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder new file mode 100644 index 000000000..951a606f6 --- /dev/null +++ b/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder @@ -0,0 +1 @@ +shaded.org.glassfish.jersey.client.JerseyClientBuilder \ No newline at end of file diff --git a/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator b/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator new file mode 100644 index 000000000..9daab8813 --- /dev/null +++ b/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator @@ -0,0 +1 @@ +org.jvnet.hk2.external.generator.ServiceLocatorGeneratorImpl \ No newline at end of file From de4275196f3954e0214eb68b86faabdfb6997056 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 14 Jun 2023 14:07:10 +0800 Subject: [PATCH 04/20] fix:#464 Spark loader has dependency conflicts --- .../loader/spark/HugeGraphSparkLoader.java | 4 ++-- .../shaded.jakarta.ws.rs.client.ClientBuilder | 15 +++++++++++++++ ...lassfish.hk2.extension.ServiceLocatorGenerator | 15 +++++++++++++++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java index 228f2db03..9ec7b2962 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java @@ -293,8 +293,8 @@ private void parse(Row row, Map.Entry> builde case HDFS: FileSource fileSource = struct.input().asFileSource(); String delimiter = fileSource.delimiter(); - elements = builder.build(fileSource.header(), - row.mkString(delimiter).split(delimiter)); + + elements = builder.build(row); break; case JDBC: Object[] structFields = JavaConverters.asJavaCollection(row.schema().toList()) diff --git a/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder b/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder index 951a606f6..b8956aaf6 100644 --- a/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder +++ b/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder @@ -1 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. shaded.org.glassfish.jersey.client.JerseyClientBuilder \ No newline at end of file diff --git a/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator b/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator index 9daab8813..5293215e7 100644 --- a/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator +++ b/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator @@ -1 +1,16 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. org.jvnet.hk2.external.generator.ServiceLocatorGeneratorImpl \ No newline at end of file From 7a3a0a10c6357671fb65b951757e3b9aee16f1be Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 14 Jun 2023 15:02:46 +0800 Subject: [PATCH 05/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-loader/pom.xml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index 448ae18a2..9d172824e 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -346,6 +346,10 @@ org.apache.commons commons-compress + + gson + com.google.code.gson + @@ -502,6 +506,10 @@ org.apache.hadoop hadoop-mapreduce-client-core + + gson + com.google.code.gson + From eac9b18f643c8cb491509fb39f4c15980562c50b Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 14 Jun 2023 15:25:40 +0800 Subject: [PATCH 06/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-loader/pom.xml | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index 9d172824e..efd7cb6c3 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -84,6 +84,10 @@ jersey-server org.glassfish.jersey.core + + gson + com.google.code.gson + @@ -346,10 +350,6 @@ org.apache.commons commons-compress - - gson - com.google.code.gson - @@ -506,10 +506,6 @@ org.apache.hadoop hadoop-mapreduce-client-core - - gson - com.google.code.gson - From 48faf3c2e75728f2ffac2b05721d15cf3421becd Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 14 Jun 2023 17:06:10 +0800 Subject: [PATCH 07/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-loader/pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index efd7cb6c3..d2dce394a 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -41,12 +41,18 @@ apache-${release.name}-incubating-${project.version} 2.12.3 3.3.1 + 2.8.5 ${project.basedir}/assembly ${assembly.dir}/descriptor ${assembly.dir}/static + + gson + com.google.code.gson + ${gson.version} + com.fasterxml.jackson.module From d782b57786bd77f691344678de18797f8729e430 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Thu, 15 Jun 2023 15:47:39 +0800 Subject: [PATCH 08/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-loader/assembly/descriptor/assembly.xml | 3 +++ .../assembly/static/bin/hugegraph-spark-loader.sh | 2 +- hugegraph-loader/pom.xml | 12 +++++++----- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/hugegraph-loader/assembly/descriptor/assembly.xml b/hugegraph-loader/assembly/descriptor/assembly.xml index 62fe67935..efac0b5c8 100644 --- a/hugegraph-loader/assembly/descriptor/assembly.xml +++ b/hugegraph-loader/assembly/descriptor/assembly.xml @@ -48,6 +48,9 @@ ${project.build.directory} lib + + hugegraph-loader-1.0.0.jar + *.jar diff --git a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh index c6bb08b84..dfcb556c6 100755 --- a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh +++ b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh @@ -25,7 +25,7 @@ get_params "$@" echo "engine_params: $ENGINE_PARAMS" echo "hugegraph_params: $HUGEGRAPH_PARAMS" -ASSEMBLY_JAR_NAME=$(find "${LIB_DIR}" -name 'hugegraph-loader*.jar') +ASSEMBLY_JAR_NAME=$(find "${LIB_DIR}" -name 'apache-hugegraph-loader-incubating*.jar') DEFAULT_APP_NAME="hugegraph-spark-loader" APP_NAME=${APP_NAME:-$DEFAULT_APP_NAME} diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index d2dce394a..b5ff5ff53 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -48,11 +48,6 @@ - - gson - com.google.code.gson - ${gson.version} - com.fasterxml.jackson.module @@ -96,6 +91,11 @@ + + gson + com.google.code.gson + ${gson.version} + org.apache.spark spark-sql_${scala.version} @@ -633,6 +633,8 @@ shade + false + ${final.name}-shaded org.glassfish.jersey From 074b6ab6c33d0947817fc9a69d558ecc7130206d Mon Sep 17 00:00:00 2001 From: alanzhao Date: Thu, 15 Jun 2023 16:55:31 +0800 Subject: [PATCH 09/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-dist/scripts/dependency/known-dependencies.txt | 2 +- hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh | 2 +- hugegraph-loader/pom.xml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hugegraph-dist/scripts/dependency/known-dependencies.txt b/hugegraph-dist/scripts/dependency/known-dependencies.txt index 77463237a..17e2bbcf3 100644 --- a/hugegraph-dist/scripts/dependency/known-dependencies.txt +++ b/hugegraph-dist/scripts/dependency/known-dependencies.txt @@ -77,7 +77,7 @@ flatbuffers-1.2.0-3f79e055.jar flatbuffers-java-1.9.0.jar groovy-all-2.4.21.jar gson-2.2.4.jar -gson-2.8.5.jar +gson-2.8.9.jar guava-25.1-jre.jar guava-30.0-jre.jar h2-1.4.199.jar diff --git a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh index dfcb556c6..dfbbb5dd7 100755 --- a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh +++ b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh @@ -25,7 +25,7 @@ get_params "$@" echo "engine_params: $ENGINE_PARAMS" echo "hugegraph_params: $HUGEGRAPH_PARAMS" -ASSEMBLY_JAR_NAME=$(find "${LIB_DIR}" -name 'apache-hugegraph-loader-incubating*.jar') +ASSEMBLY_JAR_NAME=$(find "${LIB_DIR}" -name '*hugegraph-loader-incubating*.jar') DEFAULT_APP_NAME="hugegraph-spark-loader" APP_NAME=${APP_NAME:-$DEFAULT_APP_NAME} diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index b5ff5ff53..1687f760f 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -41,7 +41,7 @@ apache-${release.name}-incubating-${project.version} 2.12.3 3.3.1 - 2.8.5 + 2.8.9 ${project.basedir}/assembly ${assembly.dir}/descriptor ${assembly.dir}/static From 4e8b7ff08dd2d9773890df180a3e6f25b9ae8543 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Thu, 15 Jun 2023 17:40:57 +0800 Subject: [PATCH 10/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-dist/scripts/dependency/known-dependencies.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/hugegraph-dist/scripts/dependency/known-dependencies.txt b/hugegraph-dist/scripts/dependency/known-dependencies.txt index 17e2bbcf3..c2edea448 100644 --- a/hugegraph-dist/scripts/dependency/known-dependencies.txt +++ b/hugegraph-dist/scripts/dependency/known-dependencies.txt @@ -77,6 +77,7 @@ flatbuffers-1.2.0-3f79e055.jar flatbuffers-java-1.9.0.jar groovy-all-2.4.21.jar gson-2.2.4.jar +gson-2.8.5.jar gson-2.8.9.jar guava-25.1-jre.jar guava-30.0-jre.jar From d51053a5df4eb418fef58f9d77c585aa59b5ea7a Mon Sep 17 00:00:00 2001 From: alanzhao Date: Thu, 15 Jun 2023 18:16:16 +0800 Subject: [PATCH 11/20] fix:#464 Spark loader has dependency conflicts --- .../apache/hugegraph/loader/spark/HugeGraphSparkLoader.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java index 9ec7b2962..228f2db03 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java @@ -293,8 +293,8 @@ private void parse(Row row, Map.Entry> builde case HDFS: FileSource fileSource = struct.input().asFileSource(); String delimiter = fileSource.delimiter(); - - elements = builder.build(row); + elements = builder.build(fileSource.header(), + row.mkString(delimiter).split(delimiter)); break; case JDBC: Object[] structFields = JavaConverters.asJavaCollection(row.schema().toList()) From 8f3b5285a249ec0c1299e7bf5286b39001e8869c Mon Sep 17 00:00:00 2001 From: alanzhao Date: Mon, 19 Jun 2023 15:03:26 +0800 Subject: [PATCH 12/20] fix:#464 Spark loader has dependency conflicts --- hugegraph-loader/pom.xml | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/hugegraph-loader/pom.xml b/hugegraph-loader/pom.xml index 6ff5e69ce..c62b8b28d 100644 --- a/hugegraph-loader/pom.xml +++ b/hugegraph-loader/pom.xml @@ -90,8 +90,17 @@ jersey-server org.glassfish.jersey.core + + gson + com.google.code.gson + + + gson + com.google.code.gson + ${gson.version} + org.apache.spark spark-sql_${scala.version} @@ -614,6 +623,57 @@ org.apache.maven.plugins maven-compiler-plugin + + org.apache.maven.plugins + maven-shade-plugin + 3.4.1 + + + package + + shade + + + false + ${final.name}-shaded + + + org.glassfish.jersey + shaded.org.glassfish.jersey + + + jakarta.ws.rs + shaded.jakarta.ws.rs + + + com.google.common.base + shaded.com.google.common.base + + + org.glassfish.hk2 + shaded.org.glassfish.hk2 + + + org.jvnet.hk2 + shaded.org.jvnet.hk2 + + + + + + + + + *:* + + module-info.class + + + + + + + org.apache.maven.plugins maven-assembly-plugin From 88963c157844131db961d9b17667c6f20a6ac3a9 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Mon, 19 Jun 2023 15:44:12 +0800 Subject: [PATCH 13/20] fix:#464 Spark loader has dependency conflicts --- .../META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder | 2 +- .../shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder b/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder index b8956aaf6..7205ceb4f 100644 --- a/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder +++ b/hugegraph-loader/src/main/resources/META-INF/services/shaded.jakarta.ws.rs.client.ClientBuilder @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -shaded.org.glassfish.jersey.client.JerseyClientBuilder \ No newline at end of file +shaded.org.glassfish.jersey.client.JerseyClientBuilder diff --git a/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator b/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator index 5293215e7..bad820aa7 100644 --- a/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator +++ b/hugegraph-loader/src/main/resources/META-INF/services/shaded.org.glassfish.hk2.extension.ServiceLocatorGenerator @@ -13,4 +13,4 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -org.jvnet.hk2.external.generator.ServiceLocatorGeneratorImpl \ No newline at end of file +org.jvnet.hk2.external.generator.ServiceLocatorGeneratorImpl From d8b31fcd1dfde4782939c845329e4a2eefa3d0a0 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Mon, 10 Jul 2023 17:18:22 +0800 Subject: [PATCH 14/20] fix:#488 bypass server for hbase bulk delete massive vertices and edges --- .../assembly/static/bin/get-params.sh | 3 +- .../hugegraph/loader/constant/Constants.java | 1 + .../direct/loader/HBaseDirectLoader.java | 41 +++++++++++-------- .../loader/executor/LoadOptions.java | 4 ++ 4 files changed, 31 insertions(+), 18 deletions(-) diff --git a/hugegraph-loader/assembly/static/bin/get-params.sh b/hugegraph-loader/assembly/static/bin/get-params.sh index dde3f0c7c..25f2cac82 100644 --- a/hugegraph-loader/assembly/static/bin/get-params.sh +++ b/hugegraph-loader/assembly/static/bin/get-params.sh @@ -27,7 +27,8 @@ function get_params() { --incremental-mode | --failure-mode | --batch-insert-threads | --single-insert-threads | \ --max-conn | --max-conn-per-route | --batch-size | --max-parse-errors | --max-insert-errors | \ --timeout | --shutdown-timeout | --retry-times | --retry-interval | --check-vertex | \ - --print-progress | --dry-run | --sink-type | --vertex-partitions | --edge-partitions | --help ) + --print-progress | --dry-run | --sink-type | --vertex-partitions | --edge-partitions | \ + --action-type | --help ) HUGEGRAPH_PARAMS="$HUGEGRAPH_PARAMS $1 $2" shift 2 ;; diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java index 563cd401c..291721aee 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java @@ -81,4 +81,5 @@ public final class Constants { public static final String LOAD_DATA_PARSE_SUFFIX = "parse"; public static final String LOAD_DATA_SER_SUFFIX = "ser"; public static final String LOAD_DATA_INSERT_SUFFIX = "insert"; + public static final String DELETE_ACTION = "delete"; } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java index 7456e9271..2dd15ec1b 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; @@ -200,23 +201,29 @@ List> buildAndSer(HBaseSerializer seria struct.input().type())); } - boolean isVertex = builder.mapping().type().isVertex(); - if (isVertex) { - for (Vertex vertex : (List) (Object) elementsElement) { - LOG.debug("vertex already build done {} ", vertex.toString()); - Tuple2 tuple2 = - vertexSerialize(serializer, vertex); - loadDistributeMetrics.increaseDisVertexInsertSuccess(builder.mapping()); - result.add(tuple2); - } - } else { - for (Edge edge : (List) (Object) elementsElement) { - LOG.debug("edge already build done {}", edge.toString()); - Tuple2 tuple2 = - edgeSerialize(serializer, edge); - loadDistributeMetrics.increaseDisEdgeInsertSuccess(builder.mapping()); - result.add(tuple2); - + if (elementsElement != null) { + for (GraphElement graphElement : elementsElement) { + final byte[] rowkey = serializer.getKeyBytes(graphElement); + final byte[] values = serializer.getValueBytes(graphElement); + ImmutableBytesWritable rowKey = new ImmutableBytesWritable(); + rowKey.set(rowkey); + if (loadOptions.actionType.equals(Constants.DELETE_ACTION)) { + KeyValue keyValue = new KeyValue( + rowkey,Bytes.toBytes(Constants.HBASE_COL_FAMILY), + Bytes.toBytes(Constants.EMPTY_STR),HConstants.LATEST_TIMESTAMP, + KeyValue.Type.DeleteFamily); + Tuple2 tuple2 = + new Tuple2<>(rowKey,keyValue); + result.add(tuple2); + } else { + KeyValue keyValue = new KeyValue(rowkey, + Bytes.toBytes(Constants.HBASE_COL_FAMILY), + Bytes.toBytes(Constants.EMPTY_STR), + values); + Tuple2 tuple2 = + new Tuple2<>(rowKey,keyValue); + result.add(tuple2); + } } } } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java index fb2a7d9dc..f42902125 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java @@ -233,6 +233,10 @@ public class LoadOptions implements Serializable { description = "hbaseZKParent") public String hbaseZKParent; + @Parameter(names = {"--action-type"}, arity = 1, + description = "add/update or delete") + public String actionType="add"; + public String workModeString() { if (this.incrementalMode) { return "INCREMENTAL MODE"; From 053d27b7b78a7fee12099df073d82550b908f7e5 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Tue, 11 Jul 2023 09:53:11 +0800 Subject: [PATCH 15/20] fix:#488 bypass server for hbase bulk delete massive vertices and edges --- .../hugegraph/loader/constant/Constants.java | 3 +- .../direct/loader/HBaseDirectLoader.java | 47 ++++++++++--------- .../loader/executor/LoadOptions.java | 24 +++++----- 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java index 291721aee..285807c29 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java @@ -81,5 +81,6 @@ public final class Constants { public static final String LOAD_DATA_PARSE_SUFFIX = "parse"; public static final String LOAD_DATA_SER_SUFFIX = "ser"; public static final String LOAD_DATA_INSERT_SUFFIX = "insert"; - public static final String DELETE_ACTION = "delete"; + public static final String ACTION_DELETE = "delete"; + public static final String ACTION_UPDATE = "update"; } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java index 2dd15ec1b..879512204 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java @@ -201,29 +201,30 @@ List> buildAndSer(HBaseSerializer seria struct.input().type())); } - if (elementsElement != null) { - for (GraphElement graphElement : elementsElement) { - final byte[] rowkey = serializer.getKeyBytes(graphElement); - final byte[] values = serializer.getValueBytes(graphElement); - ImmutableBytesWritable rowKey = new ImmutableBytesWritable(); - rowKey.set(rowkey); - if (loadOptions.actionType.equals(Constants.DELETE_ACTION)) { - KeyValue keyValue = new KeyValue( - rowkey,Bytes.toBytes(Constants.HBASE_COL_FAMILY), - Bytes.toBytes(Constants.EMPTY_STR),HConstants.LATEST_TIMESTAMP, - KeyValue.Type.DeleteFamily); - Tuple2 tuple2 = - new Tuple2<>(rowKey,keyValue); - result.add(tuple2); - } else { - KeyValue keyValue = new KeyValue(rowkey, - Bytes.toBytes(Constants.HBASE_COL_FAMILY), - Bytes.toBytes(Constants.EMPTY_STR), - values); - Tuple2 tuple2 = - new Tuple2<>(rowKey,keyValue); - result.add(tuple2); - } + if (elementsElement == null) { + continue; + } + for (GraphElement graphElement : elementsElement) { + final byte[] rowkey = serializer.getKeyBytes(graphElement); + final byte[] values = serializer.getValueBytes(graphElement); + ImmutableBytesWritable rowKey = new ImmutableBytesWritable(); + rowKey.set(rowkey); + if (loadOptions.actionType.equals(Constants.ACTION_DELETE)) { + KeyValue keyValue = new KeyValue( + rowkey,Bytes.toBytes(Constants.HBASE_COL_FAMILY), + Bytes.toBytes(Constants.EMPTY_STR),HConstants.LATEST_TIMESTAMP, + KeyValue.Type.DeleteFamily); + Tuple2 tuple2 = + new Tuple2<>(rowKey, keyValue); + result.add(tuple2); + } else { + KeyValue keyValue = new KeyValue(rowkey, + Bytes.toBytes(Constants.HBASE_COL_FAMILY), + Bytes.toBytes(Constants.EMPTY_STR), + values); + Tuple2 tuple2 = + new Tuple2<>(rowKey, keyValue); + result.add(tuple2); } } } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java index f42902125..a6e1e8949 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java @@ -217,25 +217,25 @@ public class LoadOptions implements Serializable { description = "The number of partitions of the HBase vertex table") public int vertexPartitions = 64; - @Parameter(names = {"edgeTablename"}, arity = 1, - description = "edgeTablename") + @Parameter(names = {"--edge-table-name"}, arity = 1, + description = "The name of the the HBase edge table") public String edgeTablename; - @Parameter(names = {"vertexTablename"}, arity = 1, - description = "vertexTablename") + @Parameter(names = {"--vertex-table-name"}, arity = 1, + description = "The name of the the HBase vertex table") public String vertexTablename; - @Parameter(names = {"hbaseZKQuorum"}, arity = 1, - description = "hbaseZKQuorum") + @Parameter(names = {"--zk-quorum"}, arity = 1, + description = "Address of the zookeeper on which hbase depends") public String hbaseZKQuorum; - @Parameter(names = {"hbaseZKPort"}, arity = 1, - description = "hbaseZKPort") + @Parameter(names = {"--zk-port"}, arity = 1, + description = "port of the zookeeper on which hbase depends") public String hbaseZKPort; - @Parameter(names = {"hbaseZKParent"}, arity = 1, - description = "hbaseZKParent") + @Parameter(names = {"--zk-parent"}, arity = 1, + description = "hbase root node on the zookeeper") public String hbaseZKParent; @Parameter(names = {"--action-type"}, arity = 1, - description = "add/update or delete") - public String actionType="add"; + description = "The type of operation performed on the graph,update or delete") + public String actionType="update"; public String workModeString() { if (this.incrementalMode) { From f817163566c4d9eb18d28e272378c2d6aa3d6bd7 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 12 Jul 2023 22:34:59 +0800 Subject: [PATCH 16/20] fix:#488 bypass server for hbase bulk delete massive vertices and edges --- .../java/org/apache/hugegraph/loader/executor/LoadOptions.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java index a6e1e8949..9e48ba108 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java @@ -234,7 +234,7 @@ public class LoadOptions implements Serializable { public String hbaseZKParent; @Parameter(names = {"--action-type"}, arity = 1, - description = "The type of operation performed on the graph,update or delete") + description = "The type of operation performed on the graph, update or delete") public String actionType="update"; public String workModeString() { From e610fb7f27e721015b19652dedd96477721577f6 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 2 Aug 2023 10:29:39 +0800 Subject: [PATCH 17/20] Revert "fix:#488 bypass server for hbase bulk delete massive vertices and edges" This reverts commit 053d27b7 --- .../hugegraph/loader/constant/Constants.java | 3 +- .../direct/loader/HBaseDirectLoader.java | 47 +++++++++---------- .../loader/executor/LoadOptions.java | 24 +++++----- 3 files changed, 36 insertions(+), 38 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java index 285807c29..291721aee 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java @@ -81,6 +81,5 @@ public final class Constants { public static final String LOAD_DATA_PARSE_SUFFIX = "parse"; public static final String LOAD_DATA_SER_SUFFIX = "ser"; public static final String LOAD_DATA_INSERT_SUFFIX = "insert"; - public static final String ACTION_DELETE = "delete"; - public static final String ACTION_UPDATE = "update"; + public static final String DELETE_ACTION = "delete"; } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java index 879512204..2dd15ec1b 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java @@ -201,30 +201,29 @@ List> buildAndSer(HBaseSerializer seria struct.input().type())); } - if (elementsElement == null) { - continue; - } - for (GraphElement graphElement : elementsElement) { - final byte[] rowkey = serializer.getKeyBytes(graphElement); - final byte[] values = serializer.getValueBytes(graphElement); - ImmutableBytesWritable rowKey = new ImmutableBytesWritable(); - rowKey.set(rowkey); - if (loadOptions.actionType.equals(Constants.ACTION_DELETE)) { - KeyValue keyValue = new KeyValue( - rowkey,Bytes.toBytes(Constants.HBASE_COL_FAMILY), - Bytes.toBytes(Constants.EMPTY_STR),HConstants.LATEST_TIMESTAMP, - KeyValue.Type.DeleteFamily); - Tuple2 tuple2 = - new Tuple2<>(rowKey, keyValue); - result.add(tuple2); - } else { - KeyValue keyValue = new KeyValue(rowkey, - Bytes.toBytes(Constants.HBASE_COL_FAMILY), - Bytes.toBytes(Constants.EMPTY_STR), - values); - Tuple2 tuple2 = - new Tuple2<>(rowKey, keyValue); - result.add(tuple2); + if (elementsElement != null) { + for (GraphElement graphElement : elementsElement) { + final byte[] rowkey = serializer.getKeyBytes(graphElement); + final byte[] values = serializer.getValueBytes(graphElement); + ImmutableBytesWritable rowKey = new ImmutableBytesWritable(); + rowKey.set(rowkey); + if (loadOptions.actionType.equals(Constants.DELETE_ACTION)) { + KeyValue keyValue = new KeyValue( + rowkey,Bytes.toBytes(Constants.HBASE_COL_FAMILY), + Bytes.toBytes(Constants.EMPTY_STR),HConstants.LATEST_TIMESTAMP, + KeyValue.Type.DeleteFamily); + Tuple2 tuple2 = + new Tuple2<>(rowKey,keyValue); + result.add(tuple2); + } else { + KeyValue keyValue = new KeyValue(rowkey, + Bytes.toBytes(Constants.HBASE_COL_FAMILY), + Bytes.toBytes(Constants.EMPTY_STR), + values); + Tuple2 tuple2 = + new Tuple2<>(rowKey,keyValue); + result.add(tuple2); + } } } } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java index 9e48ba108..f42902125 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java @@ -217,25 +217,25 @@ public class LoadOptions implements Serializable { description = "The number of partitions of the HBase vertex table") public int vertexPartitions = 64; - @Parameter(names = {"--edge-table-name"}, arity = 1, - description = "The name of the the HBase edge table") + @Parameter(names = {"edgeTablename"}, arity = 1, + description = "edgeTablename") public String edgeTablename; - @Parameter(names = {"--vertex-table-name"}, arity = 1, - description = "The name of the the HBase vertex table") + @Parameter(names = {"vertexTablename"}, arity = 1, + description = "vertexTablename") public String vertexTablename; - @Parameter(names = {"--zk-quorum"}, arity = 1, - description = "Address of the zookeeper on which hbase depends") + @Parameter(names = {"hbaseZKQuorum"}, arity = 1, + description = "hbaseZKQuorum") public String hbaseZKQuorum; - @Parameter(names = {"--zk-port"}, arity = 1, - description = "port of the zookeeper on which hbase depends") + @Parameter(names = {"hbaseZKPort"}, arity = 1, + description = "hbaseZKPort") public String hbaseZKPort; - @Parameter(names = {"--zk-parent"}, arity = 1, - description = "hbase root node on the zookeeper") + @Parameter(names = {"hbaseZKParent"}, arity = 1, + description = "hbaseZKParent") public String hbaseZKParent; @Parameter(names = {"--action-type"}, arity = 1, - description = "The type of operation performed on the graph, update or delete") - public String actionType="update"; + description = "add/update or delete") + public String actionType="add"; public String workModeString() { if (this.incrementalMode) { From b0918d5bced17fdb25a9f5d2c1f7601d269a6d00 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 2 Aug 2023 10:30:10 +0800 Subject: [PATCH 18/20] Revert "fix:#488 bypass server for hbase bulk delete massive vertices and edges" This reverts commit d8b31fcd --- .../assembly/static/bin/get-params.sh | 3 +- .../hugegraph/loader/constant/Constants.java | 1 - .../direct/loader/HBaseDirectLoader.java | 41 ++++++++----------- .../loader/executor/LoadOptions.java | 4 -- 4 files changed, 18 insertions(+), 31 deletions(-) diff --git a/hugegraph-loader/assembly/static/bin/get-params.sh b/hugegraph-loader/assembly/static/bin/get-params.sh index 25f2cac82..dde3f0c7c 100644 --- a/hugegraph-loader/assembly/static/bin/get-params.sh +++ b/hugegraph-loader/assembly/static/bin/get-params.sh @@ -27,8 +27,7 @@ function get_params() { --incremental-mode | --failure-mode | --batch-insert-threads | --single-insert-threads | \ --max-conn | --max-conn-per-route | --batch-size | --max-parse-errors | --max-insert-errors | \ --timeout | --shutdown-timeout | --retry-times | --retry-interval | --check-vertex | \ - --print-progress | --dry-run | --sink-type | --vertex-partitions | --edge-partitions | \ - --action-type | --help ) + --print-progress | --dry-run | --sink-type | --vertex-partitions | --edge-partitions | --help ) HUGEGRAPH_PARAMS="$HUGEGRAPH_PARAMS $1 $2" shift 2 ;; diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java index 291721aee..563cd401c 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/constant/Constants.java @@ -81,5 +81,4 @@ public final class Constants { public static final String LOAD_DATA_PARSE_SUFFIX = "parse"; public static final String LOAD_DATA_SER_SUFFIX = "ser"; public static final String LOAD_DATA_INSERT_SUFFIX = "insert"; - public static final String DELETE_ACTION = "delete"; } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java index 2dd15ec1b..7456e9271 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/direct/loader/HBaseDirectLoader.java @@ -26,7 +26,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FsShell; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; @@ -201,29 +200,23 @@ List> buildAndSer(HBaseSerializer seria struct.input().type())); } - if (elementsElement != null) { - for (GraphElement graphElement : elementsElement) { - final byte[] rowkey = serializer.getKeyBytes(graphElement); - final byte[] values = serializer.getValueBytes(graphElement); - ImmutableBytesWritable rowKey = new ImmutableBytesWritable(); - rowKey.set(rowkey); - if (loadOptions.actionType.equals(Constants.DELETE_ACTION)) { - KeyValue keyValue = new KeyValue( - rowkey,Bytes.toBytes(Constants.HBASE_COL_FAMILY), - Bytes.toBytes(Constants.EMPTY_STR),HConstants.LATEST_TIMESTAMP, - KeyValue.Type.DeleteFamily); - Tuple2 tuple2 = - new Tuple2<>(rowKey,keyValue); - result.add(tuple2); - } else { - KeyValue keyValue = new KeyValue(rowkey, - Bytes.toBytes(Constants.HBASE_COL_FAMILY), - Bytes.toBytes(Constants.EMPTY_STR), - values); - Tuple2 tuple2 = - new Tuple2<>(rowKey,keyValue); - result.add(tuple2); - } + boolean isVertex = builder.mapping().type().isVertex(); + if (isVertex) { + for (Vertex vertex : (List) (Object) elementsElement) { + LOG.debug("vertex already build done {} ", vertex.toString()); + Tuple2 tuple2 = + vertexSerialize(serializer, vertex); + loadDistributeMetrics.increaseDisVertexInsertSuccess(builder.mapping()); + result.add(tuple2); + } + } else { + for (Edge edge : (List) (Object) elementsElement) { + LOG.debug("edge already build done {}", edge.toString()); + Tuple2 tuple2 = + edgeSerialize(serializer, edge); + loadDistributeMetrics.increaseDisEdgeInsertSuccess(builder.mapping()); + result.add(tuple2); + } } } diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java index f42902125..fb2a7d9dc 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/executor/LoadOptions.java @@ -233,10 +233,6 @@ public class LoadOptions implements Serializable { description = "hbaseZKParent") public String hbaseZKParent; - @Parameter(names = {"--action-type"}, arity = 1, - description = "add/update or delete") - public String actionType="add"; - public String workModeString() { if (this.incrementalMode) { return "INCREMENTAL MODE"; From 5658a87b9400204681cde1e5ab5f0f7a76814b30 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Wed, 2 Aug 2023 14:40:30 +0800 Subject: [PATCH 19/20] fix:#464 remove incubating --- hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh index dfbbb5dd7..ee93c63f0 100755 --- a/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh +++ b/hugegraph-loader/assembly/static/bin/hugegraph-spark-loader.sh @@ -25,7 +25,7 @@ get_params "$@" echo "engine_params: $ENGINE_PARAMS" echo "hugegraph_params: $HUGEGRAPH_PARAMS" -ASSEMBLY_JAR_NAME=$(find "${LIB_DIR}" -name '*hugegraph-loader-incubating*.jar') +ASSEMBLY_JAR_NAME=$(find "${LIB_DIR}" -name 'apache-hugegraph-loader*.jar') DEFAULT_APP_NAME="hugegraph-spark-loader" APP_NAME=${APP_NAME:-$DEFAULT_APP_NAME} From 841e7741ac3c92cb2739bcc7f6161eb7674a4865 Mon Sep 17 00:00:00 2001 From: alanzhao Date: Fri, 4 Aug 2023 18:09:12 +0800 Subject: [PATCH 20/20] fix:#464 fix delimiter is null --- .../hugegraph/loader/spark/HugeGraphSparkLoader.java | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java index 228f2db03..5ee060724 100644 --- a/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java +++ b/hugegraph-loader/src/main/java/org/apache/hugegraph/loader/spark/HugeGraphSparkLoader.java @@ -60,6 +60,7 @@ import org.slf4j.Logger; import java.io.Serializable; +import java.util.Optional; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; @@ -292,9 +293,13 @@ private void parse(Row row, Map.Entry> builde case FILE: case HDFS: FileSource fileSource = struct.input().asFileSource(); - String delimiter = fileSource.delimiter(); - elements = builder.build(fileSource.header(), - row.mkString(delimiter).split(delimiter)); + String delimiter = fileSource.delimiter() ; + if (Optional.ofNullable(delimiter).isPresent()) { + elements = builder.build(fileSource.header(), + row.mkString(delimiter).split(delimiter)); + }else { + elements = builder.build(row); + } break; case JDBC: Object[] structFields = JavaConverters.asJavaCollection(row.schema().toList())