From 13f323dbe595d3e423483dbe86cf5bb924bd1f5b Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 9 Jan 2025 17:23:00 +0800 Subject: [PATCH 1/4] fixup --- .../org/apache/gluten/utils/ResourceUtil.java | 100 +++++++++++------- .../apache/gluten/component/Discovery.scala | 10 +- .../apache/gluten/util/ResourceUtilTest.java | 46 ++++++++ 3 files changed, 111 insertions(+), 45 deletions(-) create mode 100644 gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java diff --git a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java index ac66c57a8c3a..73e111712edb 100644 --- a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java +++ b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java @@ -16,16 +16,20 @@ */ package org.apache.gluten.utils; +import org.apache.gluten.exception.GlutenException; + +import com.google.common.base.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; +import java.net.URL; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.Enumeration; import java.util.List; +import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.zip.ZipEntry; import java.util.zip.ZipException; @@ -37,58 +41,73 @@ * and then modified for Gluten's use. */ public class ResourceUtil { - private static final Logger LOG = LoggerFactory.getLogger(ResourceUtil.class); /** - * Get a collection of resource paths by the input RegEx pattern. + * Get a collection of resource paths by the input RegEx pattern in a certain container folder. * - * @param pattern The pattern to match. + * @param container The container folder. E.g., `META-INF`. Should not be left empty, because + * Classloader requires for at a meaningful file name to search inside the loaded jar files. + * @param pattern The pattern to match on the file names. * @return The relative resource paths in the order they are found. */ - public static List getResources(final Pattern pattern) { + public static List getResources(String container, final Pattern pattern) { + Preconditions.checkArgument( + !container.isEmpty(), + "Resource search should only be used under a certain container folder"); + Preconditions.checkArgument( + !container.startsWith("/") && !container.endsWith("/"), + "Resource container should not starts or end with\"/\""); final List buffer = new ArrayList<>(); - String classPath = System.getProperty("java.class.path"); - processClassPathElements(classPath, pattern, buffer); - return Collections.unmodifiableList(buffer); - } - - private static void processClassPathElements( - String classPath, Pattern pattern, List buffer) { - if (classPath == null || classPath.isEmpty()) { - return; + final Enumeration containerUrls; + try { + containerUrls = Thread.currentThread().getContextClassLoader().getResources(container); + } catch (IOException e) { + throw new GlutenException(e); } - String[] classPathElements = classPath.split(File.pathSeparator); - Arrays.stream(classPathElements).forEach(element -> getResources(element, pattern, buffer)); - // the Gluten project may wrapped by the other service to use the Native Engine. - // As a result, the java.class.path points to xxx/other.jar instead of xxx/gluten.jar. - // This will result in the failure to properly load the required Components. - if (buffer.isEmpty()) { - classPath = ResourceUtil.class.getProtectionDomain().getCodeSource().getLocation().getPath(); - classPathElements = classPath.split(File.pathSeparator); - Arrays.stream(classPathElements).forEach(element -> getResources(element, pattern, buffer)); + while (containerUrls.hasMoreElements()) { + final URL containerUrl = containerUrls.nextElement(); + getResources(containerUrl, pattern, buffer); } + return Collections.unmodifiableList(buffer); } private static void getResources( - final String element, final Pattern pattern, final List buffer) { - final File file = new File(element); - if (!file.exists()) { - LOG.info("Skip non-existing classpath: {}", element); - return; - } - if (file.isDirectory()) { - getResourcesFromDirectory(file, file, pattern, buffer); - } else { - getResourcesFromJarFile(file, pattern, buffer); + final URL containerUrl, final Pattern pattern, final List buffer) { + final String protocol = containerUrl.getProtocol(); + switch (protocol) { + case "file": + final File fileContainer = new File(containerUrl.getPath()); + Preconditions.checkState( + fileContainer.exists() && fileContainer.isDirectory(), + "Specified file container " + containerUrl + " is not a directory or not a file"); + getResourcesFromDirectory(fileContainer, fileContainer, pattern, buffer); + break; + case "jar": + final String jarContainerPath = containerUrl.getPath(); + final Pattern jarContainerPattern = Pattern.compile("file:([^!]+)!/(.+)"); + final Matcher m = jarContainerPattern.matcher(jarContainerPath); + if (!m.matches()) { + throw new GlutenException("Illegal Jar container URL: " + containerUrl); + } + final String jarPath = m.group(1); + final File jarFile = new File(jarPath); + Preconditions.checkState( + jarFile.exists() && jarFile.isFile(), + "Specified Jar container " + containerUrl + " is not a Jar file"); + final String dir = m.group(2); + getResourcesFromJarFile(jarFile, dir, pattern, buffer); + break; + default: + throw new GlutenException("Unrecognizable resource protocol: " + protocol); } } private static void getResourcesFromJarFile( - final File file, final Pattern pattern, final List buffer) { - ZipFile zf; + final File jarFile, final String dir, final Pattern pattern, final List buffer) { + final ZipFile zf; try { - zf = new ZipFile(file); + zf = new ZipFile(jarFile); } catch (final ZipException e) { throw new RuntimeException(e); } catch (final IOException e) { @@ -98,9 +117,14 @@ private static void getResourcesFromJarFile( while (e.hasMoreElements()) { final ZipEntry ze = (ZipEntry) e.nextElement(); final String fileName = ze.getName(); - final boolean accept = pattern.matcher(fileName).matches(); + if (!fileName.startsWith(dir)) { + continue; + } + final String relativeFileName = + new File(dir).toURI().relativize(new File(fileName).toURI()).getPath(); + final boolean accept = pattern.matcher(relativeFileName).matches(); if (accept) { - buffer.add(fileName); + buffer.add(relativeFileName); } } try { diff --git a/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala b/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala index 2b8f060a69f7..ffa2ba9b7706 100644 --- a/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala +++ b/gluten-core/src/main/scala/org/apache/gluten/component/Discovery.scala @@ -14,7 +14,6 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - package org.apache.gluten.component import org.apache.gluten.exception.GlutenException @@ -26,11 +25,8 @@ import org.apache.spark.util.SparkReflectionUtil import scala.collection.JavaConverters._ import scala.collection.mutable import scala.util.matching.Regex - - - - // format: off + /** * Gluten's global discovery to find all [[Component]] definitions in the classpath. * @@ -54,12 +50,12 @@ import scala.util.matching.Regex // format: on private object Discovery extends Logging { private val container: String = "META-INF/gluten-components" - private val componentFilePattern: Regex = s"^$container/(.+)$$".r + private val componentFilePattern: Regex = s"^(.+)$$".r def discoverAll(): Seq[Component] = { logInfo("Start discovering components in the current classpath... ") val prev = System.currentTimeMillis() - val allFiles = ResourceUtil.getResources(componentFilePattern.pattern).asScala + val allFiles = ResourceUtil.getResources(container, componentFilePattern.pattern).asScala val duration = System.currentTimeMillis() - prev logInfo(s"Discovered component files: ${allFiles.mkString(", ")}. Duration: $duration ms.") val deDup = mutable.Set[String]() diff --git a/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java b/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java new file mode 100644 index 000000000000..c854bb7d4414 --- /dev/null +++ b/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.gluten.util; + +import org.apache.gluten.utils.ResourceUtil; + +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; +import java.util.regex.Pattern; + +public class ResourceUtilTest { + @Test + public void testFile() { + // Use the class file of this test to verify the sanity of ResourceUtil. + List classes = + ResourceUtil.getResources( + "org", Pattern.compile("apache/gluten/util/ResourceUtilTest\\.class")); + Assert.assertEquals(1, classes.size()); + Assert.assertEquals("apache/gluten/util/ResourceUtilTest.class", classes.get(0)); + } + + @Test + public void testJar() { + // Use the class file of this test to verify the sanity of ResourceUtil. + List classes = + ResourceUtil.getResources("org", Pattern.compile("apache/spark/SparkContext\\.class")); + Assert.assertEquals(1, classes.size()); + Assert.assertEquals("apache/spark/SparkContext.class", classes.get(0)); + } +} From a9d75feb0aab37cc7ef5f98489b6bdecfa725738 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 9 Jan 2025 17:31:15 +0800 Subject: [PATCH 2/4] fixup --- .../src/test/java/org/apache/gluten/util/ResourceUtilTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java b/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java index c854bb7d4414..570e5a6e4d03 100644 --- a/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java +++ b/gluten-core/src/test/java/org/apache/gluten/util/ResourceUtilTest.java @@ -37,7 +37,7 @@ public void testFile() { @Test public void testJar() { - // Use the class file of this test to verify the sanity of ResourceUtil. + // Use the class file of Spark code to verify the sanity of ResourceUtil. List classes = ResourceUtil.getResources("org", Pattern.compile("apache/spark/SparkContext\\.class")); Assert.assertEquals(1, classes.size()); From 5cdbe0577db1371b246e363949d30e061a7164fb Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 9 Jan 2025 17:32:30 +0800 Subject: [PATCH 3/4] fixup --- .../src/main/java/org/apache/gluten/utils/ResourceUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java index 73e111712edb..13e6e2858123 100644 --- a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java +++ b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java @@ -51,7 +51,7 @@ public class ResourceUtil { * @param pattern The pattern to match on the file names. * @return The relative resource paths in the order they are found. */ - public static List getResources(String container, final Pattern pattern) { + public static List getResources(final String container, final Pattern pattern) { Preconditions.checkArgument( !container.isEmpty(), "Resource search should only be used under a certain container folder"); From c2132a939e4afbba645c7f15328de9f172519491 Mon Sep 17 00:00:00 2001 From: Hongze Zhang Date: Thu, 9 Jan 2025 17:33:57 +0800 Subject: [PATCH 4/4] fixup --- .../src/main/java/org/apache/gluten/utils/ResourceUtil.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java index 13e6e2858123..89f48d4984f6 100644 --- a/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java +++ b/gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java @@ -57,7 +57,7 @@ public static List getResources(final String container, final Pattern pa "Resource search should only be used under a certain container folder"); Preconditions.checkArgument( !container.startsWith("/") && !container.endsWith("/"), - "Resource container should not starts or end with\"/\""); + "Resource container should not start or end with\"/\""); final List buffer = new ArrayList<>(); final Enumeration containerUrls; try {