Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 62 additions & 38 deletions gluten-core/src/main/java/org/apache/gluten/utils/ResourceUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,20 @@
*/
package org.apache.gluten.utils;

import org.apache.gluten.exception.GlutenException;

import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Enumeration;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
Expand All @@ -37,58 +41,73 @@
* and then modified for Gluten's use.
*/
public class ResourceUtil {

private static final Logger LOG = LoggerFactory.getLogger(ResourceUtil.class);

/**
* Get a collection of resource paths by the input RegEx pattern.
* Get a collection of resource paths by the input RegEx pattern in a certain container folder.
*
* @param pattern The pattern to match.
* @param container The container folder. E.g., `META-INF`. Should not be left empty, because
* Classloader requires for at a meaningful file name to search inside the loaded jar files.
* @param pattern The pattern to match on the file names.
* @return The relative resource paths in the order they are found.
*/
public static List<String> getResources(final Pattern pattern) {
public static List<String> getResources(final String container, final Pattern pattern) {
Preconditions.checkArgument(
!container.isEmpty(),
"Resource search should only be used under a certain container folder");
Preconditions.checkArgument(
!container.startsWith("/") && !container.endsWith("/"),
"Resource container should not start or end with\"/\"");
final List<String> buffer = new ArrayList<>();
String classPath = System.getProperty("java.class.path");
processClassPathElements(classPath, pattern, buffer);
return Collections.unmodifiableList(buffer);
}

private static void processClassPathElements(
String classPath, Pattern pattern, List<String> buffer) {
if (classPath == null || classPath.isEmpty()) {
return;
final Enumeration<URL> containerUrls;
try {
containerUrls = Thread.currentThread().getContextClassLoader().getResources(container);
} catch (IOException e) {
throw new GlutenException(e);
}
String[] classPathElements = classPath.split(File.pathSeparator);
Arrays.stream(classPathElements).forEach(element -> getResources(element, pattern, buffer));
// the Gluten project may wrapped by the other service to use the Native Engine.
// As a result, the java.class.path points to xxx/other.jar instead of xxx/gluten.jar.
// This will result in the failure to properly load the required Components.
if (buffer.isEmpty()) {
classPath = ResourceUtil.class.getProtectionDomain().getCodeSource().getLocation().getPath();
classPathElements = classPath.split(File.pathSeparator);
Arrays.stream(classPathElements).forEach(element -> getResources(element, pattern, buffer));
while (containerUrls.hasMoreElements()) {
final URL containerUrl = containerUrls.nextElement();
getResources(containerUrl, pattern, buffer);
}
return Collections.unmodifiableList(buffer);
}

private static void getResources(
final String element, final Pattern pattern, final List<String> buffer) {
final File file = new File(element);
if (!file.exists()) {
LOG.info("Skip non-existing classpath: {}", element);
return;
}
if (file.isDirectory()) {
getResourcesFromDirectory(file, file, pattern, buffer);
} else {
getResourcesFromJarFile(file, pattern, buffer);
final URL containerUrl, final Pattern pattern, final List<String> buffer) {
final String protocol = containerUrl.getProtocol();
switch (protocol) {
case "file":
final File fileContainer = new File(containerUrl.getPath());
Preconditions.checkState(
fileContainer.exists() && fileContainer.isDirectory(),
"Specified file container " + containerUrl + " is not a directory or not a file");
getResourcesFromDirectory(fileContainer, fileContainer, pattern, buffer);
break;
case "jar":
final String jarContainerPath = containerUrl.getPath();
final Pattern jarContainerPattern = Pattern.compile("file:([^!]+)!/(.+)");
final Matcher m = jarContainerPattern.matcher(jarContainerPath);
if (!m.matches()) {
throw new GlutenException("Illegal Jar container URL: " + containerUrl);
}
final String jarPath = m.group(1);
final File jarFile = new File(jarPath);
Preconditions.checkState(
jarFile.exists() && jarFile.isFile(),
"Specified Jar container " + containerUrl + " is not a Jar file");
final String dir = m.group(2);
getResourcesFromJarFile(jarFile, dir, pattern, buffer);
break;
default:
throw new GlutenException("Unrecognizable resource protocol: " + protocol);
}
}

private static void getResourcesFromJarFile(
final File file, final Pattern pattern, final List<String> buffer) {
ZipFile zf;
final File jarFile, final String dir, final Pattern pattern, final List<String> buffer) {
final ZipFile zf;
try {
zf = new ZipFile(file);
zf = new ZipFile(jarFile);
} catch (final ZipException e) {
throw new RuntimeException(e);
} catch (final IOException e) {
Expand All @@ -98,9 +117,14 @@ private static void getResourcesFromJarFile(
while (e.hasMoreElements()) {
final ZipEntry ze = (ZipEntry) e.nextElement();
final String fileName = ze.getName();
final boolean accept = pattern.matcher(fileName).matches();
if (!fileName.startsWith(dir)) {
continue;
}
final String relativeFileName =
new File(dir).toURI().relativize(new File(fileName).toURI()).getPath();
final boolean accept = pattern.matcher(relativeFileName).matches();
if (accept) {
buffer.add(fileName);
buffer.add(relativeFileName);
}
}
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.gluten.component

import org.apache.gluten.exception.GlutenException
Expand All @@ -26,11 +25,8 @@ import org.apache.spark.util.SparkReflectionUtil
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.util.matching.Regex




// format: off

/**
* Gluten's global discovery to find all [[Component]] definitions in the classpath.
*
Expand All @@ -54,12 +50,12 @@ import scala.util.matching.Regex
// format: on
private object Discovery extends Logging {
private val container: String = "META-INF/gluten-components"
private val componentFilePattern: Regex = s"^$container/(.+)$$".r
private val componentFilePattern: Regex = s"^(.+)$$".r

def discoverAll(): Seq[Component] = {
logInfo("Start discovering components in the current classpath... ")
val prev = System.currentTimeMillis()
val allFiles = ResourceUtil.getResources(componentFilePattern.pattern).asScala
val allFiles = ResourceUtil.getResources(container, componentFilePattern.pattern).asScala
val duration = System.currentTimeMillis() - prev
logInfo(s"Discovered component files: ${allFiles.mkString(", ")}. Duration: $duration ms.")
val deDup = mutable.Set[String]()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.gluten.util;

import org.apache.gluten.utils.ResourceUtil;

import org.junit.Assert;
import org.junit.Test;

import java.util.List;
import java.util.regex.Pattern;

public class ResourceUtilTest {
@Test
public void testFile() {
// Use the class file of this test to verify the sanity of ResourceUtil.
List<String> classes =
ResourceUtil.getResources(
"org", Pattern.compile("apache/gluten/util/ResourceUtilTest\\.class"));
Assert.assertEquals(1, classes.size());
Assert.assertEquals("apache/gluten/util/ResourceUtilTest.class", classes.get(0));
}

@Test
public void testJar() {
// Use the class file of Spark code to verify the sanity of ResourceUtil.
List<String> classes =
ResourceUtil.getResources("org", Pattern.compile("apache/spark/SparkContext\\.class"));
Assert.assertEquals(1, classes.size());
Assert.assertEquals("apache/spark/SparkContext.class", classes.get(0));
}
}