From c3988e9e7dc68d69ec22dafbcd3499bf3eb80b6e Mon Sep 17 00:00:00 2001 From: Thejdeep Gudivada Date: Mon, 4 Oct 2021 15:24:00 -0700 Subject: [PATCH] [SPARK-36834][SHUFFLE] Add support for namespacing log lines emitted by ESS ### What changes were proposed in this pull request? Added a config `spark.yarn.shuffle.service.logs.namespace` which can be used to add a namespace suffix to log lines emitted by the External Shuffle Service. ### Why are the changes needed? Since many instances of ESS can be running on the same NM, it would be easier to distinguish between them. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? N/A --- .../spark/network/yarn/YarnShuffleService.java | 18 ++++++++++++++++-- docs/running-on-yarn.md | 11 +++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index ac163692c4747..f1b894139149d 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -93,7 +93,8 @@ * This {@code classpath} configuration is only supported on YARN versions >= 2.9.0. */ public class YarnShuffleService extends AuxiliaryService { - private static final Logger logger = LoggerFactory.getLogger(YarnShuffleService.class); + private static final Logger defaultLogger = LoggerFactory.getLogger(YarnShuffleService.class); + private Logger logger = defaultLogger; // Port on which the shuffle server listens for fetch requests private static final String SPARK_SHUFFLE_SERVICE_PORT_KEY = "spark.shuffle.service.port"; @@ -107,6 +108,12 @@ public class YarnShuffleService extends AuxiliaryService { "spark.yarn.shuffle.service.metrics.namespace"; private static final String DEFAULT_SPARK_SHUFFLE_SERVICE_METRICS_NAME = "sparkShuffleService"; + /** + * The namespace to use for the logs produced by the shuffle service + */ + static final String SPARK_SHUFFLE_SERVICE_LOGS_NAMESPACE_KEY = + "spark.yarn.shuffle.service.logs.namespace"; + // Whether the shuffle server should authenticate fetch requests private static final String SPARK_AUTHENTICATE_KEY = "spark.authenticate"; private static final boolean DEFAULT_SPARK_AUTHENTICATE = false; @@ -204,6 +211,13 @@ protected void serviceInit(Configuration externalConf) throws Exception { confOverlayUrl); _conf.addResource(confOverlayUrl); } + + String logsNamespace = _conf.get(SPARK_SHUFFLE_SERVICE_LOGS_NAMESPACE_KEY, ""); + if (!logsNamespace.isEmpty()) { + String className = YarnShuffleService.class.getName(); + logger = LoggerFactory.getLogger(className + "." + logsNamespace); + } + super.serviceInit(_conf); boolean stopOnFailure = _conf.getBoolean(STOP_ON_FAILURE_KEY, DEFAULT_STOP_ON_FAILURE); @@ -284,7 +298,7 @@ static MergedShuffleFileManager newMergedShuffleFileManagerInstance(TransportCon // will also need the transport configuration. return mergeManagerSubClazz.getConstructor(TransportConf.class).newInstance(conf); } catch (Exception e) { - logger.error("Unable to create an instance of {}", mergeManagerImplClassName); + defaultLogger.error("Unable to create an instance of {}", mergeManagerImplClassName); return new NoOpMergedShuffleFileManager(conf); } } diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index 8b7ed180af460..52d365a0694cf 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -806,6 +806,17 @@ The following extra configuration options are available when the shuffle service NodeManager. + + spark.yarn.shuffle.service.logs.namespace + (not set) + + A namespace which will be appended to the class name when forming the logger name to use for + emitting logs from the YARN shuffle service, like + org.apache.spark.network.yarn.YarnShuffleService.logsNamespaceValue. Since some logging frameworks + may expect the logger name to look like a class name, it's generally recommended to provide a value which + would be a valid Java package or class name and not include spaces. + + Please note that the instructions above assume that the default shuffle service name,