From 7bc74d4b35ebdd6a6eae0cd80f1520095eff6f08 Mon Sep 17 00:00:00 2001 From: Prabodh Agarwal Date: Fri, 9 Feb 2024 20:20:43 +0530 Subject: [PATCH] add support for pyspark as well --- bin/comet-pyspark | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100755 bin/comet-pyspark diff --git a/bin/comet-pyspark b/bin/comet-pyspark new file mode 100755 index 0000000000..52697cfe29 --- /dev/null +++ b/bin/comet-pyspark @@ -0,0 +1,84 @@ +#!/bin/bash + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +set -e + +if [[ -z ${SPARK_HOME} ]]; then + echo "ERROR: SPARK_HOME is not set: please set it to point to a Spark 3.4.0 distribution" + exit 1 +fi + +POSITIONAL=() +while [[ $# -gt 0 ]]; do + key="$1" + + case $key in + -r|--rebuild) + REBUILD=YES + shift + ;; + -o|--outdir) + COMET_OUTDIR=$2 + shift + shift + ;; + -d|--comet-dir) + COMET_DIR=$2 + shift + shift + ;; + *) + POSITIONAL+=("$1") + shift + ;; + esac +done + +set -- "${POSITIONAL[@]}" + +COMET_DIR="${COMET_DIR:-$HOME/git/comet}" +SPARK_VERSION_SHORT=3.4 +SCALA_BINARY_VERSION=2.12 +COMET_VERSION=0.1.0-SNAPSHOT +COMET_SPARK_JAR=comet-spark-spark${SPARK_VERSION_SHORT}_${SCALA_BINARY_VERSION}-${COMET_VERSION}.jar +COMET_OUTDIR="${COMET_OUTDIR:-/tmp}" + +if [[ ! -d $COMET_DIR ]]; then + echo "Comet repo: $COMET_DIR, doesn't exist" + exit 1 +fi + +if [[ ! -d $COMET_OUTDIR ]]; then + echo "Output directory for Comet Spark library: $COMET_OUTDIR, doesn't exist" + exit 1 +fi + +if [[ "X$REBUILD" == "XYES" ]]; then + cd $COMET_DIR && make release + cd $COMET_DIR/spark && cp target/${COMET_SPARK_JAR} $COMET_OUTDIR/${COMET_SPARK_JAR} +fi + +RUST_BACKTRACE=1 $SPARK_HOME/bin/pyspark \ + --jars $COMET_OUTDIR/${COMET_SPARK_JAR} \ + --conf spark.sql.extensions=org.apache.comet.CometSparkSessionExtensions \ + --conf spark.comet.enabled=true \ + --conf spark.comet.exec.enabled=true \ + --conf spark.comet.exec.all.enabled=true \ +$@