An update from Xiang Gao <qasdfgtyuiop@gmail.com>

Squashed commit of the following: commit 1700b03fcd6b95198564e07345c482dbc85c7c92 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Sun Jul 3 08:39:50 2016 -0400 add support for slave service commit 70aecf80a40341836dfa3b1cae222b1db74801f3 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Sun Jul 3 08:12:07 2016 -0400 fix usage commit f810e7d98b67732af1814a36a65843f1a9b0fae9 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Sun Jul 3 08:05:57 2016 -0400 add service for master commit 0f597e790c48b3487f47508d1eac785b45248bb0 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Sun Jul 3 04:32:16 2016 -0400 add scripts to run spark in foreground commit 4eace18e92513bfa5f3fc8fe91a020f55fd67800 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Fri Jul 1 05:32:16 2016 -0400 add rsync as opt deps commit a5ea131071667416b14cf95ffbe18bef4e9c968f Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Wed Jun 29 22:44:05 2016 -0400 add hadoop as dependency commit bde492e3354cb2164855f79c49c42037c14d42a4 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Wed Jun 29 22:22:26 2016 -0400 change log files to /var/log/apache-spark fix sparkenv.sh to load hadoop classpaths commit 4625ceb7b05832217a4e1a1c46fe16f645d30fc4 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Wed Jun 29 07:20:35 2016 -0400 fix systemd service file commit 2354f598de49424942804051f62342b42b3eb1f3 Author: Xiang Gao <qasdfgtyuiop@gmail.com> Date: Wed Jun 29 07:04:03 2016 -0400 Make a lot of changes: * upgrade to 1.6.2 * use pre-built binaries "spark--bin-without-hadoop.tgz" instead of compiling from sources * remove dependency on scala and maven which is already built in spark * move python2 and hadoop from depends to optdepends * add r as optdepends * move templates for conf to conf-templates * move the whole conf directory to /etc/apache-spark * move apache-spark directory to /opt * move the work directory to /var/lib/apache-spark
author: François Garillot 2016-07-12 00:32:25 +0200
committer: François Garillot 2016-07-12 00:42:23 +0200
commit: 6c2bfc0675639904047b6fbb38789f5c0c569823 (patch)
tree: 36c63a87cbc0766d5de4a4dc30acc9efd5d45234
parent: 59c75dba2625d4dff23108983d7396c189355a62 (diff)
download: aur-6c2bfc0675639904047b6fbb38789f5c0c569823.tar.gz
10 files changed, 402 insertions, 63 deletions
diff --git a/.SRCINFO b/.SRCINFO
index 76c61983a6da..a602d067ae0a 100644
--- a/.SRCINFO
+++ b/.SRCINFO
@@ -1,27 +1,36 @@
 # Generated by mksrcinfo v8
-# Tue Mar 22 10:57:14 UTC 2016
+# Mon Jul 11 22:42:17 UTC 2016
 pkgbase = apache-spark
 	pkgdesc = fast and general engine for large-scale data processing
-	pkgver = 1.6.1
+	pkgver = 1.6.2
 	pkgrel = 1
 	url = http://spark.apache.org
 	install = apache-spark.install
 	arch = any
 	license = APACHE
-	depends = maven>=3.3.3
 	depends = java-environment>=6
-	depends = scala
-	depends = python2>=2.7
-	depends = hadoop>=2.6
-	optdepends = python: PYSPARK_PYTHON=python3 pyspark
-	optdepends = ipython: PYSPARK_DRIVER_PYTHON=ipython pyspark; IPYTHON=1 pyspark
+	depends = hadoop
+	optdepends = python2: python2 support for pyspark
+	optdepends = ipython2: ipython2 support for pyspark
+	optdepends = python: python3 support for pyspark
+	optdepends = ipython: ipython3 support for pyspark
+	optdepends = r: support for sparkR
+	optdepends = rsync: support rsync hadoop binaries from master
 	backup = etc/apache-spark/spark-env.sh
-	source = http://d3kbcqa49mib13.cloudfront.net/spark-1.6.1.tgz
-	source = apache-spark-standalone.service
+	source = http://d3kbcqa49mib13.cloudfront.net/spark-1.6.2-bin-without-hadoop.tgz
+	source = apache-spark-master.service
+	source = apache-spark-slave@.service
 	source = spark-env.sh
-	md5sums = 12e1368138840b62f08ed22a8637955d
-	md5sums = bb7d8b85366e6f9cc0b2777eaea161a8
-	md5sums = 0913001583e607849270090555dbd309
+	source = spark-daemon-run.sh
+	source = run-master.sh
+	source = run-slave.sh
+	md5sums = 304394fbe2899211217f0cd9e9b2b5d9
+	md5sums = 9ffe1f9c4bb2ea4e5a75ab6469fe76d4
+	md5sums = 8d34bd4cc946f46625597ca606da8ab6
+	md5sums = f8cc449543df418b8adfcc36a3afb384
+	md5sums = 8ff953f0436209b6190add59703a34f0
+	md5sums = 028472b82e9def7d5d409f008d064fe2
+	md5sums = 99115eedc453c9b8ca04cca2e32e4537
 
 pkgname = apache-spark
 
diff --git a/PKGBUILD b/PKGBUILD
index d5424f361816..cb5bf5f1d634 100644
--- a/PKGBUILD
+++ b/PKGBUILD
@@ -2,66 +2,72 @@
 # Contributor: Christian Krause ("wookietreiber") <kizkizzbangbang@gmail.com>
 
 pkgname=apache-spark
-pkgver=1.6.1
+pkgver=1.6.2
 pkgrel=1
 pkgdesc="fast and general engine for large-scale data processing"
 arch=('any')
 url="http://spark.apache.org"
 license=('APACHE')
-depends=('maven>=3.3.3' 'java-environment>=6' 'scala' 'python2>=2.7' 'hadoop>=2.6')
-optdepends=('python: PYSPARK_PYTHON=python3 pyspark'
-            'ipython: PYSPARK_DRIVER_PYTHON=ipython pyspark; IPYTHON=1 pyspark')
+depends=('java-environment>=6' 'hadoop')
+optdepends=('python2: python2 support for pyspark'
+            'ipython2: ipython2 support for pyspark'
+            'python: python3 support for pyspark'
+            'ipython: ipython3 support for pyspark'
+            'r: support for sparkR'
+            'rsync: support rsync hadoop binaries from master')
 install=apache-spark.install
-source=("http://d3kbcqa49mib13.cloudfront.net/spark-$pkgver.tgz"
-        'apache-spark-standalone.service'
-        'spark-env.sh')
-md5sums=('12e1368138840b62f08ed22a8637955d'
-         'bb7d8b85366e6f9cc0b2777eaea161a8'
-         '0913001583e607849270090555dbd309')
+source=("http://d3kbcqa49mib13.cloudfront.net/spark-${pkgver}-bin-without-hadoop.tgz"
+        'apache-spark-master.service'
+        'apache-spark-slave@.service'
+        'spark-env.sh'
+        'spark-daemon-run.sh'
+        'run-master.sh'
+        'run-slave.sh')
+md5sums=('304394fbe2899211217f0cd9e9b2b5d9'
+         '9ffe1f9c4bb2ea4e5a75ab6469fe76d4'
+         '8d34bd4cc946f46625597ca606da8ab6'
+         'f8cc449543df418b8adfcc36a3afb384'
+         '8ff953f0436209b6190add59703a34f0'
+         '028472b82e9def7d5d409f008d064fe2'
+         '99115eedc453c9b8ca04cca2e32e4537')
 backup=('etc/apache-spark/spark-env.sh')
 
 PKGEXT=${PKGEXT:-'.pkg.tar.xz'}
 
 prepare() {
-  mkdir -p "$srcdir/spark-$pkgver"
-  cd "$srcdir/spark-$pkgver"
-
-  sed -i 's|pid=$SPARK_PID_DIR/spark-$SPARK_IDENT_STRING-$command-$instance.pid|pid=/var/lib/apache-spark/spark-daemon.pid|' sbin/spark-daemon.sh
-}
-
-build() {
-        cd "$srcdir/spark-$pkgver"
-
-        export MAVEN_OPTS="-Xmx2g -XX:MaxPermSize=512M -XX:ReservedCodeCacheSize=512m"
-
-        dev/change-scala-version.sh 2.11
-
-        JAVA_HOME=/usr/lib/jvm/default-runtime ./make-distribution.sh -Pscala-2.11  -DskipTests -Dmaven.repo.local=/tmp -DautoVersionSubmodules=true -U -Djline.version=2.13 -Djline.groupid=jline -Pyarn -Phadoop-2.6
+  cd "$srcdir/spark-${pkgver}-bin-without-hadoop"
 }
 
 package() {
-        cd "$srcdir/spark-$pkgver"
+        cd "$srcdir/spark-${pkgver}-bin-without-hadoop"
 
-        install -d "$pkgdir/usr/bin" "$pkgdir/usr/share"
+        install -d "$pkgdir/usr/bin" "$pkgdir/opt" "$pkgdir/var/log/apache-spark"
 
-        cp -r "$srcdir/spark-$pkgver/dist" "$pkgdir/usr/share/apache-spark/"
+        cp -r "$srcdir/spark-${pkgver}-bin-without-hadoop" "$pkgdir/opt/apache-spark/"
 
         cd "$pkgdir/usr/bin"
         for binary in beeline pyspark sparkR spark-class spark-shell spark-sql spark-submit load-spark-env.sh; do
-                binpath="/usr/share/apache-spark/bin/$binary"
+                binpath="/opt/apache-spark/bin/$binary"
                 ln -s "$binpath" $binary
-                sed -i 's|^export SPARK_HOME=.*$|export SPARK_HOME=/usr/share/apache-spark|' "$pkgdir/$binpath"
+                sed -i 's|^export SPARK_HOME=.*$|export SPARK_HOME=/opt/apache-spark|' "$pkgdir/$binpath"
         done
 
         mkdir -p $pkgdir/etc/profile.d
         echo '#!/bin/sh' > $pkgdir/etc/profile.d/apache-spark.sh
-        echo 'SPARK_HOME=/usr/share/apache-spark' >> $pkgdir/etc/profile.d/apache-spark.sh
+        echo 'SPARK_HOME=/opt/apache-spark' >> $pkgdir/etc/profile.d/apache-spark.sh
         echo 'export SPARK_HOME' >> $pkgdir/etc/profile.d/apache-spark.sh
         chmod 755 $pkgdir/etc/profile.d/apache-spark.sh
 
-        install -Dm644 "$srcdir/apache-spark-standalone.service" "$pkgdir/usr/lib/systemd/system/apache-spark-standalone.service"
+        install -Dm644 "$srcdir/apache-spark-master.service" "$pkgdir/usr/lib/systemd/system/apache-spark-master.service"
+        install -Dm644 "$srcdir/apache-spark-slave@.service" "$pkgdir/usr/lib/systemd/system/apache-spark-slave@.service"
         install -Dm644 "$srcdir/spark-env.sh" "$pkgdir/etc/apache-spark/spark-env.sh"
+        for script in run-master.sh run-slave.sh spark-daemon-run.sh; do
+            install -Dm755 "$srcdir/$script" "$pkgdir/opt/apache-spark/sbin/$script"
+        done
+        install -Dm644 "$srcdir/spark-${pkgver}-bin-without-hadoop/conf"/* "$pkgdir/etc/apache-spark"
 
-        cd "$pkgdir/usr/share/apache-spark/conf"
-        ln -sf "/etc/apache-spark/spark-env.sh" .
+        cd "$pkgdir/opt/apache-spark"
+        mv conf conf-templates
+        ln -sf "/etc/apache-spark" conf
+        ln -sf "/var/lib/apache-spark/work" .
 }
diff --git a/apache-spark-master.service b/apache-spark-master.service
new file mode 100644
index 000000000000..b8bc98bce44c
--- /dev/null
+++ b/apache-spark-master.service
@@ -0,0 +1,12 @@
+[Unit]
+Description=Apache Spark Standalone Master
+After=network.target
+
+[Service]
+User=apache-spark
+Group=apache-spark
+Environment=SPARK_LOG_DIR=/var/log/apache-spark
+ExecStart=/opt/apache-spark/sbin/run-master.sh
+
+[Install]
+WantedBy=multi-user.target
diff --git a/apache-spark-slave@.service b/apache-spark-slave@.service
new file mode 100644
index 000000000000..453b3465ce36
--- /dev/null
+++ b/apache-spark-slave@.service
@@ -0,0 +1,13 @@
+[Unit]
+Description=Apache Spark Standalone Master
+After=network.target
+
+[Service]
+User=apache-spark
+Group=apache-spark
+Environment=SPARK_LOG_DIR=/var/log/apache-spark
+ExecStart=/opt/apache-spark/sbin/run-slave.sh %i
+
+[Install]
+WantedBy=multi-user.target
+DefaultInstance=127.0.0.1:7077
diff --git a/apache-spark-standalone.service b/apache-spark-standalone.service
deleted file mode 100644
index 3f81c9691450..000000000000
--- a/apache-spark-standalone.service
+++ /dev/null
@@ -1,14 +0,0 @@
-[Unit]
-Description=Apache Spark Standalone Local Cluster
-After=network.target
-
-[Service]
-User=apache-spark
-Group=apache-spark
-Environment=SPARK_LOG_DIR=/var/lib/apache-spark/logs
-PIDFile=/var/lib/apache-spark/spark-daemon.pid
-ExecStart=/usr/share/apache-spark/sbin/start-all.sh
-ExecStop=/usr/share/apache-spark/sbin/stop-all.sh
-
-[Install]
-WantedBy=multi-user.target
diff --git a/apache-spark.install b/apache-spark.install
index 9ffbbf29abe8..7aa03480833c 100644
--- a/apache-spark.install
+++ b/apache-spark.install
@@ -2,10 +2,10 @@ post_install() {
   groupadd -r -f apache-spark
   useradd -r -g apache-spark -s /usr/bin/nologin -d /var/lib/apache-spark apache-spark || true
 
-  [[ ! -d /var/lib/apache-spark ]] &&
-  install -d /var/lib/apache-spark
+  [[ ! -d /var/lib/apache-spark/work ]] &&
+  install -d /var/lib/apache-spark/work
 
-  chown -R apache-spark:apache-spark /var/lib/apache-spark
+  chown -R apache-spark:apache-spark /var/{lib,log}/apache-spark
 }
 
 post_remove() {
diff --git a/run-master.sh b/run-master.sh
new file mode 100755
index 000000000000..a60ca791adc1
--- /dev/null
+++ b/run-master.sh
@@ -0,0 +1,81 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts the master on the machine this script is executed on.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
+CLASS="org.apache.spark.deploy.master.Master"
+
+if [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  echo "Usage: ./sbin/run-master.sh [options]"
+  pattern="Usage:"
+  pattern+="\|Using Spark's default log4j profile:"
+  pattern+="\|Registered signal handlers for"
+
+  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+  exit 1
+fi
+
+ORIGINAL_ARGS="$@"
+
+START_TACHYON=false
+
+while (( "$#" )); do
+case $1 in
+    --with-tachyon)
+      if [ ! -e "${SPARK_HOME}"/tachyon/bin/tachyon ]; then
+        echo "Error: --with-tachyon specified, but tachyon not found."
+        exit -1
+      fi
+      START_TACHYON=true
+      ;;
+  esac
+shift
+done
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_MASTER_PORT" = "" ]; then
+  SPARK_MASTER_PORT=7077
+fi
+
+if [ "$SPARK_MASTER_IP" = "" ]; then
+  SPARK_MASTER_IP=`hostname`
+fi
+
+if [ "$SPARK_MASTER_WEBUI_PORT" = "" ]; then
+  SPARK_MASTER_WEBUI_PORT=8080
+fi
+
+"${SPARK_HOME}/sbin"/spark-daemon-run.sh start $CLASS 1 \
+  --ip $SPARK_MASTER_IP --port $SPARK_MASTER_PORT --webui-port $SPARK_MASTER_WEBUI_PORT \
+  $ORIGINAL_ARGS
+
+if [ "$START_TACHYON" == "true" ]; then
+  "${SPARK_HOME}"/tachyon/bin/tachyon bootstrap-conf $SPARK_MASTER_IP
+  "${SPARK_HOME}"/tachyon/bin/tachyon format -s
+  "${SPARK_HOME}"/tachyon/bin/tachyon-start.sh master
+fi
diff --git a/run-slave.sh b/run-slave.sh
new file mode 100755
index 000000000000..1f92aa3bee3e
--- /dev/null
+++ b/run-slave.sh
@@ -0,0 +1,91 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Starts a slave on the machine this script is executed on.
+#
+# Environment Variables
+#
+#   SPARK_WORKER_INSTANCES  The number of worker instances to run on this
+#                           slave.  Default is 1.
+#   SPARK_WORKER_PORT       The base port number for the first worker. If set,
+#                           subsequent workers will increment this number.  If
+#                           unset, Spark will find a valid port number, but
+#                           with no guarantee of a predictable pattern.
+#   SPARK_WORKER_WEBUI_PORT The base port for the web interface of the first
+#                           worker.  Subsequent workers will increment this
+#                           number.  Default is 8081.
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+# NOTE: This exact class name is matched downstream by SparkSubmit.
+# Any changes need to be reflected there.
+CLASS="org.apache.spark.deploy.worker.Worker"
+
+if [[ $# -lt 1 ]] || [[ "$@" = *--help ]] || [[ "$@" = *-h ]]; then
+  echo "Usage: ./sbin/run-slave.sh [options] <master>"
+  pattern="Usage:"
+  pattern+="\|Using Spark's default log4j profile:"
+  pattern+="\|Registered signal handlers for"
+
+  "${SPARK_HOME}"/bin/spark-class $CLASS --help 2>&1 | grep -v "$pattern" 1>&2
+  exit 1
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+# First argument should be the master; we need to store it aside because we may
+# need to insert arguments between it and the other arguments
+MASTER=$1
+shift
+
+# Determine desired worker port
+if [ "$SPARK_WORKER_WEBUI_PORT" = "" ]; then
+  SPARK_WORKER_WEBUI_PORT=8081
+fi
+
+# Start up the appropriate number of workers on this machine.
+# quick local function to start a worker
+function start_instance {
+  WORKER_NUM=$1
+  shift
+
+  if [ "$SPARK_WORKER_PORT" = "" ]; then
+    PORT_FLAG=
+    PORT_NUM=
+  else
+    PORT_FLAG="--port"
+    PORT_NUM=$(( $SPARK_WORKER_PORT + $WORKER_NUM - 1 ))
+  fi
+  WEBUI_PORT=$(( $SPARK_WORKER_WEBUI_PORT + $WORKER_NUM - 1 ))
+
+  "${SPARK_HOME}/sbin"/spark-daemon-run.sh start $CLASS $WORKER_NUM \
+     --webui-port "$WEBUI_PORT" $PORT_FLAG $PORT_NUM $MASTER "$@"
+}
+
+if [ "$SPARK_WORKER_INSTANCES" = "" ]; then
+  start_instance 1 "$@"
+else
+  for ((i=0; i<$SPARK_WORKER_INSTANCES; i++)); do
+    start_instance $(( 1 + $i )) "$@"
+  done
+fi
diff --git a/spark-daemon-run.sh b/spark-daemon-run.sh
new file mode 100755
index 000000000000..34e3a80fa37a
--- /dev/null
+++ b/spark-daemon-run.sh
@@ -0,0 +1,139 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Runs a Spark daemon foreground.
+#
+# Environment Variables
+#
+#   SPARK_CONF_DIR  Alternate conf dir. Default is ${SPARK_HOME}/conf.
+#   SPARK_LOG_DIR   Where log files are stored. ${SPARK_HOME}/logs by default.
+#   SPARK_MASTER    host:path where spark code should be rsync'd from
+#   SPARK_IDENT_STRING   A string representing this instance of spark. $USER by default
+#   SPARK_NICENESS The scheduling priority for daemons. Defaults to 0.
+##
+
+usage="Usage: spark-daemon-run.sh [--config <conf-dir>] (class|submit) <spark-command> <spark-instance-number> <args...>"
+
+# if no args specified, show usage
+if [ $# -le 1 ]; then
+  echo $usage
+  exit 1
+fi
+
+if [ -z "${SPARK_HOME}" ]; then
+  export SPARK_HOME="$(cd "`dirname "$0"`"/..; pwd)"
+fi
+
+. "${SPARK_HOME}/sbin/spark-config.sh"
+
+# get arguments
+
+# Check if --config is passed as an argument. It is an optional parameter.
+# Exit if the argument is not a directory.
+
+if [ "$1" == "--config" ]
+then
+  shift
+  conf_dir="$1"
+  if [ ! -d "$conf_dir" ]
+  then
+    echo "ERROR : $conf_dir is not a directory"
+    echo $usage
+    exit 1
+  else
+    export SPARK_CONF_DIR="$conf_dir"
+  fi
+  shift
+fi
+
+mode=$1
+shift
+command=$1
+shift
+instance=$1
+shift
+
+spark_rotate_log ()
+{
+    log=$1;
+    num=5;
+    if [ -n "$2" ]; then
+	num=$2
+    fi
+    if [ -f "$log" ]; then # rotate logs
+	while [ $num -gt 1 ]; do
+	    prev=`expr $num - 1`
+	    [ -f "$log.$prev" ] && mv "$log.$prev" "$log.$num"
+	    num=$prev
+	done
+	mv "$log" "$log.$num";
+    fi
+}
+
+. "${SPARK_HOME}/bin/load-spark-env.sh"
+
+if [ "$SPARK_IDENT_STRING" = "" ]; then
+  export SPARK_IDENT_STRING="$USER"
+fi
+
+
+
+# get log directory
+if [ "$SPARK_LOG_DIR" = "" ]; then
+  export SPARK_LOG_DIR="${SPARK_HOME}/logs"
+fi
+mkdir -p "$SPARK_LOG_DIR"
+touch "$SPARK_LOG_DIR"/.spark_test > /dev/null 2>&1
+TEST_LOG_DIR=$?
+if [ "${TEST_LOG_DIR}" = "0" ]; then
+  rm -f "$SPARK_LOG_DIR"/.spark_test
+else
+  chown "$SPARK_IDENT_STRING" "$SPARK_LOG_DIR"
+fi
+
+# some variables
+log="$SPARK_LOG_DIR/spark-$SPARK_IDENT_STRING-$command-$instance-$HOSTNAME.out"
+
+# Set default scheduling priority
+if [ "$SPARK_NICENESS" = "" ]; then
+    export SPARK_NICENESS=0
+fi
+
+if [ "$SPARK_MASTER" != "" ]; then
+  echo rsync from "$SPARK_MASTER"
+  rsync -a -e ssh --delete --exclude=.svn --exclude='logs/*' --exclude='contrib/hod/logs/*' "$SPARK_MASTER/" "${SPARK_HOME}"
+fi
+
+spark_rotate_log "$log"
+echo "running $command, logging to $log"
+
+case "$mode" in
+  (start)
+    exec nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-class $command "$@" >> "$log" 2>&1 < /dev/null
+    ;;
+
+  (submit)
+    exec nice -n "$SPARK_NICENESS" "${SPARK_HOME}"/bin/spark-submit --class $command "$@" >> "$log" 2>&1 < /dev/null
+    ;;
+
+  (*)
+    echo "unknown mode: $mode"
+    exit 1
+    ;;
+esac
diff --git a/spark-env.sh b/spark-env.sh
index fe83e9da2e9f..b11344f9722a 100644
--- a/spark-env.sh
+++ b/spark-env.sh
@@ -1,4 +1,6 @@
 #!/usr/bin/env bash
 
+export JAVA_HOME=/usr/lib/jvm/default-runtime
+export SPARK_DIST_CLASSPATH=$(hadoop classpath)
 SPARK_MASTER_IP=127.0.0.1
 SPARK_LOCAL_IP=127.0.0.1
author	François Garillot	2016-07-12 00:32:25 +0200
committer	François Garillot	2016-07-12 00:42:23 +0200
commit	6c2bfc0675639904047b6fbb38789f5c0c569823 (patch)
tree	36c63a87cbc0766d5de4a4dc30acc9efd5d45234
parent	59c75dba2625d4dff23108983d7396c189355a62 (diff)
download	aur-6c2bfc0675639904047b6fbb38789f5c0c569823.tar.gz