From 940db2b19ef7d83e64054e445d7e827f14f3430a Mon Sep 17 00:00:00 2001 From: clakech Date: Thu, 6 Aug 2015 16:29:42 +0200 Subject: [PATCH] config spark-shell to use local ip as driver host by default shell communicate its internal hostname to worker so when worker try to communicate with shell, since its hostname is internal, the worker cannot resolve the hostname. using shell local ip as driver host solve this problem. see Networking documentation of spark configuration: http://spark.apache.org/docs/latest/configuration.html#networking + fix small typo to rename start-worker to start-worker.sh --- Dockerfile | 3 +-- scripts/remove_alias.sh | 4 ---- scripts/spark-shell.sh | 3 +-- scripts/{start-worker => start-worker.sh} | 0 4 files changed, 2 insertions(+), 8 deletions(-) delete mode 100755 scripts/remove_alias.sh rename scripts/{start-worker => start-worker.sh} (100%) diff --git a/Dockerfile b/Dockerfile index 8c72111..fc53f30 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,10 +13,9 @@ RUN apt-get -y install curl RUN curl -s http://d3kbcqa49mib13.cloudfront.net/spark-1.3.0-bin-hadoop2.4.tgz | tar -xz -C /usr/local/ RUN cd /usr/local && ln -s spark-1.3.0-bin-hadoop2.4 spark ADD scripts/start-master.sh /start-master.sh -ADD scripts/start-worker /start-worker.sh +ADD scripts/start-worker.sh /start-worker.sh ADD scripts/spark-shell.sh /spark-shell.sh ADD scripts/spark-defaults.conf /spark-defaults.conf -ADD scripts/remove_alias.sh /remove_alias.sh ENV SPARK_HOME /usr/local/spark ENV SPARK_MASTER_OPTS="-Dspark.driver.port=7001 -Dspark.fileserver.port=7002 -Dspark.broadcast.port=7003 -Dspark.replClassServer.port=7004 -Dspark.blockManager.port=7005 -Dspark.executor.port=7006 -Dspark.ui.port=4040 -Dspark.broadcast.factory=org.apache.spark.broadcast.HttpBroadcastFactory" diff --git a/scripts/remove_alias.sh b/scripts/remove_alias.sh deleted file mode 100755 index 78fe0ca..0000000 --- a/scripts/remove_alias.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env bash -sed '1d' /etc/hosts > tmpHosts -cat tmpHosts > /etc/hosts -rm tmpHosts diff --git a/scripts/spark-shell.sh b/scripts/spark-shell.sh index 86486e3..b4d778f 100755 --- a/scripts/spark-shell.sh +++ b/scripts/spark-shell.sh @@ -1,9 +1,8 @@ #!/usr/bin/env bash export SPARK_LOCAL_IP=`awk 'NR==1 {print $1}' /etc/hosts` -/remove_alias.sh # problems with hostname alias, see https://issues.apache.org/jira/browse/SPARK-6680 cd /usr/local/spark ./bin/spark-shell \ --master spark://${SPARK_MASTER_PORT_7077_TCP_ADDR}:${SPARK_MASTER_ENV_SPARK_MASTER_PORT} \ - -i ${SPARK_LOCAL_IP} \ + --conf spark.driver.host=${SPARK_LOCAL_IP} \ --properties-file /spark-defaults.conf \ "$@" diff --git a/scripts/start-worker b/scripts/start-worker.sh similarity index 100% rename from scripts/start-worker rename to scripts/start-worker.sh