diff --git a/ami-list-ubuntu/ami-list-ubuntu-amd64 b/ami-list-ubuntu/ami-list-ubuntu-amd64 new file mode 100644 index 00000000..ef760f62 --- /dev/null +++ b/ami-list-ubuntu/ami-list-ubuntu-amd64 @@ -0,0 +1,88 @@ +ap-northeast-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-03d01062,hvm +ap-northeast-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-90d111f1,hvm +ap-northeast-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-49d31328,hvm +ap-northeast-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-55d21234,hvm +ap-northeast-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-47d41426,aki-176bf516 +ap-northeast-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-97d111f6,aki-176bf516 +ap-northeast-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-24d11145,aki-176bf516 +ap-northeast-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-55d01034,aki-176bf516 +ap-southeast-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-b84799db,hvm +ap-southeast-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-f0459b93,hvm +ap-southeast-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-5e429c3d,hvm +ap-southeast-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-41409e22,hvm +ap-southeast-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-af4997cc,aki-503e7402 +ap-southeast-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-af449acc,aki-503e7402 +ap-southeast-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-a2449ac1,aki-503e7402 +ap-southeast-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-dc459bbf,aki-503e7402 +ap-southeast-2,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-47f1c524,hvm +ap-southeast-2,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-def2c6bd,hvm +ap-southeast-2,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-25f3c746,hvm +ap-southeast-2,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-41f1c522,hvm +ap-southeast-2,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-0cf0c46f,aki-c362fff9 +ap-southeast-2,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-1df1c57e,aki-c362fff9 +ap-southeast-2,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-81f3c7e2,aki-c362fff9 +ap-southeast-2,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-fef2c69d,aki-c362fff9 +cn-north-1,trusty,14.04 LTS,amd64,hvm:ebs,20160714,ami-67af7a0a,hvm +cn-north-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160714,ami-bfad78d2,hvm +cn-north-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160714,ami-bead78d3,hvm +cn-north-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160714,ami-02af7a6f,hvm +cn-north-1,trusty,14.04 LTS,amd64,ebs,20160714,ami-58ae7b35,aki-9e8f1da7 +cn-north-1,trusty,14.04 LTS,amd64,ebs-io1,20160714,ami-f2ad789f,aki-9e8f1da7 +cn-north-1,trusty,14.04 LTS,amd64,ebs-ssd,20160714,ami-ccae7ba1,aki-9e8f1da7 +cn-north-1,trusty,14.04 LTS,amd64,instance-store,20160714,ami-79ae7b14,aki-9e8f1da7 +eu-central-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-d3cd3bbc,hvm +eu-central-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-51cd3b3e,hvm +eu-central-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-b1cf39de,hvm +eu-central-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-6acb3d05,hvm +eu-central-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-d1cd3bbe,aki-184c7a05 +eu-central-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-b6cf39d9,aki-184c7a05 +eu-central-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-09cf3966,aki-184c7a05 +eu-central-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-d3cf39bc,aki-184c7a05 +eu-west-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-a7412ad4,hvm +eu-west-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-5a452e29,hvm +eu-west-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-55452e26,hvm +eu-west-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-5d42292e,hvm +eu-west-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-e4452e97,aki-52a34525 +eu-west-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-3f472c4c,aki-52a34525 +eu-west-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-5b452e28,aki-52a34525 +eu-west-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-07492274,aki-52a34525 +sa-east-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-e09d0b8c,hvm +sa-east-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-f89d0b94,hvm +sa-east-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-97980efb,hvm +sa-east-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-1a980e76,hvm +sa-east-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-16980e7a,aki-5553f448 +sa-east-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-8b9a0ce7,aki-5553f448 +sa-east-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-98980ef4,aki-5553f448 +sa-east-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-6c9f0900,aki-5553f448 +us-east-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-d90d92ce,hvm +us-east-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-1b0d920c,hvm +us-east-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-8e0b9499,hvm +us-east-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-08128d1f,hvm +us-east-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-c70f90d0,aki-919dcaf8 +us-east-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-510c9346,aki-919dcaf8 +us-east-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-6808977f,aki-919dcaf8 +us-east-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-b266f9a5,aki-919dcaf8 +us-gov-west-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-3508b654,hvm +us-gov-west-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-2e0bb54f,hvm +us-gov-west-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-2a08b64b,hvm +us-gov-west-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-2809b749,hvm +us-gov-west-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-600cb201,aki-1de98d3e +us-gov-west-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-780fb119,aki-1de98d3e +us-gov-west-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-ab0bb5ca,aki-1de98d3e +us-gov-west-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-2b0bb54a,aki-1de98d3e +us-west-1,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-27743747,hvm +us-west-1,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-577a3937,hvm +us-west-1,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-547b3834,hvm +us-west-1,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-fd74379d,hvm +us-west-1,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-d47a39b4,aki-880531cd +us-west-1,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-11753671,aki-880531cd +us-west-1,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-31753651,aki-880531cd +us-west-1,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-33773453,aki-880531cd +us-west-2,trusty,14.04 LTS,amd64,hvm:ebs,20160809.1,ami-20be7540,hvm +us-west-2,trusty,14.04 LTS,amd64,hvm:ebs-io1,20160809.1,ami-dcb57ebc,hvm +us-west-2,trusty,14.04 LTS,amd64,hvm:ebs-ssd,20160809.1,ami-70b67d10,hvm +us-west-2,trusty,14.04 LTS,amd64,hvm:instance-store,20160809.1,ami-a9b972c9,hvm +us-west-2,trusty,14.04 LTS,amd64,ebs,20160809.1,ami-abb07bcb,aki-fc8f11cc +us-west-2,trusty,14.04 LTS,amd64,ebs-io1,20160809.1,ami-88b07be8,aki-fc8f11cc +us-west-2,trusty,14.04 LTS,amd64,ebs-ssd,20160809.1,ami-73b67d13,aki-fc8f11cc +us-west-2,trusty,14.04 LTS,amd64,instance-store,20160809.1,ami-a3bf74c3,aki-fc8f11cc diff --git a/copy-dir.sh b/copy-dir.sh index e1d2bd07..54e176c1 100755 --- a/copy-dir.sh +++ b/copy-dir.sh @@ -37,7 +37,7 @@ DIR=`readlink -f "$1"` DIR=`echo "$DIR"|sed 's@/$@@'` DEST=`dirname "$DIR"` -SLAVES=`cat /root/spark-ec2/slaves` +SLAVES=`cat ~/spark-ec2/slaves` SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=5" diff --git a/deploy.generic/root/spark-ec2/ec2-variables.sh b/deploy.generic/root/spark-ec2/ec2-variables.sh old mode 100644 new mode 100755 diff --git a/deploy.ubuntu/home/ubuntu/spark-ec2/ec2-variables.sh b/deploy.ubuntu/home/ubuntu/spark-ec2/ec2-variables.sh new file mode 100755 index 00000000..4f3e8da8 --- /dev/null +++ b/deploy.ubuntu/home/ubuntu/spark-ec2/ec2-variables.sh @@ -0,0 +1,34 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# These variables are automatically filled in by the spark-ec2 script. +export MASTERS="{{master_list}}" +export SLAVES="{{slave_list}}" +export HDFS_DATA_DIRS="{{hdfs_data_dirs}}" +export MAPRED_LOCAL_DIRS="{{mapred_local_dirs}}" +export SPARK_LOCAL_DIRS="{{spark_local_dirs}}" +export MODULES="{{modules}}" +export SPARK_VERSION="{{spark_version}}" +export TACHYON_VERSION="{{tachyon_version}}" +export HADOOP_MAJOR_VERSION="{{hadoop_major_version}}" +export SWAP_MB="{{swap}}" +export SPARK_WORKER_INSTANCES="{{spark_worker_instances}}" +export SPARK_MASTER_OPTS="{{spark_master_opts}}" +export AWS_ACCESS_KEY_ID="{{aws_access_key_id}}" +export AWS_SECRET_ACCESS_KEY="{{aws_secret_access_key}}" diff --git a/deploy_templates.py b/deploy_templates.py index 895e55a4..9745971a 100755 --- a/deploy_templates.py +++ b/deploy_templates.py @@ -17,7 +17,7 @@ master_ram_kb = int( os.popen(mem_command).read().strip()) # This is the master's memory. Try to find slave's memory as well -first_slave = os.popen("cat /root/spark-ec2/slaves | head -1").read().strip() +first_slave = os.popen("cat ~/spark-ec2/slaves | head -1").read().strip() slave_mem_command = "ssh -t -o StrictHostKeyChecking=no %s %s" %\ (first_slave, mem_command) @@ -80,11 +80,17 @@ "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY"), } -template_dir="/root/spark-ec2/templates" +deploy_root_dir = os.getenv("DEPLOY_ROOT_DIR") + +template_dir=deploy_root_dir+"/spark-ec2/templates" +tmp_template_dir=os.getenv("TMP_TEMPLATE_DIR") for path, dirs, files in os.walk(template_dir): if path.find(".svn") == -1: dest_dir = os.path.join('/', path[len(template_dir):]) + if dest_dir.startswith("/root"): + dest_dir = deploy_root_dir + dest_dir[5:] + dest_dir = "/tmp/templates/" + dest_dir if not os.path.exists(dest_dir): os.makedirs(dest_dir) for filename in files: @@ -92,7 +98,6 @@ dest_file = os.path.join(dest_dir, filename) with open(os.path.join(path, filename)) as src: with open(dest_file, "w") as dest: - print("Configuring " + dest_file) text = src.read() for key in template_vars: text = text.replace("{{" + key + "}}", template_vars[key] or '') diff --git a/ephemeral-hdfs/init.sh b/ephemeral-hdfs/init.sh index 0e18bca8..62d756d9 100755 --- a/ephemeral-hdfs/init.sh +++ b/ephemeral-hdfs/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -pushd /root > /dev/null +pushd ~ > /dev/null if [ -d "ephemeral-hdfs" ]; then echo "Ephemeral HDFS seems to be installed. Exiting." @@ -14,8 +14,8 @@ case "$HADOOP_MAJOR_VERSION" in tar xvzf hadoop-1.0.4.tar.gz > /tmp/spark-ec2_hadoop.log rm hadoop-*.tar.gz mv hadoop-1.0.4/ ephemeral-hdfs/ - sed -i 's/-jvm server/-server/g' /root/ephemeral-hdfs/bin/hadoop - cp /root/hadoop-native/* /root/ephemeral-hdfs/lib/native/ + sed -i 's/-jvm server/-server/g' ~/ephemeral-hdfs/bin/hadoop + cp ~/hadoop-native/* ~/ephemeral-hdfs/lib/native/ ;; 2) wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.0.0-cdh4.2.0.tar.gz @@ -25,9 +25,9 @@ case "$HADOOP_MAJOR_VERSION" in mv hadoop-2.0.0-cdh4.2.0/ ephemeral-hdfs/ # Have single conf dir - rm -rf /root/ephemeral-hdfs/etc/hadoop/ - ln -s /root/ephemeral-hdfs/conf /root/ephemeral-hdfs/etc/hadoop - cp /root/hadoop-native/* /root/ephemeral-hdfs/lib/native/ + rm -rf ~/ephemeral-hdfs/etc/hadoop/ + ln -s ~/ephemeral-hdfs/conf ~/ephemeral-hdfs/etc/hadoop + cp ~/hadoop-native/* ~/ephemeral-hdfs/lib/native/ ;; yarn) wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.4.0.tar.gz @@ -37,14 +37,14 @@ case "$HADOOP_MAJOR_VERSION" in mv hadoop-2.4.0/ ephemeral-hdfs/ # Have single conf dir - rm -rf /root/ephemeral-hdfs/etc/hadoop/ - ln -s /root/ephemeral-hdfs/conf /root/ephemeral-hdfs/etc/hadoop + rm -rf ~/ephemeral-hdfs/etc/hadoop/ + ln -s ~/ephemeral-hdfs/conf ~/ephemeral-hdfs/etc/hadoop ;; *) echo "ERROR: Unknown Hadoop version" return 1 esac -/root/spark-ec2/copy-dir /root/ephemeral-hdfs +~/spark-ec2/copy-dir ~/ephemeral-hdfs popd > /dev/null diff --git a/ephemeral-hdfs/setup-slave.sh b/ephemeral-hdfs/setup-slave.sh index a85c4df7..f91a86a0 100755 --- a/ephemeral-hdfs/setup-slave.sh +++ b/ephemeral-hdfs/setup-slave.sh @@ -1,5 +1,15 @@ #!/bin/bash +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" + +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + [[ ! -e /var/hadoop ]] && sudo mkdir /var/hadoop + sudo chmod 777 /var/hadoop +fi + # Setup ephemeral-hdfs mkdir -p /mnt/ephemeral-hdfs/logs mkdir -p /mnt/hadoop-logs @@ -24,3 +34,4 @@ create_hadoop_dirs /mnt create_hadoop_dirs /mnt2 create_hadoop_dirs /mnt3 create_hadoop_dirs /mnt4 + diff --git a/ephemeral-hdfs/setup.sh b/ephemeral-hdfs/setup.sh index 1c171056..5cdcb2d8 100755 --- a/ephemeral-hdfs/setup.sh +++ b/ephemeral-hdfs/setup.sh @@ -1,21 +1,27 @@ #!/bin/bash -EPHEMERAL_HDFS=/root/ephemeral-hdfs +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" + +EPHEMERAL_HDFS=~/ephemeral-hdfs +USER=`whoami` # Set hdfs url to make it easier HDFS_URL="hdfs://$PUBLIC_DNS:9000" echo "export HDFS_URL=$HDFS_URL" >> ~/.bash_profile -pushd /root/spark-ec2/ephemeral-hdfs > /dev/null +pushd ~/spark-ec2/ephemeral-hdfs > /dev/null source ./setup-slave.sh for node in $SLAVES $OTHER_MASTERS; do echo $node - ssh -t -t $SSH_OPTS root@$node "/root/spark-ec2/ephemeral-hdfs/setup-slave.sh" & sleep 0.3 + ssh -t -t $SSH_OPTS $USER@$node "~/spark-ec2/ephemeral-hdfs/setup-slave.sh" & sleep 0.3 done wait -/root/spark-ec2/copy-dir $EPHEMERAL_HDFS/conf +~/spark-ec2/copy-dir $EPHEMERAL_HDFS/conf NAMENODE_DIR=/mnt/ephemeral-hdfs/dfs/name @@ -31,12 +37,24 @@ echo "Starting ephemeral HDFS..." # This is different depending on version. case "$HADOOP_MAJOR_VERSION" in 1) + if [[ $DISTRIB_ID = "Ubuntu" ]]; then + echo "ERROR: Unsupported hadoop version on Ubuntu" + return -1 + fi $EPHEMERAL_HDFS/bin/start-dfs.sh ;; 2) + if [[ $DISTRIB_ID = "Ubuntu" ]]; then + [[ ! -e /var/hadoop ]] && sudo mkdir /var/hadoop + sudo chmod 777 /var/hadoop + fi $EPHEMERAL_HDFS/sbin/start-dfs.sh ;; yarn) + if [[ $DISTRIB_ID = "Ubuntu" ]]; then + [[ ! -e /var/hadoop ]] && sudo mkdir /var/hadoop + sudo chmod 777 /var/hadoop + fi $EPHEMERAL_HDFS/sbin/start-dfs.sh echo "Starting YARN" $EPHEMERAL_HDFS/sbin/start-yarn.sh diff --git a/ganglia/init.sh b/ganglia/init.sh old mode 100644 new mode 100755 index d120a236..7b5fdf72 --- a/ganglia/init.sh +++ b/ganglia/init.sh @@ -1,26 +1,50 @@ -#!/bin/bash -# NOTE: Remove all rrds which might be around from an earlier run -rm -rf /var/lib/ganglia/rrds/* -rm -rf /mnt/ganglia/rrds/* -# Make sure rrd storage directory has right permissions -mkdir -p /mnt/ganglia/rrds -chown -R nobody:nobody /mnt/ganglia/rrds +USER=`whoami` -# Install ganglia -# TODO: Remove this once the AMI has ganglia by default +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" -GANGLIA_PACKAGES="ganglia ganglia-web ganglia-gmond ganglia-gmetad" +if [[ $DISTRIB_ID = "Centos" ]]; then + + # NOTE: Remove all rrds which might be around from an earlier run + sudo rm -rf /var/lib/ganglia/rrds/* + sudo rm -rf /mnt/ganglia/rrds/* + + # Make sure rrd storage directory has right permissions + mkdir -p /mnt/ganglia/rrds + chown -R nobody:nobody /mnt/ganglia/rrds + + # Install ganglia + # TODO: Remove this once the AMI has ganglia by default + + + GANGLIA_PACKAGES="ganglia ganglia-web ganglia-gmond ganglia-gmetad" + if ! rpm --quiet -q $GANGLIA_PACKAGES; then + yum install -q -y $GANGLIA_PACKAGES; + fi + for node in $SLAVES $OTHER_MASTERS; do + ssh -t -t $SSH_OPTS root@$node "if ! rpm --quiet -q $GANGLIA_PACKAGES; then yum install -q -y $GANGLIA_PACKAGES; fi" & sleep 0.3 + done + wait + + # Post-package installation : Symlink /var/lib/ganglia/rrds to /mnt/ganglia/rrds + if [[ -d /var/lib/ganglia/rrds ]]; then sudo rmdir /var/lib/ganglia/rrds; fi + sudo ln -s /mnt/ganglia/rrds /var/lib/ganglia/rrds + +elif [[ $DISTRIB_ID = "Ubuntu" ]]; then + echo "WARNING: Skipping ganglia on ubuntu..." + #GANGLIA_PACKAGES="ganglia-webfrontend ganglia-monitor gmetad" + #sudo apt-get install -y $GANGLIA_PACKAGES + #for node in $SLAVES $OTHER_MASTERS; do + # ssh -t -t $SSH_OPTS $USER@$node "sudo apt-get install -y $GANGLIA_PACKAGES; sudo dpkg --configure -a" & sleep 0.3 + #done + #wait + + ## Post-package installation : Symlink /var/lib/ganglia/rrds to /mnt/ganglia/rrds + #if [[ -d /var/lib/ganglia/rrds ]]; then sudo rmdir /var/lib/ganglia/rrds; fi + #sudo ln -s /mnt/ganglia/rrds /var/lib/ganglia/rrds -if ! rpm --quiet -q $GANGLIA_PACKAGES; then - yum install -q -y $GANGLIA_PACKAGES; fi -for node in $SLAVES $OTHER_MASTERS; do - ssh -t -t $SSH_OPTS root@$node "if ! rpm --quiet -q $GANGLIA_PACKAGES; then yum install -q -y $GANGLIA_PACKAGES; fi" & sleep 0.3 -done -wait - -# Post-package installation : Symlink /var/lib/ganglia/rrds to /mnt/ganglia/rrds -rmdir /var/lib/ganglia/rrds -ln -s /mnt/ganglia/rrds /var/lib/ganglia/rrds diff --git a/ganglia/setup.sh b/ganglia/setup.sh old mode 100644 new mode 100755 index 8719d528..738d6dee --- a/ganglia/setup.sh +++ b/ganglia/setup.sh @@ -1,20 +1,33 @@ #!/bin/bash -/root/spark-ec2/copy-dir /etc/ganglia/ +USER=`whoami` -# Start gmond everywhere -/etc/init.d/gmond restart +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" -for node in $SLAVES $OTHER_MASTERS; do - ssh -t -t $SSH_OPTS root@$node "/etc/init.d/gmond restart" -done +if [[ $DISTRIB_ID = "Centos" ]]; then -# gmeta needs rrds to be owned by nobody -chown -R nobody /var/lib/ganglia/rrds -# cluster-wide aggregates only show up with this. TODO: Fix this cleanly ? -ln -s /usr/share/ganglia/conf/default.json /var/lib/ganglia/conf/ + /root/spark-ec2/copy-dir /etc/ganglia/ -/etc/init.d/gmetad restart + # Start gmond everywhere + /etc/init.d/gmond restart -# Start http server to serve ganglia -/etc/init.d/httpd restart + for node in $SLAVES $OTHER_MASTERS; do + ssh -t -t $SSH_OPTS root@$node "/etc/init.d/gmond restart" + done + + # gmeta needs rrds to be owned by nobody + chown -R nobody /var/lib/ganglia/rrds + # cluster-wide aggregates only show up with this. TODO: Fix this cleanly ? + ln -s /usr/share/ganglia/conf/default.json /var/lib/ganglia/conf/ + + /etc/init.d/gmetad restart + + # Start http server to serve ganglia + /etc/init.d/httpd restart + +elif [[ $DISTRIB_ID = "Ubuntu" ]]; then + echo "WARNING: Skipping ganglia on ubuntu..." +fi diff --git a/install-java-on-ubuntu.sh b/install-java-on-ubuntu.sh new file mode 100755 index 00000000..f48df69f --- /dev/null +++ b/install-java-on-ubuntu.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +if [[ "x$JAVA_HOME" == "x" ]]; then + #also install java on the first master since stock ubuntu ami does not + #come with java pre-installed; also install git + sudo apt-get update -q + sudo apt-get install -y -q openjdk-7-jdk + sudo sh -c 'echo "export JAVA_HOME=/usr/lib/jvm/java-7-openjdk-amd64" >> /etc/environment' + source /etc/environment +fi diff --git a/mapreduce/init.sh b/mapreduce/init.sh index 2e952799..90dda056 100755 --- a/mapreduce/init.sh +++ b/mapreduce/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -pushd /root > /dev/null +pushd ~ > /dev/null case "$HADOOP_MAJOR_VERSION" in 1) echo "Nothing to initialize for MapReduce in Hadoop 1" @@ -19,5 +19,5 @@ case "$HADOOP_MAJOR_VERSION" in echo "ERROR: Unknown Hadoop version" return -1 esac -/root/spark-ec2/copy-dir /root/mapreduce +~/spark-ec2/copy-dir ~/mapreduce popd > /dev/null diff --git a/mapreduce/setup.sh b/mapreduce/setup.sh index 4f71e0c7..4625c294 100755 --- a/mapreduce/setup.sh +++ b/mapreduce/setup.sh @@ -1,11 +1,12 @@ #!/bin/bash -MAPREDUCE=/root/mapreduce +MAPREDUCE=~/mapreduce +USER=`whoami` mkdir -p /mnt/mapreduce/logs for node in $SLAVES $OTHER_MASTERS; do - ssh -t $SSH_OPTS root@$node "mkdir -p /mnt/mapreduce/logs && chown hadoop:hadoop /mnt/mapreduce/logs && chown hadoop:hadoop /mnt/mapreduce" & sleep 0.3 + ssh -t $SSH_OPTS $USER@$node "mkdir -p /mnt/mapreduce/logs && chown hadoop:hadoop /mnt/mapreduce/logs && chown hadoop:hadoop /mnt/mapreduce" & sleep 0.3 done wait chown hadoop:hadoop /mnt/mapreduce -R -/root/spark-ec2/copy-dir $MAPREDUCE/conf +~/spark-ec2/copy-dir $MAPREDUCE/conf diff --git a/persistent-hdfs/init.sh b/persistent-hdfs/init.sh index 735cebcc..14d901ff 100755 --- a/persistent-hdfs/init.sh +++ b/persistent-hdfs/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -pushd /root > /dev/null +pushd ~ > /dev/null if [ -d "persistent-hdfs" ]; then echo "Persistent HDFS seems to be installed. Exiting." @@ -14,7 +14,7 @@ case "$HADOOP_MAJOR_VERSION" in tar xvzf hadoop-1.0.4.tar.gz > /tmp/spark-ec2_hadoop.log rm hadoop-*.tar.gz mv hadoop-1.0.4/ persistent-hdfs/ - cp /root/hadoop-native/* /root/persistent-hdfs/lib/native/ + cp ~/hadoop-native/* ~/persistent-hdfs/lib/native/ ;; 2) wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.0.0-cdh4.2.0.tar.gz @@ -24,9 +24,9 @@ case "$HADOOP_MAJOR_VERSION" in mv hadoop-2.0.0-cdh4.2.0/ persistent-hdfs/ # Have single conf dir - rm -rf /root/persistent-hdfs/etc/hadoop/ - ln -s /root/persistent-hdfs/conf /root/persistent-hdfs/etc/hadoop - cp /root/hadoop-native/* /root/persistent-hdfs/lib/native/ + rm -rf ~/persistent-hdfs/etc/hadoop/ + ln -s ~/persistent-hdfs/conf ~/persistent-hdfs/etc/hadoop + cp ~/hadoop-native/* ~/persistent-hdfs/lib/native/ ;; yarn) wget http://s3.amazonaws.com/spark-related-packages/hadoop-2.4.0.tar.gz @@ -36,14 +36,14 @@ case "$HADOOP_MAJOR_VERSION" in mv hadoop-2.4.0/ persistent-hdfs/ # Have single conf dir - rm -rf /root/persistent-hdfs/etc/hadoop/ - ln -s /root/persistent-hdfs/conf /root/persistent-hdfs/etc/hadoop + rm -rf ~/persistent-hdfs/etc/hadoop/ + ln -s ~/persistent-hdfs/conf ~/persistent-hdfs/etc/hadoop ;; *) echo "ERROR: Unknown Hadoop version" return 1 esac -/root/spark-ec2/copy-dir /root/persistent-hdfs +~/spark-ec2/copy-dir ~/persistent-hdfs popd > /dev/null diff --git a/persistent-hdfs/setup.sh b/persistent-hdfs/setup.sh index d1713e12..fa7b1bb9 100755 --- a/persistent-hdfs/setup.sh +++ b/persistent-hdfs/setup.sh @@ -1,22 +1,32 @@ #!/bin/bash -PERSISTENT_HDFS=/root/persistent-hdfs +PERSISTENT_HDFS=~/persistent-hdfs +USER=`whoami` -pushd /root/spark-ec2/persistent-hdfs > /dev/null -source ./setup-slave.sh +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" -for node in $SLAVES $OTHER_MASTERS; do - ssh -t $SSH_OPTS root@$node "/root/spark-ec2/persistent-hdfs/setup-slave.sh" & sleep 0.3 -done -wait +if [[ $DISTRIB_ID = "Centos" ]]; then + pushd ~/spark-ec2/persistent-hdfs > /dev/null + source ./setup-slave.sh -/root/spark-ec2/copy-dir $PERSISTENT_HDFS/conf + for node in $SLAVES $OTHER_MASTERS; do + ssh -t $SSH_OPTS $USER@$node "~/spark-ec2/persistent-hdfs/setup-slave.sh" & sleep 0.3 + done + wait -if [[ ! -e /vol/persistent-hdfs/dfs/name ]] ; then - echo "Formatting persistent HDFS namenode..." - $PERSISTENT_HDFS/bin/hadoop namenode -format -fi + ~/spark-ec2/copy-dir $PERSISTENT_HDFS/conf + + if [[ ! -e /vol/persistent-hdfs/dfs/name ]] ; then + echo "Formatting persistent HDFS namenode..." + $PERSISTENT_HDFS/bin/hadoop namenode -format + fi -echo "Persistent HDFS installed, won't start by default..." + echo "Persistent HDFS installed, won't start by default..." -popd > /dev/null + popd > /dev/null + +else + echo "Skipping persistent hdfs setup on $DISTRIB_ID" +fi diff --git a/resolve-hostname.sh b/resolve-hostname.sh index 1fe6deea..881954d8 100755 --- a/resolve-hostname.sh +++ b/resolve-hostname.sh @@ -21,7 +21,7 @@ PRIVATE_IP=`wget -q -O - http://169.254.169.254/latest/meta-data/local-ipv4` # do changes only if short hostname does not resolve ping -c 1 -q "${SHORT_HOSTNAME}" > /dev/null 2>&1 if [ $? -ne 0 ]; then - echo -e "\n# fixed by resolve-hostname.sh \n${PRIVATE_IP} ${SHORT_HOSTNAME}\n" >> /etc/hosts + sudo echo -e "\n# fixed by resolve-hostname.sh \n${PRIVATE_IP} ${SHORT_HOSTNAME}\n" >> /etc/hosts # let's make sure that it got fixed ping -c 1 -q "${SHORT_HOSTNAME}" > /dev/null 2>&1 diff --git a/rstudio/init.sh b/rstudio/init.sh old mode 100644 new mode 100755 index fd18d6be..184f2cc9 --- a/rstudio/init.sh +++ b/rstudio/init.sh @@ -1,17 +1,29 @@ #!/usr/bin/env bash -# download rstudio -wget http://download2.rstudio.org/rstudio-server-rhel-0.99.446-x86_64.rpm -sudo yum install --nogpgcheck -y rstudio-server-rhel-0.99.446-x86_64.rpm +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" -# restart rstudio -rstudio-server restart +if [[ $DISTRIB_ID = "Centos" ]]; then -# add user for rstudio, user needs to supply password later on -adduser rstudio + # download rstudio + wget http://download2.rstudio.org/rstudio-server-rhel-0.99.446-x86_64.rpm + sudo yum install --nogpgcheck -y rstudio-server-rhel-0.99.446-x86_64.rpm + + # restart rstudio + rstudio-server restart + + # add user for rstudio, user needs to supply password later on + adduser rstudio + + # create a Rscript that connects to Spark, to help starting user + cp /root/spark-ec2/rstudio/startSpark.R /home/rstudio + +elif [[ $DISTRIB_ID = "Ubuntu" ]]; then + echo "WARNING: Skipping rstudio installation on Ubuntu" +fi -# create a Rscript that connects to Spark, to help starting user -cp /root/spark-ec2/rstudio/startSpark.R /home/rstudio # make sure that the temp dirs exist and can be written to by any user # otherwise this will create a conflict for the rstudio user @@ -23,7 +35,7 @@ function create_temp_dirs { chmod a+w $location } -create_temp_dirs /mnt/spark -create_temp_dirs /mnt2/spark -create_temp_dirs /mnt3/spark -create_temp_dirs /mnt4/spark +if [[ -d /mnt ]]; then create_temp_dirs /mnt/spark; fi +if [[ -d /mnt2 ]]; then create_temp_dirs /mnt2/spark; fi +if [[ -d /mnt3 ]]; then create_temp_dirs /mnt3/spark; fi +if [[ -d /mnt4 ]]; then create_temp_dirs /mnt4/spark; fi diff --git a/scala/init.sh b/scala/init.sh index 73a299f5..39a1f16b 100755 --- a/scala/init.sh +++ b/scala/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -pushd /root > /dev/null +pushd ~ > /dev/null if [ -d "scala" ]; then echo "Scala seems to be installed. Exiting." diff --git a/scala/setup.sh b/scala/setup.sh index 6aa5d27b..7cb3b3d1 100755 --- a/scala/setup.sh +++ b/scala/setup.sh @@ -1,3 +1,3 @@ #!/bin/bash -/root/spark-ec2/copy-dir /root/scala +~/spark-ec2/copy-dir ~/scala diff --git a/setup-slave.sh b/setup-slave.sh index 76372d9a..e9d5a822 100755 --- a/setup-slave.sh +++ b/setup-slave.sh @@ -1,56 +1,86 @@ #!/bin/bash +#learn the current user +USER=`whoami` + +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" + +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + sudo apt-get install -y -q git +fi + # Disable Transparent Huge Pages (THP) # THP can result in system thrashing (high sys usage) due to frequent defrags of memory. # Most systems recommends turning THP off. if [[ -e /sys/kernel/mm/transparent_hugepage/enabled ]]; then - echo never > /sys/kernel/mm/transparent_hugepage/enabled + sudo sh -c 'echo never > /sys/kernel/mm/transparent_hugepage/enabled' fi # Make sure we are in the spark-ec2 directory -pushd /root/spark-ec2 > /dev/null +pushd ~/spark-ec2 > /dev/null + +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + ./install-java-on-ubuntu.sh + source /etc/environment +fi source ec2-variables.sh # Set hostname based on EC2 private DNS name, so that it is set correctly # even if the instance is restarted with a different private DNS name PRIVATE_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname` -hostname $PRIVATE_DNS -echo $PRIVATE_DNS > /etc/hostname +sudo hostname $PRIVATE_DNS +sudo sh -c "echo $PRIVATE_DNS > /etc/hostname" HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too echo "checking/fixing resolution of hostname" -bash /root/spark-ec2/resolve-hostname.sh +bash ~/spark-ec2/resolve-hostname.sh # Work around for R3 or I2 instances without pre-formatted ext3 disks instance_type=$(curl http://169.254.169.254/latest/meta-data/instance-type 2> /dev/null) echo "Setting up slave on `hostname`... of type $instance_type" +create_ephemeral_blkdev_links() { + device_letter=$1 + devx=/dev/xvd${device_letter} + devs=/dev/sd${device_letter} + if [[ -e $devx && ! -e $devs ]]; then sudo ln -s $devx $devs; fi +} +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + create_ephemeral_blkdev_links b + create_ephemeral_blkdev_links c + create_ephemeral_blkdev_links d +fi + if [[ $instance_type == r3* || $instance_type == i2* || $instance_type == hi1* ]]; then # Format & mount using ext4, which has the best performance among ext3, ext4, and xfs based # on our shuffle heavy benchmark EXT4_MOUNT_OPTS="defaults,noatime,nodiratime" - rm -rf /mnt* - mkdir /mnt + sudo rm -rf /mnt* + sudo mkdir /mnt # To turn TRIM support on, uncomment the following line. #echo '/dev/sdb /mnt ext4 defaults,noatime,nodiratime,discard 0 0' >> /etc/fstab - mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdb - mount -o $EXT4_MOUNT_OPTS /dev/sdb /mnt + sudo mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdb + sudo mount -o $EXT4_MOUNT_OPTS /dev/sdb /mnt + echo "Changing slave /mnt mode to 777" if [[ $instance_type == "r3.8xlarge" || $instance_type == "hi1.4xlarge" ]]; then - mkdir /mnt2 + sudo mkdir /mnt2 # To turn TRIM support on, uncomment the following line. #echo '/dev/sdc /mnt2 ext4 defaults,noatime,nodiratime,discard 0 0' >> /etc/fstab if [[ $instance_type == "r3.8xlarge" ]]; then - mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdc - mount -o $EXT4_MOUNT_OPTS /dev/sdc /mnt2 + sudo mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdc + sudo mount -o $EXT4_MOUNT_OPTS /dev/sdc /mnt2 fi # To turn TRIM support on, uncomment the following line. #echo '/dev/sdf /mnt2 ext4 defaults,noatime,nodiratime,discard 0 0' >> /etc/fstab if [[ $instance_type == "hi1.4xlarge" ]]; then - mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdf - mount -o $EXT4_MOUNT_OPTS /dev/sdf /mnt2 + sudo mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdf + sudo mount -o $EXT4_MOUNT_OPTS /dev/sdf /mnt2 fi fi fi @@ -59,29 +89,33 @@ fi # are ext3, but we use xfs for EBS volumes to format them faster) XFS_MOUNT_OPTS="defaults,noatime,nodiratime,allocsize=8m" -function setup_ebs_volume { +setup_ebs_volume() { device=$1 mount_point=$2 if [[ -e $device ]]; then # Check if device is already formatted if ! blkid $device; then - mkdir $mount_point - yum install -q -y xfsprogs - if mkfs.xfs -q $device; then - mount -o $XFS_MOUNT_OPTS $device $mount_point - chmod -R a+w $mount_point + sudo mkdir $mount_point + if [[ DISTRIB_ID = "Centos" ]]; then + yum install -q -y xfsprogs + elif [[ DISTRIB_ID = "Ubuntu" ]]; then + sudo apt-get install -y -q xfsprogs + fi + if sudo mkfs.xfs -q $device; then + sudo mount -o $XFS_MOUNT_OPTS $device $mount_point + sudo chmod -R a+w $mount_point else # mkfs.xfs is not installed on this machine or has failed; # delete /vol so that the user doesn't think we successfully # mounted the EBS volume - rmdir $mount_point + sudo rmdir $mount_point fi else # EBS volume is already formatted. Mount it if its not mounted yet. if ! grep -qs '$mount_point' /proc/mounts; then - mkdir $mount_point - mount -o $XFS_MOUNT_OPTS $device $mount_point - chmod -R a+w $mount_point + sudo mkdir $mount_point + sudo mount -o $XFS_MOUNT_OPTS $device $mount_point + sudo chmod -R a+w $mount_point fi fi fi @@ -100,35 +134,47 @@ setup_ebs_volume /dev/sdz /vol7 # Alias vol to vol3 for backward compatibility: the old spark-ec2 script supports only attaching # one EBS volume at /dev/sdv. if [[ -e /vol3 && ! -e /vol ]]; then - ln -s /vol3 /vol + sudo ln -s /vol3 /vol fi # Make data dirs writable by non-root users, such as CDH's hadoop user -chmod -R a+w /mnt* +sudo chmod -R a+w /mnt* # Remove ~/.ssh/known_hosts because it gets polluted as you start/stop many # clusters (new machines tend to come up under old hostnames) -rm -f /root/.ssh/known_hosts +rm -f ~/.ssh/known_hosts # Create swap space on /mnt -/root/spark-ec2/create-swap.sh $SWAP_MB +sudo ~/spark-ec2/create-swap.sh $SWAP_MB # Allow memory to be over committed. Helps in pyspark where we fork -echo 1 > /proc/sys/vm/overcommit_memory +sudo sh -c 'echo 1 > /proc/sys/vm/overcommit_memory' # Add github to known hosts to get git@github.com clone to work # TODO(shivaram): Avoid duplicate entries ? -cat /root/spark-ec2/github.hostkey >> /root/.ssh/known_hosts +cat ~/spark-ec2/github.hostkey >> ~/.ssh/known_hosts # Create /usr/bin/realpath which is used by R to find Java installations # NOTE: /usr/bin/realpath is missing in CentOS AMIs. See # http://superuser.com/questions/771104/usr-bin-realpath-not-found-in-centos-6-5 -echo '#!/bin/bash' > /usr/bin/realpath -echo 'readlink -e "$@"' >> /usr/bin/realpath -chmod a+x /usr/bin/realpath +if [[ $DISTRIB_ID = "CentOS" ]]; then + echo '#!/bin/bash' > /usr/bin/realpath + echo 'readlink -e "$@"' >> /usr/bin/realpath + chmod a+x /usr/bin/realpath +elif [[ $DISTRIB_ID = "Ubuntu" ]]; then + sudo apt-get install -y -q realpath +fi + +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + [[ -d /mnt ]] && sudo chmod 777 /mnt + [[ -d /mnt2 ]] && sudo chmod 777 /mnt2 + [[ -d /mnt3 ]] && sudo chmod 777 /mnt3 + [[ -d /mnt4 ]] && sudo chmod 777 /mnt4 +fi + popd > /dev/null # this is to set the ulimit for root and other users -echo '* soft nofile 1000000' >> /etc/security/limits.conf -echo '* hard nofile 1000000' >> /etc/security/limits.conf \ No newline at end of file +sudo sh -c "echo '* soft nofile 1000000' >> /etc/security/limits.conf" +sudo sh -c "echo '* hard nofile 1000000' >> /etc/security/limits.conf" diff --git a/setup.sh b/setup.sh index 5a3beea2..c97cacfa 100755 --- a/setup.sh +++ b/setup.sh @@ -1,6 +1,19 @@ #!/bin/bash -sudo yum install -y -q pssh +#learn the current user +USER=`whoami` + +#learn the linux distribution +DISTRIB_ID=Centos +if [[ -e /etc/lsb-release ]]; then source /etc/lsb-release; fi +echo "DISTRIB_ID=$DISTRIB_ID" + +if [[ $DISTRIB_ID = "Centos" ]]; then + sudo yum install -y -q pssh +elif [[ $DISTRIB_ID = "Ubuntu" ]]; then + sudo apt-get install -y -q pssh + sudo apt-get install -y -q git +fi # usage: echo_time_diff name start_time end_time echo_time_diff () { @@ -11,20 +24,24 @@ echo_time_diff () { } # Make sure we are in the spark-ec2 directory -pushd /root/spark-ec2 > /dev/null +pushd ~/spark-ec2 > /dev/null + +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + ./install-java-on-ubuntu.sh + source /etc/environment +fi # Load the environment variables specific to this AMI -source /root/.bash_profile +if [[ -e ~/.bash_profile ]]; then source ~/.bash_profile; fi # Load the cluster variables set by the deploy script -source ec2-variables.sh - +source ec2-variables.sh # Set hostname based on EC2 private DNS name, so that it is set correctly # even if the instance is restarted with a different private DNS name PRIVATE_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname` PUBLIC_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/hostname` -hostname $PRIVATE_DNS -echo $PRIVATE_DNS > /etc/hostname +sudo hostname $PRIVATE_DNS +sudo sh -c "echo $PRIVATE_DNS > /etc/hostname" export HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too echo "Setting up Spark on `hostname`..." @@ -52,35 +69,66 @@ fi echo "Setting executable permissions on scripts..." find . -regex "^.+.\(sh\|py\)" | xargs chmod a+x -echo "RSYNC'ing /root/spark-ec2 to other cluster nodes..." +echo "RSYNC'ing ~/spark-ec2 to other cluster nodes..." rsync_start_time="$(date +'%s')" for node in $SLAVES $OTHER_MASTERS; do echo $node - rsync -e "ssh $SSH_OPTS" -az /root/spark-ec2 $node:/root & + rsync -e "ssh $SSH_OPTS" -az ~/spark-ec2 $node:~ & scp $SSH_OPTS ~/.ssh/id_rsa $node:.ssh & sleep 0.1 done wait rsync_end_time="$(date +'%s')" -echo_time_diff "rsync /root/spark-ec2" "$rsync_start_time" "$rsync_end_time" +echo_time_diff "rsync ~/spark-ec2" "$rsync_start_time" "$rsync_end_time" + +create_ephemeral_blkdev_links() { + device_letter=$1 + devx=/dev/xvd${device_letter} + devs=/dev/sd${device_letter} + if [[ -e $devx && ! -e $devs ]]; then sudo ln -s $devx $devs; fi +} +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + create_ephemeral_blkdev_links b + create_ephemeral_blkdev_links c + create_ephemeral_blkdev_links d +fi + +if [[ $DISTRIB_ID = "Ubuntu" ]]; then + [[ -d /mnt ]] && sudo chmod 777 /mnt + [[ -d /mnt2 ]] && sudo chmod 777 /mnt2 + [[ -d /mnt3 ]] && sudo chmod 777 /mnt3 + [[ -d /mnt4 ]] && sudo chmod 777 /mnt4 +fi echo "Running setup-slave on all cluster nodes to mount filesystems, etc..." setup_slave_start_time="$(date +'%s')" -pssh --inline \ +if [[ $DISTRIB_ID = "Centos" ]]; then + pssh --inline \ + --host "$MASTERS $SLAVES" \ + --user $USER \ + --extra-args "-t -t $SSH_OPTS" \ + --timeout 0 \ + "spark-ec2/setup-slave.sh" +elif [[ $DISTRIB_ID = "Ubuntu" ]]; then + parallel-ssh --inline \ --host "$MASTERS $SLAVES" \ - --user root \ + --user $USER \ --extra-args "-t -t $SSH_OPTS" \ --timeout 0 \ "spark-ec2/setup-slave.sh" +fi + setup_slave_end_time="$(date +'%s')" echo_time_diff "setup-slave" "$setup_slave_start_time" "$setup_slave_end_time" + # Always include 'scala' module if it's not defined as a work around # for older versions of the scripts. if [[ ! $MODULES =~ *scala* ]]; then MODULES=$(printf "%s\n%s\n" "scala" $MODULES) fi + # Install / Init module for module in $MODULES; do echo "Initializing $module" @@ -90,18 +138,32 @@ for module in $MODULES; do fi module_init_end_time="$(date +'%s')" echo_time_diff "$module init" "$module_init_start_time" "$module_init_end_time" - cd /root/spark-ec2 # guard against init.sh changing the cwd + cd ~/spark-ec2 # guard against init.sh changing the cwd done + # Deploy templates # TODO: Move configuring templates to a per-module ? echo "Creating local config files..." + +export DEPLOY_ROOT_DIR=~ +export TMP_TEMPLATE_DIR="/tmp/templates/" +mkdir $TMP_TEMPLATE_DIR + ./deploy_templates.py +find $TMP_TEMPLATE_DIR -type f > conflist +for f in `cat conflist`; do + outf=`echo $f | sed "s/\/tmp\/templates//"` + dir=`dirname $outf` + [[ ! -e $dir ]] && sudo mkdir -p $dir + sudo mv $f $outf +done + # Copy spark conf by default echo "Deploying Spark config files..." -chmod u+x /root/spark/conf/spark-env.sh -/root/spark-ec2/copy-dir /root/spark/conf +chmod u+x ~/spark/conf/spark-env.sh +~/spark-ec2/copy-dir ~/spark/conf # Setup each module for module in $MODULES; do @@ -111,7 +173,7 @@ for module in $MODULES; do sleep 0.1 module_setup_end_time="$(date +'%s')" echo_time_diff "$module setup" "$module_setup_start_time" "$module_setup_end_time" - cd /root/spark-ec2 # guard against setup.sh changing the cwd + cd ~/spark-ec2 # guard against setup.sh changing the cwd done popd > /dev/null diff --git a/spark-standalone/setup.sh b/spark-standalone/setup.sh index e9c04c1f..4a252718 100755 --- a/spark-standalone/setup.sh +++ b/spark-standalone/setup.sh @@ -1,33 +1,31 @@ #!/bin/bash -BIN_FOLDER="/root/spark/sbin" - if [[ "0.7.3 0.8.0 0.8.1" =~ $SPARK_VERSION ]]; then - BIN_FOLDER="/root/spark/bin" + BIN_FOLDER="~/spark/bin" fi # Copy the slaves to spark conf -cp /root/spark-ec2/slaves /root/spark/conf/ -/root/spark-ec2/copy-dir /root/spark/conf +cp ~/spark-ec2/slaves ~/spark/conf/ +~/spark-ec2/copy-dir ~/spark/conf # Set cluster-url to standalone master -echo "spark://""`cat /root/spark-ec2/masters`"":7077" > /root/spark-ec2/cluster-url -/root/spark-ec2/copy-dir /root/spark-ec2 +echo "spark://""`cat ~/spark-ec2/masters`"":7077" > ~/spark-ec2/cluster-url +~/spark-ec2/copy-dir ~/spark-ec2 # The Spark master seems to take time to start and workers crash if # they start before the master. So start the master first, sleep and then start # workers. # Stop anything that is running -$BIN_FOLDER/stop-all.sh +~/spark/sbin/stop-all.sh sleep 2 # Start Master -$BIN_FOLDER/start-master.sh +~/spark/sbin/start-master.sh # Pause sleep 20 # Start Workers -$BIN_FOLDER/start-slaves.sh +~/spark/sbin/start-slaves.sh diff --git a/spark/init.sh b/spark/init.sh index 71fbc7bf..6497b868 100755 --- a/spark/init.sh +++ b/spark/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -pushd /root > /dev/null +pushd ~ > /dev/null if [ -d "spark" ]; then echo "Spark seems to be installed. Exiting." diff --git a/spark/setup.sh b/spark/setup.sh index 9035d438..8e70f710 100755 --- a/spark/setup.sh +++ b/spark/setup.sh @@ -1,3 +1,3 @@ #!/bin/bash -/root/spark-ec2/copy-dir /root/spark +~/spark-ec2/copy-dir ~/spark diff --git a/spark_ec2.py b/spark_ec2.py old mode 100644 new mode 100755 index 28d72f43..6120bf09 --- a/spark_ec2.py +++ b/spark_ec2.py @@ -42,6 +42,7 @@ from datetime import datetime from optparse import OptionParser from sys import stderr +from collections import namedtuple if sys.version < "3": from urllib2 import urlopen, Request, HTTPError @@ -177,7 +178,7 @@ def parse_args(): prog="spark-ec2", version="%prog {v}".format(v=SPARK_EC2_VERSION), usage="%prog [options] \n\n" - + " can be: launch, destroy, login, stop, start, get-master, reboot-slaves") + + " can be: create, launch, destroy, login, stop, start, get-master, reboot-slaves") parser.add_option( "-s", "--slaves", type="int", default=1, @@ -210,6 +211,11 @@ def parse_args(): help="Availability zone to launch instances in, or 'all' to spread " + "slaves across multiple (an additional $0.01/Gb for bandwidth" + "between zones applies) (default: a single zone chosen at random)") + parser.add_option( + "--ami-type", default="spark", + help="Type of ami to use (default: %default). " + + "Valid options are %default and ubuntu. " + + "If you specify an ami, the ami-type option will be ignored.") parser.add_option( "-a", "--ami", help="Amazon Machine Image ID to use") @@ -331,6 +337,12 @@ def parse_args(): parser.add_option( "--instance-profile-name", default=None, help="IAM profile name to launch instances under") + parser.add_option( + "--elastic-ip", default=None, + help="Elastic IP to associate with the master") + parser.add_option( + "--no-setup", action="store_true", default=False, + help="Do not run the usual setup commands") (opts, args) = parser.parse_args() if len(args) != 2: @@ -466,7 +478,14 @@ def get_spark_ami(opts): r=opts.spark_ec2_git_repo.replace("https://github.com", "https://raw.github.com", 1), b=opts.spark_ec2_git_branch) - ami_path = "%s/%s/%s" % (ami_prefix, opts.region, instance_type) + if opts.ami_type == "spark": + ami_path = "%s/%s/%s" % (ami_prefix, opts.region, instance_type) + elif opts.ami_type == "ubuntu": + ami_path = "%s-ubuntu/ami-list-ubuntu-amd64" % ami_prefix + else: + print("Bad ami type") + sys.exit(1) + reader = codecs.getreader("ascii") try: ami = reader(urlopen(ami_path)).read().strip() @@ -474,6 +493,26 @@ def get_spark_ami(opts): print("Could not resolve AMI at: " + ami_path, file=stderr) sys.exit(1) + if opts.ami_type == "ubuntu": + storage_type = "ebs" #"", "ebs", "ebs-io1", "ebs-ssd" + AMItup = namedtuple('AMI', ['region','release','version','arch','hwtype','date','ami','hyper']) + amil = ami.split('\n') + amill = [l.split(',') for l in amil] + amilt = [AMItup._make(l) for l in amill] + if instance_type == 'hvm': + hwtype = 'hvm:' + storage_type + amilt = [t for t in amilt if t.region == opts.region and t.hyper == 'hvm' and t.hwtype == hwtype] + else: + hwtype = storage_type + amilt = [t for t in amilt if t.region == opts.region and t.hyper != 'hvm' and t.hwtype == hwtype] + if len(amilt) != 1: + print("%d AMIs found, needed exactly one" % len(amilt)) + for t in amilt: + print(t) + sys.exit(1) + ami = amilt[0].ami + + print("Spark AMI: " + ami) return ami @@ -483,6 +522,7 @@ def get_spark_ami(opts): # Returns a tuple of EC2 reservation objects for the master and slaves # Fails if there already instances running in the cluster's groups. def launch_cluster(conn, opts, cluster_name): + if opts.identity_file is None: print("ERROR: Must provide an identity file (-i) for ssh connections.", file=stderr) sys.exit(1) @@ -599,6 +639,16 @@ def launch_cluster(conn, opts, cluster_name): device.delete_on_termination = True block_map["/dev/sd" + chr(ord('s') + i)] = device + # for vanilla ubuntu AMIs, list out ephemeral block devices so they + # attached to be mounted on /mnt* + if opts.user == "ubuntu": + for i in range(get_num_disks(opts.instance_type)): + dev = BlockDeviceType() + dev.ephemeral_name = 'ephemeral%d' % i + # The first ephemeral drive is /dev/sdb. + name = '/dev/sd' + string.ascii_letters[i + 1] + block_map[name] = dev + # AWS ignores the AMI-specified block device mapping for M3 (see SPARK-3342). if opts.instance_type.startswith('m3.'): for i in range(get_num_disks(opts.instance_type)): @@ -810,6 +860,11 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): print(slave_address) ssh_write(slave_address, opts, ['tar', 'x'], dot_ssh_tar) + #install git on vanilla ubuntu ami + if opts.user == "ubuntu": + git_install = "sudo apt-get install -y -q git" + ssh(master, opts, git_install) + modules = ['spark', 'ephemeral-hdfs', 'persistent-hdfs', 'mapreduce', 'spark-standalone', 'tachyon', 'rstudio'] @@ -836,10 +891,17 @@ def setup_cluster(conn, master_nodes, slave_nodes, opts, deploy_ssh_key): b=opts.spark_ec2_git_branch) ) + print("Deploying files to master...") + if opts.user == "root": + root_dir=SPARK_EC2_DIR + "/" + "deploy.generic" + elif opts.user == "ubuntu": + root_dir=SPARK_EC2_DIR + "/" + "deploy.ubuntu" + else: + root_dir=SPARK_EC2_DIR + "/" + "deploy.generic" deploy_files( conn=conn, - root_dir=SPARK_EC2_DIR + "/" + "deploy.generic", + root_dir=root_dir, opts=opts, master_nodes=master_nodes, slave_nodes=slave_nodes, @@ -1346,20 +1408,22 @@ def real_main(): opts.zone = random.choice(conn.get_all_zones()).name if action == "launch": - if opts.slaves <= 0: - print("ERROR: You have to start at least 1 slave", file=sys.stderr) - sys.exit(1) - if opts.resume: - (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) - else: - (master_nodes, slave_nodes) = launch_cluster(conn, opts, cluster_name) - wait_for_cluster_state( - conn=conn, - opts=opts, - cluster_instances=(master_nodes + slave_nodes), - cluster_state='ssh-ready' - ) - setup_cluster(conn, master_nodes, slave_nodes, opts, True) + if opts.slaves <= 0: + print("ERROR: You have to start at least 1 slave", file=sys.stderr) + sys.exit(1) + if opts.resume: + (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) + else: + (master_nodes, slave_nodes) = launch_cluster(conn, opts, cluster_name) + wait_for_cluster_state( + conn=conn, + opts=opts, + cluster_instances=(master_nodes + slave_nodes), + cluster_state='ssh-ready' + ) + + if not opts.no_setup: + setup_cluster(conn, master_nodes, slave_nodes, opts, True) elif action == "destroy": (master_nodes, slave_nodes) = get_existing_cluster( @@ -1488,6 +1552,9 @@ def real_main(): inst.stop() elif action == "start": + if opts.elastic_ip: + pdn = "ec2-" + "-".join(opts.elastic_ip.split(".")) + "." + opts.region + ".compute.amazonaws.com" + print("will set master's public dns name to %s" % pdn) (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) print("Starting slaves...") for inst in slave_nodes: @@ -1503,6 +1570,29 @@ def real_main(): cluster_instances=(master_nodes + slave_nodes), cluster_state='ssh-ready' ) + if opts.elastic_ip: + print("setting master's public dns name to %s" % pdn) + conn.associate_address(master_nodes[0].id, opts.elastic_ip) + master_nodes[0].ip_address=opts.elastic_ip + master_nodes[0].public_dns_name=pdn + + print("Attachment made") + + # Determine types of running instances + existing_master_type = master_nodes[0].instance_type + existing_slave_type = slave_nodes[0].instance_type + # Setting opts.master_instance_type to the empty string indicates we + # have the same instance type for the master and the slaves + if existing_master_type == existing_slave_type: + existing_master_type = "" + opts.master_instance_type = existing_master_type + opts.instance_type = existing_slave_type + + if not opts.no_setup: + setup_cluster(conn, master_nodes, slave_nodes, opts, False) + + elif action == "setup": + (master_nodes, slave_nodes) = get_existing_cluster(conn, opts, cluster_name) # Determine types of running instances existing_master_type = master_nodes[0].instance_type @@ -1514,7 +1604,7 @@ def real_main(): opts.master_instance_type = existing_master_type opts.instance_type = existing_slave_type - setup_cluster(conn, master_nodes, slave_nodes, opts, False) + setup_cluster(conn, master_nodes, slave_nodes, opts, True) else: print("Invalid action: %s" % action, file=stderr) diff --git a/tachyon/init.sh b/tachyon/init.sh index d5f1e481..ab71566d 100755 --- a/tachyon/init.sh +++ b/tachyon/init.sh @@ -1,6 +1,6 @@ #!/bin/bash -pushd /root > /dev/null +pushd ~ > /dev/null if [ -d "tachyon" ]; then echo "Tachyon seems to be installed. Exiting." diff --git a/tachyon/setup.sh b/tachyon/setup.sh index 8d946abc..88708cbb 100755 --- a/tachyon/setup.sh +++ b/tachyon/setup.sh @@ -1,9 +1,9 @@ #!/bin/bash -/root/spark-ec2/copy-dir /root/tachyon +~/spark-ec2/copy-dir ~/tachyon -/root/tachyon/bin/tachyon format +~/tachyon/bin/tachyon format sleep 1 -/root/tachyon/bin/tachyon-start.sh all Mount +~/tachyon/bin/tachyon-start.sh all Mount diff --git a/templates/root/ephemeral-hdfs/conf/hadoop-env.sh b/templates/root/ephemeral-hdfs/conf/hadoop-env.sh index f4e5d7e9..30ddbac2 100755 --- a/templates/root/ephemeral-hdfs/conf/hadoop-env.sh +++ b/templates/root/ephemeral-hdfs/conf/hadoop-env.sh @@ -11,8 +11,8 @@ export JAVA_HOME={{java_home}} # Extra Java CLASSPATH elements. Optional. # export HADOOP_CLASSPATH= -export HADOOP_HOME="/root/ephemeral-hdfs" -export HADOOP_MAPREDUCE_HOME="/root/mapreduce" +export HADOOP_HOME="~/ephemeral-hdfs" +export HADOOP_MAPREDUCE_HOME="~/mapreduce" # The maximum amount of heap to use, in MB. Default is 1000. export HADOOP_HEAPSIZE=1000 @@ -60,10 +60,14 @@ export HADOOP_PID_DIR=/var/hadoop/ephemeral-hdfs/pids # export HADOOP_NICENESS=10 # Set hadoop user for CDH (which doesn't allow running as root) -export HADOOP_NAMENODE_USER=hadoop -export HADOOP_DATANODE_USER=hadoop -export HADOOP_SECONDARYNAMENODE_USER=hadoop -export HADOOP_JOBTRACKER_USER=hadoop -export HADOOP_TASKTRACKER_USER=hadoop +USER=`whoami` +if [[ $USER == "root" ]]; then + USER=hadoop +fi +export HADOOP_NAMENODE_USER=$USER +export HADOOP_DATANODE_USER=$USER +export HADOOP_SECONDARYNAMENODE_USER=$USER +export HADOOP_JOBTRACKER_USER=$USER +export HADOOP_TASKTRACKER_USER=$USER ulimit -n 16000 diff --git a/templates/root/ephemeral-hdfs/conf/yarn-env.sh b/templates/root/ephemeral-hdfs/conf/yarn-env.sh old mode 100644 new mode 100755 index 77e62194..cac6bc9c --- a/templates/root/ephemeral-hdfs/conf/yarn-env.sh +++ b/templates/root/ephemeral-hdfs/conf/yarn-env.sh @@ -18,7 +18,7 @@ export HADOOP_YARN_USER=${HADOOP_YARN_USER:-yarn} # resolve links - $0 may be a softlink #export YARN_CONF_DIR="${YARN_CONF_DIR:-$HADOOP_YARN_HOME/conf}" -export YARN_CONF_DIR="/root/ephemeral-hdfs/conf" +export YARN_CONF_DIR="~/ephemeral-hdfs/conf" # some Java parameters # export JAVA_HOME=/home/y/libexec/jdk1.6.0/ diff --git a/templates/root/spark/conf/spark-defaults.conf b/templates/root/spark/conf/spark-defaults.conf index c63994a5..2387bfd1 100644 --- a/templates/root/spark/conf/spark-defaults.conf +++ b/templates/root/spark/conf/spark-defaults.conf @@ -1,6 +1,6 @@ spark.executor.memory {{spark_worker_mem}} -spark.executor.extraLibraryPath /root/ephemeral-hdfs/lib/native/ -spark.executor.extraClassPath /root/ephemeral-hdfs/conf +spark.executor.extraLibraryPath ~/ephemeral-hdfs/lib/native/ +spark.executor.extraClassPath ~/ephemeral-hdfs/conf # for spark version < 1.4.0 spark.tachyonStore.url tachyon://{{active_master}}:19998 diff --git a/templates/root/spark/conf/spark-env.sh b/templates/root/spark/conf/spark-env.sh index aa4490b8..dc40d612 100755 --- a/templates/root/spark/conf/spark-env.sh +++ b/templates/root/spark/conf/spark-env.sh @@ -9,12 +9,12 @@ if [ -n "{{spark_worker_instances}}" ]; then fi export SPARK_WORKER_CORES={{spark_worker_cores}} -export HADOOP_HOME="/root/ephemeral-hdfs" +export HADOOP_HOME="~/ephemeral-hdfs" export SPARK_MASTER_IP={{active_master}} -export MASTER=`cat /root/spark-ec2/cluster-url` +export MASTER=`cat ~/spark-ec2/cluster-url` -export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:/root/ephemeral-hdfs/lib/native/" -export SPARK_SUBMIT_CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:/root/ephemeral-hdfs/conf" +export SPARK_SUBMIT_LIBRARY_PATH="$SPARK_SUBMIT_LIBRARY_PATH:~/ephemeral-hdfs/lib/native/" +export SPARK_SUBMIT_CLASSPATH="$SPARK_CLASSPATH:$SPARK_SUBMIT_CLASSPATH:~/ephemeral-hdfs/conf" # Bind Spark's web UIs to this machine's public EC2 hostname otherwise fallback to private IP: export SPARK_PUBLIC_DNS=` @@ -22,10 +22,10 @@ wget -q -O - http://169.254.169.254/latest/meta-data/public-hostname ||\ wget -q -O - http://169.254.169.254/latest/meta-data/local-ipv4` # Used for YARN model -export YARN_CONF_DIR="/root/ephemeral-hdfs/conf" +export YARN_CONF_DIR="~/ephemeral-hdfs/conf" # Set a high ulimit for large shuffles, only root can do this if [ $(id -u) == "0" ] then - ulimit -n 1000000 + sudo ulimit -n 1000000 fi diff --git a/templates/root/tachyon/conf/tachyon-env.sh b/templates/root/tachyon/conf/tachyon-env.sh old mode 100644 new mode 100755