diff --git a/.gitignore b/.gitignore index be2afa23..605d3c7b 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ courselabs/* # config config.py +local +courselabs/* + # Virtualenv .Python bin diff --git a/Dockerfile b/Dockerfile index f2f3c4eb..9892b92e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ # Start with empty ubuntu machine -FROM ubuntu:15.04 +FROM ubuntu:18.04 MAINTAINER Autolab Development Team "autolab-dev@andrew.cmu.edu" @@ -15,9 +15,8 @@ WORKDIR /opt/TangoService/Tango RUN mkdir volumes WORKDIR /opt - # Install dependancies -RUN apt-get update && apt-get install -y \ +RUN apt-get update --fix-missing && DEBIAN_FRONTEND=nointeractive apt-get install -y \ nginx \ curl \ git \ @@ -68,7 +67,6 @@ RUN cp /opt/TangoService/Tango/deployment/config/redis.conf /etc/redis.conf # Reload new config scripts CMD ["/usr/bin/supervisord", "-c", "/etc/supervisor/supervisord.conf"] - # TODO: # volumes dir in root dir, supervisor only starts after calling start once , nginx also needs to be started # Different log numbers for two different tangos diff --git a/clients/README.md b/clients/README.md index 6c59ac14..bacd36b1 100644 --- a/clients/README.md +++ b/clients/README.md @@ -1,7 +1,7 @@ This directory contains the Tango client program and some example jobs that are useful for testing. -tango-cli.py - Tango client program +tango-rest.py - Tango client program Example jobs: job1 - simple hello job diff --git a/clients/tango-cli.py b/clients/tango-rest.py similarity index 99% rename from clients/tango-cli.py rename to clients/tango-rest.py index ab345164..4924d9d0 100755 --- a/clients/tango-cli.py +++ b/clients/tango-rest.py @@ -1,7 +1,7 @@ #!/usr/local/bin/python # # -# tango-cli.py - Command line client for the RESTful Tango. +# tango-rest.py - Command line client for the RESTful Tango. # import os diff --git a/config.template.py b/config.template.py index c7a92007..527869a5 100644 --- a/config.template.py +++ b/config.template.py @@ -77,7 +77,9 @@ class Config: # Default value of '*'' points this path to /path/to/Tango/volumes/ DOCKER_VOLUME_PATH = '*' DOCKER_HOST_USER = '' - + DOCKER_DISTRIBUTED_HOSTS = ['host.example.domain','host2.example.domain','host3.example.domain'] + USE_SSH_MASTER = True + # Maximum size for input files in bytes MAX_INPUT_FILE_SIZE = 250 * 1024 * 1024 # 250MB @@ -86,6 +88,8 @@ class Config: # VM ulimit values VM_ULIMIT_FILE_SIZE = 100 * 1024 * 1024 + #(Ram specified in MB) + VM_ULIMIT_USER_RAM = 512 VM_ULIMIT_USER_PROC = 100 # How many times to reschedule a failed job @@ -127,7 +131,7 @@ class Config: ###### # Part 4: Settings for shared memory # - USE_REDIS = True + USE_REDIS = False REDIS_HOSTNAME = "127.0.0.1" REDIS_PORT = 6379 diff --git a/hosts b/hosts deleted file mode 100644 index 265c7c67..00000000 --- a/hosts +++ /dev/null @@ -1,2 +0,0 @@ -54.186.238.205 -54.68.89.235 \ No newline at end of file diff --git a/jobManager.py b/jobManager.py index 7ec31aee..79ede625 100644 --- a/jobManager.py +++ b/jobManager.py @@ -65,44 +65,44 @@ def __manage(self): if not job.accessKey and Config.REUSE_VMS: id, vm = self.jobQueue.getNextPendingJobReuse(id) job = self.jobQueue.get(id) - - try: - # Mark the job assigned - self.jobQueue.assignJob(job.id) - # if the job has specified an account - # create an VM on the account and run on that instance - if job.accessKeyId: - from vmms.ec2SSH import Ec2SSH - vmms = Ec2SSH(job.accessKeyId, job.accessKey) - newVM = copy.deepcopy(job.vm) - newVM.id = self._getNextID() - preVM = vmms.initializeVM(newVM) - else: - # Try to find a vm on the free list and allocate it to - # the worker if successful. - if Config.REUSE_VMS: - preVM = vm + if job: + try: + # Mark the job assigned + self.jobQueue.assignJob(job.id) + # if the job has specified an account + # create an VM on the account and run on that instance + if job.accessKeyId: + from vmms.ec2SSH import Ec2SSH + vmms = Ec2SSH(job.accessKeyId, job.accessKey) + newVM = copy.deepcopy(job.vm) + newVM.id = self._getNextID() + preVM = vmms.initializeVM(newVM) else: - preVM = self.preallocator.allocVM(job.vm.name) - vmms = self.vmms[job.vm.vmms] # Create new vmms object - - # Now dispatch the job to a worker - self.log.info("Dispatched job %s:%d to %s [try %d]" % - (job.name, job.id, preVM.name, job.retries)) - job.appendTrace( - "%s|Dispatched job %s:%d [try %d]" % - (datetime.utcnow().ctime(), job.name, job.id, job.retries)) - - Worker( - job, - vmms, - self.jobQueue, - self.preallocator, - preVM - ).start() - - except Exception as err: - self.jobQueue.makeDead(job.id, str(err)) + # Try to find a vm on the free list and allocate it to + # the worker if successful. + if Config.REUSE_VMS: + preVM = vm + else: + preVM = self.preallocator.allocVM(job.vm.name) + vmms = self.vmms[job.vm.vmms] # Create new vmms object + + # Now dispatch the job to a worker + self.log.info("Dispatched job %s:%d to %s [try %d]" % + (job.name, job.id, preVM.name, job.retries)) + job.appendTrace( + "%s|Dispatched job %s:%d [try %d]" % + (datetime.utcnow().ctime(), job.name, job.id, job.retries)) + + Worker( + job, + vmms, + self.jobQueue, + self.preallocator, + preVM + ).start() + + except Exception as err: + self.jobQueue.makeDead(job.id, str(err)) # Sleep for a bit and then check again time.sleep(Config.DISPATCH_PERIOD) diff --git a/vmms/Dockerfile b/vmms/Dockerfile index 649e9dea..c8e399b8 100644 --- a/vmms/Dockerfile +++ b/vmms/Dockerfile @@ -1,12 +1,59 @@ # Autolab - autograding docker image -FROM ubuntu:14.04 -MAINTAINER Mihir Pandya +FROM ubuntu:18.04 +MAINTAINER David Dobmeier -RUN apt-get update --fix-missing +#C++ Setup +RUN apt-get update RUN apt-get install -y gcc RUN apt-get install -y make RUN apt-get install -y build-essential +RUN apt-get install -y libcunit1-dev libcunit1-doc libcunit1 + +#Python Setup +RUN apt-get update --fix-missing && \ + DEBIAN_FRONTEND=nointeractive apt-get install -y \ + python3 python3-numpy python3-nose python3-pandas \ + python python-numpy python-nose python-pandas \ + pep8 python-pip python3-pip python-wheel \ + python-sphinx && \ + pip install --upgrade setuptools + +#Java Setup +RUN apt-get update --fix-missing +RUN apt-get install -y default-jdk + +#Valgrind Setup +RUN apt-get update +RUN apt-get install -y valgrind + +#SML Setup +RUN mkdir -p /usr/local/bin/sml +WORKDIR /usr/local/bin/sml +RUN apt-get install -y gcc-multilib g++-multilib lib32z1 lib32ncurses5 wget +RUN wget http://www.smlnj.org/dist/working/110.78/config.tgz +RUN tar -xzvf config.tgz +RUN config/install.sh +RUN ln -s /usr/local/bin/sml/bin/sml /usr/local/sbin/sml +RUN ln -s /usr/local/bin/sml/bin/ml-lex /usr/local/sbin/ml-lex +RUN ln -s /usr/local/bin/sml/bin/ml-yacc /usr/local/sbin/ml-yacc + +#Flex setup +RUN apt-get install -y flex + +#Bison Setup +RUN apt-get install -y bison + +#Utility setup +RUN apt-get install -y unzip + +#NodeJS setup +RUN apt-get update --fix-missing +RUN apt-get install -y nodejs +RUN apt-get install -y npm + +#OCaml Setup +RUN apt-get install -y ocaml # Install autodriver WORKDIR /home @@ -16,6 +63,7 @@ RUN mkdir autolab autograde output RUN chown autolab:autolab autolab RUN chown autolab:autolab output RUN chown autograde:autograde autograde +RUN apt-get update && apt-get install -y sudo RUN apt-get install -y git RUN git clone https://github.com/autolab/Tango.git WORKDIR Tango/autodriver @@ -25,10 +73,12 @@ RUN chmod +s /usr/bin/autodriver # Clean up WORKDIR /home -RUN apt-get remove -y git RUN apt-get -y autoremove RUN rm -rf Tango/ # Check installation RUN ls -l /home RUN which autodriver +RUN which javac +RUN which sml +RUN g++ --version diff --git a/vmms/Dockerfile_CSE191 b/vmms/Dockerfile_CSE191 new file mode 100644 index 00000000..d943539b --- /dev/null +++ b/vmms/Dockerfile_CSE191 @@ -0,0 +1,62 @@ +# Autolab - autograding docker image + +FROM ubuntu:18.04 +MAINTAINER Matthew Knepley + +#C Setup +RUN apt-get update +RUN apt-get install -y gcc +RUN apt-get install -y make +RUN apt-get install -y build-essential +RUN apt-get install -y libcunit1-dev libcunit1-doc libcunit1 + +#Python Setup +RUN apt-get update --fix-missing && \ + DEBIAN_FRONTEND=nointeractive apt-get install -y \ + python3 python3-numpy python3-nose python3-pandas \ + python python-numpy python-nose python-pandas \ + pep8 python-pip python3-pip python-wheel \ + python-sphinx && \ + pip install --upgrade setuptools + +#Valgrind Setup +RUN apt-get update +RUN apt-get install -y valgrind +#OPAM Setup +RUN apt-get install -y m4 +RUN apt-get install -y pkg-config +RUN apt-get install -y opam + +#Utility setup +RUN apt-get install -y unzip + +# Install autodriver +WORKDIR /home +RUN useradd autolab +RUN useradd autograde +RUN mkdir autolab autograde output +RUN chown autolab:autolab autolab +RUN chown autolab:autolab output +RUN chown autograde:autograde autograde +RUN apt-get update && apt-get install -y sudo +RUN apt-get install -y git +RUN git clone https://github.com/autolab/Tango.git +WORKDIR Tango/autodriver +RUN make clean && make +RUN cp autodriver /usr/bin/autodriver +RUN chmod +s /usr/bin/autodriver + +# Install Coq +RUN su autograde -c "opam init && opam install coq" +RUN echo "PATH=${PATH}:/home/autograde/.opam/system/bin" >> /home/autograde/.profile + +# Clean up +WORKDIR /home +RUN apt-get -y autoremove +RUN rm -rf Tango/ + +# Check installation +RUN ls -l /home +RUN which autodriver +RUN su autograde -c "/bin/bash -c 'source ~/.profile && which coqc'" +RUN gcc --version diff --git a/vmms/Dockerfile_CSE421 b/vmms/Dockerfile_CSE421 new file mode 100644 index 00000000..72518e5f --- /dev/null +++ b/vmms/Dockerfile_CSE421 @@ -0,0 +1,64 @@ +FROM ubuntu:16.04 +MAINTAINER Farshad Ghanei +#prerequisites +RUN apt-get update --fix-missing +RUN apt-get update && apt-get install -y apt-utils +RUN apt-get install -y gcc make build-essential libcunit1-dev libcunit1-doc libcunit1 wget python qemu xorg-dev libncurses5-dev gdb git +# Install autodriver +WORKDIR /home +RUN useradd autolab +RUN useradd autograde +RUN mkdir autolab autograde output +RUN chown autolab:autolab autolab +RUN chown autolab:autolab output +RUN chown autograde:autograde autograde +RUN apt-get update && apt-get install -y sudo +RUN apt-get install -y git +RUN git clone https://github.com/autolab/Tango.git +WORKDIR Tango/autodriver +RUN make clean && make +RUN cp autodriver /usr/bin/autodriver +RUN chmod +s /usr/bin/autodriver +############################################### +# configuraion and setup for bochs and pintos # +############################################### +ENV PINTOSDIR /home/autograde/pintos_base +ENV DSTDIR /usr/local +ENV SRCDIR $PINTOSDIR/sources +RUN mkdir -p $SRCDIR +RUN mkdir -p $PINTOSDIR +RUN mkdir -p $DSTDIR/bin +ENV BXSHARE $DSTDIR/share/bochs +ENV PATH="${DSTDIR}/bin:${PATH}" + +# These files may be put on a local server, but keep the folder structure +WORKDIR $SRCDIR/ +RUN git clone git://pintos-os.org/pintos-anon +RUN mv pintos-anon/* $PINTOSDIR +#RUN wget http://web.stanford.edu/class/cs140/projects/pintos/pintos.tar.gz +#RUN tar -xzf pintos.tar.gz +#RUN mv pintos/* $PINTOSDIR + +WORKDIR $SRCDIR/ +RUN wget http://www.oldlinux.org/Linux.old/bochs/Bochs/bochs-2.2.6/bochs-2.2.6.tar.gz +#RUN wget http://web.stanford.edu/class/cs140/projects/pintos/bochs-2.2.6.tar.gz + +WORKDIR $PINTOSDIR/src/misc/ +RUN ./bochs-2.2.6-build.sh + +WORKDIR $PINTOSDIR/src/utils/ +RUN sed -i "5i GDBMACROS=$PINTOSDIR/src/misc/gdb-macros" pintos-gdb +RUN make +RUN cp backtrace pintos pintos-gdb pintos-mkdisk Pintos.pm pintos-set-cmdline squish-pty squish-unix $DSTDIR/bin + +RUN chown -R autograde:autograde /home/autograde +RUN chown -R autograde:autograde $BXSHARE +RUN chown -R autograde:autograde $DSTDIR/bin +# Clean up +WORKDIR /home +RUN apt-get -y autoremove +RUN rm -rf Tango/ +# Check installation +RUN ls -l /home +RUN which autodriver + diff --git a/vmms/Dockerfile_CSE468 b/vmms/Dockerfile_CSE468 new file mode 100644 index 00000000..d491c01b --- /dev/null +++ b/vmms/Dockerfile_CSE468 @@ -0,0 +1,105 @@ +FROM ubuntu:bionic +MAINTAINER Zijian an + +#C++ Setup +RUN apt-get update +RUN apt-get install -y gcc +RUN apt-get install -y make +RUN apt-get install -y build-essential +RUN apt-get install -y libcunit1-dev libcunit1-doc libcunit1 + +# install packages +RUN apt-get update && apt-get install -q -y --no-install-recommends \ + dirmngr \ + gnupg2 \ + build-essential \ + && rm -rf /var/lib/apt/lists/* +# setup keys +RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 + +# setup sources.list +RUN echo "deb http://packages.ros.org/ros/ubuntu bionic main" > /etc/apt/sources.list.d/ros1-latest.list + +# setup environment +ENV LANG C.UTF-8 +ENV LC_ALL C.UTF-8 + +ENV ROS_DISTRO melodic + +# install ros packages +RUN apt-get update && apt-get install -y --no-install-recommends \ + ros-melodic-desktop-full \ + && rm -rf /var/lib/apt/lists/* +#Python Setup +RUN apt-get update --fix-missing && \ + DEBIAN_FRONTEND=nointeractive apt-get install -y \ + python3 python3-numpy python3-nose python3-pandas \ + python python-numpy python-nose python-pandas \ + pep8 python-pip python3-pip python-wheel \ + python-sphinx && \ + pip install --upgrade setuptools + +#Java Setup +RUN apt-get update --fix-missing +RUN apt-get install -y default-jdk + +#Valgrind Setup +RUN apt-get update +RUN apt-get install -y valgrind + +#SML Setup +RUN mkdir -p /usr/local/bin/sml +WORKDIR /usr/local/bin/sml +RUN apt-get install -y gcc-multilib g++-multilib lib32z1 lib32ncurses5 wget +RUN wget http://www.smlnj.org/dist/working/110.78/config.tgz +RUN tar -xzvf config.tgz +RUN config/install.sh +RUN ln -s /usr/local/bin/sml/bin/sml /usr/local/sbin/sml +RUN ln -s /usr/local/bin/sml/bin/ml-lex /usr/local/sbin/ml-lex +RUN ln -s /usr/local/bin/sml/bin/ml-yacc /usr/local/sbin/ml-yacc + +#Flex setup +RUN apt-get install -y flex + +#Bison Setup +RUN apt-get install -y bison + +#Utility setup +RUN apt-get install -y unzip + +#NodeJS setup +RUN apt-get update --fix-missing +RUN apt-get install -y nodejs +RUN apt-get install -y npm + +#OCaml Setup +RUN apt-get install -y ocaml + +# Install autodriver +WORKDIR /home +RUN useradd autolab +RUN useradd autograde +RUN mkdir autolab autograde output +RUN chown autolab:autolab autolab +RUN chown autolab:autolab output +RUN chown autograde:autograde autograde +RUN apt-get update && apt-get install -y sudo +RUN apt-get install -y git +RUN git clone https://github.com/autolab/Tango.git +WORKDIR Tango/autodriver +RUN make clean && make +RUN cp autodriver /usr/bin/autodriver +RUN chmod +s /usr/bin/autodriver + +# Clean up +WORKDIR /home +RUN apt-get -y autoremove +RUN rm -rf Tango/ + +# Check installation +RUN ls -l /home +RUN which autodriver +RUN which javac +RUN which sml +RUN g++ --version + diff --git a/vmms/Dockerfile_SE b/vmms/Dockerfile_SE new file mode 100644 index 00000000..487d7e73 --- /dev/null +++ b/vmms/Dockerfile_SE @@ -0,0 +1,99 @@ +# Autolab - autograding docker image + +FROM ubuntu:18.04 +MAINTAINER David Dobmeier + +#C++ Setup +RUN apt-get update +RUN apt-get install -y gcc +RUN apt-get install -y make +RUN apt-get install -y build-essential +RUN apt-get install -y libcunit1-dev libcunit1-doc libcunit1 + +#Python Setup +RUN apt-get update --fix-missing && \ + DEBIAN_FRONTEND=nointeractive apt-get install -y \ + python3 python3-numpy python3-nose python3-pandas \ + python python-numpy python-nose python-pandas \ + pep8 python-pip python3-pip python-wheel \ + python-sphinx && \ + pip install --upgrade setuptools + +#Java Setup +RUN apt-get update --fix-missing +RUN apt-get install -y default-jdk + +#Valgrind Setup +RUN apt-get update +RUN apt-get install -y valgrind + +#SML Setup +RUN mkdir -p /usr/local/bin/sml +WORKDIR /usr/local/bin/sml +RUN apt-get install -y gcc-multilib g++-multilib lib32z1 lib32ncurses5 wget +RUN wget http://www.smlnj.org/dist/working/110.78/config.tgz +RUN tar -xzvf config.tgz +RUN config/install.sh +RUN ln -s /usr/local/bin/sml/bin/sml /usr/local/sbin/sml +RUN ln -s /usr/local/bin/sml/bin/ml-lex /usr/local/sbin/ml-lex +RUN ln -s /usr/local/bin/sml/bin/ml-yacc /usr/local/sbin/ml-yacc + +#Flex setup +RUN apt-get install -y flex + +#Bison Setup +RUN apt-get install -y bison + +#Utility setup +RUN apt-get install -y unzip + +#NodeJS setup +RUN apt-get update --fix-missing +RUN apt-get install -y nodejs +RUN apt-get install -y npm + +#OCaml Setup +RUN apt-get install -y ocaml + +#Scala setup +RUN apt-get remove scala-library scala +RUN wget http://scala-lang.org/files/archive/scala-2.12.6.deb +RUN dpkg -i scala-2.12.6.deb +RUN apt-get update +RUN apt-get install -y scala + +RUN echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list +RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 +RUN apt-get update +RUN apt-get install -y sbt + +#Maven Setup +RUN apt-get install -y maven + +# Install autodriver +WORKDIR /home +RUN useradd autolab +RUN useradd autograde +RUN mkdir autolab autograde output +RUN chown autolab:autolab autolab +RUN chown autolab:autolab output +RUN chown autograde:autograde autograde +RUN apt-get update && apt-get install -y sudo +RUN apt-get install -y git +RUN git clone https://github.com/autolab/Tango.git +WORKDIR Tango/autodriver +RUN make clean && make +RUN cp autodriver /usr/bin/autodriver +RUN chmod +s /usr/bin/autodriver + +# Clean up +WORKDIR /home +RUN apt-get -y autoremove +RUN rm -rf Tango/ + +# Check installation +RUN ls -l /home +RUN which autodriver +RUN which javac +RUN which sml +RUN g++ --version diff --git a/vmms/distDocker.py b/vmms/distDocker.py index c5726176..ed5bd21d 100644 --- a/vmms/distDocker.py +++ b/vmms/distDocker.py @@ -1,16 +1,14 @@ # # distDocker.py # -# Implements the Tango VMMS interface to run Tango jobs in +# Implements the Tango VMMS interface to run Tango jobs in # docker containers on a list of host machines. This list of # host machines must be able to run docker and be accessible # by SSH. The IP address of the host machine is stored in the # `domain_name` attribtue of TangoMachine. # -import random, subprocess, re, time, logging, threading, os, sys, shutil -import tempfile -import socket +import random, subprocess, re, time, logging, threading, os, sys, shutil, socket, tempfile import config from tangoObjects import TangoMachine @@ -18,7 +16,7 @@ def timeout(command, time_out=1): """ timeout - Run a unix command with a timeout. Return -1 on timeout, otherwise return the return value from the command, which is typically 0 for success, 1-255 for failure. - """ + """ # Launch the command p = subprocess.Popen(command, @@ -33,6 +31,7 @@ def timeout(command, time_out=1): # Determine why the while loop terminated if p.poll() is None: + try: os.kill(p.pid, 9) except OSError: @@ -47,8 +46,8 @@ def timeoutWithReturnStatus(command, time_out, returnValue = 0): until the expected value is returned by the command; On timeout, return last error code obtained from the command. """ - p = subprocess.Popen(command, - stdout=open("/dev/null", 'w'), + p = subprocess.Popen(command, + stdout=open("/dev/null", 'w'), stderr=subprocess.STDOUT) t = 0.0 while (t < time_out): @@ -68,10 +67,10 @@ class DistDocker: _SSH_FLAGS = ["-q", "-o", "BatchMode=yes" ] _SSH_AUTH_FLAGS = [ "-i", os.path.join(os.path.dirname(__file__), "id_rsa"), - "-o", "StrictHostKeyChecking=no", - "-o", "GSSAPIAuthentication=no"] + "-o", "StrictHostKeyChecking=no", + "-o", "GSSAPIAuthentication=no"] _SSH_MASTER_FLAGS = ["-o", "ControlMaster=yes", - "-o", "ControlPersist=600"] + "-o", "ControlPersist=600"] _SSH_MASTER_CHECK_FLAG = ["-O", "check"] _SSH_MASTER_EXIT_FLAG = ["-O", "exit"] HOSTS_FILE = 'hosts' @@ -82,9 +81,11 @@ def __init__(self): """ try: self.log = logging.getLogger("DistDocker") - self.hostDNSPoolname=config.Config.HOST_ALIAS - self.hostUser = "ubuntu" - + self.hosts = config.Config.DOCKER_DISTRIBUTED_HOSTS + self.log.info("Current host machines: %s" % self.hosts) + self.hostIdx = 0 + self.hostLock = threading.Lock() + self.hostUser = "developer" if len(config.Config.DOCKER_HOST_USER) > 0: self.hostUser = config.Config.DOCKER_HOST_USER @@ -114,9 +115,18 @@ def getVolumePath(self, instanceName): # VMMS API functions # def initializeVM(self, vm): - """ initializeVM - Assign a host machine for this container to + """ initializeVM - Assign a host machine for this container to run on. """ + self.hostLock.acquire() + host = self.hosts[self.hostIdx] + self.hostIdx = self.hostIdx + 1 + if self.hostIdx >= len(self.hosts): + self.hostIdx = 0 + self.hostLock.release() + + vm.domain_name = host + self.log.info("Assigned host %s to VM %s." % (host, vm.name)) return vm def waitVM(self, vm, max_secs): @@ -126,20 +136,10 @@ def waitVM(self, vm, max_secs): start_time = time.time() vm.ssh_control_dir = tempfile.mkdtemp(prefix="tango-docker-ssh") vm.ssh_flags = ['-o', 'ControlPath=' + os.path.join(vm.ssh_control_dir, "control")] - vm.use_ssh_master = True + vm.use_ssh_master = config.Config.USE_SSH_MASTER # Wait for SSH to work before declaring that the VM is ready while (True): - try: - addr=socket.gethostbyname(self.hostDNSPoolname) - host=socket.gethostbyaddr(addr)[0] - except EnvironmentError: - self.log.exception("DNS lookup failed while setting up vm %s." % (vm.name)) - return -1 - - vm.domain_name = host - self.log.info("(Re)assigned host %s to VM %s." % (host, vm.name)) - elapsed_secs = time.time() - start_time # Give up if the elapsed time exceeds the allowable time @@ -151,11 +151,18 @@ def waitVM(self, vm, max_secs): # If the call to ssh returns timeout (-1) or ssh error # (255), then success. Otherwise, keep trying until we run # out of time. - ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + - DistDocker._SSH_AUTH_FLAGS + - DistDocker._SSH_MASTER_FLAGS + - ["%s@%s" % (self.hostUser, vm.domain_name), - "(:)"], max_secs - elapsed_secs) + if vm.use_ssh_master: + ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_AUTH_FLAGS + + DistDocker._SSH_MASTER_FLAGS + + ["%s@%s" % (self.hostUser, vm.domain_name), + "(:)"], max_secs - elapsed_secs) + else: + ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_AUTH_FLAGS + + ["%s@%s" % (self.hostUser, vm.domain_name), + "(:)"], max_secs - elapsed_secs) + self.log.debug("VM %s: ssh returned with %d" % (vm.domain_name, ret)) if (ret != -1) and (ret != 255): @@ -172,16 +179,21 @@ def copyIn(self, vm, inputFiles): instanceName = self.instanceName(vm.id, vm.image) volumePath = self.getVolumePath(instanceName) - if vm.use_ssh_master: - ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + - DistDocker._SSH_MASTER_CHECK_FLAG + - ["%s@%s" % (self.hostUser, vm.domain_name)]) - if ret != 0: - self.log.debug("Lost persistent SSH connection") - return ret + if not hasattr(vm, 'ssh_flags'): + vm.ssh_control_dir = tempfile.mkdtemp(prefix="tango-docker-ssh") + vm.ssh_flags = ['-o', 'ControlPath=' + os.path.join(vm.ssh_control_dir, "control")] + + if hasattr(vm, 'use_ssh_master') and vm.use_ssh_master: + ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_MASTER_CHECK_FLAG + + ["%s@%s" % (self.hostUser, vm.domain_name)]) + if ret != 0: + self.log.debug("Lost persistent SSH connection") + return ret # Create a fresh volume ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_AUTH_FLAGS + ["%s@%s" % (self.hostUser, vm.domain_name), "(rm -rf %s; mkdir %s)" % (volumePath, volumePath)], config.Config.COPYIN_TIMEOUT) @@ -189,14 +201,15 @@ def copyIn(self, vm, inputFiles): self.log.debug("Volume directory created on VM.") else: return ret - + for file in inputFiles: ret = timeout(["scp"] + DistDocker._SSH_FLAGS + vm.ssh_flags + - [file.localFile] + ["%s@%s:%s/%s" % \ + DistDocker._SSH_AUTH_FLAGS + [file.localFile] + + ["%s@%s:%s/%s" % \ (self.hostUser, vm.domain_name, volumePath, file.destFile)], config.Config.COPYIN_TIMEOUT) if ret == 0: - self.log.debug('Copied in file %s to %s' % + self.log.debug('Copied in file %s to %s' % (file.localFile, volumePath + file.destFile)) else: self.log.error( @@ -216,7 +229,7 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): instanceName = self.instanceName(vm.id, vm.image) volumePath = self.getVolumePath(instanceName) - if vm.use_ssh_master: + if hasattr(vm, 'use_ssh_master') and vm.use_ssh_master: ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + DistDocker._SSH_MASTER_CHECK_FLAG + ["%s@%s" % (self.hostUser, vm.domain_name)]) @@ -224,8 +237,8 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): self.log.debug("Lost persistent SSH connection") return ret - autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab &> output/feedback' % \ - (config.Config.VM_ULIMIT_USER_PROC, + autodriverCmd = 'autodriver -u %d -f %d -t %d -o %d autolab > output/feedback 2>&1' % \ + (config.Config.VM_ULIMIT_USER_PROC, config.Config.VM_ULIMIT_FILE_SIZE, runTimeout, config.Config.MAX_OUTPUT_FILE_SIZE) @@ -235,12 +248,13 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): setupCmd = 'cp -r mount/* autolab/; su autolab -c "%s"; \ cp output/feedback mount/feedback' % autodriverCmd - args = "(docker run --name %s -v %s:/home/mount %s sh -c '%s')" % \ - (instanceName, volumePath, vm.image, setupCmd) + args = "(docker run --name %s -v %s:/home/mount -m %dM %s sh -c '%s')" % \ + (instanceName, volumePath, config.Config.VM_ULIMIT_USER_RAM, vm.image, setupCmd) self.log.debug('Running job: %s' % args) ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_AUTH_FLAGS + ["%s@%s" % (self.hostUser, vm.domain_name), args], runTimeout * 2) @@ -257,7 +271,11 @@ def copyOut(self, vm, destFile): instanceName = self.instanceName(vm.id, vm.image) volumePath = self.getVolumePath(instanceName) - if vm.use_ssh_master: + if not hasattr(vm, 'ssh_flags'): + vm.ssh_control_dir = tempfile.mkdtemp(prefix="tango-docker-ssh") + vm.ssh_flags = ['-o', 'ControlPath=' + os.path.join(vm.ssh_control_dir, "control")] + + if hasattr(vm, 'use_ssh_master') and vm.use_ssh_master: ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + DistDocker._SSH_MASTER_CHECK_FLAG + ["%s@%s" % (self.hostUser, vm.domain_name)]) @@ -265,12 +283,14 @@ def copyOut(self, vm, destFile): self.log.debug("Lost persistent SSH connection") return ret + ret = timeout(["scp"] + DistDocker._SSH_FLAGS + vm.ssh_flags + - ["%s@%s:%s" % - (self.hostUser, vm.domain_name, volumePath + 'feedback'), + DistDocker._SSH_AUTH_FLAGS + + ["%s@%s:%s" % + (self.hostUser, vm.domain_name, volumePath + 'feedback'), destFile], config.Config.COPYOUT_TIMEOUT) - + self.log.debug('Copied feedback file to %s' % destFile) self.destroyVM(vm) @@ -279,9 +299,15 @@ def copyOut(self, vm, destFile): def destroyVM(self, vm): """ destroyVM - Delete the docker container. """ + instanceName = self.instanceName(vm.id, vm.image) volumePath = self.getVolumePath(instanceName) - if vm.use_ssh_master: + + if not hasattr(vm, 'ssh_flags'): + vm.ssh_control_dir = tempfile.mkdtemp(prefix="tango-docker-ssh") + vm.ssh_flags = ['-o', 'ControlPath=' + os.path.join(vm.ssh_control_dir, "control")] + + if hasattr(vm, 'use_ssh_master') and vm.use_ssh_master: ret = timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + DistDocker._SSH_MASTER_CHECK_FLAG + ["%s@%s" % (self.hostUser, vm.domain_name)]) @@ -295,15 +321,17 @@ def destroyVM(self, vm): # Return status does not matter. args = '(docker rm -f %s)' % (instanceName) timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_AUTH_FLAGS + ["%s@%s" % (self.hostUser, vm.domain_name), args], config.Config.DOCKER_RM_TIMEOUT) # Destroy corresponding volume if it exists. timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + + DistDocker._SSH_AUTH_FLAGS + ["%s@%s" % (self.hostUser, vm.domain_name), "(rm -rf %s)" % (volumePath)], config.Config.DOCKER_RM_TIMEOUT) self.log.debug('Deleted volume %s' % instanceName) - if vm.use_ssh_master: + if hasattr(vm, 'use_ssh_master') and vm.use_ssh_master: timeout(["ssh"] + DistDocker._SSH_FLAGS + vm.ssh_flags + DistDocker._SSH_MASTER_EXIT_FLAG + ["%s@%s" % (self.hostUser, vm.domain_name)]) @@ -327,12 +355,9 @@ def getVMs(self): """ getVMs - Get all volumes of docker containers """ machines = [] - try: - hosts=socket.gethostbyname_ex(self.hostDNSPoolname)[2] - except EnvironmentError: - return machines volumePath = self.getVolumePath('') - for host in hosts: + + for host in self.hosts: volumes = subprocess.check_output(["ssh"] + DistDocker._SSH_FLAGS + DistDocker._SSH_AUTH_FLAGS + ["%s@%s" % (self.hostUser, host), @@ -343,11 +368,12 @@ def getVMs(self): machine.vmms = 'distDocker' machine.name = volume machine.domain_name = host - machine.ssh_flags = DistDocker._SSH_AUTH_FLAGS - machine.use_ssh_master = False volume_l = volume.split('-') machine.id = volume_l[1] machine.image = volume_l[2] + machine.ssh_control_dir = tempfile.mkdtemp(prefix="tango-docker-ssh") + machine.ssh_flags = ['-o', 'ControlPath=' + os.path.join(machine.ssh_control_dir, "control")] + machine.use_ssh_master = config.Config.USE_SSH_MASTER machines.append(machine) return machines @@ -360,17 +386,13 @@ def existsVM(self, vm): return (vm.name in vmnames) def getImages(self): - """ getImages - Executes `docker images` on every host and - returns a list of images that can be used to boot a docker - container with. This function is a lot of parsing and so + """ getImages - Executes `docker images` on every host and + returns a list of images that can be used to boot a docker + container with. This function is a lot of parsing and so can break easily. """ result = set() - try: - hosts=socket.gethostbyname_ex(self.hostDNSPoolname)[2] - except EnvironmentError: - return result - for host in hosts: + for host in self.hosts: o = subprocess.check_output(["ssh"] + DistDocker._SSH_FLAGS + DistDocker._SSH_AUTH_FLAGS + ["%s@%s" % (self.hostUser, host), @@ -381,6 +403,6 @@ def getImages(self): o_l.pop() for row in o_l: row_l = row.split(' ') - result.add(re.sub(r".*/([^/]*)", r"\1", row_l[0])) + result.add(row_l[0]) return list(result) diff --git a/vmms/localDocker.py b/vmms/localDocker.py index 45b54145..4999e939 100644 --- a/vmms/localDocker.py +++ b/vmms/localDocker.py @@ -135,6 +135,7 @@ def runJob(self, vm, runTimeout, maxOutputFileSize): volumePath = self.getVolumePath(instanceName) args = ['docker', 'run', '--name', instanceName, '-v'] args = args + ['%s:%s' % (volumePath, '/home/mount')] + args = args + ['-m%dM' % (config.Config.VM_ULIMIT_USER_RAM)] args = args + [vm.image] args = args + ['sh', '-c'] diff --git a/wrapdocker b/wrapdocker index edae791c..bebbcd57 100644 --- a/wrapdocker +++ b/wrapdocker @@ -90,14 +90,14 @@ rm -rf /var/run/docker.pid # otherwise, spawn a shell as well if [ "$PORT" ] then - exec docker daemon -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \ + exec dockerd -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \ $DOCKER_DAEMON_ARGS else if [ "$LOG" == "file" ] then - docker daemon $DOCKER_DAEMON_ARGS &>/var/log/docker.log & + dockerd $DOCKER_DAEMON_ARGS &>/var/log/docker.log & else - docker daemon $DOCKER_DAEMON_ARGS & + dockerd $DOCKER_DAEMON_ARGS & fi (( timeout = 60 + SECONDS )) until docker info >/dev/null 2>&1