diff --git a/.travis.yml b/.travis.yml
index f0c9aa80df..920326c4bf 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,6 +13,7 @@ services:
 script:
   - docker build . -f images/spark-base/Dockerfile -t cortexlabs/spark-base:latest
   - docker build . -f images/tf-base/Dockerfile -t cortexlabs/tf-base:latest
+  - docker build . -f images/tf-base-gpu/Dockerfile -t cortexlabs/tf-base-gpu:latest
 
   - ./build/images.sh images/operator operator
   - ./build/images.sh images/spark spark
@@ -20,6 +21,8 @@ script:
   - ./build/images.sh images/tf-train tf-train
   - ./build/images.sh images/tf-serve tf-serve
   - ./build/images.sh images/tf-api tf-api
+  - ./build/images.sh images/tf-serve-gpu tf-serve-gpu
+  - ./build/images.sh images/tf-train-gpu tf-train-gpu
   - ./build/images.sh images/nginx-controller nginx-controller
   - ./build/images.sh images/nginx-backend nginx-backend
   - ./build/images.sh images/fluentd fluentd
diff --git a/cortex.sh b/cortex.sh
index 823aefeaa4..46dd19f169 100755
--- a/cortex.sh
+++ b/cortex.sh
@@ -147,6 +147,8 @@ export CORTEX_IMAGE_TF_SERVE="${CORTEX_IMAGE_TF_SERVE:-cortexlabs/tf-serve:$CORT
 export CORTEX_IMAGE_TF_TRAIN="${CORTEX_IMAGE_TF_TRAIN:-cortexlabs/tf-train:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_TF_API="${CORTEX_IMAGE_TF_API:-cortexlabs/tf-api:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/python-packager:$CORTEX_VERSION_STABLE}"
+export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}"
+export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}"
 
 export AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:-""}"
 export AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:-""}"
@@ -291,6 +293,8 @@ function setup_configmap() {
     --from-literal='IMAGE_TF_SERVE'=$CORTEX_IMAGE_TF_SERVE \
     --from-literal='IMAGE_TF_API'=$CORTEX_IMAGE_TF_API \
     --from-literal='IMAGE_PYTHON_PACKAGER'=$CORTEX_IMAGE_PYTHON_PACKAGER \
+    --from-literal='IMAGE_TF_TRAIN_GPU'=$CORTEX_IMAGE_TF_TRAIN_GPU \
+    --from-literal='IMAGE_TF_SERVE_GPU'=$CORTEX_IMAGE_TF_SERVE_GPU \
     -o yaml --dry-run | kubectl apply -f - >/dev/null
 }
 
diff --git a/dev/eks.sh b/dev/eks.sh
index d8aee8becd..eea9352ca1 100755
--- a/dev/eks.sh
+++ b/dev/eks.sh
@@ -26,7 +26,12 @@ function eks_set_cluster() {
 }
 
 if [ "$1" = "start" ]; then
-  eksctl create cluster --version=1.11 --name=$K8S_NAME  --region $K8S_REGION --nodes=$K8S_NODE_COUNT --node-type=$K8S_NODE_INSTANCE_TYPE
+  eksctl create cluster --version=1.11 --name=$K8S_NAME  --region $K8S_REGION --nodes-max $K8S_NODES_MAX_COUNT --nodes-min $K8S_NODES_MIN_COUNT --node-type=$K8S_NODE_INSTANCE_TYPE
+  if [ $K8S_GPU_NODES_MIN_COUNT -gt 0 ] || [ $K8S_GPU_NODES_MAX_COUNT -gt 0 ]; then
+    eksctl create nodegroup --version=1.11 --cluster=$K8S_NAME --nodes-max=$K8S_GPU_NODES_MAX_COUNT --nodes-min=$K8S_GPU_NODES_MIN_COUNT  --node-type=$K8S_GPU_NODE_INSTANCE_TYPE --node-ami=$K8S_GPU_NODE_AMI
+    echo "Once the GPU nodegroup joins the cluster, run:"
+    echo "kubectl apply -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v1.11/nvidia-device-plugin.yml"
+  fi
   eks_set_cluster
 
 elif [ "$1" = "update" ]; then
diff --git a/dev/kops.sh b/dev/kops.sh
index d92ebe012a..44e6eeda9d 100755
--- a/dev/kops.sh
+++ b/dev/kops.sh
@@ -131,8 +131,8 @@ spec:
   image: kope.io/k8s-1.11-debian-stretch-amd64-hvm-ebs-2018-08-17
   machineType: ${K8S_NODE_INSTANCE_TYPE}
   rootVolumeSize: ${K8S_NODE_VOLUME_SIZE}
-  maxSize: ${K8S_NODE_COUNT}
-  minSize: ${K8S_NODE_COUNT}
+  maxSize: ${K8S_NODES_MAX_COUNT}
+  minSize: ${K8S_NODES_MIN_COUNT}
   nodeLabels:
     kops.k8s.io/instancegroup: nodes
   role: Node
diff --git a/dev/registry.sh b/dev/registry.sh
index e2a853e47a..468ee09d67 100755
--- a/dev/registry.sh
+++ b/dev/registry.sh
@@ -47,6 +47,8 @@ function create_registry() {
   aws ecr create-repository --repository-name=cortexlabs/tf-train --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/tf-api --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/python-packager --region=$REGISTRY_REGION || true
+  aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true
+  aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
 }
 
 ### HELPERS ###
@@ -115,6 +117,7 @@ elif [ "$cmd" = "update" ]; then
     cache_builder $ROOT/images/spark-base spark-base
     build_base $ROOT/images/spark-base spark-base
     build_base $ROOT/images/tf-base tf-base
+    build_base $ROOT/images/tf-base-gpu tf-base-gpu
 
     cache_builder $ROOT/images/operator operator
     build_and_push $ROOT/images/operator operator latest
@@ -128,11 +131,13 @@ elif [ "$cmd" = "update" ]; then
     build_and_push $ROOT/images/argo-controller argo-controller latest
     build_and_push $ROOT/images/argo-executor argo-executor latest
     build_and_push $ROOT/images/tf-serve tf-serve latest
+    build_and_push $ROOT/images/tf-serve-gpu tf-serve-gpu latest
     build_and_push $ROOT/images/python-packager python-packager latest
   fi
 
   build_and_push $ROOT/images/spark spark latest
   build_and_push $ROOT/images/tf-train tf-train latest
+  build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
   build_and_push $ROOT/images/tf-api tf-api latest
 
   cleanup
diff --git a/docs/applications/advanced/compute.md b/docs/applications/advanced/compute.md
index f25f840c01..56029af150 100644
--- a/docs/applications/advanced/compute.md
+++ b/docs/applications/advanced/compute.md
@@ -10,6 +10,7 @@ For example:
   compute:
     cpu: "2"
     mem: "1Gi"
+    gpu: 1
 ```
 
 CPU and memory requests in Cortex correspond to compute resource requests in Kubernetes. In the example above, the training job will only be scheduled once 2 CPUs and 1Gi of memory are available, and the job will be guaranteed to have access to those resources throughout it's execution. In some cases, a Cortex compute resource request can be (or may default to) `Null`.
@@ -21,3 +22,9 @@ One unit of CPU corresponds to one virtual CPU on AWS. Fractional requests are a
 ## Memory
 
 One unit of memory is one byte. Memory can be expressed as an integer or by using one of these suffixes: `K`, `M`, `G`, `T` (or their power-of two counterparts: `Ki`, `Mi`, `Gi`, `Ti`). For example, the following values represent roughly the same memory: `128974848`, `129e6`, `129M`, `123Mi`.
+
+## GPU
+One unit of GPU corresponds to one virtual GPU on AWS. Fractional requests are not allowed. Here's some information on [adding GPU enabled nodes on EKS](https://docs.aws.amazon.com/en_ca/eks/latest/userguide/gpu-ami.html).
+
+## GPU Support
+We recommend using GPU compute requests on API resources only if you have enough nodes in your cluster to support the number of GPU requests in model training plus APIs (ideally with an autoscaler). Otherwise, due to the nature of zero downtime rolling updates, your model training will not have sufficient GPU resources as there will always be GPUs consumed by APIs from the previous deployment.
diff --git a/docs/applications/resources/apis.md b/docs/applications/resources/apis.md
index aca008f1a1..aa2fb07183 100644
--- a/docs/applications/resources/apis.md
+++ b/docs/applications/resources/apis.md
@@ -12,6 +12,7 @@ Serve models at scale and use them to build smarter applications.
     replicas: <int>  # number of replicas to launch (default: 1)
     cpu: <string>  # CPU request (default: Null)
     mem: <string>  # memory request (default: Null)
+    gpu: <string>  # gpu request (default: Null)
   tags:
     <string>: <scalar>  # arbitrary key/value pairs to attach to the resource (optional)
     ...
diff --git a/docs/applications/resources/models.md b/docs/applications/resources/models.md
index 1d8b5c47bb..9f7773b9a3 100644
--- a/docs/applications/resources/models.md
+++ b/docs/applications/resources/models.md
@@ -44,6 +44,7 @@ Train custom TensorFlow models at scale.
   compute:
     cpu: <string>  # CPU request (default: Null)
     mem: <string>  # memory request (default: Null)
+    gpu: <string>  # GPU request (default: Null)
 
   tags:
     <string>: <scalar>  # arbitrary key/value pairs to attach to the resource (optional)
diff --git a/docs/applications/resources/statuses.md b/docs/applications/resources/statuses.md
index 3c6c02be63..49b1fa65c6 100644
--- a/docs/applications/resources/statuses.md
+++ b/docs/applications/resources/statuses.md
@@ -12,7 +12,7 @@
 | terminated           | Resource was terminated |
 | upstream error       | Resource was not created due to an error in one of its dependencies |
 | upstream termination | Resource was not created because one of its dependencies was terminated |
-| compute unavailable  | Resource's workload could not start due to insufficient memory or CPU in the cluster |
+| compute unavailable  | Resource's workload could not start due to insufficient memory, CPU or GPU in the cluster |
 
 ## API statuses
 
@@ -29,4 +29,4 @@
 | update skipped       | API was not updated due to an error in another resource; a previous version of this API is ready |
 | upstream error       | API was not created due to an error in one of its dependencies; a previous version of this API may be ready |
 | upstream termination | API was not created because one of its dependencies was terminated; a previous version of this API may be ready |
-| compute unavailable  | API could not start due to insufficient memory or CPU in the cluster; some replicas may be ready |
+| compute unavailable  | API could not start due to insufficient memory, CPU or GPU in the cluster; some replicas may be ready |
diff --git a/images/python-packager/Dockerfile b/images/python-packager/Dockerfile
index 6ae534717a..4122a81d71 100644
--- a/images/python-packager/Dockerfile
+++ b/images/python-packager/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:18.04
+FROM ubuntu:16.04
 
 RUN apt-get update -qq && apt-get install -y -q \
         python3 \
diff --git a/images/spark-base/Dockerfile b/images/spark-base/Dockerfile
index eebd98e9c2..352702b771 100644
--- a/images/spark-base/Dockerfile
+++ b/images/spark-base/Dockerfile
@@ -1,4 +1,4 @@
-FROM ubuntu:18.04 as builder
+FROM ubuntu:16.04 as builder
 
 RUN apt-get update -qq && apt-get install -y -q \
         git \
@@ -47,7 +47,7 @@ RUN wget -q -P $SPARK_HOME/jars/ http://central.maven.org/maven2/com/amazonaws/a
 COPY images/spark-base/conf/* $SPARK_HOME/conf/
 
 
-FROM ubuntu:18.04
+FROM ubuntu:16.04
 
 ENV JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"
 ENV HADOOP_HOME="/opt/hadoop"
diff --git a/images/tf-base-gpu/Dockerfile b/images/tf-base-gpu/Dockerfile
new file mode 100644
index 0000000000..d696be771a
--- /dev/null
+++ b/images/tf-base-gpu/Dockerfile
@@ -0,0 +1,5 @@
+FROM tensorflow/tensorflow:1.12.0-gpu-py3
+
+RUN apt-get update -qq && apt-get install -y -q \
+        zlib1g-dev \
+    && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
diff --git a/images/tf-base/Dockerfile b/images/tf-base/Dockerfile
index 359b3514f2..4904c1ad82 100644
--- a/images/tf-base/Dockerfile
+++ b/images/tf-base/Dockerfile
@@ -1,21 +1,6 @@
-FROM ubuntu:18.04
-
-ARG TF_VERSION="1.12.0"
+FROM tensorflow/tensorflow:1.12.0-py3
 
 RUN apt-get update -qq && apt-get install -y -q \
-        build-essential \
-        curl \
-        libfreetype6-dev \
-        libpng-dev \
-        libzmq3-dev \
-        pkg-config \
-        python3 \
-        python3-dev \
-        python3-pip \
-        rsync \
-        software-properties-common \
-        unzip \
         zlib1g-dev \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
-RUN pip3 install https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-${TF_VERSION}-cp36-cp36m-linux_x86_64.whl && rm -rf /root/.cache/pip*
diff --git a/images/tf-serve-gpu/Dockerfile b/images/tf-serve-gpu/Dockerfile
new file mode 100644
index 0000000000..de25316a76
--- /dev/null
+++ b/images/tf-serve-gpu/Dockerfile
@@ -0,0 +1,8 @@
+FROM cortexlabs/tf-base-gpu
+
+ARG TF_VERSION="1.12.0"
+
+RUN curl -o tensorflow-model-server.deb http://storage.googleapis.com/tensorflow-serving-apt/pool/tensorflow-model-server-${TF_VERSION}/t/tensorflow-model-server/tensorflow-model-server_${TF_VERSION}_all.deb
+RUN dpkg -i tensorflow-model-server.deb
+
+ENTRYPOINT ["tensorflow_model_server"]
diff --git a/images/tf-serve/Dockerfile b/images/tf-serve/Dockerfile
index a6e160cd50..7eedaaad5b 100644
--- a/images/tf-serve/Dockerfile
+++ b/images/tf-serve/Dockerfile
@@ -1,25 +1,9 @@
-FROM ubuntu:18.04
+FROM cortexlabs/tf-base
 
 ARG TF_VERSION="1.12.0"
 
 RUN apt-get update -qq && apt-get install -y -q \
-        automake \
-        build-essential \
         curl \
-        libcurl3-dev \
-        git \
-        libtool \
-        libfreetype6-dev \
-        libpng-dev \
-        libzmq3-dev \
-        pkg-config \
-        python3-dev \
-        python3-numpy \
-        python3-pip \
-        software-properties-common \
-        swig \
-        zip \
-        zlib1g-dev \
     && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
 
 RUN curl -o tensorflow-model-server.deb http://storage.googleapis.com/tensorflow-serving-apt/pool/tensorflow-model-server-${TF_VERSION}/t/tensorflow-model-server/tensorflow-model-server_${TF_VERSION}_all.deb
diff --git a/images/tf-train-gpu/Dockerfile b/images/tf-train-gpu/Dockerfile
new file mode 100644
index 0000000000..3dd7f65457
--- /dev/null
+++ b/images/tf-train-gpu/Dockerfile
@@ -0,0 +1,15 @@
+FROM cortexlabs/tf-base-gpu
+
+ENV PYTHONPATH="/src:${PYTHONPATH}"
+
+COPY pkg/workloads/lib/requirements.txt /src/lib/requirements.txt
+COPY pkg/workloads/tf_train/requirements.txt /src/tf_train/requirements.txt
+RUN pip3 install -r /src/lib/requirements.txt && \
+    pip3 install -r /src/tf_train/requirements.txt && \
+    rm -rf /root/.cache/pip*
+
+COPY pkg/workloads/consts.py /src/
+COPY pkg/workloads/lib /src/lib
+COPY pkg/workloads/tf_train /src/tf_train
+
+ENTRYPOINT ["/usr/bin/python3", "/src/tf_train/train.py"]
diff --git a/pkg/api/userconfig/compute.go b/pkg/api/userconfig/compute.go
index a27da3a440..52ff73af13 100644
--- a/pkg/api/userconfig/compute.go
+++ b/pkg/api/userconfig/compute.go
@@ -142,6 +142,7 @@ func (sparkCompute *SparkCompute) ID() string {
 type TFCompute struct {
 	CPU *Quantity `json:"cpu" yaml:"cpu"`
 	Mem *Quantity `json:"mem" yaml:"mem"`
+	GPU *int64    `json:"gpu" yaml:"gpu"`
 }
 
 var tfComputeFieldValidation = &cr.StructFieldValidation{
@@ -166,6 +167,13 @@ var tfComputeFieldValidation = &cr.StructFieldValidation{
 					Min: k8sresource.MustParse("0"),
 				}),
 			},
+			&cr.StructFieldValidation{
+				StructField: "GPU",
+				Int64PtrValidation: &cr.Int64PtrValidation{
+					Default:     nil,
+					GreaterThan: util.Int64Ptr(0),
+				},
+			},
 		},
 	},
 }
@@ -181,6 +189,7 @@ type APICompute struct {
 	Replicas int32     `json:"replicas" yaml:"replicas"`
 	CPU      *Quantity `json:"cpu" yaml:"cpu"`
 	Mem      *Quantity `json:"mem" yaml:"mem"`
+	GPU      int64     `json:"gpu" yaml:"gpu"`
 }
 
 var apiComputeFieldValidation = &cr.StructFieldValidation{
@@ -212,6 +221,13 @@ var apiComputeFieldValidation = &cr.StructFieldValidation{
 					Min: k8sresource.MustParse("0"),
 				}),
 			},
+			&cr.StructFieldValidation{
+				StructField: "GPU",
+				Int64Validation: &cr.Int64Validation{
+					Default:              0,
+					GreaterThanOrEqualTo: util.Int64Ptr(0),
+				},
+			},
 		},
 	},
 }
@@ -221,6 +237,7 @@ func (apiCompute *APICompute) ID() string {
 	buf.WriteString(s.Int32(apiCompute.Replicas))
 	buf.WriteString(QuantityPtrID(apiCompute.CPU))
 	buf.WriteString(QuantityPtrID(apiCompute.Mem))
+	buf.WriteString(s.Int64(apiCompute.GPU))
 	return util.HashBytes(buf.Bytes())
 }
 
@@ -228,6 +245,7 @@ func (apiCompute *APICompute) IDWithoutReplicas() string {
 	var buf bytes.Buffer
 	buf.WriteString(QuantityPtrID(apiCompute.CPU))
 	buf.WriteString(QuantityPtrID(apiCompute.Mem))
+	buf.WriteString(s.Int64(apiCompute.GPU))
 	return util.HashBytes(buf.Bytes())
 }
 
@@ -284,6 +302,11 @@ func MaxTFCompute(tfComputes ...*TFCompute) *TFCompute {
 				aggregated.Mem = tfCompute.Mem
 			}
 		}
+		if tfCompute.GPU != nil {
+			if aggregated.GPU == nil || *tfCompute.GPU > *aggregated.GPU {
+				aggregated.GPU = tfCompute.GPU
+			}
+		}
 	}
 
 	return &aggregated
@@ -299,5 +322,10 @@ func (apiCompute *APICompute) Equal(apiCompute2 APICompute) bool {
 	if !QuantityPtrsEqual(apiCompute.Mem, apiCompute2.Mem) {
 		return false
 	}
+
+	if apiCompute.GPU != apiCompute2.GPU {
+		return false
+	}
+
 	return true
 }
diff --git a/pkg/operator/cortexconfig/cortex_config.go b/pkg/operator/cortexconfig/cortex_config.go
index 05a7276dc8..e423e283bc 100644
--- a/pkg/operator/cortexconfig/cortex_config.go
+++ b/pkg/operator/cortexconfig/cortex_config.go
@@ -34,6 +34,8 @@ var (
 	TFServeImage        string
 	TFAPIImage          string
 	PythonPackagerImage string
+	TFTrainImageGPU     string
+	TFServeImageGPU     string
 )
 
 func init() {
@@ -47,6 +49,8 @@ func init() {
 	TFServeImage = getStr("IMAGE_TF_SERVE")
 	TFAPIImage = getStr("IMAGE_TF_API")
 	PythonPackagerImage = getStr("IMAGE_PYTHON_PACKAGER")
+	TFTrainImageGPU = getStr("IMAGE_TF_TRAIN_GPU")
+	TFServeImageGPU = getStr("IMAGE_TF_SERVE_GPU")
 }
 
 //
diff --git a/pkg/operator/workloads/api.go b/pkg/operator/workloads/api.go
index f4afeb606b..26f66e6284 100644
--- a/pkg/operator/workloads/api.go
+++ b/pkg/operator/workloads/api.go
@@ -21,6 +21,7 @@ import (
 
 	appsv1b1 "k8s.io/api/apps/v1beta1"
 	corev1 "k8s.io/api/core/v1"
+	k8sresource "k8s.io/apimachinery/pkg/api/resource"
 
 	"github.com/cortexlabs/cortex/pkg/api/context"
 	s "github.com/cortexlabs/cortex/pkg/api/strings"
@@ -47,6 +48,7 @@ func apiSpec(
 
 	transformResourceList := corev1.ResourceList{}
 	tfServingResourceList := corev1.ResourceList{}
+	tfServingLimitsList := corev1.ResourceList{}
 
 	if apiCompute.CPU != nil {
 		q1, q2 := apiCompute.CPU.SplitInTwo()
@@ -59,6 +61,13 @@ func apiSpec(
 		tfServingResourceList[corev1.ResourceMemory] = *q2
 	}
 
+	servingImage := cc.TFServeImage
+	if apiCompute.GPU > 0 {
+		servingImage = cc.TFServeImageGPU
+		tfServingResourceList["nvidia.com/gpu"] = *k8sresource.NewQuantity(apiCompute.GPU, k8sresource.DecimalSI)
+		tfServingLimitsList["nvidia.com/gpu"] = *k8sresource.NewQuantity(apiCompute.GPU, k8sresource.DecimalSI)
+	}
+
 	return k8s.Deployment(&k8s.DeploymentSpec{
 		Name:     internalAPIName(apiName, ctx.App.Name),
 		Replicas: ctx.APIs[apiName].Compute.Replicas,
@@ -106,7 +115,7 @@ func apiSpec(
 					},
 					{
 						Name:            tfServingContainerName,
-						Image:           cc.TFServeImage,
+						Image:           servingImage,
 						ImagePullPolicy: "Always",
 						Args: []string{
 							"--port=" + tfServingPortStr,
@@ -116,6 +125,7 @@ func apiSpec(
 						VolumeMounts: k8s.DefaultVolumeMounts(),
 						Resources: corev1.ResourceRequirements{
 							Requests: tfServingResourceList,
+							Limits:   tfServingLimitsList,
 						},
 					},
 				},
@@ -300,18 +310,20 @@ func APIDeploymentCompute(deployment *appsv1b1.Deployment) userconfig.APICompute
 		replicas = *deployment.Spec.Replicas
 	}
 
-	cpu, mem := APIPodCompute(deployment.Spec.Template.Spec.Containers)
+	cpu, mem, gpu := APIPodCompute(deployment.Spec.Template.Spec.Containers)
 
 	return userconfig.APICompute{
 		Replicas: replicas,
 		CPU:      cpu,
 		Mem:      mem,
+		GPU:      gpu,
 	}
 }
 
-func APIPodCompute(containers []corev1.Container) (*userconfig.Quantity, *userconfig.Quantity) {
-	var totalCPU *userconfig.Quantity = nil
-	var totalMem *userconfig.Quantity = nil
+func APIPodCompute(containers []corev1.Container) (*userconfig.Quantity, *userconfig.Quantity, int64) {
+	var totalCPU *userconfig.Quantity
+	var totalMem *userconfig.Quantity
+	var totalGPU int64
 
 	for _, container := range containers {
 		if container.Name != apiContainerName && container.Name != tfServingContainerName {
@@ -335,7 +347,13 @@ func APIPodCompute(containers []corev1.Container) (*userconfig.Quantity, *userco
 			}
 			totalMem.Add(mem)
 		}
+		if gpu, ok := requests["nvidia.com/gpu"]; ok {
+			gpuVal, ok := gpu.AsInt64()
+			if ok {
+				totalGPU += gpuVal
+			}
+		}
 	}
 
-	return totalCPU, totalMem
+	return totalCPU, totalMem, totalGPU
 }
diff --git a/pkg/operator/workloads/api_status.go b/pkg/operator/workloads/api_status.go
index a30d804387..ae4bf74389 100644
--- a/pkg/operator/workloads/api_status.go
+++ b/pkg/operator/workloads/api_status.go
@@ -117,10 +117,11 @@ func getReplicaCountsMap(podList []corev1.Pod, ctx *context.Context) map[string]
 
 	for _, pod := range podList {
 		resourceID := pod.Labels["resourceID"]
-		cpu, mem := APIPodCompute(pod.Spec.Containers)
+		cpu, mem, gpu := APIPodCompute(pod.Spec.Containers)
 		podAPICompute := userconfig.APICompute{
 			CPU: cpu,
 			Mem: mem,
+			GPU: gpu,
 		}
 		podAPIComputeID := podAPICompute.IDWithoutReplicas()
 		podStatus := k8s.GetPodStatus(&pod)
diff --git a/pkg/operator/workloads/training_job.go b/pkg/operator/workloads/training_job.go
index 17b0d7e5c3..a1e2df5b51 100644
--- a/pkg/operator/workloads/training_job.go
+++ b/pkg/operator/workloads/training_job.go
@@ -19,6 +19,7 @@ package workloads
 import (
 	batchv1 "k8s.io/api/batch/v1"
 	corev1 "k8s.io/api/core/v1"
+	k8sresource "k8s.io/apimachinery/pkg/api/resource"
 
 	"github.com/cortexlabs/cortex/pkg/api/context"
 	"github.com/cortexlabs/cortex/pkg/api/userconfig"
@@ -38,6 +39,7 @@ func trainingJobSpec(
 ) *batchv1.Job {
 
 	resourceList := corev1.ResourceList{}
+	limitsList := corev1.ResourceList{}
 	if tfCompute.CPU != nil {
 		resourceList[corev1.ResourceCPU] = tfCompute.CPU.Quantity
 	}
@@ -45,6 +47,13 @@ func trainingJobSpec(
 		resourceList[corev1.ResourceMemory] = tfCompute.Mem.Quantity
 	}
 
+	trainImage := cc.TFTrainImage
+	if tfCompute.GPU != nil {
+		trainImage = cc.TFTrainImageGPU
+		resourceList["nvidia.com/gpu"] = *k8sresource.NewQuantity(*tfCompute.GPU, k8sresource.DecimalSI)
+		limitsList["nvidia.com/gpu"] = *k8sresource.NewQuantity(*tfCompute.GPU, k8sresource.DecimalSI)
+	}
+
 	spec := k8s.Job(&k8s.JobSpec{
 		Name: workloadID,
 		Labels: map[string]string{
@@ -64,7 +73,7 @@ func trainingJobSpec(
 				Containers: []corev1.Container{
 					{
 						Name:            "train",
-						Image:           cc.TFTrainImage,
+						Image:           trainImage,
 						ImagePullPolicy: "Always",
 						Args: []string{
 							"--workload-id=" + workloadID,
@@ -76,6 +85,7 @@ func trainingJobSpec(
 						VolumeMounts: k8s.DefaultVolumeMounts(),
 						Resources: corev1.ResourceRequirements{
 							Requests: resourceList,
+							Limits:   limitsList,
 						},
 					},
 				},
diff --git a/pkg/workloads/lib/aws.py b/pkg/workloads/lib/aws.py
index 69c5102b88..7a6f5f0d24 100644
--- a/pkg/workloads/lib/aws.py
+++ b/pkg/workloads/lib/aws.py
@@ -151,7 +151,7 @@ def upload_json_to_s3(obj, key, bucket, client_config={}):
 
 
 def read_json_from_s3(key, bucket, allow_missing=True, client_config={}):
-    obj = read_bytes_from_s3(key, bucket, allow_missing, client_config)
+    obj = read_bytes_from_s3(key, bucket, allow_missing, client_config).decode("utf-8")
     if obj is None:
         return None
     return json.loads(obj)