From de5fa6ebfe39d7e5cf636468f37068951b8d490f Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Wed, 26 Jun 2019 08:59:39 -0400
Subject: [PATCH 01/24] Add onnx support

---
 cli/cmd/predict.go                        |  37 +----
 cortex-installer.sh                       |   2 +
 dev/registry.sh                           |   2 +
 examples/iris/cortex.yaml                 |  11 ++
 examples/iris/inference.py                |  27 ++++
 examples/iris/requirements.txt            |   1 +
 go.mod                                    |   5 +
 go.sum                                    |  10 ++
 images/onnx-serve/Dockerfile              |  39 ++++++
 pkg/consts/consts.go                      |  39 +++---
 pkg/operator/api/context/apis.go          |   5 +-
 pkg/operator/api/context/context.go       |   2 +-
 pkg/operator/api/context/dependencies.go  |  18 ++-
 pkg/operator/api/userconfig/apis.go       |  29 +++-
 pkg/operator/api/userconfig/config_key.go |   8 +-
 pkg/operator/api/userconfig/model_type.go |  78 +++++++++++
 pkg/operator/config/config.go             |  16 ++-
 pkg/operator/context/apis.go              |  60 +++++++-
 pkg/operator/context/context.go           |   2 +-
 pkg/operator/endpoints/shared.go          |   1 +
 pkg/operator/workloads/api.go             |  98 ++++++++++++-
 pkg/operator/workloads/workflow.go        |  14 +-
 pkg/workloads/lib/context.py              |  15 ++
 pkg/workloads/onnx_serve/api.py           | 163 ++++++++++++++++++++++
 pkg/workloads/onnx_serve/requirements.txt |   5 +
 25 files changed, 605 insertions(+), 82 deletions(-)
 create mode 100644 examples/iris/inference.py
 create mode 100644 examples/iris/requirements.txt
 create mode 100644 images/onnx-serve/Dockerfile
 create mode 100644 pkg/operator/api/userconfig/model_type.go
 create mode 100644 pkg/workloads/onnx_serve/api.py
 create mode 100644 pkg/workloads/onnx_serve/requirements.txt

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index f10142cf9e..a104aef179 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -24,11 +24,9 @@ import (
 
 	"github.com/spf13/cobra"
 
-	"github.com/cortexlabs/cortex/pkg/lib/cast"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/files"
 	"github.com/cortexlabs/cortex/pkg/lib/json"
-	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -43,15 +41,8 @@ func init() {
 }
 
 type PredictResponse struct {
-	ResourceID  string       `json:"resource_id"`
-	Predictions []Prediction `json:"predictions"`
-}
-
-type Prediction struct {
-	Prediction         interface{} `json:"prediction"`
-	PredictionReversed interface{} `json:"prediction_reversed"`
-	TransformedSample  interface{} `json:"transformed_sample"`
-	Response           interface{} `json:"response"`
+	ResourceID  string        `json:"resource_id"`
+	Predictions []interface{} `json:"predictions"`
 }
 
 var predictCmd = &cobra.Command{
@@ -79,6 +70,7 @@ var predictCmd = &cobra.Command{
 		apiPath := apiGroupStatus.ActiveStatus.Path
 		apiURL := urls.Join(resourcesRes.APIsBaseURL, apiPath)
 		predictResponse, err := makePredictRequest(apiURL, samplesJSONPath)
+
 		if err != nil {
 			if strings.Contains(err.Error(), "503 Service Temporarily Unavailable") || strings.Contains(err.Error(), "502 Bad Gateway") {
 				errors.Exit(ErrorAPINotReady(apiName, resource.StatusUpdating.Message()))
@@ -109,27 +101,12 @@ var predictCmd = &cobra.Command{
 		}
 
 		for _, prediction := range predictResponse.Predictions {
-			if prediction.Prediction == nil {
-				prettyResp, err := json.Pretty(prediction.Response)
-				if err != nil {
-					errors.Exit(err)
-				}
-
-				fmt.Println(prettyResp)
-				continue
-			}
-
-			value := prediction.Prediction
-			if prediction.PredictionReversed != nil {
-				value = prediction.PredictionReversed
+			prettyResp, err := json.Pretty(prediction)
+			if err != nil {
+				errors.Exit(err)
 			}
 
-			if cast.IsFloatType(value) {
-				casted, _ := cast.InterfaceToFloat64(value)
-				fmt.Println(s.Round(casted, 2, true))
-			} else {
-				fmt.Println(s.UserStrStripped(value))
-			}
+			fmt.Println(prettyResp)
 		}
 	},
 }
diff --git a/cortex-installer.sh b/cortex-installer.sh
index 630e2b0b91..137eee4007 100755
--- a/cortex-installer.sh
+++ b/cortex-installer.sh
@@ -138,6 +138,7 @@ export CORTEX_IMAGE_TF_API="${CORTEX_IMAGE_TF_API:-cortexlabs/tf-api:$CORTEX_VER
 export CORTEX_IMAGE_PYTHON_PACKAGER="${CORTEX_IMAGE_PYTHON_PACKAGER:-cortexlabs/python-packager:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_TF_SERVE_GPU="${CORTEX_IMAGE_TF_SERVE_GPU:-cortexlabs/tf-serve-gpu:$CORTEX_VERSION_STABLE}"
 export CORTEX_IMAGE_TF_TRAIN_GPU="${CORTEX_IMAGE_TF_TRAIN_GPU:-cortexlabs/tf-train-gpu:$CORTEX_VERSION_STABLE}"
+export CORTEX_IMAGE_ONNX_SERVE="${CORTEX_IMAGE_ONNX_SERVE:-cortexlabs/onnx-serve:$CORTEX_VERSION_STABLE}"
 
 export AWS_ACCESS_KEY_ID="${AWS_ACCESS_KEY_ID:-""}"
 export AWS_SECRET_ACCESS_KEY="${AWS_SECRET_ACCESS_KEY:-""}"
@@ -308,6 +309,7 @@ function setup_configmap() {
     --from-literal='IMAGE_PYTHON_PACKAGER'=$CORTEX_IMAGE_PYTHON_PACKAGER \
     --from-literal='IMAGE_TF_TRAIN_GPU'=$CORTEX_IMAGE_TF_TRAIN_GPU \
     --from-literal='IMAGE_TF_SERVE_GPU'=$CORTEX_IMAGE_TF_SERVE_GPU \
+    --from-literal='IMAGE_ONNX_SERVE'=$CORTEX_IMAGE_ONNX_SERVE \
     --from-literal='ENABLE_TELEMETRY'=$CORTEX_ENABLE_TELEMETRY \
     -o yaml --dry-run | kubectl apply -f - >/dev/null
 }
diff --git a/dev/registry.sh b/dev/registry.sh
index 468ee09d67..9d6de330c8 100755
--- a/dev/registry.sh
+++ b/dev/registry.sh
@@ -49,6 +49,7 @@ function create_registry() {
   aws ecr create-repository --repository-name=cortexlabs/python-packager --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/tf-train-gpu --region=$REGISTRY_REGION || true
   aws ecr create-repository --repository-name=cortexlabs/tf-serve-gpu --region=$REGISTRY_REGION || true
+  aws ecr create-repository --repository-name=cortexlabs/onnx-serve --region=$REGISTRY_REGION || true
 }
 
 ### HELPERS ###
@@ -139,6 +140,7 @@ elif [ "$cmd" = "update" ]; then
   build_and_push $ROOT/images/tf-train tf-train latest
   build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
   build_and_push $ROOT/images/tf-api tf-api latest
+  build_and_push $ROOT/images/onnx-serve onnx-serve latest
 
   cleanup
 fi
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 11dbfcd67e..11f0224991 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -3,8 +3,19 @@
 
 - kind: api
   name: iris-type
+  model_type: tensorflow
   external_model:
     path: s3://cortex-examples/iris-tensorflow.zip
     region: us-west-2
   compute:
     replicas: 3
+
+- kind: api
+  name: iris
+  model_type: onnx
+  inference_processor_path: inference.py
+  external_model:
+    path: s3://data-vishal/iris.onnx
+    region: us-west-2
+  compute:
+    replicas: 1
diff --git a/examples/iris/inference.py b/examples/iris/inference.py
new file mode 100644
index 0000000000..b971895664
--- /dev/null
+++ b/examples/iris/inference.py
@@ -0,0 +1,27 @@
+import numpy
+
+
+def preprocess(sample, input_metadata):
+    return {
+        input_metadata[0].name: numpy.asarray(
+            [
+                [
+                    sample["sepal_length"],
+                    sample["sepal_width"],
+                    sample["petal_length"],
+                    sample["petal_width"],
+                ]
+            ],
+            dtype=numpy.float32,
+        )
+    }
+
+
+def postprocess(prediction, output_metadata):
+    iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+    predicted_class_id = int(numpy.argmax(prediction[0][0]))
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilites": prediction[0][0].tolist(),
+    }
diff --git a/examples/iris/requirements.txt b/examples/iris/requirements.txt
new file mode 100644
index 0000000000..1c122fe8fd
--- /dev/null
+++ b/examples/iris/requirements.txt
@@ -0,0 +1 @@
+numpy==1.16.4
diff --git a/go.mod b/go.mod
index 173cd26e9d..608c59901e 100644
--- a/go.mod
+++ b/go.mod
@@ -23,6 +23,10 @@ require (
 	github.com/aws/aws-sdk-go v1.20.7
 	github.com/cortexlabs/yaml v0.0.0-20190624201412-7f31702857b6
 	github.com/davecgh/go-spew v1.1.1
+	github.com/docker/distribution v2.7.1+incompatible // indirect
+	github.com/docker/docker v1.13.1 // indirect
+	github.com/docker/go-connections v0.4.0 // indirect
+	github.com/docker/go-units v0.4.0 // indirect
 	github.com/emicklei/go-restful v2.9.6+incompatible // indirect
 	github.com/go-openapi/spec v0.19.2 // indirect
 	github.com/gogo/protobuf v1.2.1 // indirect
@@ -36,6 +40,7 @@ require (
 	github.com/json-iterator/go v1.1.6 // indirect
 	github.com/mitchellh/go-homedir v1.1.0
 	github.com/modern-go/reflect2 v1.0.1 // indirect
+	github.com/opencontainers/go-digest v1.0.0-rc1 // indirect
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
 	github.com/pkg/errors v0.8.1
 	github.com/spf13/cobra v0.0.5
diff --git a/go.sum b/go.sum
index 06828f8998..469e838551 100644
--- a/go.sum
+++ b/go.sum
@@ -25,6 +25,14 @@ github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
+github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=
+github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
+github.com/docker/docker v1.13.1 h1:IkZjBSIc8hBjLpqeAbeE5mca5mNgeatLHBy3GO78BWo=
+github.com/docker/docker v1.13.1/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
+github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
+github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
+github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
+github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
 github.com/emicklei/go-restful v2.9.6+incompatible h1:tfrHha8zJ01ywiOEC1miGY8st1/igzWB8OmvPgoYX7w=
 github.com/emicklei/go-restful v2.9.6+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
@@ -94,6 +102,8 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
 github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
+github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ=
+github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
 github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
diff --git a/images/onnx-serve/Dockerfile b/images/onnx-serve/Dockerfile
new file mode 100644
index 0000000000..4acabf1614
--- /dev/null
+++ b/images/onnx-serve/Dockerfile
@@ -0,0 +1,39 @@
+FROM ubuntu:16.04
+
+RUN apt-get update -qq && apt-get install -y -q \
+        python3 \
+        python3-dev \
+        python3-pip \
+    && apt-get clean -qq && rm -rf /var/lib/apt/lists/* && \
+    pip3 install --upgrade \
+        pip \
+        setuptools \
+    && rm -rf /root/.cache/pip*
+
+RUN apt-get update -qq && apt-get install -y -q \
+        build-essential \
+        curl \
+        libfreetype6-dev \
+        libpng-dev \
+        libzmq3-dev \
+        pkg-config \
+        rsync \
+        software-properties-common \
+        unzip \
+        zlib1g-dev \
+    && apt-get clean -qq && rm -rf /var/lib/apt/lists/*
+
+
+ENV PYTHONPATH="/src:${PYTHONPATH}"
+
+COPY pkg/workloads/lib/requirements.txt /src/lib/requirements.txt
+COPY pkg/workloads/onnx_serve/requirements.txt /src/onnx_serve/requirements.txt
+RUN pip3 install -r /src/lib/requirements.txt && \
+    pip3 install -r /src/onnx_serve/requirements.txt && \
+    rm -rf /root/.cache/pip*
+
+COPY pkg/workloads/consts.py /src/
+COPY pkg/workloads/lib /src/lib
+COPY pkg/workloads/onnx_serve /src/onnx_serve
+
+ENTRYPOINT ["/usr/bin/python3", "/src/onnx_serve/api.py"]
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 8b92580411..85af9612f3 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -37,25 +37,26 @@ var (
 	RequirementsTxt = "requirements.txt"
 	PackageDir      = "packages"
 
-	AppsDir               = "apps"
-	APIsDir               = "apis"
-	DataDir               = "data"
-	RawDataDir            = "data_raw"
-	TrainingDataDir       = "data_training"
-	AggregatorsDir        = "aggregators"
-	AggregatesDir         = "aggregates"
-	TransformersDir       = "transformers"
-	EstimatorsDir         = "estimators"
-	PythonPackagesDir     = "python_packages"
-	ModelsDir             = "models"
-	ConstantsDir          = "constants"
-	ContextsDir           = "contexts"
-	ResourceStatusesDir   = "resource_statuses"
-	WorkloadSpecsDir      = "workload_specs"
-	LogPrefixesDir        = "log_prefixes"
-	RawColumnsDir         = "raw_columns"
-	TransformedColumnsDir = "transformed_columns"
-	MetadataDir           = "metadata"
+	AppsDir                = "apps"
+	APIsDir                = "apis"
+	DataDir                = "data"
+	RawDataDir             = "data_raw"
+	TrainingDataDir        = "data_training"
+	AggregatorsDir         = "aggregators"
+	AggregatesDir          = "aggregates"
+	TransformersDir        = "transformers"
+	EstimatorsDir          = "estimators"
+	PythonPackagesDir      = "python_packages"
+	InferenceProcessorsDir = "inference_processors_dir"
+	ModelsDir              = "models"
+	ConstantsDir           = "constants"
+	ContextsDir            = "contexts"
+	ResourceStatusesDir    = "resource_statuses"
+	WorkloadSpecsDir       = "workload_specs"
+	LogPrefixesDir         = "log_prefixes"
+	RawColumnsDir          = "raw_columns"
+	TransformedColumnsDir  = "transformed_columns"
+	MetadataDir            = "metadata"
 
 	TelemetryURL = "https://telemetry.cortexlabs.dev"
 )
diff --git a/pkg/operator/api/context/apis.go b/pkg/operator/api/context/apis.go
index 0a6dd7d2e1..0cc3b33e23 100644
--- a/pkg/operator/api/context/apis.go
+++ b/pkg/operator/api/context/apis.go
@@ -25,8 +25,9 @@ type APIs map[string]*API
 type API struct {
 	*userconfig.API
 	*ComputedResourceFields
-	Path      string `json:"path"`
-	ModelName string `json:"model_name"` // This removes the @ from userconfig.API.Model, or sets it to userconfig.API.ModelPath if it's external
+	Path                      string  `json:"path"`
+	ModelName                 string  `json:"model_name"` // This removes the @ from userconfig.API.Model, or sets it to userconfig.API.ModelPath if it's external
+	InferenceProcessorImplKey *string `json:"inference_processor_impl_key"`
 }
 
 func APIPath(apiName string, appName string) string {
diff --git a/pkg/operator/api/context/context.go b/pkg/operator/api/context/context.go
index 533d888093..1ee8131f13 100644
--- a/pkg/operator/api/context/context.go
+++ b/pkg/operator/api/context/context.go
@@ -206,7 +206,7 @@ func (ctx *Context) PopulateWorkloadIDs(resourceWorkloadIDs map[string]string) {
 func (ctx *Context) CheckAllWorkloadIDsPopulated() error {
 	for _, res := range ctx.ComputedResources() {
 		if res.GetWorkloadID() == "" {
-			return errors.New(ctx.App.Name, "resource", res.GetID(), "workload ID is missing") // unexpected
+			return errors.New(ctx.App.Name, "resource", res.GetID(), "workload ID is missing", res.GetName()) // unexpected
 		}
 	}
 	return nil
diff --git a/pkg/operator/api/context/dependencies.go b/pkg/operator/api/context/dependencies.go
index 009108aba0..d1d2f37ee7 100644
--- a/pkg/operator/api/context/dependencies.go
+++ b/pkg/operator/api/context/dependencies.go
@@ -22,6 +22,7 @@ import (
 	"github.com/cortexlabs/yaml"
 
 	"github.com/cortexlabs/cortex/pkg/lib/cast"
+	"github.com/cortexlabs/cortex/pkg/lib/debug"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -35,6 +36,7 @@ func (ctx *Context) AllComputedResourceDependencies(resourceID string) strset.Se
 
 func (ctx *Context) allComputedResourceDependenciesHelper(resourceID string, allDependencies strset.Set) {
 	subDependencies := ctx.DirectComputedResourceDependencies(resourceID)
+	debug.Pp(subDependencies)
 	subDependencies.Subtract(allDependencies)
 	allDependencies.Merge(subDependencies)
 
@@ -72,6 +74,7 @@ func (ctx *Context) DirectComputedResourceDependencies(resourceID string) strset
 			return ctx.trainingDatasetDependencies(model)
 		}
 	}
+	debug.Pp(ctx.APIs)
 	for _, api := range ctx.APIs {
 		if api.ID == resourceID {
 			return ctx.apiDependencies(api)
@@ -151,11 +154,18 @@ func (ctx *Context) modelDependencies(model *Model) strset.Set {
 }
 
 func (ctx *Context) apiDependencies(api *API) strset.Set {
-	if api.Model == nil {
-		return strset.New()
+	dependencies := strset.New()
+	if api.InferenceProcessorPath != nil {
+		debug.Pp(api)
+		for _, pythonPackage := range ctx.PythonPackages {
+			dependencies.Add(pythonPackage.GetID())
+		}
 	}
-	model := ctx.Models[api.ModelName]
-	return strset.New(model.ID)
+	if api.Model != nil {
+		model := ctx.Models[api.ModelName]
+		dependencies.Add(model.ID)
+	}
+	return dependencies
 }
 
 func (ctx *Context) ExtractCortexResources(
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index b30170042f..c886921501 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -33,10 +33,12 @@ type APIs []*API
 
 type API struct {
 	ResourceFields
-	Model         *string        `json:"model" yaml:"model"`
-	ExternalModel *ExternalModel `json:"external_model" yaml:"external_model"`
-	Compute       *APICompute    `json:"compute" yaml:"compute"`
-	Tags          Tags           `json:"tags" yaml:"tags"`
+	ModelType              ModelType      `json:"model_type" yaml:"model_type"`
+	Model                  *string        `json:"model" yaml:"model"`
+	InferenceProcessorPath *string        `json:"inference_processor_path" yaml:"inference_processor_path"`
+	ExternalModel          *ExternalModel `json:"external_model" yaml:"external_model"`
+	Compute                *APICompute    `json:"compute" yaml:"compute"`
+	Tags                   Tags           `json:"tags" yaml:"tags"`
 }
 
 var apiValidation = &cr.StructValidation{
@@ -54,6 +56,19 @@ var apiValidation = &cr.StructValidation{
 				RequireCortexResources: true,
 			},
 		},
+		{
+			StructField:         "InferenceProcessorPath",
+			StringPtrValidation: &cr.StringPtrValidation{},
+		},
+		{
+			StructField: "ModelType",
+			StringValidation: &cr.StringValidation{
+				AllowedValues: ModelTypeStrings(),
+			},
+			Parser: func(str string) (interface{}, error) {
+				return ModelTypeFromString(str), nil
+			},
+		},
 		{
 			StructField:      "ExternalModel",
 			StructValidation: externalModelFieldValidation,
@@ -70,6 +85,12 @@ func (api *API) UserConfigStr() string {
 	if api.Model != nil {
 		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelKey, yaml.UnescapeAtSymbol(*api.Model)))
 	}
+	if api.ModelType != UnknownModelType {
+		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelTypeKey, api.ModelType.String()))
+	}
+	// if api.ServingFunction != nil {
+	// 	sb.WriteString(fmt.Sprintf("%s: %s\n", ServingFunctionKey, *api.ServingFunction))
+	// }
 	if api.ExternalModel != nil {
 		sb.WriteString(fmt.Sprintf("%s:\n", ExternalModelKey))
 		sb.WriteString(s.Indent(api.ExternalModel.UserConfigStr(), "  "))
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index d78908ca93..458d064a1e 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -93,9 +93,11 @@ const (
 	ThrottleSecsKey              = "throttle_secs"
 
 	// API
-	ModelKey         = "model"
-	ModelNameKey     = "model_name"
-	ExternalModelKey = "external_model"
+	ModelKey           = "model"
+	ModelTypeKey       = "model_key"
+	ServingFunctionKey = "serving_fn"
+	ModelNameKey       = "model_name"
+	ExternalModelKey   = "external_model"
 
 	// compute
 	ComputeKey             = "compute"
diff --git a/pkg/operator/api/userconfig/model_type.go b/pkg/operator/api/userconfig/model_type.go
new file mode 100644
index 0000000000..117ef5b699
--- /dev/null
+++ b/pkg/operator/api/userconfig/model_type.go
@@ -0,0 +1,78 @@
+/*
+Copyright 2019 Cortex Labs, Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package userconfig
+
+type ModelType int
+
+const (
+	UnknownModelType ModelType = iota
+	TensorFlowModelType
+	ONNXModelType
+)
+
+var modelDataTypes = []string{
+	"unknown",
+	"tensorflow",
+	"onnx",
+}
+
+func ModelTypeFromString(s string) ModelType {
+	for i := 0; i < len(modelDataTypes); i++ {
+		if s == modelDataTypes[i] {
+			return ModelType(i)
+		}
+	}
+	return UnknownModelType
+}
+
+func ModelTypeStrings() []string {
+	return modelDataTypes[1:]
+}
+
+func (t ModelType) String() string {
+	return modelDataTypes[t]
+}
+
+// MarshalText satisfies TextMarshaler
+func (t ModelType) MarshalText() ([]byte, error) {
+	return []byte(t.String()), nil
+}
+
+// UnmarshalText satisfies TextUnmarshaler
+func (t *ModelType) UnmarshalText(text []byte) error {
+	enum := string(text)
+	for i := 0; i < len(modelDataTypes); i++ {
+		if enum == modelDataTypes[i] {
+			*t = ModelType(i)
+			return nil
+		}
+	}
+
+	*t = UnknownModelType
+	return nil
+}
+
+// UnmarshalBinary satisfies BinaryUnmarshaler
+// Needed for msgpack
+func (t *ModelType) UnmarshalBinary(data []byte) error {
+	return t.UnmarshalText(data)
+}
+
+// MarshalBinary satisfies BinaryMarshaler
+func (t ModelType) MarshalBinary() ([]byte, error) {
+	return []byte(t.String()), nil
+}
diff --git a/pkg/operator/config/config.go b/pkg/operator/config/config.go
index b36bd6b12d..c6bcc64a7a 100644
--- a/pkg/operator/config/config.go
+++ b/pkg/operator/config/config.go
@@ -53,9 +53,11 @@ type CortexConfig struct {
 	PythonPackagerImage string `json:"python_packager_image"`
 	TFTrainImageGPU     string `json:"tf_train_image_gpu"`
 	TFServeImageGPU     string `json:"tf_serve_image_gpu"`
-	TelemetryURL        string `json:"telemetry_url"`
-	EnableTelemetry     bool   `json:"enable_telemetry"`
-	OperatorInCluster   bool   `json:"operator_in_cluster"`
+	ONNXServeImage      string `json:"onnx_serve_image"`
+
+	TelemetryURL      string `json:"telemetry_url"`
+	EnableTelemetry   bool   `json:"enable_telemetry"`
+	OperatorInCluster bool   `json:"operator_in_cluster"`
 }
 
 func Init() error {
@@ -73,9 +75,11 @@ func Init() error {
 		PythonPackagerImage: getStr("IMAGE_PYTHON_PACKAGER"),
 		TFTrainImageGPU:     getStr("IMAGE_TF_TRAIN_GPU"),
 		TFServeImageGPU:     getStr("IMAGE_TF_SERVE_GPU"),
-		TelemetryURL:        configreader.MustStringFromEnv("CONST_TELEMETRY_URL", &configreader.StringValidation{Required: false, Default: consts.TelemetryURL}),
-		EnableTelemetry:     getBool("ENABLE_TELEMETRY"),
-		OperatorInCluster:   configreader.MustBoolFromEnv("CONST_OPERATOR_IN_CLUSTER", &configreader.BoolValidation{Default: true}),
+		ONNXServeImage:      getStr("IMAGE_ONNX_SERVE"),
+
+		TelemetryURL:      configreader.MustStringFromEnv("CONST_TELEMETRY_URL", &configreader.StringValidation{Required: false, Default: consts.TelemetryURL}),
+		EnableTelemetry:   getBool("ENABLE_TELEMETRY"),
+		OperatorInCluster: configreader.MustBoolFromEnv("CONST_OPERATOR_IN_CLUSTER", &configreader.BoolValidation{Default: true}),
 	}
 	Cortex.ID = hash.String(Cortex.Bucket + Cortex.Region + Cortex.LogGroup)
 
diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index dbc7322bf4..11b2578217 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -18,18 +18,26 @@ package context
 
 import (
 	"bytes"
+	"path/filepath"
 
+	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/hash"
+	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	"github.com/cortexlabs/cortex/pkg/operator/api/context"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
 	"github.com/cortexlabs/cortex/pkg/operator/api/userconfig"
+	"github.com/cortexlabs/cortex/pkg/operator/config"
 	"github.com/cortexlabs/yaml"
 )
 
+var uploadedInferenceProcessors = strset.New()
+
 func getAPIs(config *userconfig.Config,
 	models context.Models,
 	datasetVersion string,
+	impls map[string][]byte,
+	pythonPackages context.PythonPackages,
 ) (context.APIs, error) {
 	apis := context.APIs{}
 
@@ -37,7 +45,25 @@ func getAPIs(config *userconfig.Config,
 
 		var buf bytes.Buffer
 		var modelName string
+		var inferenceProcessorImplKey *string
 		buf.WriteString(apiConfig.Name)
+		buf.WriteString(apiConfig.ModelType.String())
+
+		for _, pythonPackage := range pythonPackages {
+			buf.WriteString(pythonPackage.GetID())
+		}
+
+		if apiConfig.InferenceProcessorPath != nil {
+			impl, ok := impls[*apiConfig.InferenceProcessorPath]
+			if !ok {
+				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.InferenceProcessorPath), userconfig.Identify(apiConfig))
+			}
+			implID := hash.Bytes(impl)
+			buf.WriteString(implID)
+
+			key := filepath.Join(consts.InferenceProcessorsDir, implID)
+			inferenceProcessorImplKey = &key
+		}
 
 		if apiConfig.Model != nil {
 			modelName, _ = yaml.ExtractAtSymbolText(*apiConfig.Model)
@@ -64,10 +90,38 @@ func getAPIs(config *userconfig.Config,
 					ResourceType: resource.APIType,
 				},
 			},
-			API:       apiConfig,
-			Path:      context.APIPath(apiConfig.Name, config.App.Name),
-			ModelName: modelName,
+			API:                       apiConfig,
+			Path:                      context.APIPath(apiConfig.Name, config.App.Name),
+			ModelName:                 modelName,
+			InferenceProcessorImplKey: inferenceProcessorImplKey,
+		}
+
+		if apiConfig.InferenceProcessorPath != nil {
+			uploadInferenceProcessor(apis[apiConfig.Name], impls[*apiConfig.InferenceProcessorPath])
 		}
 	}
 	return apis, nil
 }
+
+func uploadInferenceProcessor(api *context.API, impl []byte) error {
+	implID := hash.Bytes(impl)
+
+	if uploadedInferenceProcessors.Has(implID) {
+		return nil
+	}
+
+	isUploaded, err := config.AWS.IsS3File(*api.InferenceProcessorImplKey)
+	if err != nil {
+		return errors.Wrap(err, userconfig.Identify(api), "upload")
+	}
+
+	if !isUploaded {
+		err = config.AWS.UploadBytesToS3(impl, *api.InferenceProcessorImplKey)
+		if err != nil {
+			return errors.Wrap(err, userconfig.Identify(api), "upload")
+		}
+	}
+
+	uploadedInferenceProcessors.Add(implID)
+	return nil
+}
diff --git a/pkg/operator/context/context.go b/pkg/operator/context/context.go
index 9742c715da..bf931f99e3 100644
--- a/pkg/operator/context/context.go
+++ b/pkg/operator/context/context.go
@@ -231,7 +231,7 @@ func New(
 	}
 	ctx.Models = models
 
-	apis, err := getAPIs(userconf, ctx.Models, ctx.DatasetVersion)
+	apis, err := getAPIs(userconf, ctx.Models, ctx.DatasetVersion, files, pythonPackages)
 	if err != nil {
 		return nil, err
 	}
diff --git a/pkg/operator/endpoints/shared.go b/pkg/operator/endpoints/shared.go
index d276e02ccb..ea689f8045 100644
--- a/pkg/operator/endpoints/shared.go
+++ b/pkg/operator/endpoints/shared.go
@@ -53,6 +53,7 @@ func RespondError(w http.ResponseWriter, err error, strs ...string) {
 func RespondErrorCode(w http.ResponseWriter, code int, err error, strs ...string) {
 	err = errors.Wrap(err, strs...)
 	errors.PrintError(err)
+	errors.PrintStacktrace(err)
 
 	w.WriteHeader(code)
 	response := schema.ErrorResponse{
diff --git a/pkg/operator/workloads/api.go b/pkg/operator/workloads/api.go
index 10d26ecf46..250bff71d6 100644
--- a/pkg/operator/workloads/api.go
+++ b/pkg/operator/workloads/api.go
@@ -22,6 +22,7 @@ import (
 	appsv1b1 "k8s.io/api/apps/v1beta1"
 	corev1 "k8s.io/api/core/v1"
 	k8sresource "k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	intstr "k8s.io/apimachinery/pkg/util/intstr"
 
 	"github.com/cortexlabs/cortex/pkg/consts"
@@ -38,13 +39,12 @@ const (
 	tfServingContainerName = "serve"
 )
 
-func apiSpec(
+func tfAPISpec(
 	ctx *context.Context,
 	apiName string,
 	workloadID string,
 	apiCompute *userconfig.APICompute,
 ) *appsv1b1.Deployment {
-
 	transformResourceList := corev1.ResourceList{}
 	tfServingResourceList := corev1.ResourceList{}
 	tfServingLimitsList := corev1.ResourceList{}
@@ -165,6 +165,89 @@ func apiSpec(
 	})
 }
 
+func onnxAPISpec(
+	ctx *context.Context,
+	apiName string,
+	workloadID string,
+	apiCompute *userconfig.APICompute,
+) *appsv1b1.Deployment {
+	transformResourceList := corev1.ResourceList{}
+	if apiCompute.CPU != nil {
+		transformResourceList[corev1.ResourceCPU] = apiCompute.CPU.Quantity
+	}
+
+	if apiCompute.Mem != nil {
+		transformResourceList[corev1.ResourceMemory] = apiCompute.Mem.Quantity
+	}
+
+	return k8s.Deployment(&k8s.DeploymentSpec{
+		Name:     internalAPIName(apiName, ctx.App.Name),
+		Replicas: ctx.APIs[apiName].Compute.Replicas,
+		Labels: map[string]string{
+			"appName":      ctx.App.Name,
+			"workloadType": WorkloadTypeAPI,
+			"apiName":      apiName,
+			"resourceID":   ctx.APIs[apiName].ID,
+			"workloadID":   workloadID,
+		},
+		Selector: map[string]string{
+			"appName":      ctx.App.Name,
+			"workloadType": WorkloadTypeAPI,
+			"apiName":      apiName,
+		},
+		PodSpec: k8s.PodSpec{
+			Labels: map[string]string{
+				"appName":      ctx.App.Name,
+				"workloadType": WorkloadTypeAPI,
+				"apiName":      apiName,
+				"resourceID":   ctx.APIs[apiName].ID,
+				"workloadID":   workloadID,
+				"userFacing":   "true",
+			},
+			K8sPodSpec: corev1.PodSpec{
+				Containers: []corev1.Container{
+					{
+						Name:            apiContainerName,
+						Image:           config.Cortex.ONNXServeImage,
+						ImagePullPolicy: "Always",
+						Args: []string{
+							"--workload-id=" + workloadID,
+							"--port=" + defaultPortStr,
+							"--context=" + config.AWS.S3Path(ctx.Key),
+							"--api=" + ctx.APIs[apiName].ID,
+							"--model-dir=" + path.Join(consts.EmptyDirMountPath, "model"),
+							"--cache-dir=" + consts.ContextCacheDir,
+						},
+						Env:          k8s.AWSCredentials(),
+						VolumeMounts: k8s.DefaultVolumeMounts(),
+						ReadinessProbe: &corev1.Probe{
+							InitialDelaySeconds: 5,
+							TimeoutSeconds:      5,
+							PeriodSeconds:       5,
+							SuccessThreshold:    1,
+							FailureThreshold:    2,
+							Handler: corev1.Handler{
+								HTTPGet: &corev1.HTTPGetAction{
+									Path: "/healthz",
+									Port: intstr.IntOrString{
+										IntVal: defaultPortInt32,
+									},
+								},
+							},
+						},
+						Resources: corev1.ResourceRequirements{
+							Requests: transformResourceList,
+						},
+					},
+				},
+				Volumes:            k8s.DefaultVolumes(),
+				ServiceAccountName: "default",
+			},
+		},
+		Namespace: config.Cortex.Namespace,
+	})
+}
+
 func ingressSpec(ctx *context.Context, apiName string) *k8s.IngressSpec {
 	return &k8s.IngressSpec{
 		Name:         internalAPIName(apiName, ctx.App.Name),
@@ -219,10 +302,19 @@ func apiWorkloadSpecs(ctx *context.Context) ([]*WorkloadSpec, error) {
 			workloadID = deployment.Labels["workloadID"] // Reuse workloadID if just modifying compute
 		}
 
+		var spec metav1.Object
+
+		if api.ModelType == userconfig.TensorFlowModelType {
+			spec = tfAPISpec(ctx, apiName, workloadID, api.Compute)
+		}
+
+		if api.ModelType == userconfig.ONNXModelType {
+			spec = onnxAPISpec(ctx, apiName, workloadID, api.Compute)
+		}
 		workloadSpecs = append(workloadSpecs, &WorkloadSpec{
 			WorkloadID:       workloadID,
 			ResourceIDs:      strset.New(api.ID),
-			Spec:             apiSpec(ctx, apiName, workloadID, api.Compute),
+			Spec:             spec,
 			K8sAction:        "apply",
 			SuccessCondition: k8s.DeploymentSuccessConditionAll,
 			WorkloadType:     WorkloadTypeAPI,
diff --git a/pkg/operator/workloads/workflow.go b/pkg/operator/workloads/workflow.go
index 17c623b5a1..6d10f0cc66 100644
--- a/pkg/operator/workloads/workflow.go
+++ b/pkg/operator/workloads/workflow.go
@@ -65,13 +65,13 @@ func Create(ctx *context.Context) (*awfv1.Workflow, error) {
 
 	var allSpecs []*WorkloadSpec
 
-	if ctx.Environment != nil {
-		pythonPackageJobSpecs, err := pythonPackageWorkloadSpecs(ctx)
-		if err != nil {
-			return nil, err
-		}
-		allSpecs = append(allSpecs, pythonPackageJobSpecs...)
+	pythonPackageJobSpecs, err := pythonPackageWorkloadSpecs(ctx)
+	if err != nil {
+		return nil, err
+	}
+	allSpecs = append(allSpecs, pythonPackageJobSpecs...)
 
+	if ctx.Environment != nil {
 		dataJobSpecs, err := dataWorkloadSpecs(ctx)
 		if err != nil {
 			return nil, err
@@ -102,6 +102,8 @@ func Create(ctx *context.Context) (*awfv1.Workflow, error) {
 	for _, spec := range allSpecs {
 		var dependencyWorkloadIDs []string
 		for resourceID := range spec.ResourceIDs {
+			fmt.Println(spec.ResourceIDs)
+			fmt.Println(spec.Spec.GetName())
 			for dependencyResourceID := range ctx.AllComputedResourceDependencies(resourceID) {
 				workloadID := resourceWorkloadIDs[dependencyResourceID]
 				if workloadID != "" && workloadID != spec.WorkloadID {
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index 7e8bad3ece..bcac3ddbb0 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -270,6 +270,21 @@ def get_estimator_impl(self, model_name):
         self._estimator_impls[estimator_name] = (impl, impl_path)
         return (impl, impl_path)
 
+    def get_inference_processor_impl(self, api_name):
+        api = self.apis[api_name]
+
+        module_prefix = "inference_processor"
+
+        try:
+            impl, impl_path = self.load_module(
+                module_prefix, api["name"], api["inference_processor_impl_key"]
+            )
+        except CortexException as e:
+            e.wrap("api " + api_name, "inference_processor")
+            raise
+
+        return (impl, impl_path)
+
     # Mode must be "training" or "evaluation"
     def get_training_data_parts(self, model_name, mode, part_prefix="part"):
         training_dataset = self.models[model_name]["dataset"]
diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
new file mode 100644
index 0000000000..b8a36119f0
--- /dev/null
+++ b/pkg/workloads/onnx_serve/api.py
@@ -0,0 +1,163 @@
+# Copyright 2019 Cortex Labs, Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import json
+import argparse
+import traceback
+import time
+from flask import Flask, request, jsonify
+from flask_api import status
+from waitress import serve
+import onnxruntime as rt
+from lib.storage import S3
+import numpy
+
+import consts
+from lib import util, package, Context
+from lib.log import get_logger
+from lib.exceptions import CortexException, UserRuntimeException, UserException
+
+logger = get_logger()
+logger.propagate = False  # prevent double logging (flask modifies root logger)
+
+app = Flask(__name__)
+
+local_cache = {
+    "ctx": None,
+    "api": None,
+    "sess": None,
+    "model_inputs": None,
+    "model_outputs": None,
+    "inference_processor": None,
+}
+
+
+def prediction_failed(sample, reason=None):
+    message = "prediction failed for sample: {}".format(json.dumps(sample))
+    if reason:
+        message += " ({})".format(reason)
+
+    logger.error(message)
+    return message, status.HTTP_406_NOT_ACCEPTABLE
+
+
+@app.route("/healthz", methods=["GET"])
+def health():
+    return jsonify({"ok": True})
+
+
+@app.route("/<app_name>/<api_name>", methods=["POST"])
+def predict(app_name, api_name):
+    try:
+        payload = request.get_json()
+    except Exception as e:
+        return "Malformed JSON", status.HTTP_400_BAD_REQUEST
+
+    sess = local_cache["sess"]
+    api = local_cache["api"]
+    inference_processor = local_cache["inference_processor"]
+    model_inputs = local_cache["model_inputs"]
+    model_outputs = local_cache["model_outputs"]
+
+    response = {}
+
+    if not util.is_dict(payload) or "samples" not in payload:
+        util.log_pretty(payload, logging_func=logger.error)
+        return prediction_failed(payload, "top level `samples` key not found in request")
+
+    logger.info("Predicting " + util.pluralize(len(payload["samples"]), "sample", "samples"))
+
+    predictions = []
+    samples = payload["samples"]
+    if not util.is_list(samples):
+        util.log_pretty(samples, logging_func=logger.error)
+        return prediction_failed(
+            payload, "expected the value of key `samples` to be a list of json objects"
+        )
+
+    for i, sample in enumerate(payload["samples"]):
+        util.log_indent("sample {}".format(i + 1), 2)
+        try:
+            util.log_indent("Raw sample:", indent=4)
+            util.log_pretty(sample, indent=6)
+            inference_input = inference_processor.preprocess(sample, model_inputs)
+            labels = [output_node.name for output_node in model_outputs]
+            inference = sess.run(labels, inference_input)
+            result = inference_processor.postprocess(inference, model_outputs)
+            util.log_indent("Prediction:", indent=4)
+            util.log_pretty(result, indent=6)
+            prediction = {"prediction": result}
+        except CortexException as e:
+            e.wrap("error", "sample {}".format(i + 1))
+            logger.error(str(e))
+            logger.exception("An error occurred, see `cx logs api {}` for more details.".format(1))
+            return prediction_failed(sample, str(e))
+        except Exception as e:
+            logger.exception("An error occurred, see `cx logs api {}` for more details.".format(2))
+            return prediction_failed(sample, str(e))
+
+        predictions.append(result)
+
+    response["predictions"] = predictions
+    response["resource_id"] = api["id"]
+
+    return jsonify(response)
+
+
+def start(args):
+    logger.info(args)
+    ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id)
+    package.install_packages(ctx.python_packages, ctx.storage)
+    api = ctx.apis_id_map[args.api]
+
+    local_cache["api"] = api
+    local_cache["ctx"] = ctx
+    local_cache["inference_processor"], _ = ctx.get_inference_processor_impl(api["name"])
+
+    logger.info(ctx)
+    model_cache_path = os.path.join(args.model_dir, args.api)
+    if not os.path.exists(model_cache_path):
+        ctx.storage.download_file_external(api["external_model"]["path"], model_cache_path)
+
+    sess = rt.InferenceSession(model_cache_path)
+    local_cache["sess"] = sess
+    local_cache["model_inputs"] = sess.get_inputs()
+    local_cache["model_outputs"] = sess.get_outputs()
+    serve(app, listen="*:{}".format(args.port))
+    logger.info("Serving model")
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    na = parser.add_argument_group("required named arguments")
+    na.add_argument("--workload-id", required=True, help="Workload ID")
+    na.add_argument("--port", type=int, required=True, help="Port (on localhost) to use")
+    na.add_argument(
+        "--context",
+        required=True,
+        help="S3 path to context (e.g. s3://bucket/path/to/context.json)",
+    )
+    na.add_argument("--api", required=True, help="Resource id of api to serve")
+    na.add_argument("--model-dir", required=True, help="Directory to download the model to")
+    na.add_argument("--cache-dir", required=True, help="Local path for the context cache")
+    parser.set_defaults(func=start)
+
+    args = parser.parse_args()
+    args.func(args)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/pkg/workloads/onnx_serve/requirements.txt b/pkg/workloads/onnx_serve/requirements.txt
new file mode 100644
index 0000000000..4712d29649
--- /dev/null
+++ b/pkg/workloads/onnx_serve/requirements.txt
@@ -0,0 +1,5 @@
+flask==1.0.2
+flask-api==1.1
+waitress==1.2.1
+onnxruntime==0.4.0
+numpy>=1.15.0

From 48d156254ededf9eaba74c18caa15d5481534497 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Wed, 26 Jun 2019 19:21:09 +0000
Subject: [PATCH 02/24] Add model examples to examples directory

---
 examples/iris/cortex.yaml              | 28 ++++++++--
 examples/iris/inference.py             | 17 +++---
 examples/iris/inference_pytorch.py     | 28 ++++++++++
 examples/models/iris_pytorch.py        | 75 ++++++++++++++++++++++++++
 examples/models/iris_sklearn_logreg.py | 28 ++++++++++
 examples/models/iris_xgboost.py        | 27 ++++++++++
 pkg/workloads/tf_api/api.py            |  6 ++-
 7 files changed, 193 insertions(+), 16 deletions(-)
 create mode 100644 examples/iris/inference_pytorch.py
 create mode 100644 examples/models/iris_pytorch.py
 create mode 100644 examples/models/iris_sklearn_logreg.py
 create mode 100644 examples/models/iris_xgboost.py

diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 11f0224991..245b440c8e 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -2,20 +2,40 @@
   name: iris
 
 - kind: api
-  name: iris-type
+  name: iris-tf-nn
   model_type: tensorflow
   external_model:
     path: s3://cortex-examples/iris-tensorflow.zip
     region: us-west-2
   compute:
-    replicas: 3
+    replicas: 1
 
 - kind: api
-  name: iris
+  name: iris-pytorch-nn
+  model_type: onnx
+  inference_processor_path: inference_pytorch.py
+  external_model:
+    path: s3://data-vishal/iris_pytorch.onnx
+    region: us-west-2
+  compute:
+    replicas: 1
+
+- kind: api
+  name: iris-xgb-classifier
+  model_type: onnx
+  inference_processor_path: inference.py
+  external_model:
+    path: s3://data-vishal/iris_xgb.onnx
+    region: us-west-2
+  compute:
+    replicas: 1
+
+- kind: api
+  name: iris-sklearn-logistic-regression
   model_type: onnx
   inference_processor_path: inference.py
   external_model:
-    path: s3://data-vishal/iris.onnx
+    path: s3://data-vishal/iris_sklearn_logreg.onnx
     region: us-west-2
   compute:
     replicas: 1
diff --git a/examples/iris/inference.py b/examples/iris/inference.py
index b971895664..36fcb0a7b5 100644
--- a/examples/iris/inference.py
+++ b/examples/iris/inference.py
@@ -1,9 +1,11 @@
-import numpy
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 
 def preprocess(sample, input_metadata):
     return {
-        input_metadata[0].name: numpy.asarray(
+        input_metadata[0].name: np.asarray(
             [
                 [
                     sample["sepal_length"],
@@ -12,16 +14,11 @@ def preprocess(sample, input_metadata):
                     sample["petal_width"],
                 ]
             ],
-            dtype=numpy.float32,
+            dtype=np.float32,
         )
     }
 
 
 def postprocess(prediction, output_metadata):
-    iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
-    predicted_class_id = int(numpy.argmax(prediction[0][0]))
-    return {
-        "class_label": iris_labels[predicted_class_id],
-        "class_index": predicted_class_id,
-        "probabilites": prediction[0][0].tolist(),
-    }
+    predicted_class_id = int(prediction[0][0])
+    return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
diff --git a/examples/iris/inference_pytorch.py b/examples/iris/inference_pytorch.py
new file mode 100644
index 0000000000..c00e4c8e11
--- /dev/null
+++ b/examples/iris/inference_pytorch.py
@@ -0,0 +1,28 @@
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+
+def preprocess(sample, input_metadata):
+    return {
+        input_metadata[0].name: np.asarray(
+            [
+                [
+                    sample["sepal_length"],
+                    sample["sepal_width"],
+                    sample["petal_length"],
+                    sample["petal_width"],
+                ]
+            ],
+            dtype=np.float32,
+        )
+    }
+
+
+def postprocess(prediction, output_metadata):
+    predicted_class_id = int(np.argmax(prediction[0][0]))
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilites": prediction[0][0].tolist(),
+    }
diff --git a/examples/models/iris_pytorch.py b/examples/models/iris_pytorch.py
new file mode 100644
index 0000000000..db6effdbf7
--- /dev/null
+++ b/examples/models/iris_pytorch.py
@@ -0,0 +1,75 @@
+"""
+Requirements.txt
+
+scikit-learn
+torch
+"""
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import accuracy_score, precision_score, recall_score
+from sklearn.datasets import load_iris
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+iris = load_iris()
+X, y = iris.data, iris.target
+
+
+class Net(nn.Module):
+    # define nn
+    def __init__(self):
+        super(Net, self).__init__()
+        self.fc1 = nn.Linear(4, 100)
+        self.fc2 = nn.Linear(100, 100)
+        self.fc3 = nn.Linear(100, 3)
+        self.softmax = nn.Softmax(dim=1)
+
+    def forward(self, X):
+        X = F.relu(self.fc1(X))
+        X = self.fc2(X)
+        X = self.fc3(X)
+        X = self.softmax(X)
+
+        return X
+
+
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
+
+# wrap up with Variable in pytorch
+train_X = Variable(torch.Tensor(X_train).float())
+test_X = Variable(torch.Tensor(X_test).float())
+train_y = Variable(torch.Tensor(y_train).long())
+test_y = Variable(torch.Tensor(y_test).long())
+
+net = Net()
+
+criterion = nn.CrossEntropyLoss()  # cross entropy loss
+
+optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
+
+for epoch in range(1000):
+    optimizer.zero_grad()
+    out = net(train_X)
+    loss = criterion(out, train_y)
+    loss.backward()
+    optimizer.step()
+
+    if epoch % 100 == 0:
+        print("number of epoch {} loss {}".format(epoch, loss))
+
+predict_out = net(test_X)
+_, predict_y = torch.max(predict_out, 1)
+
+print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data)))
+
+dummy_input = torch.randn(1, 4)
+
+torch.onnx.export(
+    net,
+    dummy_input,
+    "iris_pytorch.onnx",
+    verbose=True,
+    input_names=["input"],
+    output_names=["species"],
+)
diff --git a/examples/models/iris_sklearn_logreg.py b/examples/models/iris_sklearn_logreg.py
new file mode 100644
index 0000000000..84caf38a77
--- /dev/null
+++ b/examples/models/iris_sklearn_logreg.py
@@ -0,0 +1,28 @@
+"""
+Requirements.txt
+
+onnxmltools
+pandas
+scikit-learn
+skl2onnx
+"""
+import numpy as np
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from onnxconverter_common.data_types import FloatTensorType
+from sklearn.linear_model import LogisticRegression
+from onnxmltools import convert_sklearn
+import onnxruntime as rt
+
+iris = load_iris()
+X, y = iris.data, iris.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
+
+lr = LogisticRegression(solver="lbfgs", multi_class="multinomial")
+lr.fit(X_train, y_train)
+
+print("Test data accuracy of the logistic regressor is {:.2f}".format(lr.score(X_test, y_test)))
+
+onnx_model = convert_sklearn(lr, initial_types=[("input", FloatTensorType([1, 4]))])
+with open("iris_sklearn_logreg.onnx", "wb") as f:
+    f.write(onnx_model.SerializeToString())
diff --git a/examples/models/iris_xgboost.py b/examples/models/iris_xgboost.py
new file mode 100644
index 0000000000..e8314521ca
--- /dev/null
+++ b/examples/models/iris_xgboost.py
@@ -0,0 +1,27 @@
+"""
+Requirements.txt
+
+onnxmltools
+scikit-learn
+xgboost
+"""
+import numpy as np
+import xgboost as xgb
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from onnxmltools.convert import convert_xgboost
+from onnxconverter_common.data_types import FloatTensorType
+import onnxruntime as rt
+
+iris = load_iris()
+X, y = iris.data, iris.target
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
+
+xgb_clf = xgb.XGBClassifier()
+xgb_clf = xgb_clf.fit(X_train, y_train)
+
+print("Test data accuracy of the xgb classifier is {:.2f}".format(xgb_clf.score(X_test, y_test)))
+
+onnx_model = convert_xgboost(xgb_clf, initial_types=[("input", FloatTensorType([1, 4]))])
+with open("iris_xgb.onnx", "wb") as f:
+    f.write(onnx_model.SerializeToString())
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index 766db1ee8e..816138c702 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -369,13 +369,15 @@ def predict(deployment_name, api_name):
 
 def start(args):
     ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id)
-    package.install_packages(ctx.python_packages, ctx.storage)
 
     api = ctx.apis_id_map[args.api]
-
     local_cache["api"] = api
     local_cache["ctx"] = ctx
 
+    print(api)
+    if api.get("inference_processor_path") is not None or api.get("external_model") is None:
+        package.install_packages(ctx.python_packages, ctx.storage)
+
     if api.get("external_model") is None:
         model = ctx.models[api["model_name"]]
         estimator = ctx.estimators[model["estimator"]]

From 0da0a8f0f5154c6449cc84d3f09f4714d2ecd17e Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Tue, 2 Jul 2019 21:04:11 +0000
Subject: [PATCH 03/24] Rename to request handler

---
 examples/iris/cortex.yaml                 |  6 ++--
 examples/iris/inference.py                |  4 +--
 examples/iris/inference_pytorch.py        |  4 +--
 pkg/consts/consts.go                      | 40 ++++++++++-----------
 pkg/operator/api/context/apis.go          |  2 +-
 pkg/operator/api/context/dependencies.go  |  2 +-
 pkg/operator/api/userconfig/apis.go       | 20 +++++------
 pkg/operator/api/userconfig/config_key.go |  4 +--
 pkg/operator/context/apis.go              | 42 +++++++++++------------
 pkg/workloads/lib/context.py              |  8 ++---
 pkg/workloads/onnx_serve/api.py           | 10 +++---
 pkg/workloads/tf_api/api.py               |  2 +-
 12 files changed, 72 insertions(+), 72 deletions(-)

diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 245b440c8e..2b5947fe6a 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -13,7 +13,7 @@
 - kind: api
   name: iris-pytorch-nn
   model_type: onnx
-  inference_processor_path: inference_pytorch.py
+  request_handler_path: inference_pytorch.py
   external_model:
     path: s3://data-vishal/iris_pytorch.onnx
     region: us-west-2
@@ -23,7 +23,7 @@
 - kind: api
   name: iris-xgb-classifier
   model_type: onnx
-  inference_processor_path: inference.py
+  request_handler_path: inference.py
   external_model:
     path: s3://data-vishal/iris_xgb.onnx
     region: us-west-2
@@ -33,7 +33,7 @@
 - kind: api
   name: iris-sklearn-logistic-regression
   model_type: onnx
-  inference_processor_path: inference.py
+  request_handler_path: inference.py
   external_model:
     path: s3://data-vishal/iris_sklearn_logreg.onnx
     region: us-west-2
diff --git a/examples/iris/inference.py b/examples/iris/inference.py
index 36fcb0a7b5..b3c5b34891 100644
--- a/examples/iris/inference.py
+++ b/examples/iris/inference.py
@@ -3,7 +3,7 @@
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 
-def preprocess(sample, input_metadata):
+def preinference(sample, input_metadata):
     return {
         input_metadata[0].name: np.asarray(
             [
@@ -19,6 +19,6 @@ def preprocess(sample, input_metadata):
     }
 
 
-def postprocess(prediction, output_metadata):
+def postinference(prediction, output_metadata):
     predicted_class_id = int(prediction[0][0])
     return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
diff --git a/examples/iris/inference_pytorch.py b/examples/iris/inference_pytorch.py
index c00e4c8e11..76fea1ec08 100644
--- a/examples/iris/inference_pytorch.py
+++ b/examples/iris/inference_pytorch.py
@@ -3,7 +3,7 @@
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 
-def preprocess(sample, input_metadata):
+def preinference(sample, input_metadata):
     return {
         input_metadata[0].name: np.asarray(
             [
@@ -19,7 +19,7 @@ def preprocess(sample, input_metadata):
     }
 
 
-def postprocess(prediction, output_metadata):
+def postinference(prediction, output_metadata):
     predicted_class_id = int(np.argmax(prediction[0][0]))
     return {
         "class_label": iris_labels[predicted_class_id],
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 85af9612f3..5e0b0ffc96 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -37,26 +37,26 @@ var (
 	RequirementsTxt = "requirements.txt"
 	PackageDir      = "packages"
 
-	AppsDir                = "apps"
-	APIsDir                = "apis"
-	DataDir                = "data"
-	RawDataDir             = "data_raw"
-	TrainingDataDir        = "data_training"
-	AggregatorsDir         = "aggregators"
-	AggregatesDir          = "aggregates"
-	TransformersDir        = "transformers"
-	EstimatorsDir          = "estimators"
-	PythonPackagesDir      = "python_packages"
-	InferenceProcessorsDir = "inference_processors_dir"
-	ModelsDir              = "models"
-	ConstantsDir           = "constants"
-	ContextsDir            = "contexts"
-	ResourceStatusesDir    = "resource_statuses"
-	WorkloadSpecsDir       = "workload_specs"
-	LogPrefixesDir         = "log_prefixes"
-	RawColumnsDir          = "raw_columns"
-	TransformedColumnsDir  = "transformed_columns"
-	MetadataDir            = "metadata"
+	AppsDir               = "apps"
+	APIsDir               = "apis"
+	DataDir               = "data"
+	RawDataDir            = "data_raw"
+	TrainingDataDir       = "data_training"
+	AggregatorsDir        = "aggregators"
+	AggregatesDir         = "aggregates"
+	TransformersDir       = "transformers"
+	EstimatorsDir         = "estimators"
+	PythonPackagesDir     = "python_packages"
+	RequestHandlersDir    = "request_handlers_dir"
+	ModelsDir             = "models"
+	ConstantsDir          = "constants"
+	ContextsDir           = "contexts"
+	ResourceStatusesDir   = "resource_statuses"
+	WorkloadSpecsDir      = "workload_specs"
+	LogPrefixesDir        = "log_prefixes"
+	RawColumnsDir         = "raw_columns"
+	TransformedColumnsDir = "transformed_columns"
+	MetadataDir           = "metadata"
 
 	TelemetryURL = "https://telemetry.cortexlabs.dev"
 )
diff --git a/pkg/operator/api/context/apis.go b/pkg/operator/api/context/apis.go
index 0cc3b33e23..76ee6541ad 100644
--- a/pkg/operator/api/context/apis.go
+++ b/pkg/operator/api/context/apis.go
@@ -27,7 +27,7 @@ type API struct {
 	*ComputedResourceFields
 	Path                      string  `json:"path"`
 	ModelName                 string  `json:"model_name"` // This removes the @ from userconfig.API.Model, or sets it to userconfig.API.ModelPath if it's external
-	InferenceProcessorImplKey *string `json:"inference_processor_impl_key"`
+	RequestHandlerImplKey *string `json:"request_handler_impl_key"`
 }
 
 func APIPath(apiName string, appName string) string {
diff --git a/pkg/operator/api/context/dependencies.go b/pkg/operator/api/context/dependencies.go
index d1d2f37ee7..56afd6036f 100644
--- a/pkg/operator/api/context/dependencies.go
+++ b/pkg/operator/api/context/dependencies.go
@@ -155,7 +155,7 @@ func (ctx *Context) modelDependencies(model *Model) strset.Set {
 
 func (ctx *Context) apiDependencies(api *API) strset.Set {
 	dependencies := strset.New()
-	if api.InferenceProcessorPath != nil {
+	if api.RequestHandlerPath != nil {
 		debug.Pp(api)
 		for _, pythonPackage := range ctx.PythonPackages {
 			dependencies.Add(pythonPackage.GetID())
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index c886921501..1366602199 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -33,12 +33,12 @@ type APIs []*API
 
 type API struct {
 	ResourceFields
-	ModelType              ModelType      `json:"model_type" yaml:"model_type"`
-	Model                  *string        `json:"model" yaml:"model"`
-	InferenceProcessorPath *string        `json:"inference_processor_path" yaml:"inference_processor_path"`
-	ExternalModel          *ExternalModel `json:"external_model" yaml:"external_model"`
-	Compute                *APICompute    `json:"compute" yaml:"compute"`
-	Tags                   Tags           `json:"tags" yaml:"tags"`
+	ModelType          ModelType      `json:"model_type" yaml:"model_type"`
+	Model              *string        `json:"model" yaml:"model"`
+	RequestHandlerPath *string        `json:"request_handler_path" yaml:"request_handler_path"`
+	ExternalModel      *ExternalModel `json:"external_model" yaml:"external_model"`
+	Compute            *APICompute    `json:"compute" yaml:"compute"`
+	Tags               Tags           `json:"tags" yaml:"tags"`
 }
 
 var apiValidation = &cr.StructValidation{
@@ -57,7 +57,7 @@ var apiValidation = &cr.StructValidation{
 			},
 		},
 		{
-			StructField:         "InferenceProcessorPath",
+			StructField:         "RequestHandlerPath",
 			StringPtrValidation: &cr.StringPtrValidation{},
 		},
 		{
@@ -88,9 +88,9 @@ func (api *API) UserConfigStr() string {
 	if api.ModelType != UnknownModelType {
 		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelTypeKey, api.ModelType.String()))
 	}
-	// if api.ServingFunction != nil {
-	// 	sb.WriteString(fmt.Sprintf("%s: %s\n", ServingFunctionKey, *api.ServingFunction))
-	// }
+	if api.ServingFunction != nil {
+		sb.WriteString(fmt.Sprintf("%s: %s\n", RequestHandlerPathKey, *api.RequestHandlerPath))
+	}
 	if api.ExternalModel != nil {
 		sb.WriteString(fmt.Sprintf("%s:\n", ExternalModelKey))
 		sb.WriteString(s.Indent(api.ExternalModel.UserConfigStr(), "  "))
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index 458d064a1e..215fbf6a24 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -94,8 +94,8 @@ const (
 
 	// API
 	ModelKey           = "model"
-	ModelTypeKey       = "model_key"
-	ServingFunctionKey = "serving_fn"
+	ModelTypeKey       = "model_type"
+	RequestHandlerPathKey = "request_handler_path"
 	ModelNameKey       = "model_name"
 	ExternalModelKey   = "external_model"
 
diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index 11b2578217..56c37a11f5 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -31,7 +31,7 @@ import (
 	"github.com/cortexlabs/yaml"
 )
 
-var uploadedInferenceProcessors = strset.New()
+var uploadedRequestHandlers = strset.New()
 
 func getAPIs(config *userconfig.Config,
 	models context.Models,
@@ -45,24 +45,24 @@ func getAPIs(config *userconfig.Config,
 
 		var buf bytes.Buffer
 		var modelName string
-		var inferenceProcessorImplKey *string
+		var requestHandlerImplKey *string
 		buf.WriteString(apiConfig.Name)
 		buf.WriteString(apiConfig.ModelType.String())
 
-		for _, pythonPackage := range pythonPackages {
-			buf.WriteString(pythonPackage.GetID())
-		}
+		if apiConfig.RequestHandlerPath != nil {
+			for _, pythonPackage := range pythonPackages {
+				buf.WriteString(pythonPackage.GetID())
+			}
 
-		if apiConfig.InferenceProcessorPath != nil {
-			impl, ok := impls[*apiConfig.InferenceProcessorPath]
+			impl, ok := impls[*apiConfig.RequestHandlerPath]
 			if !ok {
-				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.InferenceProcessorPath), userconfig.Identify(apiConfig))
+				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.RequestHandlerPath), userconfig.Identify(apiConfig))
 			}
 			implID := hash.Bytes(impl)
 			buf.WriteString(implID)
 
-			key := filepath.Join(consts.InferenceProcessorsDir, implID)
-			inferenceProcessorImplKey = &key
+			key := filepath.Join(consts.RequestHandlersDir, implID)
+			requestHandlerImplKey = &key
 		}
 
 		if apiConfig.Model != nil {
@@ -90,38 +90,38 @@ func getAPIs(config *userconfig.Config,
 					ResourceType: resource.APIType,
 				},
 			},
-			API:                       apiConfig,
-			Path:                      context.APIPath(apiConfig.Name, config.App.Name),
-			ModelName:                 modelName,
-			InferenceProcessorImplKey: inferenceProcessorImplKey,
+			API:                   apiConfig,
+			Path:                  context.APIPath(apiConfig.Name, config.App.Name),
+			ModelName:             modelName,
+			RequestHandlerImplKey: requestHandlerImplKey,
 		}
 
-		if apiConfig.InferenceProcessorPath != nil {
-			uploadInferenceProcessor(apis[apiConfig.Name], impls[*apiConfig.InferenceProcessorPath])
+		if apiConfig.RequestHandlerPath != nil {
+			uploadRequestHandlers(apis[apiConfig.Name], impls[*apiConfig.RequestHandlerPath])
 		}
 	}
 	return apis, nil
 }
 
-func uploadInferenceProcessor(api *context.API, impl []byte) error {
+func uploadRequestHandlers(api *context.API, impl []byte) error {
 	implID := hash.Bytes(impl)
 
-	if uploadedInferenceProcessors.Has(implID) {
+	if uploadedRequestHandlers.Has(implID) {
 		return nil
 	}
 
-	isUploaded, err := config.AWS.IsS3File(*api.InferenceProcessorImplKey)
+	isUploaded, err := config.AWS.IsS3File(*api.RequestHandlerImplKey)
 	if err != nil {
 		return errors.Wrap(err, userconfig.Identify(api), "upload")
 	}
 
 	if !isUploaded {
-		err = config.AWS.UploadBytesToS3(impl, *api.InferenceProcessorImplKey)
+		err = config.AWS.UploadBytesToS3(impl, *api.RequestHandlerImplKey)
 		if err != nil {
 			return errors.Wrap(err, userconfig.Identify(api), "upload")
 		}
 	}
 
-	uploadedInferenceProcessors.Add(implID)
+	uploadedRequestHandlers.Add(implID)
 	return nil
 }
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index bcac3ddbb0..253e563f84 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -270,17 +270,17 @@ def get_estimator_impl(self, model_name):
         self._estimator_impls[estimator_name] = (impl, impl_path)
         return (impl, impl_path)
 
-    def get_inference_processor_impl(self, api_name):
+    def get_request_handler_impl(self, api_name):
         api = self.apis[api_name]
 
-        module_prefix = "inference_processor"
+        module_prefix = "request_handler"
 
         try:
             impl, impl_path = self.load_module(
-                module_prefix, api["name"], api["inference_processor_impl_key"]
+                module_prefix, api["name"], api["request_handler_impl_key"]
             )
         except CortexException as e:
-            e.wrap("api " + api_name, "inference_processor")
+            e.wrap("api " + api_name, "request_handler")
             raise
 
         return (impl, impl_path)
diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index b8a36119f0..43cf65850d 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -41,7 +41,7 @@
     "sess": None,
     "model_inputs": None,
     "model_outputs": None,
-    "inference_processor": None,
+    "request_handler": None,
 }
 
 
@@ -68,7 +68,7 @@ def predict(app_name, api_name):
 
     sess = local_cache["sess"]
     api = local_cache["api"]
-    inference_processor = local_cache["inference_processor"]
+    request_handler = local_cache["request_handler"]
     model_inputs = local_cache["model_inputs"]
     model_outputs = local_cache["model_outputs"]
 
@@ -93,10 +93,10 @@ def predict(app_name, api_name):
         try:
             util.log_indent("Raw sample:", indent=4)
             util.log_pretty(sample, indent=6)
-            inference_input = inference_processor.preprocess(sample, model_inputs)
+            inference_input = request_handler.preinference(sample, model_inputs)
             labels = [output_node.name for output_node in model_outputs]
             inference = sess.run(labels, inference_input)
-            result = inference_processor.postprocess(inference, model_outputs)
+            result = request_handler.postinference(inference, model_outputs)
             util.log_indent("Prediction:", indent=4)
             util.log_pretty(result, indent=6)
             prediction = {"prediction": result}
@@ -125,7 +125,7 @@ def start(args):
 
     local_cache["api"] = api
     local_cache["ctx"] = ctx
-    local_cache["inference_processor"], _ = ctx.get_inference_processor_impl(api["name"])
+    local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
 
     logger.info(ctx)
     model_cache_path = os.path.join(args.model_dir, args.api)
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index 816138c702..4b26dd3384 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -375,7 +375,7 @@ def start(args):
     local_cache["ctx"] = ctx
 
     print(api)
-    if api.get("inference_processor_path") is not None or api.get("external_model") is None:
+    if api.get("request_handler_path") is not None or api.get("external_model") is None:
         package.install_packages(ctx.python_packages, ctx.storage)
 
     if api.get("external_model") is None:

From aca81f760ae33edae7c14bf50b0951c977307690 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Wed, 3 Jul 2019 17:43:18 +0000
Subject: [PATCH 04/24] Optional pre/post

---
 pkg/workloads/onnx_serve/api.py | 5 +++--
 pkg/workloads/tf_api/api.py     | 6 ++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index 43cf65850d..a898262b6f 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -125,12 +125,13 @@ def start(args):
 
     local_cache["api"] = api
     local_cache["ctx"] = ctx
-    local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
+    if api.get("request_handler_impl_key") is not None:
+        local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
 
     logger.info(ctx)
     model_cache_path = os.path.join(args.model_dir, args.api)
     if not os.path.exists(model_cache_path):
-        ctx.storage.download_file_external(api["external_model"]["path"], model_cache_path)
+        ctx.storage.download_file_external(api["model"], model_cache_path)
 
     sess = rt.InferenceSession(model_cache_path)
     local_cache["sess"] = sess
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index 0b3638ec3e..2c53a2129b 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -244,6 +244,9 @@ def parse_response_proto_raw(response_proto):
 
 
 def run_predict(sample):
+    preprocessed_sample = local_cache["request_handler"].preinference(
+        sample, local_cache["metadata"]["signatureDef"]
+    )
     if util.is_resource_ref(local_cache["api"]["model"]):
         transformed_sample = transform_sample(sample)
         prediction_request = create_prediction_request(transformed_sample)
@@ -387,6 +390,9 @@ def start(args):
     local_cache["api"] = api
     local_cache["ctx"] = ctx
 
+    if api.get("request_handler_impl_key") is not None:
+        local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
+
     if not util.is_resource_ref(api["model"]):
         if api.get("request_handler_path") is not None:
             package.install_packages(ctx.python_packages, ctx.storage)

From c07f3cef89a1cfe50f7ddade3b311e644e44e5f9 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 01:13:01 +0000
Subject: [PATCH 05/24] Optional pre and post processing

---
 dev/registry.sh                           |  3 -
 examples/iris/cortex.yaml                 | 31 +++++---
 examples/iris/inference.py                | 23 +++---
 examples/iris/inference_pytorch.py        | 25 +++---
 examples/iris/requirements.txt            |  2 +-
 pkg/operator/api/context/dependencies.go  |  2 +-
 pkg/operator/api/userconfig/apis.go       | 16 ++--
 pkg/operator/api/userconfig/config_key.go |  2 +-
 pkg/operator/context/apis.go              | 10 +--
 pkg/operator/workloads/workflow.go        |  2 -
 pkg/workloads/lib/context.py              |  6 ++
 pkg/workloads/lib/util.py                 |  7 ++
 pkg/workloads/onnx_serve/api.py           | 97 +++++++++++++++++++----
 pkg/workloads/tf_api/api.py               | 20 +++--
 14 files changed, 164 insertions(+), 82 deletions(-)

diff --git a/dev/registry.sh b/dev/registry.sh
index c9528fdbcd..5b7976d96a 100755
--- a/dev/registry.sh
+++ b/dev/registry.sh
@@ -143,9 +143,6 @@ elif [ "$cmd" = "update" ]; then
     build_and_push $ROOT/images/metrics-server metrics-server latest
   fi
 
-  build_and_push $ROOT/images/spark spark latest
-  build_and_push $ROOT/images/tf-train tf-train latest
-  build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
   build_and_push $ROOT/images/tf-api tf-api latest
   build_and_push $ROOT/images/onnx-serve onnx-serve latest
 
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 407540a838..98eacbbad7 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -1,25 +1,32 @@
 - kind: deployment
   name: iris
 
-- kind: api
-  name: iris-tf-nn
-  model_type: tensorflow
-  model: s3://cortex-examples/iris-tensorflow.zip
+# - kind: api
+#   name: iris-tf-nn
+#   model_type: tensorflow
+#   model: s3://cortex-examples/iris-tensorflow.zip
+#   compute:
+#     max_replicas: 1
 
-- kind: api
-  name: iris-pytorch-nn
-  model_type: onnx
-  request_handler_path: inference_pytorch.py
-  model: s3://data-vishal/iris_pytorch.onnx
+# - kind: api
+#   name: iris-pytorch-nn
+#   model_type: onnx
+#   request_handler: inference_pytorch.py
+#   model: s3://data-vishal/iris_pytorch.onnx
+#   compute:
+#     max_replicas: 1
 
 - kind: api
-name: iris-xgb-classifier
+  name: iris-xgb-classifier
   model_type: onnx
-  request_handler_path: inference.py
   model: s3://data-vishal/iris_xgb.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: iris-sklearn-logistic-regression
   model_type: onnx
-  request_handler_path: inference.py
+#   request_handler: inference.py
   model: s3://data-vishal/iris_sklearn_logreg.onnx
+  compute:
+    max_replicas: 1
diff --git a/examples/iris/inference.py b/examples/iris/inference.py
index b3c5b34891..58b000bbdc 100644
--- a/examples/iris/inference.py
+++ b/examples/iris/inference.py
@@ -3,22 +3,17 @@
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 
-def preinference(sample, input_metadata):
+def preinference(request, metadata):
     return {
-        input_metadata[0].name: np.asarray(
-            [
-                [
-                    sample["sepal_length"],
-                    sample["sepal_width"],
-                    sample["petal_length"],
-                    sample["petal_width"],
-                ]
-            ],
-            dtype=np.float32,
-        )
+        "input": [
+            request["sepal_length"],
+            request["sepal_width"],
+            request["petal_length"],
+            request["petal_width"],
+        ]
     }
 
 
-def postinference(prediction, output_metadata):
-    predicted_class_id = int(prediction[0][0])
+def postinference(response, metadata):
+    predicted_class_id = response[0][0]
     return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
diff --git a/examples/iris/inference_pytorch.py b/examples/iris/inference_pytorch.py
index 76fea1ec08..fc19e6f4a7 100644
--- a/examples/iris/inference_pytorch.py
+++ b/examples/iris/inference_pytorch.py
@@ -3,26 +3,21 @@
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 
-def preinference(sample, input_metadata):
+def preinference(request, metadata):
     return {
-        input_metadata[0].name: np.asarray(
-            [
-                [
-                    sample["sepal_length"],
-                    sample["sepal_width"],
-                    sample["petal_length"],
-                    sample["petal_width"],
-                ]
-            ],
-            dtype=np.float32,
-        )
+        metadata[0].name: [
+            request["sepal_length"],
+            request["sepal_width"],
+            request["petal_length"],
+            request["petal_width"],
+        ]
     }
 
 
-def postinference(prediction, output_metadata):
-    predicted_class_id = int(np.argmax(prediction[0][0]))
+def postinference(response, metadata):
+    predicted_class_id = int(np.argmax(response[0][0]))
     return {
         "class_label": iris_labels[predicted_class_id],
         "class_index": predicted_class_id,
-        "probabilites": prediction[0][0].tolist(),
+        "probabilites": response[0][0],
     }
diff --git a/examples/iris/requirements.txt b/examples/iris/requirements.txt
index 1c122fe8fd..f46aa907fa 100644
--- a/examples/iris/requirements.txt
+++ b/examples/iris/requirements.txt
@@ -1 +1 @@
-numpy==1.16.4
+numpy>1.16.3
diff --git a/pkg/operator/api/context/dependencies.go b/pkg/operator/api/context/dependencies.go
index e766f50545..4252ffc13d 100644
--- a/pkg/operator/api/context/dependencies.go
+++ b/pkg/operator/api/context/dependencies.go
@@ -162,7 +162,7 @@ func (ctx *Context) apiDependencies(api *API) strset.Set {
 		dependencies.Add(model.ID)
 	}
 
-	if api.RequestHandlerPath != nil {
+	if api.Requesthandler != nil {
 		for _, pythonPackage := range ctx.PythonPackages {
 			dependencies.Add(pythonPackage.GetID())
 		}
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index c4158d1722..48abaaef50 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -33,11 +33,11 @@ type APIs []*API
 
 type API struct {
 	ResourceFields
-	Model              string      `json:"model" yaml:"model"`
-	ModelType          ModelType   `json:"model_type" yaml:"model_type"`
-	RequestHandlerPath *string     `json:"request_handler_path" yaml:"request_handler_path"`
-	Compute            *APICompute `json:"compute" yaml:"compute"`
-	Tags               Tags        `json:"tags" yaml:"tags"`
+	Model          string      `json:"model" yaml:"model"`
+	ModelType      ModelType   `json:"model_type" yaml:"model_type"`
+	Requesthandler *string     `json:"request_handler" yaml:"request_handler"`
+	Compute        *APICompute `json:"compute" yaml:"compute"`
+	Tags           Tags        `json:"tags" yaml:"tags"`
 }
 
 var apiValidation = &cr.StructValidation{
@@ -57,7 +57,7 @@ var apiValidation = &cr.StructValidation{
 			},
 		},
 		{
-			StructField:         "RequestHandlerPath",
+			StructField:         "Requesthandler",
 			StringPtrValidation: &cr.StringPtrValidation{},
 		},
 		{
@@ -83,8 +83,8 @@ func (api *API) UserConfigStr() string {
 	if api.ModelType != UnknownModelType {
 		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelTypeKey, api.ModelType.String()))
 	}
-	if api.RequestHandlerPath != nil {
-		sb.WriteString(fmt.Sprintf("%s: %s\n", RequestHandlerPathKey, *api.RequestHandlerPath))
+	if api.Requesthandler != nil {
+		sb.WriteString(fmt.Sprintf("%s: %s\n", RequesthandlerKey, *api.Requesthandler))
 	}
 	if api.Compute != nil {
 		sb.WriteString(fmt.Sprintf("%s:\n", ComputeKey))
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index ece7bd8c8a..533793a440 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -93,7 +93,7 @@ const (
 	// API
 	ModelKey              = "model"
 	ModelTypeKey          = "model_type"
-	RequestHandlerPathKey = "request_handler_path"
+	RequesthandlerKey = "request_handler"
 
 	// compute
 	ComputeKey              = "compute"
diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index 20fa7601b1..a08dc5922d 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -48,14 +48,14 @@ func getAPIs(config *userconfig.Config,
 		buf.WriteString(apiConfig.Name)
 		buf.WriteString(apiConfig.ModelType.String())
 
-		if apiConfig.RequestHandlerPath != nil {
+		if apiConfig.Requesthandler != nil {
 			for _, pythonPackage := range pythonPackages {
 				buf.WriteString(pythonPackage.GetID())
 			}
 
-			impl, ok := impls[*apiConfig.RequestHandlerPath]
+			impl, ok := impls[*apiConfig.Requesthandler]
 			if !ok {
-				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.RequestHandlerPath), userconfig.Identify(apiConfig))
+				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.Requesthandler), userconfig.Identify(apiConfig))
 			}
 			implID := hash.Bytes(impl)
 			buf.WriteString(implID)
@@ -90,8 +90,8 @@ func getAPIs(config *userconfig.Config,
 			RequestHandlerImplKey: requestHandlerImplKey,
 		}
 
-		if apiConfig.RequestHandlerPath != nil {
-			uploadRequestHandlers(apis[apiConfig.Name], impls[*apiConfig.RequestHandlerPath])
+		if apiConfig.Requesthandler != nil {
+			uploadRequestHandlers(apis[apiConfig.Name], impls[*apiConfig.Requesthandler])
 		}
 	}
 	return apis, nil
diff --git a/pkg/operator/workloads/workflow.go b/pkg/operator/workloads/workflow.go
index 6ae5f456da..526d71ee4f 100644
--- a/pkg/operator/workloads/workflow.go
+++ b/pkg/operator/workloads/workflow.go
@@ -104,8 +104,6 @@ func Create(ctx *context.Context) (*awfv1.Workflow, error) {
 	for _, spec := range allSpecs {
 		var dependencyWorkloadIDs []string
 		for resourceID := range spec.ResourceIDs {
-			fmt.Println(spec.ResourceIDs)
-			fmt.Println(spec.Spec.GetName())
 			for dependencyResourceID := range ctx.AllComputedResourceDependencies(resourceID) {
 				workloadID := resourceWorkloadIDs[dependencyResourceID]
 				if workloadID != "" && workloadID != spec.WorkloadID {
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index ed4b125068..3272b82b5a 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -677,6 +677,12 @@ def cast_compound_type(value, type_str):
     ]
 }
 
+TRANSFORMER_IMPL_VALIDATION = {
+    "optional": [
+        {"name": "preinference", "args": ["request", "metadata"]},
+        {"name": "postinference", "args": ["response", "metadata"]},
+    ]
+}
 
 def _validate_impl(impl, impl_req):
     for optional_func in impl_req.get("optional", []):
diff --git a/pkg/workloads/lib/util.py b/pkg/workloads/lib/util.py
index 3ee7ba2266..583797ca1f 100644
--- a/pkg/workloads/lib/util.py
+++ b/pkg/workloads/lib/util.py
@@ -929,3 +929,10 @@ def extract_resource_refs(input):
         return resources
 
     return set()
+
+def has_function(impl, fn_name):
+    fn = getattr(impl, fn_name, None)
+    if fn is None:
+        return False
+
+    return callable(fn)
\ No newline at end of file
diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index a898262b6f..b9e636877c 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -23,7 +23,7 @@
 from waitress import serve
 import onnxruntime as rt
 from lib.storage import S3
-import numpy
+import numpy as np
 
 import consts
 from lib import util, package, Context
@@ -35,12 +35,27 @@
 
 app = Flask(__name__)
 
+onnx_to_np = {
+    "tensor(float16)": "float16",
+    "tensor(float)": "float32",
+    "tensor(double)": "float64",
+    "tensor(int32)": "int32",
+    "tensor(int8)": "int8",
+    "tensor(uint8)": "uint8",
+    "tensor(int16)": "int16",
+    "tensor(uint16)": "uint16",
+    "tensor(int64)": "int64",
+    "tensor(uint64)": "uint64",
+    "tensor(bool)": "bool",
+    "tensor(string)": "string",
+}
+
 local_cache = {
     "ctx": None,
     "api": None,
     "sess": None,
-    "model_inputs": None,
-    "model_outputs": None,
+    "input_metadata": None,
+    "output_metadata": None,
     "request_handler": None,
 }
 
@@ -59,6 +74,47 @@ def health():
     return jsonify({"ok": True})
 
 
+def transform_to_numpy(input_pyobj, input_metadata):
+    target_dtype = onnx_to_np[input_metadata.type]
+    target_shape = input_metadata.shape
+
+    if type(input_pyobj) is not np.ndarray:
+        np_arr = np.array(input_pyobj, dtype=target_dtype)
+    else:
+        np_arr = input_pyobj
+    np_arr = np_arr.reshape(tuple(target_shape))
+    return np_arr
+
+
+def convert_to_onnx_input(sample, input_metadata_list):
+    sess = local_cache["sess"]
+
+    input_dict = {}
+    if len(input_metadata_list) == 1:
+        input_metadata = input_metadata_list[0]
+        if util.is_dict(sample):
+            if sample.get(input_metadata.name) is None:
+                raise ValueError("sample should be a dict containing key: " + input_metadata.name)
+            input_dict[input_metadata.name] = transform_to_numpy(
+                sample[input_metadata.name], input_metadata
+            )
+        else:
+            input_dict[input_metadata.name] = transform_to_numpy(sample, input_metadata)
+    else:
+        for input_metadata in input_metadata_list:
+            if not sample.is_dict(input_metadata):
+                expected_keys = [metadata.name for metadata in input_metadata_list]
+                raise ValueError(
+                    "sample should be a dict containing keys: " + ", ".join(expected_keys)
+                )
+
+            if sample.get(input_metadata.name) is None:
+                raise ValueError("sample should be a dict containing key: " + input_metadata.name)
+
+            input_dict[input_metadata.name] = transform_to_numpy(sample, input_metadata)
+    return input_dict
+
+
 @app.route("/<app_name>/<api_name>", methods=["POST"])
 def predict(app_name, api_name):
     try:
@@ -68,9 +124,9 @@ def predict(app_name, api_name):
 
     sess = local_cache["sess"]
     api = local_cache["api"]
-    request_handler = local_cache["request_handler"]
-    model_inputs = local_cache["model_inputs"]
-    model_outputs = local_cache["model_outputs"]
+    request_handler = local_cache.get("request_handler")
+    input_metadata = local_cache["input_metadata"]
+    output_metadata = local_cache["output_metadata"]
 
     response = {}
 
@@ -93,10 +149,23 @@ def predict(app_name, api_name):
         try:
             util.log_indent("Raw sample:", indent=4)
             util.log_pretty(sample, indent=6)
-            inference_input = request_handler.preinference(sample, model_inputs)
-            labels = [output_node.name for output_node in model_outputs]
-            inference = sess.run(labels, inference_input)
-            result = request_handler.postinference(inference, model_outputs)
+
+            if request_handler is not None and util.has_function(request_handler, "preinference"):
+                sample = request_handler.preinference(sample, input_metadata)
+
+            inference_input = convert_to_onnx_input(sample, input_metadata)
+            logger.info(inference_input)
+            model_outputs = sess.run([], inference_input)
+            logger.info(model_outputs)
+            result = []
+            for model_output in model_outputs:
+                if type(model_output) is np.ndarray:
+                    result.append(model_output.tolist())
+                else:
+                    result.append(model_output)
+
+            if request_handler is not None and util.has_function(request_handler, "postinference"):
+                result = request_handler.postinference(result, output_metadata)
             util.log_indent("Prediction:", indent=4)
             util.log_pretty(result, indent=6)
             prediction = {"prediction": result}
@@ -109,7 +178,7 @@ def predict(app_name, api_name):
             logger.exception("An error occurred, see `cx logs api {}` for more details.".format(2))
             return prediction_failed(sample, str(e))
 
-        predictions.append(result)
+        predictions.append(prediction)
 
     response["predictions"] = predictions
     response["resource_id"] = api["id"]
@@ -120,12 +189,12 @@ def predict(app_name, api_name):
 def start(args):
     logger.info(args)
     ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id)
-    package.install_packages(ctx.python_packages, ctx.storage)
     api = ctx.apis_id_map[args.api]
 
     local_cache["api"] = api
     local_cache["ctx"] = ctx
     if api.get("request_handler_impl_key") is not None:
+        package.install_packages(ctx.python_packages, ctx.storage)
         local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
 
     logger.info(ctx)
@@ -135,8 +204,8 @@ def start(args):
 
     sess = rt.InferenceSession(model_cache_path)
     local_cache["sess"] = sess
-    local_cache["model_inputs"] = sess.get_inputs()
-    local_cache["model_outputs"] = sess.get_outputs()
+    local_cache["input_metadata"] = sess.get_inputs()
+    local_cache["output_metadata"] = sess.get_outputs()
     serve(app, listen="*:{}".format(args.port))
     logger.info("Serving model")
 
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index 2c53a2129b..cf4c52493f 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -244,11 +244,16 @@ def parse_response_proto_raw(response_proto):
 
 
 def run_predict(sample):
-    preprocessed_sample = local_cache["request_handler"].preinference(
-        sample, local_cache["metadata"]["signatureDef"]
-    )
+    request_handler = local_cache.get("request_handler")
+
+    preprocessed_sample = sample
+    if request_handler is not None and util.has_function(request_handler, "preinference"):
+        preprocessed_sample = request_handler.preinference(
+            sample, local_cache["metadata"]["signatureDef"]
+        )
+
     if util.is_resource_ref(local_cache["api"]["model"]):
-        transformed_sample = transform_sample(sample)
+        transformed_sample = transform_sample(preprocessed_sample)
         prediction_request = create_prediction_request(transformed_sample)
         response_proto = local_cache["stub"].Predict(prediction_request, timeout=10.0)
         result = parse_response_proto(response_proto)
@@ -263,7 +268,7 @@ def run_predict(sample):
         result["transformed_sample"] = transformed_sample
 
     else:
-        prediction_request = create_raw_prediction_request(sample)
+        prediction_request = create_raw_prediction_request(preprocessed_sample)
         response_proto = local_cache["stub"].Predict(prediction_request, timeout=10.0)
         result = parse_response_proto_raw(response_proto)
         util.log_indent("Sample:", indent=4)
@@ -271,6 +276,9 @@ def run_predict(sample):
         util.log_indent("Prediction:", indent=4)
         util.log_pretty(result, indent=6)
 
+    if request_handler is not None and util.has_function(request_handler, "postinference"):
+        result = request_handler.postinference(result, local_cache["metadata"]["signatureDef"])
+
     return result
 
 
@@ -394,7 +402,7 @@ def start(args):
         local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
 
     if not util.is_resource_ref(api["model"]):
-        if api.get("request_handler_path") is not None:
+        if api.get("request_handler") is not None:
             package.install_packages(ctx.python_packages, ctx.storage)
         if not os.path.isdir(args.model_dir):
             ctx.storage.download_and_unzip_external(api["model"], args.model_dir)

From e8b12f437481d8e6e8f7a082ff6906a621944de1 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 01:59:09 +0000
Subject: [PATCH 06/24] Remove unnecessary config

---
 examples/iris/cortex.yaml                 | 28 +++++++++++------------
 examples/iris/irises_flat.json            | 10 ++++++++
 examples/iris/requirements.txt            |  2 +-
 go.mod                                    |  5 ----
 go.sum                                    | 10 --------
 pkg/operator/api/context/dependencies.go  |  4 +---
 pkg/operator/api/userconfig/config_key.go |  4 ++--
 pkg/operator/endpoints/shared.go          |  1 -
 pkg/workloads/tf_api/api.py               |  8 +++----
 9 files changed, 32 insertions(+), 40 deletions(-)
 create mode 100644 examples/iris/irises_flat.json

diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 98eacbbad7..78ee7f1ead 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -1,20 +1,20 @@
 - kind: deployment
   name: iris
 
-# - kind: api
-#   name: iris-tf-nn
-#   model_type: tensorflow
-#   model: s3://cortex-examples/iris-tensorflow.zip
-#   compute:
-#     max_replicas: 1
+- kind: api
+  name: iris-tf-nn
+  model_type: tensorflow
+  model: s3://cortex-examples/iris-tensorflow.zip
+  compute:
+    max_replicas: 1
 
-# - kind: api
-#   name: iris-pytorch-nn
-#   model_type: onnx
-#   request_handler: inference_pytorch.py
-#   model: s3://data-vishal/iris_pytorch.onnx
-#   compute:
-#     max_replicas: 1
+- kind: api
+  name: iris-pytorch-nn
+  model_type: onnx
+  request_handler: inference_pytorch.py
+  model: s3://data-vishal/iris_pytorch.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: iris-xgb-classifier
@@ -26,7 +26,7 @@
 - kind: api
   name: iris-sklearn-logistic-regression
   model_type: onnx
-#   request_handler: inference.py
+  request_handler: inference.py
   model: s3://data-vishal/iris_sklearn_logreg.onnx
   compute:
     max_replicas: 1
diff --git a/examples/iris/irises_flat.json b/examples/iris/irises_flat.json
new file mode 100644
index 0000000000..a43e0234e2
--- /dev/null
+++ b/examples/iris/irises_flat.json
@@ -0,0 +1,10 @@
+{
+    "samples": [
+        [
+            5.9,
+            3.0,
+            5.1,
+            1.8
+        ]
+    ]
+}
diff --git a/examples/iris/requirements.txt b/examples/iris/requirements.txt
index f46aa907fa..1c122fe8fd 100644
--- a/examples/iris/requirements.txt
+++ b/examples/iris/requirements.txt
@@ -1 +1 @@
-numpy>1.16.3
+numpy==1.16.4
diff --git a/go.mod b/go.mod
index f6c364bd87..eac3486535 100644
--- a/go.mod
+++ b/go.mod
@@ -23,10 +23,6 @@ require (
 	github.com/aws/aws-sdk-go v1.20.12
 	github.com/cortexlabs/yaml v0.0.0-20190626164117-202ab3a3d475
 	github.com/davecgh/go-spew v1.1.1
-	github.com/docker/distribution v2.7.1+incompatible // indirect
-	github.com/docker/docker v1.13.1 // indirect
-	github.com/docker/go-connections v0.4.0 // indirect
-	github.com/docker/go-units v0.4.0 // indirect
 	github.com/emicklei/go-restful v2.9.6+incompatible // indirect
 	github.com/ghodss/yaml v1.0.0
 	github.com/go-openapi/spec v0.19.2 // indirect
@@ -41,7 +37,6 @@ require (
 	github.com/json-iterator/go v1.1.6 // indirect
 	github.com/mitchellh/go-homedir v1.1.0
 	github.com/modern-go/reflect2 v1.0.1 // indirect
-	github.com/opencontainers/go-digest v1.0.0-rc1 // indirect
 	github.com/peterbourgon/diskv v2.0.1+incompatible // indirect
 	github.com/pkg/errors v0.8.1
 	github.com/spf13/cobra v0.0.5
diff --git a/go.sum b/go.sum
index aff9ac51c4..fb2c8abb42 100644
--- a/go.sum
+++ b/go.sum
@@ -25,14 +25,6 @@ github.com/davecgh/go-spew v0.0.0-20151105211317-5215b55f46b2/go.mod h1:J7Y8YcW2
 github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
 github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
 github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
-github.com/docker/distribution v2.7.1+incompatible h1:a5mlkVzth6W5A4fOsS3D2EO5BUmsJpcB+cRlLU7cSug=
-github.com/docker/distribution v2.7.1+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w=
-github.com/docker/docker v1.13.1 h1:IkZjBSIc8hBjLpqeAbeE5mca5mNgeatLHBy3GO78BWo=
-github.com/docker/docker v1.13.1/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
-github.com/docker/go-connections v0.4.0 h1:El9xVISelRB7BuFusrZozjnkIM5YnzCViNKohAFqRJQ=
-github.com/docker/go-connections v0.4.0/go.mod h1:Gbd7IOopHjR8Iph03tsViu4nIes5XhDvyHbTtUxmeec=
-github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
-github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
 github.com/emicklei/go-restful v0.0.0-20170410110728-ff4f55a20633/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
 github.com/emicklei/go-restful v2.9.6+incompatible h1:tfrHha8zJ01ywiOEC1miGY8st1/igzWB8OmvPgoYX7w=
 github.com/emicklei/go-restful v2.9.6+incompatible/go.mod h1:otzb+WCGbkyDHkqmQmT5YD2WR4BBwUdeQoFo8l/7tVs=
@@ -104,8 +96,6 @@ github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3Rllmb
 github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
 github.com/onsi/ginkgo v0.0.0-20170829012221-11459a886d9c/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
 github.com/onsi/gomega v0.0.0-20170829124025-dcabb60a477c/go.mod h1:C1qb7wdrVGGVU+Z6iS04AVkA3Q65CEZX59MT0QO5uiA=
-github.com/opencontainers/go-digest v1.0.0-rc1 h1:WzifXhOVOEOuFYOJAW6aQqW0TooG2iki3E3Ii+WN7gQ=
-github.com/opencontainers/go-digest v1.0.0-rc1/go.mod h1:cMLVZDEM3+U2I4VmLI6N8jQYUd2OVphdqWwCJHrFt2s=
 github.com/pelletier/go-toml v1.2.0/go.mod h1:5z9KED0ma1S8pY6P1sdut58dfprrGBbd/94hg7ilaic=
 github.com/peterbourgon/diskv v2.0.1+incompatible h1:UBdAOUP5p4RWqPBg048CAvpKN+vxiaj6gdUUzhl4XmI=
 github.com/peterbourgon/diskv v2.0.1+incompatible/go.mod h1:uqqh8zWWbv1HBMNONnaR/tNboyR3/BZd58JJSHlUSCU=
diff --git a/pkg/operator/api/context/dependencies.go b/pkg/operator/api/context/dependencies.go
index 4252ffc13d..774dbaeea4 100644
--- a/pkg/operator/api/context/dependencies.go
+++ b/pkg/operator/api/context/dependencies.go
@@ -22,7 +22,6 @@ import (
 	"github.com/cortexlabs/yaml"
 
 	"github.com/cortexlabs/cortex/pkg/lib/cast"
-	"github.com/cortexlabs/cortex/pkg/lib/debug"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -36,7 +35,6 @@ func (ctx *Context) AllComputedResourceDependencies(resourceID string) strset.Se
 
 func (ctx *Context) allComputedResourceDependenciesHelper(resourceID string, allDependencies strset.Set) {
 	subDependencies := ctx.DirectComputedResourceDependencies(resourceID)
-	debug.Pp(subDependencies)
 	subDependencies.Subtract(allDependencies)
 	allDependencies.Merge(subDependencies)
 
@@ -74,7 +72,7 @@ func (ctx *Context) DirectComputedResourceDependencies(resourceID string) strset
 			return ctx.trainingDatasetDependencies(model)
 		}
 	}
-	debug.Pp(ctx.APIs)
+
 	for _, api := range ctx.APIs {
 		if api.ID == resourceID {
 			return ctx.apiDependencies(api)
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index 533793a440..693c152ae4 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -91,8 +91,8 @@ const (
 	ThrottleSecsKey              = "throttle_secs"
 
 	// API
-	ModelKey              = "model"
-	ModelTypeKey          = "model_type"
+	ModelKey          = "model"
+	ModelTypeKey      = "model_type"
 	RequesthandlerKey = "request_handler"
 
 	// compute
diff --git a/pkg/operator/endpoints/shared.go b/pkg/operator/endpoints/shared.go
index ea689f8045..d276e02ccb 100644
--- a/pkg/operator/endpoints/shared.go
+++ b/pkg/operator/endpoints/shared.go
@@ -53,7 +53,6 @@ func RespondError(w http.ResponseWriter, err error, strs ...string) {
 func RespondErrorCode(w http.ResponseWriter, code int, err error, strs ...string) {
 	err = errors.Wrap(err, strs...)
 	errors.PrintError(err)
-	errors.PrintStacktrace(err)
 
 	w.WriteHeader(code)
 	response := schema.ErrorResponse{
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index cf4c52493f..dcb64d4efd 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -246,14 +246,14 @@ def parse_response_proto_raw(response_proto):
 def run_predict(sample):
     request_handler = local_cache.get("request_handler")
 
-    preprocessed_sample = sample
+    prepared_sample = sample
     if request_handler is not None and util.has_function(request_handler, "preinference"):
-        preprocessed_sample = request_handler.preinference(
+        prepared_sample = request_handler.preinference(
             sample, local_cache["metadata"]["signatureDef"]
         )
 
     if util.is_resource_ref(local_cache["api"]["model"]):
-        transformed_sample = transform_sample(preprocessed_sample)
+        transformed_sample = transform_sample(prepared_sample)
         prediction_request = create_prediction_request(transformed_sample)
         response_proto = local_cache["stub"].Predict(prediction_request, timeout=10.0)
         result = parse_response_proto(response_proto)
@@ -268,7 +268,7 @@ def run_predict(sample):
         result["transformed_sample"] = transformed_sample
 
     else:
-        prediction_request = create_raw_prediction_request(preprocessed_sample)
+        prediction_request = create_raw_prediction_request(prepared_sample)
         response_proto = local_cache["stub"].Predict(prediction_request, timeout=10.0)
         result = parse_response_proto_raw(response_proto)
         util.log_indent("Sample:", indent=4)

From 05e3b1aa21b6e0c384fbbe2589c64dcef2e213c6 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 02:25:48 +0000
Subject: [PATCH 07/24] Fix linting

---
 pkg/workloads/lib/context.py | 1 +
 pkg/workloads/lib/util.py    | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index 3272b82b5a..4216ae7989 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -684,6 +684,7 @@ def cast_compound_type(value, type_str):
     ]
 }
 
+
 def _validate_impl(impl, impl_req):
     for optional_func in impl_req.get("optional", []):
         _validate_optional_fn_args(impl, optional_func["name"], optional_func["args"])
diff --git a/pkg/workloads/lib/util.py b/pkg/workloads/lib/util.py
index 583797ca1f..b4b8f3094d 100644
--- a/pkg/workloads/lib/util.py
+++ b/pkg/workloads/lib/util.py
@@ -930,9 +930,10 @@ def extract_resource_refs(input):
 
     return set()
 
+
 def has_function(impl, fn_name):
     fn = getattr(impl, fn_name, None)
     if fn is None:
         return False
 
-    return callable(fn)
\ No newline at end of file
+    return callable(fn)

From 143e27c47b402483a671c424c6634233b4c0a6ff Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 02:41:31 +0000
Subject: [PATCH 08/24] Remove unnecessary logs

---
 pkg/workloads/onnx_serve/api.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index b9e636877c..a3c9fccd8a 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -154,9 +154,7 @@ def predict(app_name, api_name):
                 sample = request_handler.preinference(sample, input_metadata)
 
             inference_input = convert_to_onnx_input(sample, input_metadata)
-            logger.info(inference_input)
             model_outputs = sess.run([], inference_input)
-            logger.info(model_outputs)
             result = []
             for model_output in model_outputs:
                 if type(model_output) is np.ndarray:
@@ -187,7 +185,6 @@ def predict(app_name, api_name):
 
 
 def start(args):
-    logger.info(args)
     ctx = Context(s3_path=args.context, cache_dir=args.cache_dir, workload_id=args.workload_id)
     api = ctx.apis_id_map[args.api]
 
@@ -197,7 +194,6 @@ def start(args):
         package.install_packages(ctx.python_packages, ctx.storage)
         local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
 
-    logger.info(ctx)
     model_cache_path = os.path.join(args.model_dir, args.api)
     if not os.path.exists(model_cache_path):
         ctx.storage.download_file_external(api["model"], model_cache_path)

From b2838e31cbac22ea0e6004f285874ebecffb3982 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 12:03:54 +0000
Subject: [PATCH 09/24] Move models to cortex-examples bucket

---
 examples/iris/cortex.yaml       | 15 +++++++++----
 examples/models/iris_keras.py   | 38 +++++++++++++++++++++++++++++++++
 pkg/workloads/onnx_serve/api.py |  6 +++++-
 3 files changed, 54 insertions(+), 5 deletions(-)
 create mode 100644 examples/models/iris_keras.py

diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 78ee7f1ead..807454d152 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -11,15 +11,15 @@
 - kind: api
   name: iris-pytorch-nn
   model_type: onnx
-  request_handler: inference_pytorch.py
-  model: s3://data-vishal/iris_pytorch.onnx
+  request_handler: inference_pytorch2.py
+  model: s3://cortex-examples/iris_pytorch.onnx
   compute:
     max_replicas: 1
 
 - kind: api
   name: iris-xgb-classifier
   model_type: onnx
-  model: s3://data-vishal/iris_xgb.onnx
+  model: s3://cortex-examples/iris_xgb.onnx
   compute:
     max_replicas: 1
 
@@ -27,6 +27,13 @@
   name: iris-sklearn-logistic-regression
   model_type: onnx
   request_handler: inference.py
-  model: s3://data-vishal/iris_sklearn_logreg.onnx
+  model: s3://cortex-examples/iris_sklearn_logreg.onnx
+  compute:
+    max_replicas: 1
+
+- kind: api
+  name: iris-keras
+  model_type: onnx
+  model: s3://data-vishal/iris_keras.onnx
   compute:
     max_replicas: 1
diff --git a/examples/models/iris_keras.py b/examples/models/iris_keras.py
new file mode 100644
index 0000000000..467ccb79f5
--- /dev/null
+++ b/examples/models/iris_keras.py
@@ -0,0 +1,38 @@
+"""
+Requirements.txt
+
+scikit-learn
+keras
+keras2onnx
+tensorflow
+onnxruntime
+"""
+import numpy as np
+from sklearn.datasets import load_iris
+from sklearn.model_selection import train_test_split
+from keras.models import Sequential
+from keras.layers import Dense
+from keras.utils import np_utils
+import onnxruntime as rt
+import keras2onnx
+
+iris = load_iris()
+X, y = iris.data, iris.target
+y = np_utils.to_categorical(y)
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+model = Sequential(name="iris")
+model.add(Dense(30, input_dim=4, activation="relu", name="input"))
+model.add(Dense(3, activation="softmax", name="last"))
+
+model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
+
+model.fit(X_train, y_train, epochs=100)
+
+scores = model.evaluate(X_test, y_test)
+print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
+
+onnx_model = keras2onnx.convert_keras(model)
+
+with open("iris_keras.onnx", "wb") as f:
+    f.write(onnx_model.SerializeToString())
diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index a3c9fccd8a..0a0fbb7456 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -78,11 +78,15 @@ def transform_to_numpy(input_pyobj, input_metadata):
     target_dtype = onnx_to_np[input_metadata.type]
     target_shape = input_metadata.shape
 
+    for idx, dim in enumerate(target_shape):
+        if dim is None:
+            target_shape[idx] = 1
+
     if type(input_pyobj) is not np.ndarray:
         np_arr = np.array(input_pyobj, dtype=target_dtype)
     else:
         np_arr = input_pyobj
-    np_arr = np_arr.reshape(tuple(target_shape))
+    np_arr = np_arr.reshape(target_shape)
     return np_arr
 
 

From 3ef4eb8214ee589ea75aaf6ae8f092e682aa35cc Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 18:26:01 +0000
Subject: [PATCH 10/24] Add docs and respond to PR comments

---
 Makefile                                  |   2 +
 cli/cmd/predict.go                        | 244 +++++++++++++++++++++-
 dev/registry.sh                           |   4 +-
 docs/apis/apis.md                         |  41 ++++
 docs/apis/packaging-models.md             |  34 +++
 docs/apis/request_handler.md              |   3 +
 docs/cluster/config.md                    |   1 +
 docs/cluster/development.md               |   1 +
 examples/iris/cortex.yaml                 |  53 +++--
 examples/models/iris_keras.py             |   4 +-
 examples/models/iris_pytorch.py           |  16 +-
 examples/models/iris_sklearn_logreg.py    |  13 +-
 examples/models/iris_xgboost.py           |  10 +-
 pkg/consts/consts.go                      |   2 +-
 pkg/operator/api/context/context.go       |   4 +-
 pkg/operator/api/context/dependencies.go  |   2 +-
 pkg/operator/api/userconfig/apis.go       |   9 +-
 pkg/operator/api/userconfig/config_key.go |   2 +-
 pkg/operator/api/userconfig/errors.go     |   9 +
 pkg/operator/context/apis.go              |  38 ++--
 pkg/operator/workloads/api.go             |  18 +-
 pkg/workloads/lib/context.py              |   8 +-
 pkg/workloads/onnx_serve/api.py           |   8 +-
 23 files changed, 428 insertions(+), 98 deletions(-)
 create mode 100644 docs/apis/request_handler.md

diff --git a/Makefile b/Makefile
index 324b3e0bea..3a693d26ab 100644
--- a/Makefile
+++ b/Makefile
@@ -128,6 +128,7 @@ ci-build-images:
 	@./build/build-image.sh images/tf-serve tf-serve
 	@./build/build-image.sh images/tf-serve-gpu tf-serve-gpu
 	@./build/build-image.sh images/tf-api tf-api
+	@./build/build-image.sh images/onnx-serve onnx-serve
 	@./build/build-image.sh images/operator operator
 	@./build/build-image.sh images/fluentd fluentd
 	@./build/build-image.sh images/nginx-controller nginx-controller
@@ -147,6 +148,7 @@ ci-push-images:
 	@./build/push-image.sh tf-serve
 	@./build/push-image.sh tf-serve-gpu
 	@./build/push-image.sh tf-api
+	@./build/push-image.sh images/onnx-serve onnx-serve
 	@./build/push-image.sh operator
 	@./build/push-image.sh fluentd
 	@./build/push-image.sh nginx-controller
diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index a104aef179..0de4c5c98f 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -23,6 +23,7 @@ import (
 	"strings"
 
 	"github.com/spf13/cobra"
+	"github.com/cortexlabs/yaml"
 
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/files"
@@ -45,6 +46,19 @@ type PredictResponse struct {
 	Predictions []interface{} `json:"predictions"`
 }
 
+type Prediction struct {
+	Prediction         interface{} `json:"prediction"`
+	PredictionReversed interface{} `json:"prediction_reversed"`
+	TransformedSample  interface{} `json:"transformed_sample"`
+	Response           interface{} `json:"response"`
+}
+
+
+// type PredictResponseExternalModel struct {
+// 	ResourceID  string        `json:"resource_id"`
+// 	Predictions []interface{} `json:"predictions"`
+// }
+
 var predictCmd = &cobra.Command{
 	Use:   "predict API_NAME SAMPLES_FILE",
 	Short: "make predictions",
@@ -70,10 +84,9 @@ var predictCmd = &cobra.Command{
 		apiPath := apiGroupStatus.ActiveStatus.Path
 		apiURL := urls.Join(resourcesRes.APIsBaseURL, apiPath)
 		predictResponse, err := makePredictRequest(apiURL, samplesJSONPath)
-
 		if err != nil {
 			if strings.Contains(err.Error(), "503 Service Temporarily Unavailable") || strings.Contains(err.Error(), "502 Bad Gateway") {
-				errors.Exit(ErrorAPINotReady(apiName, resource.StatusUpdating.Message()))
+				errors.Exit(ErrorAPINotReady(apiName, resource.StatusCreating.Message()))
 			}
 			errors.Exit(err)
 		}
@@ -90,6 +103,7 @@ var predictCmd = &cobra.Command{
 
 		apiID := predictResponse.ResourceID
 		api := resourcesRes.APIStatuses[apiID]
+		_, isExternalModel := yaml.ExtractAtSymbolText(resourcesRes.Context.APIs[apiName].model)
 
 		apiStart := libtime.LocalTimestampHuman(api.Start)
 		fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
@@ -101,12 +115,38 @@ var predictCmd = &cobra.Command{
 		}
 
 		for _, prediction := range predictResponse.Predictions {
-			prettyResp, err := json.Pretty(prediction)
-			if err != nil {
-				errors.Exit(err)
+			if isExternalModel {
+				prettyResp, err := json.Pretty(prediction)
+				if err != nil {
+					errors.Exit(err)
+				}
+	
+				fmt.Println(prettyResp)
+				continue
 			}
 
-			fmt.Println(prettyResp)
+			parsedPrediction = prediction.(Prediction)
+			if parsedPrediction.Prediction == nil {
+				prettyResp, err := json.Pretty(parsedPrediction.Response)
+				if err != nil {
+					errors.Exit(err)
+				}
+
+				fmt.Println(prettyResp)
+				continue
+			}
+
+			value := parsedPrediction.Prediction
+			if parsedPrediction.PredictionReversed != nil {
+				value = parsedPrediction.PredictionReversed
+			}
+
+			if cast.IsFloatType(value) {
+				casted, _ := cast.InterfaceToFloat64(value)
+				fmt.Println(s.Round(casted, 2, true))
+			} else {
+				fmt.Println(s.UserStrStripped(value))
+			}
 		}
 	},
 }
@@ -136,3 +176,195 @@ func makePredictRequest(apiURL string, samplesJSONPath string) (*PredictResponse
 
 	return &predictResponse, nil
 }
+
+
+
+// var predictCmd = &cobra.Command{
+// 	Use:   "predict API_NAME SAMPLES_FILE",
+// 	Short: "make predictions",
+// 	Long:  "Make predictions.",
+// 	Args:  cobra.ExactArgs(2),
+// 	Run: func(cmd *cobra.Command, args []string) {
+// 		apiName := args[0]
+// 		samplesJSONPath := args[1]
+
+// 		resourcesRes, err := getResourcesResponse()
+// 		if err != nil {
+// 			errors.Exit(err)
+// 		}
+
+// 		apiGroupStatus := resourcesRes.APIGroupStatuses[apiName]
+// 		if apiGroupStatus == nil {
+// 			errors.Exit(ErrorAPINotFound(apiName))
+// 		}
+// 		if apiGroupStatus.ActiveStatus == nil {
+// 			errors.Exit(ErrorAPINotReady(apiName, apiGroupStatus.Message()))
+// 		}
+
+// 		api := resourcesRes.Context.APIs[apiName]
+// 		apiPath := apiGroupStatus.ActiveStatus.Path
+// 		apiURL := urls.Join(resourcesRes.APIsBaseURL, apiPath)
+
+// 		if _, ok := yaml.ExtractAtSymbolText(api.Model); ok {
+// 			// e2e
+// 			predict(apiURL, samplesJSONPath)
+// 		} else {
+// 			predictExternalModel(apiURL, samplesJSONPath)
+// 		}
+// 	},
+// }
+
+// var predictCmd = &cobra.Command{
+// 	Use:   "predict API_NAME SAMPLES_FILE",
+// 	Short: "make predictions",
+// 	Long:  "Make predictions.",
+// 	Args:  cobra.ExactArgs(2),
+// 	Run: func(cmd *cobra.Command, args []string) {
+// 		apiName := args[0]
+// 		samplesJSONPath := args[1]
+
+// 		resourcesRes, err := getResourcesResponse()
+// 		if err != nil {
+// 			errors.Exit(err)
+// 		}
+
+// 		apiGroupStatus := resourcesRes.APIGroupStatuses[apiName]
+// 		if apiGroupStatus == nil {
+// 			errors.Exit(ErrorAPINotFound(apiName))
+// 		}
+// 		if apiGroupStatus.ActiveStatus == nil {
+// 			errors.Exit(ErrorAPINotReady(apiName, apiGroupStatus.Message()))
+// 		}
+
+// 		api := resourcesRes.Context.APIs[apiName]
+// 		apiPath := apiGroupStatus.ActiveStatus.Path
+// 		apiURL := urls.Join(resourcesRes.APIsBaseURL, apiPath)
+
+// 		if _, ok := yaml.ExtractAtSymbolText(api.Model); ok {
+// 			// e2e
+// 			predict(apiURL, samplesJSONPath)
+// 		} else {
+// 			predictExternalModel(apiURL, samplesJSONPath)
+// 		}
+// 	},
+// }
+
+// func predict(apiURL string, samplesJSONPath string) {
+// 	httpResponse := makePredictRequest(apiURL, samplesJSONPath)
+
+// 	var predictResponse PredictResponse
+// 	err := json.DecodeWithNumber(httpResponse, &predictResponse)
+// 	if err != nil {
+// 		return nil, errors.Wrap(err, "prediction response")
+// 	}
+
+// 	if predictPrintJSON {
+// 		prettyResp, err := json.Pretty(predictResponse)
+// 		if err != nil {
+// 			errors.Exit(err)
+// 		}
+
+// 		fmt.Println(prettyResp)
+// 		return
+// 	}
+
+// 	apiID := predictResponse.ResourceID
+// 	api := resourcesRes.APIStatuses[apiID]
+
+// 	apiStart := libtime.LocalTimestampHuman(api.Start)
+// 	fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
+
+// 	if len(predictResponse.Predictions) == 1 {
+// 		fmt.Println("Prediction:")
+// 	} else {
+// 		fmt.Println("Predictions:")
+// 	}
+
+// 	for _, prediction := range predictResponse.Predictions {
+// 		if prediction.Prediction == nil {
+// 			prettyResp, err := json.Pretty(prediction.Response)
+// 			if err != nil {
+// 				errors.Exit(err)
+// 			}
+
+// 			fmt.Println(prettyResp)
+// 			continue
+// 		}
+
+// 		value := prediction.Prediction
+// 		if prediction.PredictionReversed != nil {
+// 			value = prediction.PredictionReversed
+// 		}
+
+// 		if cast.IsFloatType(value) {
+// 			casted, _ := cast.InterfaceToFloat64(value)
+// 			fmt.Println(s.Round(casted, 2, true))
+// 		} else {
+// 			fmt.Println(s.UserStrStripped(value))
+// 		}
+// 	}
+// }
+
+// func predictExternalModel(apiURL string, samplesJSONPath string) {
+// 	httpResponse := makePredictRequest(apiURL, samplesJSONPath)
+
+// 	var predictResponse PredictResponseExternalModel
+// 	err = json.DecodeWithNumber(httpResponse, &predictResponse)
+// 	if err != nil {
+// 		return nil, errors.Wrap(err, "prediction response")
+// 	}
+
+// 	if predictPrintJSON {
+// 		prettyResp, err := json.Pretty(predictResponse)
+// 		if err != nil {
+// 			errors.Exit(err)
+// 		}
+
+// 		fmt.Println(prettyResp)
+// 		return
+// 	}
+
+// 	apiID := predictResponse.ResourceID
+// 	api := resourcesRes.APIStatuses[apiID]
+
+// 	apiStart := libtime.LocalTimestampHuman(api.Start)
+// 	fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
+
+// 	if len(predictResponse.Predictions) == 1 {
+// 		fmt.Println("Prediction:")
+// 	} else {
+// 		fmt.Println("Predictions:")
+// 	}
+
+// 	for _, prediction := range predictResponse.Predictions {
+// 		prettyResp, err := json.Pretty(prediction)
+// 		if err != nil {
+// 			errors.Exit(err)
+// 		}
+
+// 		fmt.Println(prettyResp)
+// 	}
+// }
+
+// func makePredictRequest(apiURL string, samplesJSONPath string) []byte {
+// 	samplesBytes, err := files.ReadFileBytes(samplesJSONPath)
+// 	if err != nil {
+// 		errors.Exit(err)
+// 	}
+// 	payload := bytes.NewBuffer(samplesBytes)
+// 	req, err := http.NewRequest("POST", apiURL, payload)
+// 	if err != nil {
+// 		return nil, errors.Exit(err, errStrCantMakeRequest)
+// 	}
+
+// 	req.Header.Set("Content-Type", "application/json")
+// 	httpResponse, err := makeRequest(req)
+// 	if err != nil {
+// 		if strings.Contains(err.Error(), "503 Service Temporarily Unavailable") || strings.Contains(err.Error(), "502 Bad Gateway") {
+// 			errors.Exit(ErrorAPINotReady(apiName, resource.StatusUpdating.Message()))
+// 		}
+// 		errors.Exit(err)
+// 	}
+
+// 	return &predictResponse
+// }
diff --git a/dev/registry.sh b/dev/registry.sh
index 5b7976d96a..8198eb6cd5 100755
--- a/dev/registry.sh
+++ b/dev/registry.sh
@@ -130,7 +130,9 @@ elif [ "$cmd" = "update" ]; then
 
     cache_builder $ROOT/images/spark-operator spark-operator
     build_and_push $ROOT/images/spark-operator spark-operator latest
-
+    build_and_push $ROOT/images/spark spark latest
+    build_and_push $ROOT/images/tf-train tf-train latest
+    build_and_push $ROOT/images/tf-train-gpu tf-train-gpu latest
     build_and_push $ROOT/images/nginx-controller nginx-controller latest
     build_and_push $ROOT/images/nginx-backend nginx-backend latest
     build_and_push $ROOT/images/fluentd fluentd latest
diff --git a/docs/apis/apis.md b/docs/apis/apis.md
index 4b6a27ec20..ecb79f5ae4 100644
--- a/docs/apis/apis.md
+++ b/docs/apis/apis.md
@@ -8,6 +8,8 @@ Serve models at scale and use them to build smarter applications.
 - kind: api
   name: <string>  # API name (required)
   model: <string>  # path to a zipped model dir (e.g. s3://my-bucket/model.zip)
+  model_type: <string>  # framework of model, currently support tensorflow and onnx
+  request_handler: <string>  # path to the implementation file, relative to the cortex root
   compute:
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
@@ -32,6 +34,45 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
     cpu: "1"
 ```
 
+## Custom Request Handlers
+
+API endpoints can be customized by providing a request handler. Request handlers can be used to prepare request payloads before being passed to model for inference and modify model predictions before they are served.
+
+```python
+def preinference(sample, metadata):
+    """Prepare a sample before it is passed into the model.
+
+    Args:
+        sample: A single sample in the request payload converted from JSON to Python object.
+
+        metadata: Describes the expected shape and type of inputs to the model.
+            If API model_type is tensorflow the object is a map<string, SignatureDef>
+                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
+            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
+                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
+
+    Returns:
+        If model only has one 1 input, return a python list or numpy array of expected type  and shape. If model has more than 1 input, return a dictionary mapping input names to python list or numpy array of expected type and shape.
+    """
+    pass
+
+def postinference(prediction, metadata):
+    """Modify prediction from model before adding it to response payload.
+
+    Args:
+        sample: A single sample in the request payload converted from JSON to Python object
+
+        metadata: Describes the output shape and type of outputs from the model.
+            If API model_type is tensorflow the object is a map<string, SignatureDef>
+                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
+            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
+                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
+
+    Returns:
+        Python object that can be marshalled to JSON.
+    """
+```
+
 ## Integration
 
 APIs can be integrated into other applications or services via their JSON endpoints. The endpoint for any API follows the following format: {apis_endpoint}/{deployment_name}/{api_name}.
diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
index 4837829420..04e93fddd3 100644
--- a/docs/apis/packaging-models.md
+++ b/docs/apis/packaging-models.md
@@ -22,5 +22,39 @@ Specify `model` in an API, e.g.
 ```yaml
 - kind: api
   name: my-api
+  model_type: tensorflow
   model: s3://my-bucket/model.zip
 ```
+
+## ONNX
+
+Convert your model to ONNX model format. 
+
+```Python
+# Convert PyTorch model to ONNX
+dummy_input = torch.randn(1, 4)
+
+torch.onnx.export(
+    model, dummy_input, "iris_pytorch.onnx", input_names=["input"], output_names=["species"]
+)
+```
+
+See examples on how to convert models from common ML frameworks to ONNX.
+
+* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_pytorch.py)
+* [scikit-learn](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_sklearn_logreg.py)
+* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_xgboost.py)
+* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_keras.py)
+
+```text
+$ aws s3 cp model.onnx s3://my-bucket/model.onnx
+```
+
+Specify `model` in an API, e.g.
+
+```yaml
+- kind: api
+  name: my-api
+  model_type: onnx
+  model: s3://my-bucket/model.onnx
+```
diff --git a/docs/apis/request_handler.md b/docs/apis/request_handler.md
new file mode 100644
index 0000000000..de45fab69f
--- /dev/null
+++ b/docs/apis/request_handler.md
@@ -0,0 +1,3 @@
+# Request Handlers
+
+Request handlers 
diff --git a/docs/cluster/config.md b/docs/cluster/config.md
index 68f1e61c56..facd12bd90 100644
--- a/docs/cluster/config.md
+++ b/docs/cluster/config.md
@@ -50,6 +50,7 @@ export CORTEX_IMAGE_TF_TRAIN="cortexlabs/tf-train:master"
 export CORTEX_IMAGE_TF_API="cortexlabs/tf-api:master"
 export CORTEX_IMAGE_TF_TRAIN_GPU="cortexlabs/tf-train-gpu:master"
 export CORTEX_IMAGE_TF_SERVE_GPU="cortexlabs/tf-serve-gpu:master"
+export CORTEX_IMAGE_ONNX_SERVE="cortexlabs/onnx-serve:master"
 export CORTEX_IMAGE_PYTHON_PACKAGER="cortexlabs/python-packager:master"
 export CORTEX_IMAGE_CLUSTER_AUTOSCALER="cortexlabs/cluster-autoscaler:master"
 export CORTEX_IMAGE_METRICS_SERVER="cortexlabs/metrics-server:master"
diff --git a/docs/cluster/development.md b/docs/cluster/development.md
index 4d7b4a212e..47a976a243 100644
--- a/docs/cluster/development.md
+++ b/docs/cluster/development.md
@@ -76,6 +76,7 @@ export CORTEX_IMAGE_ARGO_EXECUTOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cort
 export CORTEX_IMAGE_FLUENTD="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/fluentd:latest"
 export CORTEX_IMAGE_NGINX_BACKEND="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-backend:latest"
 export CORTEX_IMAGE_NGINX_CONTROLLER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/nginx-controller:latest"
+export CORTEX_IMAGE_ONNX_SERVE="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/onnx-serve:latest"
 export CORTEX_IMAGE_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/operator:latest"
 export CORTEX_IMAGE_SPARK="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark:latest"
 export CORTEX_IMAGE_SPARK_OPERATOR="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/spark-operator:latest"
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 807454d152..326ea7311a 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -3,37 +3,36 @@
 
 - kind: api
   name: iris-tf-nn
-  model_type: tensorflow
   model: s3://cortex-examples/iris-tensorflow.zip
   compute:
     max_replicas: 1
 
-- kind: api
-  name: iris-pytorch-nn
-  model_type: onnx
-  request_handler: inference_pytorch2.py
-  model: s3://cortex-examples/iris_pytorch.onnx
-  compute:
-    max_replicas: 1
+# - kind: api
+#   name: iris-pytorch-nn
+#   model_type: onnx
+#   request_handler: inference_pytorch2.py
+#   model: s3://cortex-examples/iris_pytorch.onnx
+#   compute:
+#     max_replicas: 1
 
-- kind: api
-  name: iris-xgb-classifier
-  model_type: onnx
-  model: s3://cortex-examples/iris_xgb.onnx
-  compute:
-    max_replicas: 1
+# - kind: api
+#   name: iris-xgb-classifier
+#   model_type: onnx
+#   model: s3://cortex-examples/iris_xgb.onnx
+#   compute:
+#     max_replicas: 1
 
-- kind: api
-  name: iris-sklearn-logistic-regression
-  model_type: onnx
-  request_handler: inference.py
-  model: s3://cortex-examples/iris_sklearn_logreg.onnx
-  compute:
-    max_replicas: 1
+# - kind: api
+#   name: iris-sklearn-logistic-regression
+#   model_type: onnx
+#   request_handler: inference.py
+#   model: s3://cortex-examples/iris_sklearn_logreg.onnx
+#   compute:
+#     max_replicas: 1
 
-- kind: api
-  name: iris-keras
-  model_type: onnx
-  model: s3://data-vishal/iris_keras.onnx
-  compute:
-    max_replicas: 1
+# - kind: api
+#   name: iris-keras
+#   model_type: onnx
+#   model: s3://data-vishal/iris_keras.onnx
+#   compute:
+#     max_replicas: 1
diff --git a/examples/models/iris_keras.py b/examples/models/iris_keras.py
index 467ccb79f5..77ee8682c2 100644
--- a/examples/models/iris_keras.py
+++ b/examples/models/iris_keras.py
@@ -5,7 +5,6 @@
 keras
 keras2onnx
 tensorflow
-onnxruntime
 """
 import numpy as np
 from sklearn.datasets import load_iris
@@ -13,7 +12,6 @@
 from keras.models import Sequential
 from keras.layers import Dense
 from keras.utils import np_utils
-import onnxruntime as rt
 import keras2onnx
 
 iris = load_iris()
@@ -32,7 +30,7 @@
 scores = model.evaluate(X_test, y_test)
 print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1] * 100))
 
+# Convert to ONNX model format
 onnx_model = keras2onnx.convert_keras(model)
-
 with open("iris_keras.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
diff --git a/examples/models/iris_pytorch.py b/examples/models/iris_pytorch.py
index db6effdbf7..2dea1dbc14 100644
--- a/examples/models/iris_pytorch.py
+++ b/examples/models/iris_pytorch.py
@@ -42,15 +42,15 @@ def forward(self, X):
 train_y = Variable(torch.Tensor(y_train).long())
 test_y = Variable(torch.Tensor(y_test).long())
 
-net = Net()
+model = Net()
 
 criterion = nn.CrossEntropyLoss()  # cross entropy loss
 
-optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
+optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
 
 for epoch in range(1000):
     optimizer.zero_grad()
-    out = net(train_X)
+    out = model(train_X)
     loss = criterion(out, train_y)
     loss.backward()
     optimizer.step()
@@ -58,18 +58,14 @@ def forward(self, X):
     if epoch % 100 == 0:
         print("number of epoch {} loss {}".format(epoch, loss))
 
-predict_out = net(test_X)
+predict_out = model(test_X)
 _, predict_y = torch.max(predict_out, 1)
 
 print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data)))
 
+# Convert to ONNX model format
 dummy_input = torch.randn(1, 4)
 
 torch.onnx.export(
-    net,
-    dummy_input,
-    "iris_pytorch.onnx",
-    verbose=True,
-    input_names=["input"],
-    output_names=["species"],
+    model, dummy_input, "iris_pytorch.onnx", input_names=["input"], output_names=["species"]
 )
diff --git a/examples/models/iris_sklearn_logreg.py b/examples/models/iris_sklearn_logreg.py
index 84caf38a77..57aad00eb8 100644
--- a/examples/models/iris_sklearn_logreg.py
+++ b/examples/models/iris_sklearn_logreg.py
@@ -9,20 +9,21 @@
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
-from onnxconverter_common.data_types import FloatTensorType
 from sklearn.linear_model import LogisticRegression
+from onnxconverter_common.data_types import FloatTensorType
 from onnxmltools import convert_sklearn
-import onnxruntime as rt
 
 iris = load_iris()
 X, y = iris.data, iris.target
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
 
-lr = LogisticRegression(solver="lbfgs", multi_class="multinomial")
-lr.fit(X_train, y_train)
+logreg_model = LogisticRegression(solver="lbfgs", multi_class="multinomial")
+logreg_model.fit(X_train, y_train)
+
+print("Test data accuracy: {:.2f}".format(logreg_model.score(X_test, y_test)))
 
-print("Test data accuracy of the logistic regressor is {:.2f}".format(lr.score(X_test, y_test)))
 
-onnx_model = convert_sklearn(lr, initial_types=[("input", FloatTensorType([1, 4]))])
+# Convert to ONNX model format
+onnx_model = convert_sklearn(logreg_model, initial_types=[("input", FloatTensorType([1, 4]))])
 with open("iris_sklearn_logreg.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
diff --git a/examples/models/iris_xgboost.py b/examples/models/iris_xgboost.py
index e8314521ca..404be19596 100644
--- a/examples/models/iris_xgboost.py
+++ b/examples/models/iris_xgboost.py
@@ -11,17 +11,17 @@
 from sklearn.model_selection import train_test_split
 from onnxmltools.convert import convert_xgboost
 from onnxconverter_common.data_types import FloatTensorType
-import onnxruntime as rt
 
 iris = load_iris()
 X, y = iris.data, iris.target
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
 
-xgb_clf = xgb.XGBClassifier()
-xgb_clf = xgb_clf.fit(X_train, y_train)
+xgb_model = xgb.XGBClassifier()
+xgb_model = xgb_model.fit(X_train, y_train)
 
-print("Test data accuracy of the xgb classifier is {:.2f}".format(xgb_clf.score(X_test, y_test)))
+print("Test data accuracy of the xgb classifier is {:.2f}".format(xgb_model.score(X_test, y_test)))
 
-onnx_model = convert_xgboost(xgb_clf, initial_types=[("input", FloatTensorType([1, 4]))])
+# Convert to ONNX model format
+onnx_model = convert_xgboost(xgb_model, initial_types=[("input", FloatTensorType([1, 4]))])
 with open("iris_xgb.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
diff --git a/pkg/consts/consts.go b/pkg/consts/consts.go
index 5e0b0ffc96..d8295de37d 100644
--- a/pkg/consts/consts.go
+++ b/pkg/consts/consts.go
@@ -47,7 +47,7 @@ var (
 	TransformersDir       = "transformers"
 	EstimatorsDir         = "estimators"
 	PythonPackagesDir     = "python_packages"
-	RequestHandlersDir    = "request_handlers_dir"
+	RequestHandlersDir    = "request_handlers"
 	ModelsDir             = "models"
 	ConstantsDir          = "constants"
 	ContextsDir           = "contexts"
diff --git a/pkg/operator/api/context/context.go b/pkg/operator/api/context/context.go
index 1ee8131f13..7dafe70b25 100644
--- a/pkg/operator/api/context/context.go
+++ b/pkg/operator/api/context/context.go
@@ -17,6 +17,8 @@ limitations under the License.
 package context
 
 import (
+	"fmt"
+
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -206,7 +208,7 @@ func (ctx *Context) PopulateWorkloadIDs(resourceWorkloadIDs map[string]string) {
 func (ctx *Context) CheckAllWorkloadIDsPopulated() error {
 	for _, res := range ctx.ComputedResources() {
 		if res.GetWorkloadID() == "" {
-			return errors.New(ctx.App.Name, "resource", res.GetID(), "workload ID is missing", res.GetName()) // unexpected
+			return errors.New(ctx.App.Name, "workload ID missing", fmt.Sprintf("%s (ID: %s)", res.GetName(), res.GetID())) // unexpected
 		}
 	}
 	return nil
diff --git a/pkg/operator/api/context/dependencies.go b/pkg/operator/api/context/dependencies.go
index 774dbaeea4..af10369d79 100644
--- a/pkg/operator/api/context/dependencies.go
+++ b/pkg/operator/api/context/dependencies.go
@@ -160,7 +160,7 @@ func (ctx *Context) apiDependencies(api *API) strset.Set {
 		dependencies.Add(model.ID)
 	}
 
-	if api.Requesthandler != nil {
+	if api.RequestHandler != nil {
 		for _, pythonPackage := range ctx.PythonPackages {
 			dependencies.Add(pythonPackage.GetID())
 		}
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index 48abaaef50..dfe5d47ee3 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -35,7 +35,7 @@ type API struct {
 	ResourceFields
 	Model          string      `json:"model" yaml:"model"`
 	ModelType      ModelType   `json:"model_type" yaml:"model_type"`
-	Requesthandler *string     `json:"request_handler" yaml:"request_handler"`
+	RequestHandler *string     `json:"request_handler" yaml:"request_handler"`
 	Compute        *APICompute `json:"compute" yaml:"compute"`
 	Tags           Tags        `json:"tags" yaml:"tags"`
 }
@@ -57,12 +57,13 @@ var apiValidation = &cr.StructValidation{
 			},
 		},
 		{
-			StructField:         "Requesthandler",
+			StructField:         "RequestHandler",
 			StringPtrValidation: &cr.StringPtrValidation{},
 		},
 		{
 			StructField: "ModelType",
 			StringValidation: &cr.StringValidation{
+				Required:      true,
 				AllowedValues: ModelTypeStrings(),
 			},
 			Parser: func(str string) (interface{}, error) {
@@ -83,8 +84,8 @@ func (api *API) UserConfigStr() string {
 	if api.ModelType != UnknownModelType {
 		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelTypeKey, api.ModelType.String()))
 	}
-	if api.Requesthandler != nil {
-		sb.WriteString(fmt.Sprintf("%s: %s\n", RequesthandlerKey, *api.Requesthandler))
+	if api.RequestHandler != nil {
+		sb.WriteString(fmt.Sprintf("%s: %s\n", RequestHandlerKey, *api.RequestHandler))
 	}
 	if api.Compute != nil {
 		sb.WriteString(fmt.Sprintf("%s:\n", ComputeKey))
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index 693c152ae4..b413d1b970 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -93,7 +93,7 @@ const (
 	// API
 	ModelKey          = "model"
 	ModelTypeKey      = "model_type"
-	RequesthandlerKey = "request_handler"
+	RequestHandlerKey = "request_handler"
 
 	// compute
 	ComputeKey              = "compute"
diff --git a/pkg/operator/api/userconfig/errors.go b/pkg/operator/api/userconfig/errors.go
index a0cfd9a6d6..bea40b901e 100644
--- a/pkg/operator/api/userconfig/errors.go
+++ b/pkg/operator/api/userconfig/errors.go
@@ -79,6 +79,7 @@ const (
 	ErrImplDoesNotExist
 	ErrInvalidS3PathOrResourceReference
 	ErrExternalNotFound
+	ErrUnknownModelType
 )
 
 var errorKinds = []string{
@@ -131,6 +132,7 @@ var errorKinds = []string{
 	"err_impl_does_not_exist",
 	"err_invalid_s3_path_or_resource_reference",
 	"err_external_not_found",
+	"err_unknown_model_type",
 }
 
 var _ = [1]int{}[int(ErrExternalNotFound)-(len(errorKinds)-1)] // Ensure list length matches
@@ -604,3 +606,10 @@ func ErrorInvalidS3PathOrResourceReference(provided string) error {
 		message: s3ErrMsg + ", and is not a cortex resource reference (which starts with @)",
 	}
 }
+
+func ErrorUnknownModelType() error {
+	return Error{
+		Kind:    ErrUnknownModelType,
+		message: "Unknown model type encountered",
+	}
+}
diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index a08dc5922d..4ae5f1a0b8 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -23,6 +23,7 @@ import (
 	"github.com/cortexlabs/cortex/pkg/consts"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/hash"
+	"github.com/cortexlabs/cortex/pkg/lib/pointer"
 	"github.com/cortexlabs/cortex/pkg/lib/sets/strset"
 	"github.com/cortexlabs/cortex/pkg/operator/api/context"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -48,20 +49,25 @@ func getAPIs(config *userconfig.Config,
 		buf.WriteString(apiConfig.Name)
 		buf.WriteString(apiConfig.ModelType.String())
 
-		if apiConfig.Requesthandler != nil {
+		if apiConfig.RequestHandler != nil {
 			for _, pythonPackage := range pythonPackages {
 				buf.WriteString(pythonPackage.GetID())
 			}
 
-			impl, ok := impls[*apiConfig.Requesthandler]
+			impl, ok := impls[*apiConfig.RequestHandler]
 			if !ok {
-				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.Requesthandler), userconfig.Identify(apiConfig))
+				return nil, errors.Wrap(userconfig.ErrorImplDoesNotExist(*apiConfig.RequestHandler), userconfig.Identify(apiConfig), userconfig.RequestHandlerKey)
 			}
 			implID := hash.Bytes(impl)
 			buf.WriteString(implID)
 
-			key := filepath.Join(consts.RequestHandlersDir, implID)
-			requestHandlerImplKey = &key
+			requestHandlerImplKey = pointer.String(filepath.Join(consts.RequestHandlersDir, implID))
+
+			err := uploadRequestHandler(*requestHandlerImplKey, impls[*apiConfig.RequestHandler])
+
+			if err != nil {
+				return nil, errors.Wrap(err, userconfig.Identify(apiConfig), "upload")
+			}
 		}
 
 		if yaml.StartsWithEscapedAtSymbol(apiConfig.Model) {
@@ -89,33 +95,23 @@ func getAPIs(config *userconfig.Config,
 			Path:                  context.APIPath(apiConfig.Name, config.App.Name),
 			RequestHandlerImplKey: requestHandlerImplKey,
 		}
-
-		if apiConfig.Requesthandler != nil {
-			uploadRequestHandlers(apis[apiConfig.Name], impls[*apiConfig.Requesthandler])
-		}
 	}
 	return apis, nil
 }
 
-func uploadRequestHandlers(api *context.API, impl []byte) error {
-	implID := hash.Bytes(impl)
-
-	if uploadedRequestHandlers.Has(implID) {
-		return nil
-	}
-
-	isUploaded, err := config.AWS.IsS3File(*api.RequestHandlerImplKey)
+func uploadRequestHandler(implKey string, impl []byte) error {
+	isUploaded, err := config.AWS.IsS3File(implKey)
 	if err != nil {
-		return errors.Wrap(err, userconfig.Identify(api), "upload")
+		return err
 	}
 
 	if !isUploaded {
-		err = config.AWS.UploadBytesToS3(impl, *api.RequestHandlerImplKey)
+		err = config.AWS.UploadBytesToS3(impl, implKey)
 		if err != nil {
-			return errors.Wrap(err, userconfig.Identify(api), "upload")
+			return err
 		}
 	}
 
-	uploadedRequestHandlers.Add(implID)
+	uploadedRequestHandlers.Add(implKey)
 	return nil
 }
diff --git a/pkg/operator/workloads/api.go b/pkg/operator/workloads/api.go
index f0421b7f6f..e2d218fd57 100644
--- a/pkg/operator/workloads/api.go
+++ b/pkg/operator/workloads/api.go
@@ -171,11 +171,11 @@ func onnxAPISpec(
 	workloadID string,
 	desiredReplicas int32,
 ) *appsv1b1.Deployment {
-	transformResourceList := corev1.ResourceList{}
-	transformResourceList[corev1.ResourceCPU] = api.Compute.CPU.Quantity
+	resourceList := corev1.ResourceList{}
+	resourceList[corev1.ResourceCPU] = api.Compute.CPU.Quantity
 
 	if api.Compute.Mem != nil {
-		transformResourceList[corev1.ResourceMemory] = api.Compute.Mem.Quantity
+		resourceList[corev1.ResourceMemory] = api.Compute.Mem.Quantity
 	}
 
 	return k8s.Deployment(&k8s.DeploymentSpec{
@@ -234,7 +234,7 @@ func onnxAPISpec(
 							},
 						},
 						Resources: corev1.ResourceRequirements{
-							Requests: transformResourceList,
+							Requests: resourceList,
 						},
 					},
 				},
@@ -335,13 +335,15 @@ func apiWorkloadSpecs(ctx *context.Context) ([]*WorkloadSpec, error) {
 
 		var spec metav1.Object
 
-		if api.ModelType == userconfig.TensorFlowModelType {
+		switch api.ModelType {
+		case userconfig.TensorFlowModelType:
 			spec = tfAPISpec(ctx, api, workloadID, desiredReplicas)
-		}
-
-		if api.ModelType == userconfig.ONNXModelType {
+		case userconfig.ONNXModelType:
 			spec = onnxAPISpec(ctx, api, workloadID, desiredReplicas)
+		default:
+			return nil, userconfig.ErrorUnknownModelType()
 		}
+
 		workloadSpecs = append(workloadSpecs, &WorkloadSpec{
 			WorkloadID:   workloadID,
 			ResourceIDs:  strset.New(api.ID),
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index 4216ae7989..2d2223c8b9 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -282,6 +282,12 @@ def get_request_handler_impl(self, api_name):
         except CortexException as e:
             e.wrap("api " + api_name, "request_handler")
             raise
+        
+        try:
+            _validate_impl(impl, REQUEST_HANDLER_IMPL_VALIDATION)
+        except CortexException as e:
+            e.wrap("api " + api_name, "request_handler " + api["request_handler"])
+            raise
 
         return (impl, impl_path)
 
@@ -677,7 +683,7 @@ def cast_compound_type(value, type_str):
     ]
 }
 
-TRANSFORMER_IMPL_VALIDATION = {
+REQUEST_HANDLER_IMPL_VALIDATION = {
     "optional": [
         {"name": "preinference", "args": ["request", "metadata"]},
         {"name": "postinference", "args": ["response", "metadata"]},
diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index 0a0fbb7456..85a77860d2 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -174,10 +174,14 @@ def predict(app_name, api_name):
         except CortexException as e:
             e.wrap("error", "sample {}".format(i + 1))
             logger.error(str(e))
-            logger.exception("An error occurred, see `cx logs api {}` for more details.".format(1))
+            logger.exception(
+                "An error occurred, see `cx logs -v api {}` for more details.".format(api["name"])
+            )
             return prediction_failed(sample, str(e))
         except Exception as e:
-            logger.exception("An error occurred, see `cx logs api {}` for more details.".format(2))
+            logger.exception(
+                "An error occurred, see `cx logs -v api {}` for more details.".format(api["name"])
+            )
             return prediction_failed(sample, str(e))
 
         predictions.append(prediction)

From b9261a1da72bf305a21065f31dd38b2fd9174686 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 19:00:44 +0000
Subject: [PATCH 11/24] Fix predictions

---
 cli/cmd/predict.go                    | 25 +++++++++----
 examples/iris/cortex.yaml             | 53 ++++++++++++++-------------
 pkg/operator/api/userconfig/errors.go |  9 -----
 pkg/operator/workloads/api.go         |  2 +-
 pkg/workloads/tf_api/api.py           |  6 +--
 5 files changed, 47 insertions(+), 48 deletions(-)

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index 0de4c5c98f..0a1f03333f 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -22,12 +22,14 @@ import (
 	"net/http"
 	"strings"
 
-	"github.com/spf13/cobra"
 	"github.com/cortexlabs/yaml"
+	"github.com/spf13/cobra"
 
+	"github.com/cortexlabs/cortex/pkg/lib/cast"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
 	"github.com/cortexlabs/cortex/pkg/lib/files"
 	"github.com/cortexlabs/cortex/pkg/lib/json"
+	s "github.com/cortexlabs/cortex/pkg/lib/strings"
 	libtime "github.com/cortexlabs/cortex/pkg/lib/time"
 	"github.com/cortexlabs/cortex/pkg/lib/urls"
 	"github.com/cortexlabs/cortex/pkg/operator/api/resource"
@@ -53,7 +55,6 @@ type Prediction struct {
 	Response           interface{} `json:"response"`
 }
 
-
 // type PredictResponseExternalModel struct {
 // 	ResourceID  string        `json:"resource_id"`
 // 	Predictions []interface{} `json:"predictions"`
@@ -103,7 +104,7 @@ var predictCmd = &cobra.Command{
 
 		apiID := predictResponse.ResourceID
 		api := resourcesRes.APIStatuses[apiID]
-		_, isExternalModel := yaml.ExtractAtSymbolText(resourcesRes.Context.APIs[apiName].model)
+		_, isModelReference := yaml.ExtractAtSymbolText(resourcesRes.Context.APIs[apiName].Model)
 
 		apiStart := libtime.LocalTimestampHuman(api.Start)
 		fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
@@ -115,17 +116,27 @@ var predictCmd = &cobra.Command{
 		}
 
 		for _, prediction := range predictResponse.Predictions {
-			if isExternalModel {
+			if !isModelReference {
 				prettyResp, err := json.Pretty(prediction)
 				if err != nil {
 					errors.Exit(err)
 				}
-	
+
 				fmt.Println(prettyResp)
 				continue
 			}
 
-			parsedPrediction = prediction.(Prediction)
+			predictionBytes, err := json.Marshal(prediction)
+			if err != nil {
+				errors.Exit(err)
+			}
+
+			var parsedPrediction Prediction
+			err = json.DecodeWithNumber(predictionBytes, &parsedPrediction)
+			if err != nil {
+				errors.Exit(err, "prediction response")
+			}
+
 			if parsedPrediction.Prediction == nil {
 				prettyResp, err := json.Pretty(parsedPrediction.Response)
 				if err != nil {
@@ -177,8 +188,6 @@ func makePredictRequest(apiURL string, samplesJSONPath string) (*PredictResponse
 	return &predictResponse, nil
 }
 
-
-
 // var predictCmd = &cobra.Command{
 // 	Use:   "predict API_NAME SAMPLES_FILE",
 // 	Short: "make predictions",
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 326ea7311a..25d31b420e 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -3,36 +3,37 @@
 
 - kind: api
   name: iris-tf-nn
+  model_type: tensorflow
   model: s3://cortex-examples/iris-tensorflow.zip
   compute:
     max_replicas: 1
 
-# - kind: api
-#   name: iris-pytorch-nn
-#   model_type: onnx
-#   request_handler: inference_pytorch2.py
-#   model: s3://cortex-examples/iris_pytorch.onnx
-#   compute:
-#     max_replicas: 1
+- kind: api
+  name: iris-pytorch-nn
+  model_type: onnx
+  request_handler: inference_pytorch.py
+  model: s3://cortex-examples/iris_pytorch.onnx
+  compute:
+    max_replicas: 1
 
-# - kind: api
-#   name: iris-xgb-classifier
-#   model_type: onnx
-#   model: s3://cortex-examples/iris_xgb.onnx
-#   compute:
-#     max_replicas: 1
+- kind: api
+  name: iris-xgb-classifier
+  model_type: onnx
+  model: s3://cortex-examples/iris_xgb.onnx
+  compute:
+    max_replicas: 1
 
-# - kind: api
-#   name: iris-sklearn-logistic-regression
-#   model_type: onnx
-#   request_handler: inference.py
-#   model: s3://cortex-examples/iris_sklearn_logreg.onnx
-#   compute:
-#     max_replicas: 1
+- kind: api
+  name: iris-sklearn-logistic-regression
+  model_type: onnx
+  request_handler: inference.py
+  model: s3://cortex-examples/iris_sklearn_logreg.onnx
+  compute:
+    max_replicas: 1
 
-# - kind: api
-#   name: iris-keras
-#   model_type: onnx
-#   model: s3://data-vishal/iris_keras.onnx
-#   compute:
-#     max_replicas: 1
+- kind: api
+  name: iris-keras
+  model_type: onnx
+  model: s3://data-vishal/iris_keras.onnx
+  compute:
+    max_replicas: 1
diff --git a/pkg/operator/api/userconfig/errors.go b/pkg/operator/api/userconfig/errors.go
index bea40b901e..a0cfd9a6d6 100644
--- a/pkg/operator/api/userconfig/errors.go
+++ b/pkg/operator/api/userconfig/errors.go
@@ -79,7 +79,6 @@ const (
 	ErrImplDoesNotExist
 	ErrInvalidS3PathOrResourceReference
 	ErrExternalNotFound
-	ErrUnknownModelType
 )
 
 var errorKinds = []string{
@@ -132,7 +131,6 @@ var errorKinds = []string{
 	"err_impl_does_not_exist",
 	"err_invalid_s3_path_or_resource_reference",
 	"err_external_not_found",
-	"err_unknown_model_type",
 }
 
 var _ = [1]int{}[int(ErrExternalNotFound)-(len(errorKinds)-1)] // Ensure list length matches
@@ -606,10 +604,3 @@ func ErrorInvalidS3PathOrResourceReference(provided string) error {
 		message: s3ErrMsg + ", and is not a cortex resource reference (which starts with @)",
 	}
 }
-
-func ErrorUnknownModelType() error {
-	return Error{
-		Kind:    ErrUnknownModelType,
-		message: "Unknown model type encountered",
-	}
-}
diff --git a/pkg/operator/workloads/api.go b/pkg/operator/workloads/api.go
index e5fc5cb929..49bd59a9c5 100644
--- a/pkg/operator/workloads/api.go
+++ b/pkg/operator/workloads/api.go
@@ -341,7 +341,7 @@ func apiWorkloadSpecs(ctx *context.Context) ([]*WorkloadSpec, error) {
 		case userconfig.ONNXModelType:
 			spec = onnxAPISpec(ctx, api, workloadID, desiredReplicas)
 		default:
-			return nil, userconfig.ErrorUnknownModelType()
+			return nil, errors.New(api.Name, "unknown model type countered") // unexpected
 		}
 
 		workloadSpecs = append(workloadSpecs, &WorkloadSpec{
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index dcb64d4efd..66ac5d0ae5 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -248,9 +248,7 @@ def run_predict(sample):
 
     prepared_sample = sample
     if request_handler is not None and util.has_function(request_handler, "preinference"):
-        prepared_sample = request_handler.preinference(
-            sample, local_cache["metadata"]["signatureDef"]
-        )
+        prepared_sample = request_handler.preinference(sample, local_cache["metadata"])
 
     if util.is_resource_ref(local_cache["api"]["model"]):
         transformed_sample = transform_sample(prepared_sample)
@@ -277,7 +275,7 @@ def run_predict(sample):
         util.log_pretty(result, indent=6)
 
     if request_handler is not None and util.has_function(request_handler, "postinference"):
-        result = request_handler.postinference(result, local_cache["metadata"]["signatureDef"])
+        result = request_handler.postinference(result, local_cache["metadata"])
 
     return result
 

From 905c2d420ed51673003084e2886b7398ea8a4cc8 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 19:19:56 +0000
Subject: [PATCH 12/24] Add request handler docs round 1

---
 docs/apis/apis.md             | 36 +----------------
 docs/apis/packaging-models.md |  2 +-
 docs/apis/request-handlers.md | 76 +++++++++++++++++++++++++++++++++++
 docs/apis/request_handler.md  |  3 --
 pkg/workloads/lib/context.py  |  2 +-
 5 files changed, 80 insertions(+), 39 deletions(-)
 create mode 100644 docs/apis/request-handlers.md
 delete mode 100644 docs/apis/request_handler.md

diff --git a/docs/apis/apis.md b/docs/apis/apis.md
index ecb79f5ae4..4d03baf7d9 100644
--- a/docs/apis/apis.md
+++ b/docs/apis/apis.md
@@ -28,6 +28,7 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
 - kind: api
   name: my-api
   model: s3://my-bucket/my-model.zip
+  request_handler: inference.py
   compute:
     min_replicas: 5
     max_replicas: 20
@@ -38,40 +39,7 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
 
 API endpoints can be customized by providing a request handler. Request handlers can be used to prepare request payloads before being passed to model for inference and modify model predictions before they are served.
 
-```python
-def preinference(sample, metadata):
-    """Prepare a sample before it is passed into the model.
-
-    Args:
-        sample: A single sample in the request payload converted from JSON to Python object.
-
-        metadata: Describes the expected shape and type of inputs to the model.
-            If API model_type is tensorflow the object is a map<string, SignatureDef>
-                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
-            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
-                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
-
-    Returns:
-        If model only has one 1 input, return a python list or numpy array of expected type  and shape. If model has more than 1 input, return a dictionary mapping input names to python list or numpy array of expected type and shape.
-    """
-    pass
-
-def postinference(prediction, metadata):
-    """Modify prediction from model before adding it to response payload.
-
-    Args:
-        sample: A single sample in the request payload converted from JSON to Python object
-
-        metadata: Describes the output shape and type of outputs from the model.
-            If API model_type is tensorflow the object is a map<string, SignatureDef>
-                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
-            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
-                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
-
-    Returns:
-        Python object that can be marshalled to JSON.
-    """
-```
+See [request handlers](request-handlers.md) for a detailed guide.
 
 ## Integration
 
diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
index 04e93fddd3..f23c6ccd8f 100644
--- a/docs/apis/packaging-models.md
+++ b/docs/apis/packaging-models.md
@@ -28,7 +28,7 @@ Specify `model` in an API, e.g.
 
 ## ONNX
 
-Convert your model to ONNX model format. 
+Convert your model to ONNX model format.
 
 ```Python
 # Convert PyTorch model to ONNX
diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
new file mode 100644
index 0000000000..2a4a51d97b
--- /dev/null
+++ b/docs/apis/request-handlers.md
@@ -0,0 +1,76 @@
+# Request Handlers
+
+## Implementation
+
+```python
+def preinference(sample, metadata):
+    """Prepare a sample before it is passed into the model.
+
+    Args:
+        sample: A single sample in the request payload converted from JSON to Python object.
+
+        metadata: Describes the expected shape and type of inputs to the model.
+            If API model_type is tensorflow the object is a map<string, SignatureDef>
+                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
+            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
+                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
+
+    Returns:
+        If model only has one 1 input, return a python list or numpy array of expected type  and shape. If model has more than 1 input, return a dictionary mapping input names to python list or numpy array of expected type and shape.
+    """
+    pass
+
+def postinference(prediction, metadata):
+    """Modify prediction from model before adding it to response payload.
+
+    Args:
+        sample: A single sample in the request payload converted from JSON to Python object
+
+        metadata: Describes the output shape and type of outputs from the model.
+            If API model_type is tensorflow the object is a map<string, SignatureDef>
+                https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
+            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
+                https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
+
+    Returns:
+        Python object that can be marshalled to JSON.
+    """
+```
+
+## Example
+
+```python
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+def preinference(request, metadata):
+    return {
+        metadata[0].name : [
+            request["sepal_length"],
+            request["sepal_width"],
+            request["petal_length"],
+            request["petal_width"],
+        ]
+    }
+
+
+def postinference(response, metadata):
+    predicted_class_id = response[0][0]
+    return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
+
+```
+
+## Pre-installed Packages
+
+The following packages have been pre-installed and can be used in your implementations:
+
+```text
+boto3==1.9.78
+msgpack==0.6.1
+numpy>=1.13.3,<2
+requirements-parser==0.2.0
+packaging==19.0.0
+```
+
+You can install additional PyPI packages and import your own Python packages. See [Python Packages](../advanced/python-packages.md) for more details.
diff --git a/docs/apis/request_handler.md b/docs/apis/request_handler.md
deleted file mode 100644
index de45fab69f..0000000000
--- a/docs/apis/request_handler.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# Request Handlers
-
-Request handlers 
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index 2d2223c8b9..aa9c889118 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -282,7 +282,7 @@ def get_request_handler_impl(self, api_name):
         except CortexException as e:
             e.wrap("api " + api_name, "request_handler")
             raise
-        
+
         try:
             _validate_impl(impl, REQUEST_HANDLER_IMPL_VALIDATION)
         except CortexException as e:

From c7e3b37acf9d7d6aa0167893dc33ca2f58b8a595 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Thu, 4 Jul 2019 19:20:56 +0000
Subject: [PATCH 13/24] Remove commented code

---
 cli/cmd/predict.go | 190 ---------------------------------------------
 1 file changed, 190 deletions(-)

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index 0a1f03333f..4f00bc029d 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -187,193 +187,3 @@ func makePredictRequest(apiURL string, samplesJSONPath string) (*PredictResponse
 
 	return &predictResponse, nil
 }
-
-// var predictCmd = &cobra.Command{
-// 	Use:   "predict API_NAME SAMPLES_FILE",
-// 	Short: "make predictions",
-// 	Long:  "Make predictions.",
-// 	Args:  cobra.ExactArgs(2),
-// 	Run: func(cmd *cobra.Command, args []string) {
-// 		apiName := args[0]
-// 		samplesJSONPath := args[1]
-
-// 		resourcesRes, err := getResourcesResponse()
-// 		if err != nil {
-// 			errors.Exit(err)
-// 		}
-
-// 		apiGroupStatus := resourcesRes.APIGroupStatuses[apiName]
-// 		if apiGroupStatus == nil {
-// 			errors.Exit(ErrorAPINotFound(apiName))
-// 		}
-// 		if apiGroupStatus.ActiveStatus == nil {
-// 			errors.Exit(ErrorAPINotReady(apiName, apiGroupStatus.Message()))
-// 		}
-
-// 		api := resourcesRes.Context.APIs[apiName]
-// 		apiPath := apiGroupStatus.ActiveStatus.Path
-// 		apiURL := urls.Join(resourcesRes.APIsBaseURL, apiPath)
-
-// 		if _, ok := yaml.ExtractAtSymbolText(api.Model); ok {
-// 			// e2e
-// 			predict(apiURL, samplesJSONPath)
-// 		} else {
-// 			predictExternalModel(apiURL, samplesJSONPath)
-// 		}
-// 	},
-// }
-
-// var predictCmd = &cobra.Command{
-// 	Use:   "predict API_NAME SAMPLES_FILE",
-// 	Short: "make predictions",
-// 	Long:  "Make predictions.",
-// 	Args:  cobra.ExactArgs(2),
-// 	Run: func(cmd *cobra.Command, args []string) {
-// 		apiName := args[0]
-// 		samplesJSONPath := args[1]
-
-// 		resourcesRes, err := getResourcesResponse()
-// 		if err != nil {
-// 			errors.Exit(err)
-// 		}
-
-// 		apiGroupStatus := resourcesRes.APIGroupStatuses[apiName]
-// 		if apiGroupStatus == nil {
-// 			errors.Exit(ErrorAPINotFound(apiName))
-// 		}
-// 		if apiGroupStatus.ActiveStatus == nil {
-// 			errors.Exit(ErrorAPINotReady(apiName, apiGroupStatus.Message()))
-// 		}
-
-// 		api := resourcesRes.Context.APIs[apiName]
-// 		apiPath := apiGroupStatus.ActiveStatus.Path
-// 		apiURL := urls.Join(resourcesRes.APIsBaseURL, apiPath)
-
-// 		if _, ok := yaml.ExtractAtSymbolText(api.Model); ok {
-// 			// e2e
-// 			predict(apiURL, samplesJSONPath)
-// 		} else {
-// 			predictExternalModel(apiURL, samplesJSONPath)
-// 		}
-// 	},
-// }
-
-// func predict(apiURL string, samplesJSONPath string) {
-// 	httpResponse := makePredictRequest(apiURL, samplesJSONPath)
-
-// 	var predictResponse PredictResponse
-// 	err := json.DecodeWithNumber(httpResponse, &predictResponse)
-// 	if err != nil {
-// 		return nil, errors.Wrap(err, "prediction response")
-// 	}
-
-// 	if predictPrintJSON {
-// 		prettyResp, err := json.Pretty(predictResponse)
-// 		if err != nil {
-// 			errors.Exit(err)
-// 		}
-
-// 		fmt.Println(prettyResp)
-// 		return
-// 	}
-
-// 	apiID := predictResponse.ResourceID
-// 	api := resourcesRes.APIStatuses[apiID]
-
-// 	apiStart := libtime.LocalTimestampHuman(api.Start)
-// 	fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
-
-// 	if len(predictResponse.Predictions) == 1 {
-// 		fmt.Println("Prediction:")
-// 	} else {
-// 		fmt.Println("Predictions:")
-// 	}
-
-// 	for _, prediction := range predictResponse.Predictions {
-// 		if prediction.Prediction == nil {
-// 			prettyResp, err := json.Pretty(prediction.Response)
-// 			if err != nil {
-// 				errors.Exit(err)
-// 			}
-
-// 			fmt.Println(prettyResp)
-// 			continue
-// 		}
-
-// 		value := prediction.Prediction
-// 		if prediction.PredictionReversed != nil {
-// 			value = prediction.PredictionReversed
-// 		}
-
-// 		if cast.IsFloatType(value) {
-// 			casted, _ := cast.InterfaceToFloat64(value)
-// 			fmt.Println(s.Round(casted, 2, true))
-// 		} else {
-// 			fmt.Println(s.UserStrStripped(value))
-// 		}
-// 	}
-// }
-
-// func predictExternalModel(apiURL string, samplesJSONPath string) {
-// 	httpResponse := makePredictRequest(apiURL, samplesJSONPath)
-
-// 	var predictResponse PredictResponseExternalModel
-// 	err = json.DecodeWithNumber(httpResponse, &predictResponse)
-// 	if err != nil {
-// 		return nil, errors.Wrap(err, "prediction response")
-// 	}
-
-// 	if predictPrintJSON {
-// 		prettyResp, err := json.Pretty(predictResponse)
-// 		if err != nil {
-// 			errors.Exit(err)
-// 		}
-
-// 		fmt.Println(prettyResp)
-// 		return
-// 	}
-
-// 	apiID := predictResponse.ResourceID
-// 	api := resourcesRes.APIStatuses[apiID]
-
-// 	apiStart := libtime.LocalTimestampHuman(api.Start)
-// 	fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
-
-// 	if len(predictResponse.Predictions) == 1 {
-// 		fmt.Println("Prediction:")
-// 	} else {
-// 		fmt.Println("Predictions:")
-// 	}
-
-// 	for _, prediction := range predictResponse.Predictions {
-// 		prettyResp, err := json.Pretty(prediction)
-// 		if err != nil {
-// 			errors.Exit(err)
-// 		}
-
-// 		fmt.Println(prettyResp)
-// 	}
-// }
-
-// func makePredictRequest(apiURL string, samplesJSONPath string) []byte {
-// 	samplesBytes, err := files.ReadFileBytes(samplesJSONPath)
-// 	if err != nil {
-// 		errors.Exit(err)
-// 	}
-// 	payload := bytes.NewBuffer(samplesBytes)
-// 	req, err := http.NewRequest("POST", apiURL, payload)
-// 	if err != nil {
-// 		return nil, errors.Exit(err, errStrCantMakeRequest)
-// 	}
-
-// 	req.Header.Set("Content-Type", "application/json")
-// 	httpResponse, err := makeRequest(req)
-// 	if err != nil {
-// 		if strings.Contains(err.Error(), "503 Service Temporarily Unavailable") || strings.Contains(err.Error(), "502 Bad Gateway") {
-// 			errors.Exit(ErrorAPINotReady(apiName, resource.StatusUpdating.Message()))
-// 		}
-// 		errors.Exit(err)
-// 	}
-
-// 	return &predictResponse
-// }

From 3892a230e1004024e32fd0ad8e562e2f0f789c14 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 01:06:48 +0000
Subject: [PATCH 14/24] Cleanup examples and docs round 2

---
 README.md                                     |  4 +-
 cli/cmd/predict.go                            |  5 --
 docs/apis/apis.md                             |  4 +-
 docs/apis/packaging-models.md                 | 31 ++++++++-----
 docs/apis/request-handlers.md                 | 34 +++++++-------
 examples/iris/cortex.yaml                     | 46 ++++++++-----------
 examples/iris/inference.py                    | 19 --------
 examples/iris/inference_pytorch.py            | 23 ----------
 .../iris_keras.py => iris/models/keras.py}    |  2 +-
 .../models/pytorch.py}                        |  5 +-
 .../models/sklearn.py}                        |  3 +-
 .../models/xgboost.py}                        |  2 +-
 examples/iris/pytorch.py                      | 23 ++++++++++
 examples/iris/sklearn.py                      | 19 ++++++++
 pkg/operator/api/userconfig/apis.go           |  2 +-
 pkg/operator/api/userconfig/config_key.go     |  2 +-
 pkg/workloads/lib/context.py                  |  7 ++-
 pkg/workloads/onnx_serve/api.py               | 10 ++--
 pkg/workloads/tf_api/api.py                   | 10 ++--
 19 files changed, 121 insertions(+), 130 deletions(-)
 delete mode 100644 examples/iris/inference.py
 delete mode 100644 examples/iris/inference_pytorch.py
 rename examples/{models/iris_keras.py => iris/models/keras.py} (95%)
 rename examples/{models/iris_pytorch.py => iris/models/pytorch.py} (93%)
 rename examples/{models/iris_sklearn_logreg.py => iris/models/sklearn.py} (94%)
 rename examples/{models/iris_xgboost.py => iris/models/xgboost.py} (95%)
 create mode 100644 examples/iris/pytorch.py
 create mode 100644 examples/iris/sklearn.py

diff --git a/README.md b/README.md
index cf23eb4d88..e22f1fd0ae 100644
--- a/README.md
+++ b/README.md
@@ -35,11 +35,11 @@ Cortex is actively maintained by Cortex Labs. We're a venture-backed team of inf
 ```python
 # handler.py
 
-def preprocess(payload):
+def pre_inference(sample, metadata):
   # Python code
 
 
-def postprocess(prediction):
+def post_inference(prediction, metadata):
   # Python code
 ```
 
diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index 4f00bc029d..644b4a7f0f 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -55,11 +55,6 @@ type Prediction struct {
 	Response           interface{} `json:"response"`
 }
 
-// type PredictResponseExternalModel struct {
-// 	ResourceID  string        `json:"resource_id"`
-// 	Predictions []interface{} `json:"predictions"`
-// }
-
 var predictCmd = &cobra.Command{
 	Use:   "predict API_NAME SAMPLES_FILE",
 	Short: "make predictions",
diff --git a/docs/apis/apis.md b/docs/apis/apis.md
index 4d03baf7d9..bf88fbeaf0 100644
--- a/docs/apis/apis.md
+++ b/docs/apis/apis.md
@@ -8,8 +8,8 @@ Serve models at scale and use them to build smarter applications.
 - kind: api
   name: <string>  # API name (required)
   model: <string>  # path to a zipped model dir (e.g. s3://my-bucket/model.zip)
-  model_type: <string>  # framework of model, currently support tensorflow and onnx
-  request_handler: <string>  # path to the implementation file, relative to the cortex root
+  model_format: <string>  # model format, currently support tensorflow and onnx
+  request_handler: <string>  # path to the request handler implementation file, relative to the cortex root
   compute:
     min_replicas: <int>  # minimum number of replicas (default: 1)
     max_replicas: <int>  # maximum number of replicas (default: 100)
diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
index f23c6ccd8f..e8549a426c 100644
--- a/docs/apis/packaging-models.md
+++ b/docs/apis/packaging-models.md
@@ -22,29 +22,36 @@ Specify `model` in an API, e.g.
 ```yaml
 - kind: api
   name: my-api
-  model_type: tensorflow
+  model_format: tensorflow
   model: s3://my-bucket/model.zip
 ```
 
 ## ONNX
 
-Convert your model to ONNX model format.
+Export your trained model to an ONNX model format. An example of a trained model being exported to ONNX is shown below.
 
 ```Python
-# Convert PyTorch model to ONNX
-dummy_input = torch.randn(1, 4)
+...
 
-torch.onnx.export(
-    model, dummy_input, "iris_pytorch.onnx", input_names=["input"], output_names=["species"]
-)
+logreg_model = LogisticRegression(solver="lbfgs", multi_class="multinomial")
+
+# Trained model
+logreg_model.fit(X_train, y_train)
+
+# Convert to ONNX model format
+onnx_model = convert_sklearn(logreg_model, initial_types=[("input", FloatTensorType([1, 4]))])
+with open("model.onnx", "wb") as f:
+    f.write(onnx_model.SerializeToString())
 ```
 
 See examples on how to convert models from common ML frameworks to ONNX.
 
-* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_pytorch.py)
-* [scikit-learn](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_sklearn_logreg.py)
-* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_xgboost.py)
-* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/models/iris_keras.py)
+* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/pytorch.py)
+* [Sklearn](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/sklearn.py)
+* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/xgboost.py)
+* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/keras.py)
+
+Upload your trained model in ONNX format to S3
 
 ```text
 $ aws s3 cp model.onnx s3://my-bucket/model.onnx
@@ -55,6 +62,6 @@ Specify `model` in an API, e.g.
 ```yaml
 - kind: api
   name: my-api
-  model_type: onnx
+  model_format: onnx
   model: s3://my-bucket/model.onnx
 ```
diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index 2a4a51d97b..9ba2f709fe 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -3,37 +3,37 @@
 ## Implementation
 
 ```python
-def preinference(sample, metadata):
+def pre_inference(sample, metadata):
     """Prepare a sample before it is passed into the model.
 
     Args:
-        sample: A single sample in the request payload converted from JSON to Python object.
+        sample: A sample from the request payload.
 
         metadata: Describes the expected shape and type of inputs to the model.
-            If API model_type is tensorflow the object is a map<string, SignatureDef>
+            If API model_format is tensorflow: map<string, SignatureDef>
                 https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
-            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
+            If API model_format is onnx: list<onnxruntime.NodeArg>
                 https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
 
     Returns:
-        If model only has one 1 input, return a python list or numpy array of expected type  and shape. If model has more than 1 input, return a dictionary mapping input names to python list or numpy array of expected type and shape.
+        A dictionary of mapping model input name to a python list or array.
     """
     pass
 
-def postinference(prediction, metadata):
+def post_inference(prediction, metadata):
     """Modify prediction from model before adding it to response payload.
 
     Args:
-        sample: A single sample in the request payload converted from JSON to Python object
+        prediction: The output of the model.
 
         metadata: Describes the output shape and type of outputs from the model.
-            If API model_type is tensorflow the object is a map<string, SignatureDef>
+            If API model_format is tensorflow: map<string, SignatureDef>
                 https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/protobuf/meta_graph.proto
-            If API model_type is onnx the object is a list of [onnxruntime.NodeArg]
+            If API model_format is onnx: list<onnxruntime.NodeArg>
                 https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
 
     Returns:
-        Python object that can be marshalled to JSON.
+        A python dictionary or list.
     """
 ```
 
@@ -44,19 +44,19 @@ import numpy as np
 
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
-def preinference(request, metadata):
+def pre_inference(sample, metadata):
     return {
         metadata[0].name : [
-            request["sepal_length"],
-            request["sepal_width"],
-            request["petal_length"],
-            request["petal_width"],
+            sample["sepal_length"],
+            sample["sepal_width"],
+            sample["petal_length"],
+            sample["petal_width"],
         ]
     }
 
 
-def postinference(response, metadata):
-    predicted_class_id = response[0][0]
+def post_inference(prediction, metadata):
+    predicted_class_id = prediction[0][0]
     return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
 
 ```
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 25d31b420e..eec57eda03 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -2,38 +2,30 @@
   name: iris
 
 - kind: api
-  name: iris-tf-nn
-  model_type: tensorflow
-  model: s3://cortex-examples/iris-tensorflow.zip
-  compute:
-    max_replicas: 1
+  name: tensorflow
+  model_format: tensorflow
+  model: s3://cortex-examples/iris/tensorflow.zip
 
 - kind: api
-  name: iris-pytorch-nn
-  model_type: onnx
-  request_handler: inference_pytorch.py
-  model: s3://cortex-examples/iris_pytorch.onnx
-  compute:
-    max_replicas: 1
+  name: pytorch
+  model_format: onnx
+  request_handler: pytorch.py
+  model: s3://cortex-examples/iris/pytorch.onnx
 
 - kind: api
-  name: iris-xgb-classifier
-  model_type: onnx
-  model: s3://cortex-examples/iris_xgb.onnx
-  compute:
-    max_replicas: 1
+  name: xgboost
+  model_format: onnx
+  model: s3://cortex-examples/iris/xgboost.onnx
 
 - kind: api
-  name: iris-sklearn-logistic-regression
-  model_type: onnx
-  request_handler: inference.py
-  model: s3://cortex-examples/iris_sklearn_logreg.onnx
-  compute:
-    max_replicas: 1
+  name: sklearn
+  model_format: onnx
+  request_handler: sklearn.py
+  model: s3://cortex-examples/iris/sklearn.onnx
 
 - kind: api
-  name: iris-keras
-  model_type: onnx
-  model: s3://data-vishal/iris_keras.onnx
-  compute:
-    max_replicas: 1
+  name: keras
+  model_format: onnx
+  model: s3://cortex-examples/iris/keras.onnx
+
+# model_format -> model_format
diff --git a/examples/iris/inference.py b/examples/iris/inference.py
deleted file mode 100644
index 58b000bbdc..0000000000
--- a/examples/iris/inference.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import numpy as np
-
-iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
-
-
-def preinference(request, metadata):
-    return {
-        "input": [
-            request["sepal_length"],
-            request["sepal_width"],
-            request["petal_length"],
-            request["petal_width"],
-        ]
-    }
-
-
-def postinference(response, metadata):
-    predicted_class_id = response[0][0]
-    return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
diff --git a/examples/iris/inference_pytorch.py b/examples/iris/inference_pytorch.py
deleted file mode 100644
index fc19e6f4a7..0000000000
--- a/examples/iris/inference_pytorch.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import numpy as np
-
-iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
-
-
-def preinference(request, metadata):
-    return {
-        metadata[0].name: [
-            request["sepal_length"],
-            request["sepal_width"],
-            request["petal_length"],
-            request["petal_width"],
-        ]
-    }
-
-
-def postinference(response, metadata):
-    predicted_class_id = int(np.argmax(response[0][0]))
-    return {
-        "class_label": iris_labels[predicted_class_id],
-        "class_index": predicted_class_id,
-        "probabilites": response[0][0],
-    }
diff --git a/examples/models/iris_keras.py b/examples/iris/models/keras.py
similarity index 95%
rename from examples/models/iris_keras.py
rename to examples/iris/models/keras.py
index 77ee8682c2..691dfb5bc3 100644
--- a/examples/models/iris_keras.py
+++ b/examples/iris/models/keras.py
@@ -32,5 +32,5 @@
 
 # Convert to ONNX model format
 onnx_model = keras2onnx.convert_keras(model)
-with open("iris_keras.onnx", "wb") as f:
+with open("keras.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
diff --git a/examples/models/iris_pytorch.py b/examples/iris/models/pytorch.py
similarity index 93%
rename from examples/models/iris_pytorch.py
rename to examples/iris/models/pytorch.py
index 2dea1dbc14..a7306edeb7 100644
--- a/examples/models/iris_pytorch.py
+++ b/examples/iris/models/pytorch.py
@@ -64,8 +64,7 @@ def forward(self, X):
 print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data)))
 
 # Convert to ONNX model format
-dummy_input = torch.randn(1, 4)
-
+placeholder = torch.randn(1, 4)
 torch.onnx.export(
-    model, dummy_input, "iris_pytorch.onnx", input_names=["input"], output_names=["species"]
+    model, placeholder, "pytorch.onnx", input_names=["input"], output_names=["species"]
 )
diff --git a/examples/models/iris_sklearn_logreg.py b/examples/iris/models/sklearn.py
similarity index 94%
rename from examples/models/iris_sklearn_logreg.py
rename to examples/iris/models/sklearn.py
index 57aad00eb8..9ccdc9f07c 100644
--- a/examples/models/iris_sklearn_logreg.py
+++ b/examples/iris/models/sklearn.py
@@ -22,8 +22,7 @@
 
 print("Test data accuracy: {:.2f}".format(logreg_model.score(X_test, y_test)))
 
-
 # Convert to ONNX model format
 onnx_model = convert_sklearn(logreg_model, initial_types=[("input", FloatTensorType([1, 4]))])
-with open("iris_sklearn_logreg.onnx", "wb") as f:
+with open("sklearn.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
diff --git a/examples/models/iris_xgboost.py b/examples/iris/models/xgboost.py
similarity index 95%
rename from examples/models/iris_xgboost.py
rename to examples/iris/models/xgboost.py
index 404be19596..884cc6847b 100644
--- a/examples/models/iris_xgboost.py
+++ b/examples/iris/models/xgboost.py
@@ -23,5 +23,5 @@
 
 # Convert to ONNX model format
 onnx_model = convert_xgboost(xgb_model, initial_types=[("input", FloatTensorType([1, 4]))])
-with open("iris_xgb.onnx", "wb") as f:
+with open("xgboost.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
diff --git a/examples/iris/pytorch.py b/examples/iris/pytorch.py
new file mode 100644
index 0000000000..1368be6033
--- /dev/null
+++ b/examples/iris/pytorch.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+
+def pre_inference(sample, metadata):
+    return {
+        metadata[0].name: [
+            payload["sepal_length"],
+            payload["sepal_width"],
+            payload["petal_length"],
+            payload["petal_width"],
+        ]
+    }
+
+
+def post_inference(prediction, metadata):
+    predicted_class_id = int(np.argmax(prediction[0][0]))
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilites": prediction[0][0],
+    }
diff --git a/examples/iris/sklearn.py b/examples/iris/sklearn.py
new file mode 100644
index 0000000000..e94bbbb1e5
--- /dev/null
+++ b/examples/iris/sklearn.py
@@ -0,0 +1,19 @@
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+
+def pre_inference(sample, metadata):
+    return {
+        "input": [
+            payload["sepal_length"],
+            payload["sepal_width"],
+            payload["petal_length"],
+            payload["petal_width"],
+        ]
+    }
+
+
+def post_inference(prediction, metadata):
+    predicted_class_id = prediction[0][0]
+    return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index dfe5d47ee3..61e4e737ff 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -34,7 +34,7 @@ type APIs []*API
 type API struct {
 	ResourceFields
 	Model          string      `json:"model" yaml:"model"`
-	ModelType      ModelType   `json:"model_type" yaml:"model_type"`
+	ModelType      ModelType   `json:"model_format" yaml:"model_format"`
 	RequestHandler *string     `json:"request_handler" yaml:"request_handler"`
 	Compute        *APICompute `json:"compute" yaml:"compute"`
 	Tags           Tags        `json:"tags" yaml:"tags"`
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index b413d1b970..4d7211f0c3 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -92,7 +92,7 @@ const (
 
 	// API
 	ModelKey          = "model"
-	ModelTypeKey      = "model_type"
+	ModelTypeKey      = "model_format"
 	RequestHandlerKey = "request_handler"
 
 	// compute
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index aa9c889118..a21e089bec 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -288,8 +288,7 @@ def get_request_handler_impl(self, api_name):
         except CortexException as e:
             e.wrap("api " + api_name, "request_handler " + api["request_handler"])
             raise
-
-        return (impl, impl_path)
+        return impl
 
     # Mode must be "training" or "evaluation"
     def get_training_data_parts(self, model_name, mode, part_prefix="part"):
@@ -685,8 +684,8 @@ def cast_compound_type(value, type_str):
 
 REQUEST_HANDLER_IMPL_VALIDATION = {
     "optional": [
-        {"name": "preinference", "args": ["request", "metadata"]},
-        {"name": "postinference", "args": ["response", "metadata"]},
+        {"name": "pre_inference", "args": ["payload", "metadata"]},
+        {"name": "post_inference", "args": ["prediction", "metadata"]},
     ]
 }
 
diff --git a/pkg/workloads/onnx_serve/api.py b/pkg/workloads/onnx_serve/api.py
index 85a77860d2..d21219421c 100644
--- a/pkg/workloads/onnx_serve/api.py
+++ b/pkg/workloads/onnx_serve/api.py
@@ -154,8 +154,8 @@ def predict(app_name, api_name):
             util.log_indent("Raw sample:", indent=4)
             util.log_pretty(sample, indent=6)
 
-            if request_handler is not None and util.has_function(request_handler, "preinference"):
-                sample = request_handler.preinference(sample, input_metadata)
+            if request_handler is not None and util.has_function(request_handler, "pre_inference"):
+                sample = request_handler.pre_inference(sample, input_metadata)
 
             inference_input = convert_to_onnx_input(sample, input_metadata)
             model_outputs = sess.run([], inference_input)
@@ -166,8 +166,8 @@ def predict(app_name, api_name):
                 else:
                     result.append(model_output)
 
-            if request_handler is not None and util.has_function(request_handler, "postinference"):
-                result = request_handler.postinference(result, output_metadata)
+            if request_handler is not None and util.has_function(request_handler, "post_inference"):
+                result = request_handler.post_inference(result, output_metadata)
             util.log_indent("Prediction:", indent=4)
             util.log_pretty(result, indent=6)
             prediction = {"prediction": result}
@@ -200,7 +200,7 @@ def start(args):
     local_cache["ctx"] = ctx
     if api.get("request_handler_impl_key") is not None:
         package.install_packages(ctx.python_packages, ctx.storage)
-        local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
+        local_cache["request_handler"] = ctx.get_request_handler_impl(api["name"])
 
     model_cache_path = os.path.join(args.model_dir, args.api)
     if not os.path.exists(model_cache_path):
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index 66ac5d0ae5..a6f31d8f6a 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -247,8 +247,8 @@ def run_predict(sample):
     request_handler = local_cache.get("request_handler")
 
     prepared_sample = sample
-    if request_handler is not None and util.has_function(request_handler, "preinference"):
-        prepared_sample = request_handler.preinference(sample, local_cache["metadata"])
+    if request_handler is not None and util.has_function(request_handler, "pre_inference"):
+        prepared_sample = request_handler.pre_inference(sample, local_cache["metadata"])
 
     if util.is_resource_ref(local_cache["api"]["model"]):
         transformed_sample = transform_sample(prepared_sample)
@@ -274,8 +274,8 @@ def run_predict(sample):
         util.log_indent("Prediction:", indent=4)
         util.log_pretty(result, indent=6)
 
-    if request_handler is not None and util.has_function(request_handler, "postinference"):
-        result = request_handler.postinference(result, local_cache["metadata"])
+    if request_handler is not None and util.has_function(request_handler, "post_inference"):
+        result = request_handler.post_inference(result, local_cache["metadata"])
 
     return result
 
@@ -397,7 +397,7 @@ def start(args):
     local_cache["ctx"] = ctx
 
     if api.get("request_handler_impl_key") is not None:
-        local_cache["request_handler"], _ = ctx.get_request_handler_impl(api["name"])
+        local_cache["request_handler"] = ctx.get_request_handler_impl(api["name"])
 
     if not util.is_resource_ref(api["model"]):
         if api.get("request_handler") is not None:

From 37adbf74a6902af5da32d2e2f721bba30bcdabdd Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 14:37:54 +0000
Subject: [PATCH 15/24] Rename payload to sample

---
 cli/cmd/predict.go                            | 21 +++++++++----------
 docs/apis/apis.md                             |  2 +-
 docs/apis/request-handlers.md                 |  6 ++++--
 examples/iris/cortex.yaml                     | 16 ++++++++++----
 examples/iris/models/pytorch.py               |  2 +-
 ...{pytorch.py => pytorch_request_handler.py} |  8 +++----
 ...{sklearn.py => sklearn_request_handler.py} |  8 +++----
 pkg/operator/api/userconfig/apis.go           |  6 ++++++
 pkg/workloads/lib/context.py                  |  2 +-
 9 files changed, 43 insertions(+), 28 deletions(-)
 rename examples/iris/{pytorch.py => pytorch_request_handler.py} (75%)
 rename examples/iris/{sklearn.py => sklearn_request_handler.py} (70%)

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index 644b4a7f0f..655a445dec 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -22,7 +22,6 @@ import (
 	"net/http"
 	"strings"
 
-	"github.com/cortexlabs/yaml"
 	"github.com/spf13/cobra"
 
 	"github.com/cortexlabs/cortex/pkg/lib/cast"
@@ -48,7 +47,7 @@ type PredictResponse struct {
 	Predictions []interface{} `json:"predictions"`
 }
 
-type Prediction struct {
+type DetailedPrediction struct {
 	Prediction         interface{} `json:"prediction"`
 	PredictionReversed interface{} `json:"prediction_reversed"`
 	TransformedSample  interface{} `json:"transformed_sample"`
@@ -99,7 +98,7 @@ var predictCmd = &cobra.Command{
 
 		apiID := predictResponse.ResourceID
 		api := resourcesRes.APIStatuses[apiID]
-		_, isModelReference := yaml.ExtractAtSymbolText(resourcesRes.Context.APIs[apiName].Model)
+		isExternalModel := resourcesRes.Context.APIs[apiName].IsServingExternalModel()
 
 		apiStart := libtime.LocalTimestampHuman(api.Start)
 		fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
@@ -111,7 +110,7 @@ var predictCmd = &cobra.Command{
 		}
 
 		for _, prediction := range predictResponse.Predictions {
-			if !isModelReference {
+			if isExternalModel {
 				prettyResp, err := json.Pretty(prediction)
 				if err != nil {
 					errors.Exit(err)
@@ -126,14 +125,14 @@ var predictCmd = &cobra.Command{
 				errors.Exit(err)
 			}
 
-			var parsedPrediction Prediction
-			err = json.DecodeWithNumber(predictionBytes, &parsedPrediction)
+			var detailedPrediction DetailedPrediction
+			err = json.DecodeWithNumber(predictionBytes, &detailedPrediction)
 			if err != nil {
 				errors.Exit(err, "prediction response")
 			}
 
-			if parsedPrediction.Prediction == nil {
-				prettyResp, err := json.Pretty(parsedPrediction.Response)
+			if detailedPrediction.Prediction == nil {
+				prettyResp, err := json.Pretty(detailedPrediction.Response)
 				if err != nil {
 					errors.Exit(err)
 				}
@@ -142,9 +141,9 @@ var predictCmd = &cobra.Command{
 				continue
 			}
 
-			value := parsedPrediction.Prediction
-			if parsedPrediction.PredictionReversed != nil {
-				value = parsedPrediction.PredictionReversed
+			value := detailedPrediction.Prediction
+			if detailedPrediction.PredictionReversed != nil {
+				value = detailedPrediction.PredictionReversed
 			}
 
 			if cast.IsFloatType(value) {
diff --git a/docs/apis/apis.md b/docs/apis/apis.md
index bf88fbeaf0..97cc0be1a6 100644
--- a/docs/apis/apis.md
+++ b/docs/apis/apis.md
@@ -37,7 +37,7 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
 
 ## Custom Request Handlers
 
-API endpoints can be customized by providing a request handler. Request handlers can be used to prepare request payloads before being passed to model for inference and modify model predictions before they are served.
+Request handlers are used to decouple the interface of an API endpoint from its model. A `pre_inference` request handler can be used to modify request payloads before they is sent to the model. A `post_inference` request handler can be used to modify model predictions before they are served from the API endpoint.
 
 See [request handlers](request-handlers.md) for a detailed guide.
 
diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index 9ba2f709fe..07674bd396 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -1,5 +1,7 @@
 # Request Handlers
 
+Request handlers are python files that can contain a `pre_inference` function and a `post_inference` function. Both functions are optional. 
+
 ## Implementation
 
 ```python
@@ -16,7 +18,7 @@ def pre_inference(sample, metadata):
                 https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
 
     Returns:
-        A dictionary of mapping model input name to a python list or array.
+        A dictionary containing model input names as keys and python lists or numpy arrays as values. If the model only has a single input, then a python list or numpy array can be returned instead of a dictionary with a single key.
     """
     pass
 
@@ -73,4 +75,4 @@ requirements-parser==0.2.0
 packaging==19.0.0
 ```
 
-You can install additional PyPI packages and import your own Python packages. See [Python Packages](../advanced/python-packages.md) for more details.
+You can install additional PyPI packages and import your own Python packages. See [Python Packages](../piplines/python-packages.md) for more details.
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index eec57eda03..073462e9a2 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -5,27 +5,35 @@
   name: tensorflow
   model_format: tensorflow
   model: s3://cortex-examples/iris/tensorflow.zip
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: pytorch
   model_format: onnx
-  request_handler: pytorch.py
+  request_handler: pytorch_request_handler.py
   model: s3://cortex-examples/iris/pytorch.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: xgboost
   model_format: onnx
   model: s3://cortex-examples/iris/xgboost.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: sklearn
   model_format: onnx
-  request_handler: sklearn.py
+  request_handler: sklearn_request_handler.py
   model: s3://cortex-examples/iris/sklearn.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: keras
   model_format: onnx
   model: s3://cortex-examples/iris/keras.onnx
-
-# model_format -> model_format
+  compute:
+    max_replicas: 1
diff --git a/examples/iris/models/pytorch.py b/examples/iris/models/pytorch.py
index a7306edeb7..90da7974da 100644
--- a/examples/iris/models/pytorch.py
+++ b/examples/iris/models/pytorch.py
@@ -44,7 +44,7 @@ def forward(self, X):
 
 model = Net()
 
-criterion = nn.CrossEntropyLoss()  # cross entropy loss
+criterion = nn.CrossEntropyLoss()
 
 optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
 
diff --git a/examples/iris/pytorch.py b/examples/iris/pytorch_request_handler.py
similarity index 75%
rename from examples/iris/pytorch.py
rename to examples/iris/pytorch_request_handler.py
index 1368be6033..cc0e7369e4 100644
--- a/examples/iris/pytorch.py
+++ b/examples/iris/pytorch_request_handler.py
@@ -6,10 +6,10 @@
 def pre_inference(sample, metadata):
     return {
         metadata[0].name: [
-            payload["sepal_length"],
-            payload["sepal_width"],
-            payload["petal_length"],
-            payload["petal_width"],
+            sample["sepal_length"],
+            sample["sepal_width"],
+            sample["petal_length"],
+            sample["petal_width"],
         ]
     }
 
diff --git a/examples/iris/sklearn.py b/examples/iris/sklearn_request_handler.py
similarity index 70%
rename from examples/iris/sklearn.py
rename to examples/iris/sklearn_request_handler.py
index e94bbbb1e5..002f0f060c 100644
--- a/examples/iris/sklearn.py
+++ b/examples/iris/sklearn_request_handler.py
@@ -6,10 +6,10 @@
 def pre_inference(sample, metadata):
     return {
         "input": [
-            payload["sepal_length"],
-            payload["sepal_width"],
-            payload["petal_length"],
-            payload["petal_width"],
+            sample["sepal_length"],
+            sample["sepal_width"],
+            sample["petal_length"],
+            sample["petal_width"],
         ]
     }
 
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index 61e4e737ff..33347f08b6 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -136,6 +136,12 @@ func (api *API) GetResourceType() resource.Type {
 	return resource.APIType
 }
 
+func (api *API) IsServingExternalModel() bool {
+	_, isModelReference := yaml.ExtractAtSymbolText(api.Model)
+
+	return !isModelReference
+}
+
 func (apis APIs) Names() []string {
 	names := make([]string, len(apis))
 	for i, api := range apis {
diff --git a/pkg/workloads/lib/context.py b/pkg/workloads/lib/context.py
index a21e089bec..f2e81a815a 100644
--- a/pkg/workloads/lib/context.py
+++ b/pkg/workloads/lib/context.py
@@ -684,7 +684,7 @@ def cast_compound_type(value, type_str):
 
 REQUEST_HANDLER_IMPL_VALIDATION = {
     "optional": [
-        {"name": "pre_inference", "args": ["payload", "metadata"]},
+        {"name": "pre_inference", "args": ["sample", "metadata"]},
         {"name": "post_inference", "args": ["prediction", "metadata"]},
     ]
 }

From 9fb94a72ab0a01c00bfa7fc9f35651668a9b8a59 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 14:39:10 +0000
Subject: [PATCH 16/24] Remove trailing whitespace

---
 docs/apis/request-handlers.md |  2 +-
 examples/iris/cortex.yaml     | 10 ----------
 2 files changed, 1 insertion(+), 11 deletions(-)

diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index 07674bd396..b76ff57efc 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -1,6 +1,6 @@
 # Request Handlers
 
-Request handlers are python files that can contain a `pre_inference` function and a `post_inference` function. Both functions are optional. 
+Request handlers are python files that can contain a `pre_inference` function and a `post_inference` function. Both functions are optional.
 
 ## Implementation
 
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 073462e9a2..b83d1bf71c 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -5,35 +5,25 @@
   name: tensorflow
   model_format: tensorflow
   model: s3://cortex-examples/iris/tensorflow.zip
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: pytorch
   model_format: onnx
   request_handler: pytorch_request_handler.py
   model: s3://cortex-examples/iris/pytorch.onnx
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: xgboost
   model_format: onnx
   model: s3://cortex-examples/iris/xgboost.onnx
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: sklearn
   model_format: onnx
   request_handler: sklearn_request_handler.py
   model: s3://cortex-examples/iris/sklearn.onnx
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: keras
   model_format: onnx
   model: s3://cortex-examples/iris/keras.onnx
-  compute:
-    max_replicas: 1

From 5811ab0cfd5d28ab1ed0200349ce4a23f7650da5 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 20:18:41 +0000
Subject: [PATCH 17/24] Breakout example models into their own directories

---
 cli/cmd/predict.go                            |  9 ++++---
 docs/apis/apis.md                             |  4 +--
 docs/apis/packaging-models.md                 | 25 ++++++++++---------
 docs/apis/request-handlers.md                 | 15 ++++++++---
 examples/iris/cortex.yaml                     | 12 +++++++++
 examples/iris/irises_flat.json                |  6 +++++
 examples/iris/keras_request_handler.py        | 13 ++++++++++
 .../iris/models/{keras.py => keras/model.py}  | 11 +-------
 examples/iris/models/keras/requirements.txt   |  4 +++
 .../models/{pytorch.py => pytorch/model.py}   | 11 ++------
 examples/iris/models/pytorch/requirements.txt |  2 ++
 .../models/{sklearn.py => sklearn/model.py}   | 10 +-------
 examples/iris/models/sklearn/requirements.txt |  4 +++
 .../models/{xgboost.py => xgboost/model.py}   |  7 ------
 examples/iris/models/xgboost/requirements.txt |  3 +++
 examples/iris/sklearn_request_handler.py      | 14 +++++------
 examples/iris/xgboost_request_handler.py      | 11 ++++++++
 pkg/operator/api/userconfig/apis.go           |  8 +-----
 pkg/operator/api/userconfig/config_key.go     |  2 +-
 pkg/operator/context/apis.go                  |  1 -
 pkg/workloads/tf_api/api.py                   |  6 +++--
 21 files changed, 102 insertions(+), 76 deletions(-)
 create mode 100644 examples/iris/keras_request_handler.py
 rename examples/iris/models/{keras.py => keras/model.py} (87%)
 create mode 100644 examples/iris/models/keras/requirements.txt
 rename examples/iris/models/{pytorch.py => pytorch/model.py} (97%)
 create mode 100644 examples/iris/models/pytorch/requirements.txt
 rename examples/iris/models/{sklearn.py => sklearn/model.py} (92%)
 create mode 100644 examples/iris/models/sklearn/requirements.txt
 rename examples/iris/models/{xgboost.py => xgboost/model.py} (92%)
 create mode 100644 examples/iris/models/xgboost/requirements.txt
 create mode 100644 examples/iris/xgboost_request_handler.py

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index 655a445dec..3d484407ba 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -23,6 +23,7 @@ import (
 	"strings"
 
 	"github.com/spf13/cobra"
+	"github.com/cortexlabs/yaml"
 
 	"github.com/cortexlabs/cortex/pkg/lib/cast"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"
@@ -97,10 +98,10 @@ var predictCmd = &cobra.Command{
 		}
 
 		apiID := predictResponse.ResourceID
-		api := resourcesRes.APIStatuses[apiID]
-		isExternalModel := resourcesRes.Context.APIs[apiName].IsServingExternalModel()
+		apiStatus := resourcesRes.APIStatuses[apiID]
+		api := resourcesRes.Context.APIs[apiName]
 
-		apiStart := libtime.LocalTimestampHuman(api.Start)
+		apiStart := libtime.LocalTimestampHuman(apiStatus.Start)
 		fmt.Println("\n" + apiName + " was last updated on " + apiStart + "\n")
 
 		if len(predictResponse.Predictions) == 1 {
@@ -110,7 +111,7 @@ var predictCmd = &cobra.Command{
 		}
 
 		for _, prediction := range predictResponse.Predictions {
-			if isExternalModel {
+			if !yaml.StartsWithEscapedAtSymbol(api.Model) {
 				prettyResp, err := json.Pretty(prediction)
 				if err != nil {
 					errors.Exit(err)
diff --git a/docs/apis/apis.md b/docs/apis/apis.md
index 97cc0be1a6..1415343142 100644
--- a/docs/apis/apis.md
+++ b/docs/apis/apis.md
@@ -8,7 +8,7 @@ Serve models at scale and use them to build smarter applications.
 - kind: api
   name: <string>  # API name (required)
   model: <string>  # path to a zipped model dir (e.g. s3://my-bucket/model.zip)
-  model_format: <string>  # model format, currently support tensorflow and onnx
+  model_format: <string>  # model format, must be "tensorflow" or "onnx"
   request_handler: <string>  # path to the request handler implementation file, relative to the cortex root
   compute:
     min_replicas: <int>  # minimum number of replicas (default: 1)
@@ -37,7 +37,7 @@ See [packaging models](packaging-models.md) for how to create the zipped model.
 
 ## Custom Request Handlers
 
-Request handlers are used to decouple the interface of an API endpoint from its model. A `pre_inference` request handler can be used to modify request payloads before they is sent to the model. A `post_inference` request handler can be used to modify model predictions before they are served from the API endpoint.
+Request handlers are used to decouple the interface of an API endpoint from its model. A `pre_inference` request handler can be used to modify request payloads before they are sent to the model. A `post_inference` request handler can be used to modify model predictions in the server before they are sent to the client.
 
 See [request handlers](request-handlers.md) for a detailed guide.
 
diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
index e8549a426c..4b34f161ed 100644
--- a/docs/apis/packaging-models.md
+++ b/docs/apis/packaging-models.md
@@ -2,7 +2,7 @@
 
 ## TensorFlow
 
-Zip the exported estimator output in your checkpoint directory, e.g.
+Zip the exported estimator output in your checkpoint directory:
 
 ```text
 $ ls export/estimator
@@ -11,13 +11,13 @@ saved_model.pb  variables/
 $ zip -r model.zip export/estimator
 ```
 
-Upload the zipped file to Amazon S3, e.g.
+Upload the zipped file to Amazon S3:
 
 ```text
 $ aws s3 cp model.zip s3://my-bucket/model.zip
 ```
 
-Specify `model` in an API, e.g.
+Reference your `model` in an API:
 
 ```yaml
 - kind: api
@@ -28,36 +28,37 @@ Specify `model` in an API, e.g.
 
 ## ONNX
 
-Export your trained model to an ONNX model format. An example of a trained model being exported to ONNX is shown below.
+Export your trained model to an ONNX model format. An example of an sklearn model being exported to ONNX is shown below:
 
 ```Python
 ...
+logreg_model = sklearn.linear_model.LogisticRegression(solver="lbfgs", multi_class="multinomial")
 
-logreg_model = LogisticRegression(solver="lbfgs", multi_class="multinomial")
-
-# Trained model
+# Train the model
 logreg_model.fit(X_train, y_train)
 
 # Convert to ONNX model format
-onnx_model = convert_sklearn(logreg_model, initial_types=[("input", FloatTensorType([1, 4]))])
-with open("model.onnx", "wb") as f:
+onnx_model = onnxmltools.convert_sklearn(
+    logreg_model, initial_types=[("input", onnxconverter_common.data_types.FloatTensorType([1, 4]))]
+)
+with open("sklearn.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
 ```
 
-See examples on how to convert models from common ML frameworks to ONNX.
+Here are examples of converting models from some of the common ML frameworks to ONNX:
 
 * [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/pytorch.py)
 * [Sklearn](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/sklearn.py)
 * [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/xgboost.py)
 * [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/keras.py)
 
-Upload your trained model in ONNX format to S3
+Upload your trained model in ONNX format to Amazon S3:
 
 ```text
 $ aws s3 cp model.onnx s3://my-bucket/model.onnx
 ```
 
-Specify `model` in an API, e.g.
+Reference your `model` in an API:
 
 ```yaml
 - kind: api
diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index b76ff57efc..824b805223 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -18,12 +18,12 @@ def pre_inference(sample, metadata):
                 https://microsoft.github.io/onnxruntime/api_summary.html#onnxruntime.NodeArg
 
     Returns:
-        A dictionary containing model input names as keys and python lists or numpy arrays as values. If the model only has a single input, then a python list or numpy array can be returned instead of a dictionary with a single key.
+        A dictionary containing model input names as keys and python lists or numpy arrays as values. If the model only has a single input, then a python list or numpy array can be returned.
     """
     pass
 
 def post_inference(prediction, metadata):
-    """Modify prediction from model before adding it to response payload.
+    """Modify a prediction from the model before responding to the request.
 
     Args:
         prediction: The output of the model.
@@ -47,6 +47,7 @@ import numpy as np
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 def pre_inference(sample, metadata):
+    # Converts a key-value pairs of features to a flattened in list in the order expected by the model
     return {
         metadata[0].name : [
             sample["sepal_length"],
@@ -58,8 +59,14 @@ def pre_inference(sample, metadata):
 
 
 def post_inference(prediction, metadata):
-    predicted_class_id = prediction[0][0]
-    return {"class_label": iris_labels[predicted_class_id], "class_index": predicted_class_id}
+    # Modify the model prediction to include the index and the label of the class predicted
+    probabilites = prediction[0][0]
+    predicted_class_id = int(np.argmax(probabilites))
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilities": probabilites,
+    }
 
 ```
 
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index b83d1bf71c..aa4734e986 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -5,25 +5,37 @@
   name: tensorflow
   model_format: tensorflow
   model: s3://cortex-examples/iris/tensorflow.zip
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: pytorch
   model_format: onnx
   request_handler: pytorch_request_handler.py
   model: s3://cortex-examples/iris/pytorch.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: xgboost
   model_format: onnx
+  request_handler: xgboost_request_handler.py
   model: s3://cortex-examples/iris/xgboost.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: sklearn
   model_format: onnx
   request_handler: sklearn_request_handler.py
   model: s3://cortex-examples/iris/sklearn.onnx
+  compute:
+    max_replicas: 1
 
 - kind: api
   name: keras
   model_format: onnx
+  request_handler: keras_request_handler.py
   model: s3://cortex-examples/iris/keras.onnx
+  compute:
+    max_replicas: 1
diff --git a/examples/iris/irises_flat.json b/examples/iris/irises_flat.json
index a43e0234e2..4b660c4325 100644
--- a/examples/iris/irises_flat.json
+++ b/examples/iris/irises_flat.json
@@ -5,6 +5,12 @@
             3.0,
             5.1,
             1.8
+        ],
+        [
+            5.6,
+            2.5,
+            3.9,
+            1.1
         ]
     ]
 }
diff --git a/examples/iris/keras_request_handler.py b/examples/iris/keras_request_handler.py
new file mode 100644
index 0000000000..8c7217f8ca
--- /dev/null
+++ b/examples/iris/keras_request_handler.py
@@ -0,0 +1,13 @@
+import numpy as np
+
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+
+def post_inference(prediction, metadata):
+    probabilites = prediction[0][0]
+    predicted_class_id = int(np.argmax(probabilites))
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilities": probabilites,
+    }
diff --git a/examples/iris/models/keras.py b/examples/iris/models/keras/model.py
similarity index 87%
rename from examples/iris/models/keras.py
rename to examples/iris/models/keras/model.py
index 691dfb5bc3..84e5af784d 100644
--- a/examples/iris/models/keras.py
+++ b/examples/iris/models/keras/model.py
@@ -1,11 +1,3 @@
-"""
-Requirements.txt
-
-scikit-learn
-keras
-keras2onnx
-tensorflow
-"""
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
@@ -15,8 +7,7 @@
 import keras2onnx
 
 iris = load_iris()
-X, y = iris.data, iris.target
-y = np_utils.to_categorical(y)
+X, y = iris.data, np_utils.to_categorical(iris.target)
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 model = Sequential(name="iris")
diff --git a/examples/iris/models/keras/requirements.txt b/examples/iris/models/keras/requirements.txt
new file mode 100644
index 0000000000..dbc2f28b5f
--- /dev/null
+++ b/examples/iris/models/keras/requirements.txt
@@ -0,0 +1,4 @@
+scikit-learn
+keras
+keras2onnx
+tensorflow
diff --git a/examples/iris/models/pytorch.py b/examples/iris/models/pytorch/model.py
similarity index 97%
rename from examples/iris/models/pytorch.py
rename to examples/iris/models/pytorch/model.py
index 90da7974da..dc7bafe119 100644
--- a/examples/iris/models/pytorch.py
+++ b/examples/iris/models/pytorch/model.py
@@ -1,9 +1,3 @@
-"""
-Requirements.txt
-
-scikit-learn
-torch
-"""
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import accuracy_score, precision_score, recall_score
 from sklearn.datasets import load_iris
@@ -12,9 +6,6 @@
 import torch.nn.functional as F
 from torch.autograd import Variable
 
-iris = load_iris()
-X, y = iris.data, iris.target
-
 
 class Net(nn.Module):
     # define nn
@@ -34,6 +25,8 @@ def forward(self, X):
         return X
 
 
+iris = load_iris()
+X, y = iris.data, iris.target
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
 
 # wrap up with Variable in pytorch
diff --git a/examples/iris/models/pytorch/requirements.txt b/examples/iris/models/pytorch/requirements.txt
new file mode 100644
index 0000000000..04383c96ca
--- /dev/null
+++ b/examples/iris/models/pytorch/requirements.txt
@@ -0,0 +1,2 @@
+scikit-learn
+torch
diff --git a/examples/iris/models/sklearn.py b/examples/iris/models/sklearn/model.py
similarity index 92%
rename from examples/iris/models/sklearn.py
rename to examples/iris/models/sklearn/model.py
index 9ccdc9f07c..06fc5f5e28 100644
--- a/examples/iris/models/sklearn.py
+++ b/examples/iris/models/sklearn/model.py
@@ -1,17 +1,9 @@
-"""
-Requirements.txt
-
-onnxmltools
-pandas
-scikit-learn
-skl2onnx
-"""
 import numpy as np
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
-from onnxconverter_common.data_types import FloatTensorType
 from onnxmltools import convert_sklearn
+from onnxconverter_common.data_types import FloatTensorType
 
 iris = load_iris()
 X, y = iris.data, iris.target
diff --git a/examples/iris/models/sklearn/requirements.txt b/examples/iris/models/sklearn/requirements.txt
new file mode 100644
index 0000000000..c486fc151f
--- /dev/null
+++ b/examples/iris/models/sklearn/requirements.txt
@@ -0,0 +1,4 @@
+onnxmltools
+pandas
+scikit-learn
+skl2onnx
diff --git a/examples/iris/models/xgboost.py b/examples/iris/models/xgboost/model.py
similarity index 92%
rename from examples/iris/models/xgboost.py
rename to examples/iris/models/xgboost/model.py
index 884cc6847b..3bdef80b1d 100644
--- a/examples/iris/models/xgboost.py
+++ b/examples/iris/models/xgboost/model.py
@@ -1,10 +1,3 @@
-"""
-Requirements.txt
-
-onnxmltools
-scikit-learn
-xgboost
-"""
 import numpy as np
 import xgboost as xgb
 from sklearn.datasets import load_iris
diff --git a/examples/iris/models/xgboost/requirements.txt b/examples/iris/models/xgboost/requirements.txt
new file mode 100644
index 0000000000..10de6ba33c
--- /dev/null
+++ b/examples/iris/models/xgboost/requirements.txt
@@ -0,0 +1,3 @@
+onnxmltools
+scikit-learn
+xgboost
diff --git a/examples/iris/sklearn_request_handler.py b/examples/iris/sklearn_request_handler.py
index 002f0f060c..45d24eb818 100644
--- a/examples/iris/sklearn_request_handler.py
+++ b/examples/iris/sklearn_request_handler.py
@@ -4,14 +4,12 @@
 
 
 def pre_inference(sample, metadata):
-    return {
-        "input": [
-            sample["sepal_length"],
-            sample["sepal_width"],
-            sample["petal_length"],
-            sample["petal_width"],
-        ]
-    }
+    return [
+        sample["sepal_length"],
+        sample["sepal_width"],
+        sample["petal_length"],
+        sample["petal_width"],
+    ]
 
 
 def post_inference(prediction, metadata):
diff --git a/examples/iris/xgboost_request_handler.py b/examples/iris/xgboost_request_handler.py
new file mode 100644
index 0000000000..9c5e80d637
--- /dev/null
+++ b/examples/iris/xgboost_request_handler.py
@@ -0,0 +1,11 @@
+iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
+
+
+def post_inference(prediction, metadata):
+    logger.info(prediction)
+    predicted_class_id = prediction[0][0]
+    return {
+        "class_label": iris_labels[predicted_class_id],
+        "class_index": predicted_class_id,
+        "probabilities": prediction[1][0],
+    }
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index 33347f08b6..657d1ac2b4 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -82,7 +82,7 @@ func (api *API) UserConfigStr() string {
 	sb.WriteString(fmt.Sprintf("%s: %s\n", ModelKey, yaml.UnescapeAtSymbol(api.Model)))
 
 	if api.ModelType != UnknownModelType {
-		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelTypeKey, api.ModelType.String()))
+		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelFormatKey, api.ModelType.String()))
 	}
 	if api.RequestHandler != nil {
 		sb.WriteString(fmt.Sprintf("%s: %s\n", RequestHandlerKey, *api.RequestHandler))
@@ -136,12 +136,6 @@ func (api *API) GetResourceType() resource.Type {
 	return resource.APIType
 }
 
-func (api *API) IsServingExternalModel() bool {
-	_, isModelReference := yaml.ExtractAtSymbolText(api.Model)
-
-	return !isModelReference
-}
-
 func (apis APIs) Names() []string {
 	names := make([]string, len(apis))
 	for i, api := range apis {
diff --git a/pkg/operator/api/userconfig/config_key.go b/pkg/operator/api/userconfig/config_key.go
index 4d7211f0c3..2f871a9792 100644
--- a/pkg/operator/api/userconfig/config_key.go
+++ b/pkg/operator/api/userconfig/config_key.go
@@ -92,7 +92,7 @@ const (
 
 	// API
 	ModelKey          = "model"
-	ModelTypeKey      = "model_format"
+	ModelFormatKey    = "model_format"
 	RequestHandlerKey = "request_handler"
 
 	// compute
diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index 4ae5f1a0b8..086ab00371 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -64,7 +64,6 @@ func getAPIs(config *userconfig.Config,
 			requestHandlerImplKey = pointer.String(filepath.Join(consts.RequestHandlersDir, implID))
 
 			err := uploadRequestHandler(*requestHandlerImplKey, impls[*apiConfig.RequestHandler])
-
 			if err != nil {
 				return nil, errors.Wrap(err, userconfig.Identify(apiConfig), "upload")
 			}
diff --git a/pkg/workloads/tf_api/api.py b/pkg/workloads/tf_api/api.py
index a6f31d8f6a..760779966b 100644
--- a/pkg/workloads/tf_api/api.py
+++ b/pkg/workloads/tf_api/api.py
@@ -248,7 +248,9 @@ def run_predict(sample):
 
     prepared_sample = sample
     if request_handler is not None and util.has_function(request_handler, "pre_inference"):
-        prepared_sample = request_handler.pre_inference(sample, local_cache["metadata"])
+        prepared_sample = request_handler.pre_inference(
+            sample, local_cache["metadata"]["signatureDef"]
+        )
 
     if util.is_resource_ref(local_cache["api"]["model"]):
         transformed_sample = transform_sample(prepared_sample)
@@ -275,7 +277,7 @@ def run_predict(sample):
         util.log_pretty(result, indent=6)
 
     if request_handler is not None and util.has_function(request_handler, "post_inference"):
-        result = request_handler.post_inference(result, local_cache["metadata"])
+        result = request_handler.post_inference(result, local_cache["metadata"]["signatureDef"])
 
     return result
 

From d4d614dcf9d6ad1813fbda0f7c38f778b76c5b16 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 20:32:39 +0000
Subject: [PATCH 18/24] Refactor request handlers

---
 examples/iris/cortex.yaml                                 | 8 ++++----
 .../keras.py}                                             | 0
 .../pytorch.py}                                           | 0
 .../sklearn.py}                                           | 0
 .../xgboost.py}                                           | 1 -
 5 files changed, 4 insertions(+), 5 deletions(-)
 rename examples/iris/{keras_request_handler.py => request_handlers/keras.py} (100%)
 rename examples/iris/{pytorch_request_handler.py => request_handlers/pytorch.py} (100%)
 rename examples/iris/{sklearn_request_handler.py => request_handlers/sklearn.py} (100%)
 rename examples/iris/{xgboost_request_handler.py => request_handlers/xgboost.py} (91%)

diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index aa4734e986..26b74d5770 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -11,7 +11,7 @@
 - kind: api
   name: pytorch
   model_format: onnx
-  request_handler: pytorch_request_handler.py
+  request_handler: request_handlers/pytorch.py
   model: s3://cortex-examples/iris/pytorch.onnx
   compute:
     max_replicas: 1
@@ -19,7 +19,7 @@
 - kind: api
   name: xgboost
   model_format: onnx
-  request_handler: xgboost_request_handler.py
+  request_handler: request_handlers/xgboost.py
   model: s3://cortex-examples/iris/xgboost.onnx
   compute:
     max_replicas: 1
@@ -27,7 +27,7 @@
 - kind: api
   name: sklearn
   model_format: onnx
-  request_handler: sklearn_request_handler.py
+  request_handler: request_handlers/sklearn.py
   model: s3://cortex-examples/iris/sklearn.onnx
   compute:
     max_replicas: 1
@@ -35,7 +35,7 @@
 - kind: api
   name: keras
   model_format: onnx
-  request_handler: keras_request_handler.py
+  request_handler: request_handlers/keras.py
   model: s3://cortex-examples/iris/keras.onnx
   compute:
     max_replicas: 1
diff --git a/examples/iris/keras_request_handler.py b/examples/iris/request_handlers/keras.py
similarity index 100%
rename from examples/iris/keras_request_handler.py
rename to examples/iris/request_handlers/keras.py
diff --git a/examples/iris/pytorch_request_handler.py b/examples/iris/request_handlers/pytorch.py
similarity index 100%
rename from examples/iris/pytorch_request_handler.py
rename to examples/iris/request_handlers/pytorch.py
diff --git a/examples/iris/sklearn_request_handler.py b/examples/iris/request_handlers/sklearn.py
similarity index 100%
rename from examples/iris/sklearn_request_handler.py
rename to examples/iris/request_handlers/sklearn.py
diff --git a/examples/iris/xgboost_request_handler.py b/examples/iris/request_handlers/xgboost.py
similarity index 91%
rename from examples/iris/xgboost_request_handler.py
rename to examples/iris/request_handlers/xgboost.py
index 9c5e80d637..093a2db31e 100644
--- a/examples/iris/xgboost_request_handler.py
+++ b/examples/iris/request_handlers/xgboost.py
@@ -2,7 +2,6 @@
 
 
 def post_inference(prediction, metadata):
-    logger.info(prediction)
     predicted_class_id = prediction[0][0]
     return {
         "class_label": iris_labels[predicted_class_id],

From 857da8d869f24fec75c3a6d8dd0f627e9fdba82d Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 20:34:09 +0000
Subject: [PATCH 19/24] Fix linting issue with predict.go

---
 cli/cmd/predict.go | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/cmd/predict.go b/cli/cmd/predict.go
index 3d484407ba..6069741d16 100644
--- a/cli/cmd/predict.go
+++ b/cli/cmd/predict.go
@@ -22,8 +22,8 @@ import (
 	"net/http"
 	"strings"
 
-	"github.com/spf13/cobra"
 	"github.com/cortexlabs/yaml"
+	"github.com/spf13/cobra"
 
 	"github.com/cortexlabs/cortex/pkg/lib/cast"
 	"github.com/cortexlabs/cortex/pkg/lib/errors"

From 1bbf6e1bfb865155954789d43ec75b433808d7be Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 20:43:44 +0000
Subject: [PATCH 20/24] Restructure examples folder

---
 docs/apis/packaging-models.md                  |  8 ++++----
 examples/iris/cortex.yaml                      | 18 ++++--------------
 .../{irises_flat.json => keras/irises.json}    |  0
 examples/iris/{models => }/keras/model.py      |  0
 .../keras.py => keras/request_handler.py}      |  0
 .../iris/{models => }/keras/requirements.txt   |  0
 examples/iris/{ => pytorch}/irises.json        |  0
 examples/iris/{models => }/pytorch/model.py    |  0
 .../pytorch.py => pytorch/request_handler.py}  |  0
 .../iris/{models => }/pytorch/requirements.txt |  0
 examples/iris/sklearn/irises.json              | 10 ++++++++++
 examples/iris/{models => }/sklearn/model.py    |  0
 .../sklearn.py => sklearn/request_handler.py}  |  0
 .../iris/{models => }/sklearn/requirements.txt |  0
 examples/iris/tensorflow/irises.json           | 10 ++++++++++
 examples/iris/xgboost/irises.json              | 16 ++++++++++++++++
 examples/iris/{models => }/xgboost/model.py    |  0
 .../xgboost.py => xgboost/request_handler.py}  |  0
 .../iris/{models => }/xgboost/requirements.txt |  0
 19 files changed, 44 insertions(+), 18 deletions(-)
 rename examples/iris/{irises_flat.json => keras/irises.json} (100%)
 rename examples/iris/{models => }/keras/model.py (100%)
 rename examples/iris/{request_handlers/keras.py => keras/request_handler.py} (100%)
 rename examples/iris/{models => }/keras/requirements.txt (100%)
 rename examples/iris/{ => pytorch}/irises.json (100%)
 rename examples/iris/{models => }/pytorch/model.py (100%)
 rename examples/iris/{request_handlers/pytorch.py => pytorch/request_handler.py} (100%)
 rename examples/iris/{models => }/pytorch/requirements.txt (100%)
 create mode 100644 examples/iris/sklearn/irises.json
 rename examples/iris/{models => }/sklearn/model.py (100%)
 rename examples/iris/{request_handlers/sklearn.py => sklearn/request_handler.py} (100%)
 rename examples/iris/{models => }/sklearn/requirements.txt (100%)
 create mode 100644 examples/iris/tensorflow/irises.json
 create mode 100644 examples/iris/xgboost/irises.json
 rename examples/iris/{models => }/xgboost/model.py (100%)
 rename examples/iris/{request_handlers/xgboost.py => xgboost/request_handler.py} (100%)
 rename examples/iris/{models => }/xgboost/requirements.txt (100%)

diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
index 4b34f161ed..34d2fa442a 100644
--- a/docs/apis/packaging-models.md
+++ b/docs/apis/packaging-models.md
@@ -47,10 +47,10 @@ with open("sklearn.onnx", "wb") as f:
 
 Here are examples of converting models from some of the common ML frameworks to ONNX:
 
-* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/pytorch.py)
-* [Sklearn](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/sklearn.py)
-* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/xgboost.py)
-* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/keras.py)
+* [PyTorch](https://github.com/cortexlabs/cortex/blob/master/examples/iris/pytorch/model.py)
+* [Sklearn](https://github.com/cortexlabs/cortex/blob/master/examples/iris/sklearn/model.py)
+* [XGBoost](https://github.com/cortexlabs/cortex/blob/master/examples/iris/xgboost/model.py)
+* [Keras](https://github.com/cortexlabs/cortex/blob/master/examples/iris/keras/model.py)
 
 Upload your trained model in ONNX format to Amazon S3:
 
diff --git a/examples/iris/cortex.yaml b/examples/iris/cortex.yaml
index 26b74d5770..16ba8b85fa 100644
--- a/examples/iris/cortex.yaml
+++ b/examples/iris/cortex.yaml
@@ -5,37 +5,27 @@
   name: tensorflow
   model_format: tensorflow
   model: s3://cortex-examples/iris/tensorflow.zip
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: pytorch
   model_format: onnx
-  request_handler: request_handlers/pytorch.py
+  request_handler: pytorch/request_handler.py
   model: s3://cortex-examples/iris/pytorch.onnx
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: xgboost
   model_format: onnx
-  request_handler: request_handlers/xgboost.py
+  request_handler: xgboost/request_handler.py
   model: s3://cortex-examples/iris/xgboost.onnx
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: sklearn
   model_format: onnx
-  request_handler: request_handlers/sklearn.py
+  request_handler: sklearn/request_handler.py
   model: s3://cortex-examples/iris/sklearn.onnx
-  compute:
-    max_replicas: 1
 
 - kind: api
   name: keras
   model_format: onnx
-  request_handler: request_handlers/keras.py
+  request_handler: keras/request_handler.py
   model: s3://cortex-examples/iris/keras.onnx
-  compute:
-    max_replicas: 1
diff --git a/examples/iris/irises_flat.json b/examples/iris/keras/irises.json
similarity index 100%
rename from examples/iris/irises_flat.json
rename to examples/iris/keras/irises.json
diff --git a/examples/iris/models/keras/model.py b/examples/iris/keras/model.py
similarity index 100%
rename from examples/iris/models/keras/model.py
rename to examples/iris/keras/model.py
diff --git a/examples/iris/request_handlers/keras.py b/examples/iris/keras/request_handler.py
similarity index 100%
rename from examples/iris/request_handlers/keras.py
rename to examples/iris/keras/request_handler.py
diff --git a/examples/iris/models/keras/requirements.txt b/examples/iris/keras/requirements.txt
similarity index 100%
rename from examples/iris/models/keras/requirements.txt
rename to examples/iris/keras/requirements.txt
diff --git a/examples/iris/irises.json b/examples/iris/pytorch/irises.json
similarity index 100%
rename from examples/iris/irises.json
rename to examples/iris/pytorch/irises.json
diff --git a/examples/iris/models/pytorch/model.py b/examples/iris/pytorch/model.py
similarity index 100%
rename from examples/iris/models/pytorch/model.py
rename to examples/iris/pytorch/model.py
diff --git a/examples/iris/request_handlers/pytorch.py b/examples/iris/pytorch/request_handler.py
similarity index 100%
rename from examples/iris/request_handlers/pytorch.py
rename to examples/iris/pytorch/request_handler.py
diff --git a/examples/iris/models/pytorch/requirements.txt b/examples/iris/pytorch/requirements.txt
similarity index 100%
rename from examples/iris/models/pytorch/requirements.txt
rename to examples/iris/pytorch/requirements.txt
diff --git a/examples/iris/sklearn/irises.json b/examples/iris/sklearn/irises.json
new file mode 100644
index 0000000000..33d1e6a5b5
--- /dev/null
+++ b/examples/iris/sklearn/irises.json
@@ -0,0 +1,10 @@
+{
+  "samples": [
+    {
+      "sepal_length": 5.2,
+      "sepal_width": 3.6,
+      "petal_length": 1.4,
+      "petal_width": 0.3
+    }
+  ]
+}
diff --git a/examples/iris/models/sklearn/model.py b/examples/iris/sklearn/model.py
similarity index 100%
rename from examples/iris/models/sklearn/model.py
rename to examples/iris/sklearn/model.py
diff --git a/examples/iris/request_handlers/sklearn.py b/examples/iris/sklearn/request_handler.py
similarity index 100%
rename from examples/iris/request_handlers/sklearn.py
rename to examples/iris/sklearn/request_handler.py
diff --git a/examples/iris/models/sklearn/requirements.txt b/examples/iris/sklearn/requirements.txt
similarity index 100%
rename from examples/iris/models/sklearn/requirements.txt
rename to examples/iris/sklearn/requirements.txt
diff --git a/examples/iris/tensorflow/irises.json b/examples/iris/tensorflow/irises.json
new file mode 100644
index 0000000000..33d1e6a5b5
--- /dev/null
+++ b/examples/iris/tensorflow/irises.json
@@ -0,0 +1,10 @@
+{
+  "samples": [
+    {
+      "sepal_length": 5.2,
+      "sepal_width": 3.6,
+      "petal_length": 1.4,
+      "petal_width": 0.3
+    }
+  ]
+}
diff --git a/examples/iris/xgboost/irises.json b/examples/iris/xgboost/irises.json
new file mode 100644
index 0000000000..4b660c4325
--- /dev/null
+++ b/examples/iris/xgboost/irises.json
@@ -0,0 +1,16 @@
+{
+    "samples": [
+        [
+            5.9,
+            3.0,
+            5.1,
+            1.8
+        ],
+        [
+            5.6,
+            2.5,
+            3.9,
+            1.1
+        ]
+    ]
+}
diff --git a/examples/iris/models/xgboost/model.py b/examples/iris/xgboost/model.py
similarity index 100%
rename from examples/iris/models/xgboost/model.py
rename to examples/iris/xgboost/model.py
diff --git a/examples/iris/request_handlers/xgboost.py b/examples/iris/xgboost/request_handler.py
similarity index 100%
rename from examples/iris/request_handlers/xgboost.py
rename to examples/iris/xgboost/request_handler.py
diff --git a/examples/iris/models/xgboost/requirements.txt b/examples/iris/xgboost/requirements.txt
similarity index 100%
rename from examples/iris/models/xgboost/requirements.txt
rename to examples/iris/xgboost/requirements.txt

From b3b5ffc9a1bb89683f32dae07d42c05207e7ab02 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 21:52:05 +0000
Subject: [PATCH 21/24] ModelType to ModelFormat

---
 docs/apis/packaging-models.md                 |  2 +-
 docs/apis/request-handlers.md                 |  4 +-
 pkg/operator/api/userconfig/apis.go           | 12 ++---
 .../{model_type.go => model_format.go}        | 44 +++++++++----------
 pkg/operator/context/apis.go                  |  2 +-
 pkg/operator/workloads/api.go                 |  8 ++--
 6 files changed, 36 insertions(+), 36 deletions(-)
 rename pkg/operator/api/userconfig/{model_type.go => model_format.go} (56%)

diff --git a/docs/apis/packaging-models.md b/docs/apis/packaging-models.md
index 34d2fa442a..7ade2e1fcd 100644
--- a/docs/apis/packaging-models.md
+++ b/docs/apis/packaging-models.md
@@ -41,7 +41,7 @@ logreg_model.fit(X_train, y_train)
 onnx_model = onnxmltools.convert_sklearn(
     logreg_model, initial_types=[("input", onnxconverter_common.data_types.FloatTensorType([1, 4]))]
 )
-with open("sklearn.onnx", "wb") as f:
+with open("model.onnx", "wb") as f:
     f.write(onnx_model.SerializeToString())
 ```
 
diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index 824b805223..b487847bc0 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -47,7 +47,7 @@ import numpy as np
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 def pre_inference(sample, metadata):
-    # Converts a key-value pairs of features to a flattened in list in the order expected by the model
+    # Converts a dictionary of features to a flattened in list in the order expected by the model
     return {
         metadata[0].name : [
             sample["sepal_length"],
@@ -59,7 +59,7 @@ def pre_inference(sample, metadata):
 
 
 def post_inference(prediction, metadata):
-    # Modify the model prediction to include the index and the label of the class predicted
+    # Modify the model prediction to include the index and the label of the predicted class
     probabilites = prediction[0][0]
     predicted_class_id = int(np.argmax(probabilites))
     return {
diff --git a/pkg/operator/api/userconfig/apis.go b/pkg/operator/api/userconfig/apis.go
index 657d1ac2b4..51450ade41 100644
--- a/pkg/operator/api/userconfig/apis.go
+++ b/pkg/operator/api/userconfig/apis.go
@@ -34,7 +34,7 @@ type APIs []*API
 type API struct {
 	ResourceFields
 	Model          string      `json:"model" yaml:"model"`
-	ModelType      ModelType   `json:"model_format" yaml:"model_format"`
+	ModelFormat    ModelFormat `json:"model_format" yaml:"model_format"`
 	RequestHandler *string     `json:"request_handler" yaml:"request_handler"`
 	Compute        *APICompute `json:"compute" yaml:"compute"`
 	Tags           Tags        `json:"tags" yaml:"tags"`
@@ -61,13 +61,13 @@ var apiValidation = &cr.StructValidation{
 			StringPtrValidation: &cr.StringPtrValidation{},
 		},
 		{
-			StructField: "ModelType",
+			StructField: "ModelFormat",
 			StringValidation: &cr.StringValidation{
 				Required:      true,
-				AllowedValues: ModelTypeStrings(),
+				AllowedValues: ModelFormatStrings(),
 			},
 			Parser: func(str string) (interface{}, error) {
-				return ModelTypeFromString(str), nil
+				return ModelFormatFromString(str), nil
 			},
 		},
 		apiComputeFieldValidation,
@@ -81,8 +81,8 @@ func (api *API) UserConfigStr() string {
 	sb.WriteString(api.ResourceFields.UserConfigStr())
 	sb.WriteString(fmt.Sprintf("%s: %s\n", ModelKey, yaml.UnescapeAtSymbol(api.Model)))
 
-	if api.ModelType != UnknownModelType {
-		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelFormatKey, api.ModelType.String()))
+	if api.ModelFormat != UnknownModelFormat {
+		sb.WriteString(fmt.Sprintf("%s: %s\n", ModelFormatKey, api.ModelFormat.String()))
 	}
 	if api.RequestHandler != nil {
 		sb.WriteString(fmt.Sprintf("%s: %s\n", RequestHandlerKey, *api.RequestHandler))
diff --git a/pkg/operator/api/userconfig/model_type.go b/pkg/operator/api/userconfig/model_format.go
similarity index 56%
rename from pkg/operator/api/userconfig/model_type.go
rename to pkg/operator/api/userconfig/model_format.go
index 117ef5b699..2398ae20dd 100644
--- a/pkg/operator/api/userconfig/model_type.go
+++ b/pkg/operator/api/userconfig/model_format.go
@@ -16,63 +16,63 @@ limitations under the License.
 
 package userconfig
 
-type ModelType int
+type ModelFormat int
 
 const (
-	UnknownModelType ModelType = iota
-	TensorFlowModelType
-	ONNXModelType
+	UnknownModelFormat ModelFormat = iota
+	TensorFlowModelFormat
+	ONNXModelFormat
 )
 
-var modelDataTypes = []string{
+var modelFormats = []string{
 	"unknown",
 	"tensorflow",
 	"onnx",
 }
 
-func ModelTypeFromString(s string) ModelType {
-	for i := 0; i < len(modelDataTypes); i++ {
-		if s == modelDataTypes[i] {
-			return ModelType(i)
+func ModelFormatFromString(s string) ModelFormat {
+	for i := 0; i < len(modelFormats); i++ {
+		if s == modelFormats[i] {
+			return ModelFormat(i)
 		}
 	}
-	return UnknownModelType
+	return UnknownModelFormat
 }
 
-func ModelTypeStrings() []string {
-	return modelDataTypes[1:]
+func ModelFormatStrings() []string {
+	return modelFormats[1:]
 }
 
-func (t ModelType) String() string {
-	return modelDataTypes[t]
+func (t ModelFormat) String() string {
+	return modelFormats[t]
 }
 
 // MarshalText satisfies TextMarshaler
-func (t ModelType) MarshalText() ([]byte, error) {
+func (t ModelFormat) MarshalText() ([]byte, error) {
 	return []byte(t.String()), nil
 }
 
 // UnmarshalText satisfies TextUnmarshaler
-func (t *ModelType) UnmarshalText(text []byte) error {
+func (t *ModelFormat) UnmarshalText(text []byte) error {
 	enum := string(text)
-	for i := 0; i < len(modelDataTypes); i++ {
-		if enum == modelDataTypes[i] {
-			*t = ModelType(i)
+	for i := 0; i < len(modelFormats); i++ {
+		if enum == modelFormats[i] {
+			*t = ModelFormat(i)
 			return nil
 		}
 	}
 
-	*t = UnknownModelType
+	*t = UnknownModelFormat
 	return nil
 }
 
 // UnmarshalBinary satisfies BinaryUnmarshaler
 // Needed for msgpack
-func (t *ModelType) UnmarshalBinary(data []byte) error {
+func (t *ModelFormat) UnmarshalBinary(data []byte) error {
 	return t.UnmarshalText(data)
 }
 
 // MarshalBinary satisfies BinaryMarshaler
-func (t ModelType) MarshalBinary() ([]byte, error) {
+func (t ModelFormat) MarshalBinary() ([]byte, error) {
 	return []byte(t.String()), nil
 }
diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index 086ab00371..1c3d2fe8d1 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -47,7 +47,7 @@ func getAPIs(config *userconfig.Config,
 		var buf bytes.Buffer
 		var requestHandlerImplKey *string
 		buf.WriteString(apiConfig.Name)
-		buf.WriteString(apiConfig.ModelType.String())
+		buf.WriteString(apiConfig.ModelFormat.String())
 
 		if apiConfig.RequestHandler != nil {
 			for _, pythonPackage := range pythonPackages {
diff --git a/pkg/operator/workloads/api.go b/pkg/operator/workloads/api.go
index 49bd59a9c5..dc3f17bda9 100644
--- a/pkg/operator/workloads/api.go
+++ b/pkg/operator/workloads/api.go
@@ -335,13 +335,13 @@ func apiWorkloadSpecs(ctx *context.Context) ([]*WorkloadSpec, error) {
 
 		var spec metav1.Object
 
-		switch api.ModelType {
-		case userconfig.TensorFlowModelType:
+		switch api.ModelFormat {
+		case userconfig.TensorFlowModelFormat:
 			spec = tfAPISpec(ctx, api, workloadID, desiredReplicas)
-		case userconfig.ONNXModelType:
+		case userconfig.ONNXModelFormat:
 			spec = onnxAPISpec(ctx, api, workloadID, desiredReplicas)
 		default:
-			return nil, errors.New(api.Name, "unknown model type countered") // unexpected
+			return nil, errors.New(api.Name, "unknown model format encountered") // unexpected
 		}
 
 		workloadSpecs = append(workloadSpecs, &WorkloadSpec{

From 3e9a2655d746b6cc3c51e830d83e6cc1528ab366 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 21:57:07 +0000
Subject: [PATCH 22/24] Tweak example post_inference handler doc

---
 docs/apis/request-handlers.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index b487847bc0..35cefe70b6 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -59,7 +59,7 @@ def pre_inference(sample, metadata):
 
 
 def post_inference(prediction, metadata):
-    # Modify the model prediction to include the index and the label of the predicted class
+    # Update the model prediction to include the index and the label of the predicted class
     probabilites = prediction[0][0]
     predicted_class_id = int(np.argmax(probabilites))
     return {

From 7231e42fce30e679da69c43bb8fb4f3690707cf9 Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 22:02:20 +0000
Subject: [PATCH 23/24] Push down error wrapping

---
 pkg/operator/context/apis.go | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/pkg/operator/context/apis.go b/pkg/operator/context/apis.go
index 1c3d2fe8d1..76ea5c7671 100644
--- a/pkg/operator/context/apis.go
+++ b/pkg/operator/context/apis.go
@@ -65,7 +65,7 @@ func getAPIs(config *userconfig.Config,
 
 			err := uploadRequestHandler(*requestHandlerImplKey, impls[*apiConfig.RequestHandler])
 			if err != nil {
-				return nil, errors.Wrap(err, userconfig.Identify(apiConfig), "upload")
+				return nil, errors.Wrap(err, userconfig.Identify(apiConfig))
 			}
 		}
 
@@ -101,13 +101,13 @@ func getAPIs(config *userconfig.Config,
 func uploadRequestHandler(implKey string, impl []byte) error {
 	isUploaded, err := config.AWS.IsS3File(implKey)
 	if err != nil {
-		return err
+		return errors.Wrap(err, "upload")
 	}
 
 	if !isUploaded {
 		err = config.AWS.UploadBytesToS3(impl, implKey)
 		if err != nil {
-			return err
+			return errors.Wrap(err, "upload")
 		}
 	}
 

From c2af7aaaae10b6bac00efdf630839b0ad79c3e3b Mon Sep 17 00:00:00 2001
From: vishal <vishalbollu@users.noreply.github.com>
Date: Fri, 5 Jul 2019 22:09:46 +0000
Subject: [PATCH 24/24] Tweak comment in pre_inference request-handlers doc

---
 docs/apis/request-handlers.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/apis/request-handlers.md b/docs/apis/request-handlers.md
index 35cefe70b6..19b7cd6cbf 100644
--- a/docs/apis/request-handlers.md
+++ b/docs/apis/request-handlers.md
@@ -47,7 +47,7 @@ import numpy as np
 iris_labels = ["Iris-setosa", "Iris-versicolor", "Iris-virginica"]
 
 def pre_inference(sample, metadata):
-    # Converts a dictionary of features to a flattened in list in the order expected by the model
+    # Convert a dictionary of features to a flattened in list in the order expected by the model
     return {
         metadata[0].name : [
             sample["sepal_length"],