Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ ci-build-images:
@./build/build-image.sh images/istio-galley istio-galley
@./build/build-image.sh images/istio-pilot istio-pilot
@./build/build-image.sh images/istio-proxy istio-proxy
@./build/build-image.sh images/downloader downloader

ci-push-images:
@./build/push-image.sh manager
Expand All @@ -172,6 +173,7 @@ ci-push-images:
@./build/push-image.sh istio-galley
@./build/push-image.sh istio-pilot
@./build/push-image.sh istio-proxy
@./build/push-image.sh downloader


ci-build-cli:
Expand Down
2 changes: 2 additions & 0 deletions cortex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ export CORTEX_IMAGE_ISTIO_CITADEL="${CORTEX_IMAGE_ISTIO_CITADEL:-cortexlabs/isti
export CORTEX_IMAGE_ISTIO_GALLEY="${CORTEX_IMAGE_ISTIO_GALLEY:-cortexlabs/istio-galley:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_ISTIO_PILOT="${CORTEX_IMAGE_ISTIO_PILOT:-cortexlabs/istio-pilot:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_ISTIO_PROXY="${CORTEX_IMAGE_ISTIO_PROXY:-cortexlabs/istio-proxy:$CORTEX_VERSION_STABLE}"
export CORTEX_IMAGE_DOWNLOADER="${CORTEX_IMAGE_DOWNLOADER:-cortexlabs/downloader:$CORTEX_VERSION_STABLE}"

export CORTEX_ENABLE_TELEMETRY="${CORTEX_ENABLE_TELEMETRY:-""}"
export CORTEX_TELEMETRY_URL="${CORTEX_TELEMETRY_URL:-"https://telemetry.cortexlabs.dev"}"
Expand Down Expand Up @@ -220,6 +221,7 @@ function install_cortex() {
-e CORTEX_IMAGE_ISTIO_GALLEY=$CORTEX_IMAGE_ISTIO_GALLEY \
-e CORTEX_IMAGE_ISTIO_PILOT=$CORTEX_IMAGE_ISTIO_PILOT \
-e CORTEX_IMAGE_ISTIO_PROXY=$CORTEX_IMAGE_ISTIO_PROXY \
-e CORTEX_IMAGE_DOWNLOADER=$CORTEX_IMAGE_DOWNLOADER \
-e CORTEX_ENABLE_TELEMETRY=$CORTEX_ENABLE_TELEMETRY \
$CORTEX_IMAGE_MANAGER
}
Expand Down
2 changes: 2 additions & 0 deletions dev/registry.sh
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ function create_registry() {
aws ecr create-repository --repository-name=cortexlabs/cluster-autoscaler --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/nvidia --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/metrics-server --region=$REGISTRY_REGION || true
aws ecr create-repository --repository-name=cortexlabs/downloader --region=$REGISTRY_REGION || true
}

### HELPERS ###
Expand Down Expand Up @@ -139,6 +140,7 @@ elif [ "$cmd" = "update" ]; then
fi

build_and_push $ROOT/images/tf-api tf-api latest
build_and_push $ROOT/images/downloader downloader latest
build_and_push $ROOT/images/onnx-serve onnx-serve latest

cleanup
Expand Down
1 change: 1 addition & 0 deletions docs/cluster/config.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export CORTEX_IMAGE_ISTIO_PROXY="cortexlabs/istio-proxy:master"
export CORTEX_IMAGE_ISTIO_PILOT="cortexlabs/istio-pilot:master"
export CORTEX_IMAGE_ISTIO_CITADEL="cortexlabs/istio-citadel:master"
export CORTEX_IMAGE_ISTIO_GALLEY="cortexlabs/istio-galley:master"
export CORTEX_IMAGE_DOWNLOADER="cortexlabs/downloader:master"

# Flag to enable collecting error reports and usage stats. If flag is not set to either "true" or "false", you will be prompted.
export CORTEX_ENABLE_TELEMETRY=""
Expand Down
1 change: 1 addition & 0 deletions docs/cluster/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ export CORTEX_IMAGE_ISTIO_PROXY="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortex
export CORTEX_IMAGE_ISTIO_PILOT="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/istio-pilot:latest"
export CORTEX_IMAGE_ISTIO_CITADEL="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/istio-citadel:latest"
export CORTEX_IMAGE_ISTIO_GALLEY="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/istio-galley:latest"
export CORTEX_IMAGE_DOWNLOADER="XXXXXXXX.dkr.ecr.us-west-2.amazonaws.com/cortexlabs/downloader:latest"

export CORTEX_ENABLE_TELEMETRY="false"
```
Expand Down
33 changes: 19 additions & 14 deletions docs/deployments/packaging-models.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,43 @@

## TensorFlow

Export your trained model and zip the model directory. An example is shown below (here is the [complete example](https://github.com/cortexlabs/cortex/blob/master/examples/iris/models/tensorflow_model.py)):
Export your trained model and upload the export directory, or checkpoint directory containing the export directory, which is usually the case if you used `estimator.train_and_evaluate`. An example is shown below (here is the [complete example](https://github.com/cortexlabs/cortex/blob/master/examples/sentiment)):

```Python
import tensorflow as tf
import shutil
import os

...

classifier = tf.estimator.Estimator(
model_fn=my_model, model_dir="iris", params={"hidden_units": [10, 10], "n_classes": 3}
)

exporter = tf.estimator.FinalExporter("estimator", serving_input_fn, as_text=False)
train_spec = tf.estimator.TrainSpec(train_input_fn, max_steps=1000)
eval_spec = tf.estimator.EvalSpec(eval_input_fn, exporters=[exporter], name="estimator-eval")

tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
OUPUT_DIR="bert"
estimator = tf.estimator.Estimator(model_fn=model_fn...)

# TF Serving requires a special input_fn used at serving time
def serving_input_fn():
inputs = tf.placeholder(shape=[128], dtype=tf.int32)
features = {
"input_ids": tf.expand_dims(inputs, 0),
"input_mask": tf.expand_dims(inputs, 0),
"segment_ids": tf.expand_dims(inputs, 0),
"label_ids": tf.placeholder(shape=[0], dtype=tf.int32),
}
return tf.estimator.export.ServingInputReceiver(features=features, receiver_tensors=inputs)

estimator.export_savedmodel(OUPUT_DIR, serving_input_fn, strip_default_attrs=True)
```

Upload the exported version directory to Amazon S3 using the AWS web console or CLI:
Upload the checkpoint directory to Amazon S3 using the AWS web console or CLI:

```text
$ aws s3 sync ./iris/export/estimator/156293432 s3://my-bucket/iris/156293432
$ aws s3 sync ./bert s3://my-bucket/bert
```

Reference your model in an `api`:

```yaml
- kind: api
name: my-api
model: s3://my-bucket/iris/156293432
model: s3://my-bucket/bert
```

## ONNX
Expand Down
2 changes: 1 addition & 1 deletion examples/image-classifier/cortex.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@

- kind: api
name: classifier
model: s3://cortex-examples/imagenet/1566492692
model: s3://cortex-examples/imagenet/
request_handler: imagenet.py
23 changes: 23 additions & 0 deletions images/downloader/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM ubuntu:18.04

ENV PYTHONPATH="/src:${PYTHONPATH}"

RUN apt-get update -qq && apt-get install -y -q \
python3 \
python3-dev \
python3-pip \
&& apt-get clean -qq && rm -rf /var/lib/apt/lists/* && \
pip3 install --upgrade \
pip \
setuptools \
&& rm -rf /root/.cache/pip*

COPY pkg/workloads/cortex/lib/requirements.txt /src/cortex/lib/requirements.txt
RUN pip3 install -r /src/cortex/lib/requirements.txt && \
rm -rf /root/.cache/pip*

COPY pkg/workloads/cortex/consts.py /src/cortex/
COPY pkg/workloads/cortex/lib /src/cortex/lib
COPY pkg/workloads/cortex/downloader /src/cortex/downloader

ENTRYPOINT ["/usr/bin/python3", "/src/cortex/downloader/download.py"]
1 change: 1 addition & 0 deletions manager/install_cortex.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ function setup_configmap() {
--from-literal='IMAGE_ONNX_SERVE'=$CORTEX_IMAGE_ONNX_SERVE \
--from-literal='IMAGE_ONNX_SERVE_GPU'=$CORTEX_IMAGE_ONNX_SERVE_GPU \
--from-literal='IMAGE_TF_API'=$CORTEX_IMAGE_TF_API \
--from-literal='IMAGE_DOWNLOADER'=$CORTEX_IMAGE_DOWNLOADER \
--from-literal='IMAGE_PYTHON_PACKAGER'=$CORTEX_IMAGE_PYTHON_PACKAGER \
--from-literal='IMAGE_TF_SERVE_GPU'=$CORTEX_IMAGE_TF_SERVE_GPU \
--from-literal='ENABLE_TELEMETRY'=$CORTEX_ENABLE_TELEMETRY \
Expand Down
4 changes: 2 additions & 2 deletions pkg/lib/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ import (
type Client struct {
Region string
Bucket string
s3Client *s3.S3
S3 *s3.S3
stsClient *sts.STS
cloudWatchLogsClient *cloudwatchlogs.CloudWatchLogs
CloudWatchMetrics *cloudwatch.CloudWatch
Expand All @@ -48,7 +48,7 @@ func New(region string, bucket string, withAccountID bool) (*Client, error) {
awsClient := &Client{
Bucket: bucket,
Region: region,
s3Client: s3.New(sess),
S3: s3.New(sess),
stsClient: sts.New(sess),
CloudWatchMetrics: cloudwatch.New(sess),
cloudWatchLogsClient: cloudwatchlogs.New(sess),
Expand Down
16 changes: 8 additions & 8 deletions pkg/lib/aws/s3.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func S3PathJoin(paths ...string) string {

func (c *Client) IsS3File(keys ...string) (bool, error) {
for _, key := range keys {
_, err := c.s3Client.HeadObject(&s3.HeadObjectInput{
_, err := c.S3.HeadObject(&s3.HeadObjectInput{
Bucket: aws.String(c.Bucket),
Key: aws.String(key),
})
Expand All @@ -88,7 +88,7 @@ func (c *Client) IsS3File(keys ...string) (bool, error) {

func (c *Client) IsS3Prefix(prefixes ...string) (bool, error) {
for _, prefix := range prefixes {
out, err := c.s3Client.ListObjectsV2(&s3.ListObjectsV2Input{
out, err := c.S3.ListObjectsV2(&s3.ListObjectsV2Input{
Bucket: aws.String(c.Bucket),
Prefix: aws.String(prefix),
})
Expand Down Expand Up @@ -138,7 +138,7 @@ func (c *Client) IsS3PathDir(s3Paths ...string) (bool, error) {
}

func (c *Client) UploadBytesToS3(data []byte, key string) error {
_, err := c.s3Client.PutObject(&s3.PutObjectInput{
_, err := c.S3.PutObject(&s3.PutObjectInput{
Body: bytes.NewReader(data),
Key: aws.String(key),
Bucket: aws.String(c.Bucket),
Expand Down Expand Up @@ -210,7 +210,7 @@ func (c *Client) ReadMsgpackFromS3(objPtr interface{}, key string) error {
}

func (c *Client) ReadStringFromS3(key string) (string, error) {
response, err := c.s3Client.GetObject(&s3.GetObjectInput{
response, err := c.S3.GetObject(&s3.GetObjectInput{
Key: aws.String(key),
Bucket: aws.String(c.Bucket),
})
Expand All @@ -225,7 +225,7 @@ func (c *Client) ReadStringFromS3(key string) (string, error) {
}

func (c *Client) ReadBytesFromS3(key string) ([]byte, error) {
response, err := c.s3Client.GetObject(&s3.GetObjectInput{
response, err := c.S3.GetObject(&s3.GetObjectInput{
Key: aws.String(key),
Bucket: aws.String(c.Bucket),
})
Expand All @@ -246,7 +246,7 @@ func (c *Client) ListPrefix(prefix string, maxResults int64) ([]*s3.Object, erro
MaxKeys: aws.Int64(maxResults),
}

output, err := c.s3Client.ListObjectsV2(listObjectsInput)
output, err := c.S3.ListObjectsV2(listObjectsInput)
if err != nil {
return nil, errors.Wrap(err, prefix)
}
Expand All @@ -263,7 +263,7 @@ func (c *Client) DeleteFromS3ByPrefix(prefix string, continueIfFailure bool) err

var subErr error

err := c.s3Client.ListObjectsV2Pages(listObjectsInput,
err := c.S3.ListObjectsV2Pages(listObjectsInput,
func(listObjectsOutput *s3.ListObjectsV2Output, lastPage bool) bool {
deleteObjects := make([]*s3.ObjectIdentifier, len(listObjectsOutput.Contents))
for i, object := range listObjectsOutput.Contents {
Expand All @@ -276,7 +276,7 @@ func (c *Client) DeleteFromS3ByPrefix(prefix string, continueIfFailure bool) err
Quiet: aws.Bool(true),
},
}
_, newSubErr := c.s3Client.DeleteObjects(deleteObjectsInput)
_, newSubErr := c.S3.DeleteObjects(deleteObjectsInput)
if newSubErr != nil {
subErr = newSubErr
if !continueIfFailure {
Expand Down
72 changes: 59 additions & 13 deletions pkg/operator/api/userconfig/apis.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ package userconfig

import (
"fmt"
"path/filepath"
"strconv"
"strings"

"github.com/aws/aws-sdk-go/service/s3"
"github.com/cortexlabs/cortex/pkg/consts"
"github.com/cortexlabs/cortex/pkg/lib/aws"
cr "github.com/cortexlabs/cortex/pkg/lib/configreader"
Expand Down Expand Up @@ -147,6 +150,45 @@ func IsValidTensorFlowS3Directory(path string, awsClient *aws.Client) bool {
return true
}

func GetTFServingExportFromS3Path(path string, awsClient *aws.Client) (string, error) {
if IsValidTensorFlowS3Directory(path, awsClient) {
return path, nil
}

bucket, prefix, err := aws.SplitS3Path(path)
if err != nil {
return "", err
}

resp, _ := awsClient.S3.ListObjects(&s3.ListObjectsInput{
Bucket: &bucket,
Prefix: &prefix,
})

highestVersion := int64(0)
var highestPath string
for _, key := range resp.Contents {
if !strings.HasSuffix(*key.Key, "saved_model.pb") {
continue
}

keyParts := strings.Split(*key.Key, "/")
versionStr := keyParts[len(keyParts)-1]
version, err := strconv.ParseInt(versionStr, 10, 64)
if err != nil {
version = 0
}

possiblePath := "s3://" + filepath.Join(bucket, filepath.Join(keyParts[:len(keyParts)-1]...))
if version >= highestVersion && IsValidTensorFlowS3Directory(possiblePath, awsClient) {
highestVersion = version
highestPath = possiblePath
}
}

return highestPath, nil
}

func (api *API) UserConfigStr() string {
var sb strings.Builder
sb.WriteString(api.ResourceFields.UserConfigStr())
Expand Down Expand Up @@ -190,27 +232,31 @@ func (api *API) Validate() error {
return err
}

switch api.ModelFormat {
case ONNXModelFormat:
switch {
case api.ModelFormat == ONNXModelFormat:
if ok, err := awsClient.IsS3PathFile(api.Model); err != nil || !ok {
return errors.Wrap(ErrorExternalNotFound(api.Model), Identify(api), ModelKey)
}
case TensorFlowModelFormat:
if !IsValidTensorFlowS3Directory(api.Model, awsClient) {
case api.ModelFormat == TensorFlowModelFormat:
path, err := GetTFServingExportFromS3Path(api.Model, awsClient)
if path == "" || err != nil {
return errors.Wrap(ErrorInvalidTensorflowDir(api.Model), Identify(api), ModelKey)
}
case strings.HasSuffix(api.Model, ".onnx"):
api.ModelFormat = ONNXModelFormat
if ok, err := awsClient.IsS3PathFile(api.Model); err != nil || !ok {
return errors.Wrap(ErrorExternalNotFound(api.Model), Identify(api), ModelKey)
}
default:
switch {
case strings.HasSuffix(api.Model, ".onnx"):
api.ModelFormat = ONNXModelFormat
if ok, err := awsClient.IsS3PathFile(api.Model); err != nil || !ok {
return errors.Wrap(ErrorExternalNotFound(api.Model), Identify(api), ModelKey)
}
case IsValidTensorFlowS3Directory(api.Model, awsClient):
api.ModelFormat = TensorFlowModelFormat
default:
path, err := GetTFServingExportFromS3Path(api.Model, awsClient)
if err != nil {
return errors.Wrap(err, Identify(api), ModelKey)
}
if path == "" {
return errors.Wrap(ErrorUnableToInferModelFormat(api.Model), Identify(api))
}
api.ModelFormat = TensorFlowModelFormat
api.Model = path
}

if api.ModelFormat == TensorFlowModelFormat && api.TFServing == nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/operator/api/userconfig/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,8 +277,8 @@ func ErrorExternalNotFound(path string) error {

var onnxExpectedStructMessage = `For ONNX models, the path should end in .onnx`

var tfExpectedStructMessage = `For TensorFlow models, the path should be a directory with the following structure:
1523423423/ (version prefix, usually a timestamp)
var tfExpectedStructMessage = `For TensorFlow models, the path must contain a directory with the following structure:
1523423423/ (Version prefix, usually a timestamp)
├── saved_model.pb
└── variables/
├── variables.index
Expand Down
2 changes: 2 additions & 0 deletions pkg/operator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ type CortexConfig struct {
OperatorImage string `json:"operator_image"`
TFServeImage string `json:"tf_serve_image"`
TFAPIImage string `json:"tf_api_image"`
DownloaderImage string `json:"downloader_image"`
PythonPackagerImage string `json:"python_packager_image"`
TFServeImageGPU string `json:"tf_serve_image_gpu"`
ONNXServeImage string `json:"onnx_serve_image"`
Expand All @@ -66,6 +67,7 @@ func Init() error {
OperatorImage: getStr("IMAGE_OPERATOR"),
TFServeImage: getStr("IMAGE_TF_SERVE"),
TFAPIImage: getStr("IMAGE_TF_API"),
DownloaderImage: getStr("IMAGE_DOWNLOADER"),
PythonPackagerImage: getStr("IMAGE_PYTHON_PACKAGER"),
TFServeImageGPU: getStr("IMAGE_TF_SERVE_GPU"),
ONNXServeImage: getStr("IMAGE_ONNX_SERVE"),
Expand Down
Loading