From cfea3ea44d79c3e0060a1591f5daaa6d464af6d1 Mon Sep 17 00:00:00 2001 From: vishal Date: Tue, 17 Sep 2019 18:18:23 -0400 Subject: [PATCH 1/3] Add prediction metrics tracking documentation --- docs/deployments/apis.md | 8 ++++++-- examples/iris-classifier/cortex.yaml | 10 ++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/deployments/apis.md b/docs/deployments/apis.md index aa0159ab31..577e77142b 100644 --- a/docs/deployments/apis.md +++ b/docs/deployments/apis.md @@ -10,9 +10,9 @@ Serve models at scale. model: # path to an exported model (e.g. s3://my-bucket/exported_model) model_format: # model format, must be "tensorflow" or "onnx" (default: "onnx" if model path ends with .onnx, "tensorflow" if model path ends with .zip or is a directory) request_handler: # path to the request handler implementation file, relative to the cortex root - tf_signature_key: # name of the signature def to use for prediction (required if your model has more than one signature def) + tf_signature_key: # name of the signature def to use for prediction (required if your model has more than one signature def) tracker: - key: # json key to track if the response payload is a dictionary + key: # key to track, only required if the response payload is a json object model_type: # model type, must be "classification" or "regression" compute: min_replicas: # minimum number of replicas (default: 1) @@ -43,6 +43,10 @@ Request handlers are used to decouple the interface of an API endpoint from its See [request handlers](request-handlers.md) for a detailed guide. +## Prediction Monitoring + +The `tracker` can be configured to collect API prediction metrics and display real time stats in `cortex get `. The tracker looks for scalar values in the response payload (after the execution of `post_inference` request handler). If the response payload is a json object, the `key` can be set to extract the desired scalar value. For regression models, the tracker should be configured with `model_type: regression` to collect float values and display regreission stats such as min, max and avg. For classification models, the tracker should be configured with `model_type: classification` to collect integer or string values and display the class distribution. + ## Debugging You can log more information about each request by adding a `?debug=true` parameter to your requests. This will print: diff --git a/examples/iris-classifier/cortex.yaml b/examples/iris-classifier/cortex.yaml index e59a2f6c21..dcdce7b57f 100644 --- a/examples/iris-classifier/cortex.yaml +++ b/examples/iris-classifier/cortex.yaml @@ -5,23 +5,33 @@ name: tensorflow model: s3://cortex-examples/iris/tensorflow request_handler: handlers/tensorflow.py + tracker: + model_type: classification - kind: api name: pytorch model: s3://cortex-examples/iris/pytorch.onnx request_handler: handlers/pytorch.py + tracker: + model_type: classification - kind: api name: keras model: s3://cortex-examples/iris/keras.onnx request_handler: handlers/keras.py + tracker: + model_type: classification - kind: api name: xgboost model: s3://cortex-examples/iris/xgboost.onnx request_handler: handlers/xgboost.py + tracker: + model_type: classification - kind: api name: sklearn model: s3://cortex-examples/iris/sklearn.onnx request_handler: handlers/sklearn.py + tracker: + model_type: classification From 9852bdc0c6962308a8fdb28394387f0f927e25db Mon Sep 17 00:00:00 2001 From: Omer Spillinger Date: Tue, 17 Sep 2019 16:06:59 -0700 Subject: [PATCH 2/3] Update apis.md --- docs/deployments/apis.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/deployments/apis.md b/docs/deployments/apis.md index 577e77142b..44bf8f83af 100644 --- a/docs/deployments/apis.md +++ b/docs/deployments/apis.md @@ -12,7 +12,7 @@ Serve models at scale. request_handler: # path to the request handler implementation file, relative to the cortex root tf_signature_key: # name of the signature def to use for prediction (required if your model has more than one signature def) tracker: - key: # key to track, only required if the response payload is a json object + key: # key to track (required if the response payload is a JSON object) model_type: # model type, must be "classification" or "regression" compute: min_replicas: # minimum number of replicas (default: 1) @@ -45,7 +45,7 @@ See [request handlers](request-handlers.md) for a detailed guide. ## Prediction Monitoring -The `tracker` can be configured to collect API prediction metrics and display real time stats in `cortex get `. The tracker looks for scalar values in the response payload (after the execution of `post_inference` request handler). If the response payload is a json object, the `key` can be set to extract the desired scalar value. For regression models, the tracker should be configured with `model_type: regression` to collect float values and display regreission stats such as min, max and avg. For classification models, the tracker should be configured with `model_type: classification` to collect integer or string values and display the class distribution. +The `tracker` can be configured to collect API prediction metrics and display real-time stats in `cortex get `. The tracker looks for scalar values in the response payload (after the execution of the `post_inference` request handler). If the response payload is a JSON object, `key` can be set to extract the desired scalar value. For regression models, the tracker should be configured with `model_type: regression` to collect float values and display regression stats such as min, max and avg. For classification models, the tracker should be configured with `model_type: classification` to collect integer or string values and display the class distribution. ## Debugging @@ -56,10 +56,10 @@ You can log more information about each request by adding a `?debug=true` parame 3. The value after running inference 4. The value after running the `post_inference` function (if applicable) -## Autoscaling replicas +## Autoscaling Replicas Cortex adjusts the number of replicas that are serving predictions by monitoring the compute resource usage of each API. The number of replicas will be at least `min_replicas` and no more than `max_replicas`. -## Autoscaling nodes +## Autoscaling Nodes Cortex spins up and down nodes based on the aggregate resource requests of all APIs. The number of nodes will be at least `$CORTEX_NODES_MIN` and no more than `$CORTEX_NODES_MAX` (configured during installation and modifiable via the [AWS console](https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-manual-scaling.html)). From ad0adbceacdbbd66d9da72cb678799d77f579a7a Mon Sep 17 00:00:00 2001 From: David Eliahu Date: Tue, 17 Sep 2019 16:15:23 -0700 Subject: [PATCH 3/3] Update apis.md --- docs/deployments/apis.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/deployments/apis.md b/docs/deployments/apis.md index 44bf8f83af..1eed87bdf1 100644 --- a/docs/deployments/apis.md +++ b/docs/deployments/apis.md @@ -45,7 +45,7 @@ See [request handlers](request-handlers.md) for a detailed guide. ## Prediction Monitoring -The `tracker` can be configured to collect API prediction metrics and display real-time stats in `cortex get `. The tracker looks for scalar values in the response payload (after the execution of the `post_inference` request handler). If the response payload is a JSON object, `key` can be set to extract the desired scalar value. For regression models, the tracker should be configured with `model_type: regression` to collect float values and display regression stats such as min, max and avg. For classification models, the tracker should be configured with `model_type: classification` to collect integer or string values and display the class distribution. +`tracker` can be configured to collect API prediction metrics and display real-time stats in `cortex get `. The tracker looks for scalar values in the response payload (after the execution of the `post_inference` request handler, if provided). If the response payload is a JSON object, `key` can be set to extract the desired scalar value. For regression models, the tracker should be configured with `model_type: regression` to collect float values and display regression stats such as min, max and average. For classification models, the tracker should be configured with `model_type: classification` to collect integer or string values and display the class distribution. ## Debugging