diff --git a/docs/deployments/api-configuration.md b/docs/deployments/api-configuration.md index f237f2d42d..63a4915bad 100644 --- a/docs/deployments/api-configuration.md +++ b/docs/deployments/api-configuration.md @@ -10,7 +10,8 @@ Reference the section below which corresponds to your Predictor type: [Python](# ```yaml - name: # API name (required) - endpoint: # the endpoint for the API (default: ) + endpoint: # the endpoint for the API (aws only) (default: ) + local_port: # specify the port for API (local only) (default: 8888) predictor: type: python path: # path to a python file with a PythonPredictor class definition, relative to the Cortex root (required) @@ -18,14 +19,14 @@ Reference the section below which corresponds to your Predictor type: [Python](# python_path: # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml) image: # docker image to use for the Predictor (default: cortexlabs/python-predictor-cpu or cortexlabs/python-predictor-gpu based on compute) env: # dictionary of environment variables - tracker: + tracker: # (aws only) key: # the JSON key in the response to track (required if the response payload is a JSON object) model_type: # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required) compute: cpu: # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m) gpu: # GPU request per replica (default: 0) mem: # memory request per replica, e.g. 200Mi or 1Gi (default: Null) - autoscaling: + autoscaling: # (aws only) min_replicas: # minimum number of replicas (default: 1) max_replicas: # maximum number of replicas (default: 100) init_replicas: # initial number of replicas (default: ) @@ -40,7 +41,7 @@ Reference the section below which corresponds to your Predictor type: [Python](# max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 1.5) downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05) upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05) - update_strategy: + update_strategy: # (aws only) max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` @@ -51,7 +52,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput ```yaml - name: # API name (required) - endpoint: # the endpoint for the API (default: ) + endpoint: # the endpoint for the API (aws only) (default: ) + local_port: # specify the port for API (local only) (default: 8888) predictor: type: tensorflow path: # path to a python file with a TensorFlowPredictor class definition, relative to the Cortex root (required) @@ -62,14 +64,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput image: # docker image to use for the Predictor (default: cortexlabs/tensorflow-predictor) tensorflow_serving_image: # docker image to use for the TensorFlow Serving container (default: cortexlabs/tensorflow-serving-gpu or cortexlabs/tensorflow-serving-cpu based on compute) env: # dictionary of environment variables - tracker: + tracker: # (aws only) key: # the JSON key in the response to track (required if the response payload is a JSON object) model_type: # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required) compute: cpu: # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m) gpu: # GPU request per replica (default: 0) mem: # memory request per replica, e.g. 200Mi or 1Gi (default: Null) - autoscaling: + autoscaling: # (aws only) min_replicas: # minimum number of replicas (default: 1) max_replicas: # maximum number of replicas (default: 100) init_replicas: # initial number of replicas (default: ) @@ -84,7 +86,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 1.5) downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05) upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05) - update_strategy: + update_strategy: # (aws only) max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ``` @@ -95,7 +97,8 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput ```yaml - name: # API name (required) - endpoint: # the endpoint for the API (default: ) + endpoint: # the endpoint for the API (aws only) (default: ) + local_port: # specify the port for API (local only) (default: 8888) predictor: type: onnx path: # path to a python file with an ONNXPredictor class definition, relative to the Cortex root (required) @@ -104,14 +107,14 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput python_path: # path to the root of your Python folder that will be appended to PYTHONPATH (default: folder containing cortex.yaml) image: # docker image to use for the Predictor (default: cortexlabs/onnx-predictor-gpu or cortexlabs/onnx-predictor-cpu based on compute) env: # dictionary of environment variables - tracker: + tracker: # (aws only) key: # the JSON key in the response to track (required if the response payload is a JSON object) model_type: # must be "classification" or "regression", so responses can be interpreted correctly (i.e. categorical vs continuous) (required) compute: cpu: # CPU request per replica, e.g. 200m or 1 (200m is equivalent to 0.2) (default: 200m) gpu: # GPU request per replica (default: 0) mem: # memory request per replica, e.g. 200Mi or 1Gi (default: Null) - autoscaling: + autoscaling: # (aws only) min_replicas: # minimum number of replicas (default: 1) max_replicas: # maximum number of replicas (default: 100) init_replicas: # initial number of replicas (default: ) @@ -126,7 +129,7 @@ See additional documentation for [autoscaling](autoscaling.md), [compute](comput max_upscale_factor: # the maximum factor by which to scale up the API on a single scaling event (default: 1.5) downscale_tolerance: # any recommendation falling within this factor below the current number of replicas will not trigger a scale down event (default: 0.05) upscale_tolerance: # any recommendation falling within this factor above the current number of replicas will not trigger a scale up event (default: 0.05) - update_strategy: + update_strategy: # (aws only) max_surge: # maximum number of replicas that can be scheduled above the desired number of replicas during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) (set to 0 to disable rolling updates) max_unavailable: # maximum number of replicas that can be unavailable during an update; can be an absolute number, e.g. 5, or a percentage of desired replicas, e.g. 10% (default: 25%) ```