diff --git a/src/sagemaker/huggingface/__init__.py b/src/sagemaker/huggingface/__init__.py index b4417da510..355970ca8d 100644 --- a/src/sagemaker/huggingface/__init__.py +++ b/src/sagemaker/huggingface/__init__.py @@ -14,3 +14,4 @@ from __future__ import absolute_import from sagemaker.huggingface.estimator import HuggingFace # noqa: F401 +from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor # noqa: F401 diff --git a/src/sagemaker/huggingface/estimator.py b/src/sagemaker/huggingface/estimator.py index 2d38db5de7..62470ba87a 100644 --- a/src/sagemaker/huggingface/estimator.py +++ b/src/sagemaker/huggingface/estimator.py @@ -23,6 +23,7 @@ warn_if_parameter_server_with_multi_gpu, validate_smdistributed, ) +from sagemaker.huggingface.model import HuggingFaceModel from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT logger = logging.getLogger("sagemaker") @@ -233,8 +234,59 @@ def create_model( dependencies=None, **kwargs ): - """Placeholder docstring""" - raise NotImplementedError("Creating model with HuggingFace training job is not supported.") + """Create a SageMaker ``HuggingFaceModel`` object that can be deployed to an ``Endpoint``. + + Args: + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``, + which is also used during transform jobs. If not specified, the + role from the Estimator will be used. + vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on + the model. Default: use subnets and security groups from this Estimator. + * 'Subnets' (list[str]): List of subnet ids. + * 'SecurityGroupIds' (list[str]): List of security group ids. + entry_point (str): Path (absolute or relative) to the local Python source file which + should be executed as the entry point to training. If ``source_dir`` is specified, + then ``entry_point`` must point to a file located at the root of ``source_dir``. + If not specified, the training entry point is used. + source_dir (str): Path (absolute or relative) to a directory with any other serving + source code dependencies aside from the entry point file. + If not specified, the model source directory from training is used. + dependencies (list[str]): A list of paths to directories (absolute or relative) with + any additional libraries that will be exported to the container. + If not specified, the dependencies from training are used. + This is not supported with "local code" in Local Mode. + **kwargs: Additional kwargs passed to the :class:`~sagemaker.huggingface.model.HuggingFaceModel` + constructor. + + Returns: + sagemaker.huggingface.model.HuggingFaceModel: A SageMaker ``HuggingFaceModel`` + object. See :func:`~sagemaker.huggingface.model.HuggingFaceModel` for full details. + """ + if "image_uri" not in kwargs: + kwargs["image_uri"] = self.image_uri + + kwargs["name"] = self._get_or_create_name(kwargs.get("name")) + + return HuggingFaceModel( + role or self.role, + model_data=self.model_data, + entry_point=entry_point or self._model_entry_point(), + transformers_version=self.framework_version, + tensorflow_version=self.tensorflow_version, + pytorch_version=self.pytorch_version, + py_version=self.py_version, + source_dir=(source_dir or self._model_source_dir()), + container_log_level=self.container_log_level, + code_location=self.code_location, + model_server_workers=model_server_workers, + sagemaker_session=self.sagemaker_session, + vpc_config=self.get_vpc_config(vpc_config_override), + dependencies=(dependencies or self.dependencies), + **kwargs + ) @classmethod def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None): diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py new file mode 100644 index 0000000000..f7fe57dd5b --- /dev/null +++ b/src/sagemaker/huggingface/model.py @@ -0,0 +1,309 @@ +# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You +# may not use this file except in compliance with the License. A copy of +# the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "license" file accompanying this file. This file is +# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF +# ANY KIND, either express or implied. See the License for the specific +# language governing permissions and limitations under the License. +"""Placeholder docstring""" +from __future__ import absolute_import + +import logging + +import sagemaker +from sagemaker import image_uris +from sagemaker.deserializers import JSONDeserializer +from sagemaker.fw_utils import ( + model_code_key_prefix, + validate_version_or_image_args, +) +from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME +from sagemaker.predictor import Predictor +from sagemaker.serializers import JSONSerializer + +logger = logging.getLogger("sagemaker") + + +class HuggingFacePredictor(Predictor): + """A Predictor for inference against HuggingFace Endpoints. + + This is able to serialize Python lists, dictionaries, and numpy arrays to + multidimensional tensors for HuggingFace inference. + """ + + def __init__( + self, + endpoint_name, + sagemaker_session=None, + serializer=JSONSerializer(), + deserializer=JSONDeserializer(), + ): + """Initialize an ``HuggingFacePredictor``. + + Args: + endpoint_name (str): The name of the endpoint to perform inference + on. + sagemaker_session (sagemaker.session.Session): Session object which + manages interactions with Amazon SageMaker APIs and any other + AWS services needed. If not specified, the estimator creates one + using the default AWS configuration chain. + serializer (sagemaker.serializers.BaseSerializer): Optional. Default + serializes input data to .npy format. Handles lists and numpy + arrays. + deserializer (sagemaker.deserializers.BaseDeserializer): Optional. + Default parses the response from .npy format to numpy array. + """ + super(HuggingFacePredictor, self).__init__( + endpoint_name, + sagemaker_session, + serializer=serializer, + deserializer=deserializer, + ) + + +def _validate_pt_tf_versions(pytorch_version, tensorflow_version, image_uri): + """Placeholder docstring""" + + if image_uri is not None: + return + + if tensorflow_version is not None and pytorch_version is not None: + raise ValueError( + "tensorflow_version and pytorch_version are both not None. " + "Specify only tensorflow_version or pytorch_version." + ) + if tensorflow_version is None and pytorch_version is None: + raise ValueError( + "tensorflow_version and pytorch_version are both None. " + "Specify either tensorflow_version or pytorch_version." + ) + + +class HuggingFaceModel(FrameworkModel): + """An HuggingFace SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``.""" + + _framework_name = "huggingface" + + def __init__( + self, + role, + model_data=None, + entry_point=None, + transformers_version=None, + tensorflow_version=None, + pytorch_version=None, + py_version=None, + image_uri=None, + predictor_cls=HuggingFacePredictor, + model_server_workers=None, + **kwargs, + ): + """Initialize a HuggingFaceModel. + + Args: + model_data (str): The S3 location of a SageMaker model data + ``.tar.gz`` file. + role (str): An AWS IAM role (either name or full ARN). The Amazon + SageMaker training jobs and APIs that create Amazon SageMaker + endpoints use this role to access training data and model + artifacts. After the endpoint is created, the inference code + might use the IAM role, if it needs to access an AWS resource. + entry_point (str): Path (absolute or relative) to the Python source + file which should be executed as the entry point to model + hosting. If ``source_dir`` is specified, then ``entry_point`` + must point to a file located at the root of ``source_dir``. + Defaults to None. + transformers_version (str): transformers version you want to use for + executing your model training code. Defaults to None. Required + unless ``image_uri`` is provided. + tensorflow_version (str): TensorFlow version you want to use for + executing your inference code. Defaults to ``None``. Required unless + ``pytorch_version`` is provided. List of supported versions: + https://github.com/aws/sagemaker-python-sdk#huggingface-sagemaker-estimators. + pytorch_version (str): PyTorch version you want to use for + executing your inference code. Defaults to ``None``. Required unless + ``tensorflow_version`` is provided. List of supported versions: + https://github.com/aws/sagemaker-python-sdk#huggingface-sagemaker-estimators. + py_version (str): Python version you want to use for executing your + model training code. Defaults to ``None``. Required unless + ``image_uri`` is provided. + image_uri (str): A Docker image URI (default: None). If not specified, a + default image for PyTorch will be used. If ``framework_version`` + or ``py_version`` are ``None``, then ``image_uri`` is required. If + also ``None``, then a ``ValueError`` will be raised. + predictor_cls (callable[str, sagemaker.session.Session]): A function + to call to create a predictor with an endpoint name and + SageMaker ``Session``. If specified, ``deploy()`` returns the + result of invoking this function on the created endpoint name. + model_server_workers (int): Optional. The number of worker processes + used by the inference server. If None, server will use one + worker per vCPU. + **kwargs: Keyword arguments passed to the superclass + :class:`~sagemaker.model.FrameworkModel` and, subsequently, its + superclass :class:`~sagemaker.model.Model`. + + .. tip:: + + You can find additional parameters for initializing this class at + :class:`~sagemaker.model.FrameworkModel` and + :class:`~sagemaker.model.Model`. + """ + validate_version_or_image_args(transformers_version, py_version, image_uri) + _validate_pt_tf_versions( + pytorch_version=pytorch_version, + tensorflow_version=tensorflow_version, + image_uri=image_uri, + ) + if py_version == "py2": + raise ValueError("py2 is not supported with HuggingFace images") + self.framework_version = transformers_version + self.pytorch_version = pytorch_version + self.tensorflow_version = tensorflow_version + self.py_version = py_version + + super(HuggingFaceModel, self).__init__( + model_data, image_uri, role, entry_point, predictor_cls=predictor_cls, **kwargs + ) + + self.model_server_workers = model_server_workers + + def register( + self, + content_types, + response_types, + inference_instances, + transform_instances, + model_package_name=None, + model_package_group_name=None, + image_uri=None, + model_metrics=None, + metadata_properties=None, + marketplace_cert=False, + approval_status=None, + description=None, + ): + """Creates a model package for creating SageMaker models or listing on Marketplace. + + Args: + content_types (list): The supported MIME types for the input data. + response_types (list): The supported MIME types for the output data. + inference_instances (list): A list of the instance types that are used to + generate inferences in real-time. + transform_instances (list): A list of the instance types on which a transformation + job can be run or on which an endpoint can be deployed. + model_package_name (str): Model Package name, exclusive to `model_package_group_name`, + using `model_package_name` makes the Model Package un-versioned (default: None). + model_package_group_name (str): Model Package Group name, exclusive to + `model_package_name`, using `model_package_group_name` makes the Model Package + versioned (default: None). + image_uri (str): Inference image uri for the container. Model class' self.image will + be used if it is None (default: None). + model_metrics (ModelMetrics): ModelMetrics object (default: None). + metadata_properties (MetadataProperties): MetadataProperties object (default: None). + marketplace_cert (bool): A boolean value indicating if the Model Package is certified + for AWS Marketplace (default: False). + approval_status (str): Model Approval Status, values can be "Approved", "Rejected", + or "PendingManualApproval" (default: "PendingManualApproval"). + description (str): Model Package description (default: None). + + Returns: + A `sagemaker.model.ModelPackage` instance. + """ + instance_type = inference_instances[0] + self._init_sagemaker_session_if_does_not_exist(instance_type) + + if image_uri: + self.image_uri = image_uri + if not self.image_uri: + self.image_uri = self.serving_image_uri( + region_name=self.sagemaker_session.boto_session.region_name, + instance_type=instance_type, + ) + return super(HuggingFaceModel, self).register( + content_types, + response_types, + inference_instances, + transform_instances, + model_package_name, + model_package_group_name, + image_uri, + model_metrics, + metadata_properties, + marketplace_cert, + approval_status, + description, + ) + + def prepare_container_def(self, instance_type=None, accelerator_type=None): + """A container definition with framework configuration set in model environment variables. + + Args: + instance_type (str): The EC2 instance type to deploy this Model to. + For example, 'ml.p2.xlarge'. + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. + + Returns: + dict[str, str]: A container definition object usable with the + CreateModel API. + """ + deploy_image = self.image_uri + if not deploy_image: + if instance_type is None: + raise ValueError( + "Must supply either an instance type (for choosing CPU vs GPU) or an image URI." + ) + + region_name = self.sagemaker_session.boto_session.region_name + deploy_image = self.serving_image_uri( + region_name, instance_type, accelerator_type=accelerator_type + ) + + deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image) + self._upload_code(deploy_key_prefix, repack=True) + deploy_env = dict(self.env) + deploy_env.update(self._framework_env_vars()) + + if self.model_server_workers: + deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers) + return sagemaker.container_def( + deploy_image, self.repacked_model_data or self.model_data, deploy_env + ) + + def serving_image_uri(self, region_name, instance_type, accelerator_type=None): + """Create a URI for the serving image. + + Args: + region_name (str): AWS region where the image is uploaded. + instance_type (str): SageMaker instance type. Used to determine device type + (cpu/gpu/family-specific optimized). + accelerator_type (str): The Elastic Inference accelerator type to + deploy to the instance for loading and making inferences to the + model. + + Returns: + str: The appropriate image URI based on the given parameters. + + """ + if self.tensorflow_version is not None: # pylint: disable=no-member + base_framework_version = ( + f"tensorflow{self.tensorflow_version}" # pylint: disable=no-member + ) + else: + base_framework_version = f"pytorch{self.pytorch_version}" # pylint: disable=no-member + return image_uris.retrieve( + self._framework_name, + region_name, + version=self.framework_version, + py_version=self.py_version, + instance_type=instance_type, + accelerator_type=accelerator_type, + image_scope="inference", + base_framework_version=base_framework_version, + ) diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json index 73d99b7064..8e35554364 100644 --- a/src/sagemaker/image_uri_config/huggingface.json +++ b/src/sagemaker/image_uri_config/huggingface.json @@ -237,5 +237,80 @@ } } } + }, + + "inference": { + "processors": ["gpu", "cpu"], + "version_aliases": { + "4.6": "4.6.1" + }, + "versions": { + "4.6.1": { + "version_aliases": { + "pytorch1.7": "pytorch1.7.1", + "tensorflow2.4": "tensorflow2.4.1" + }, + "pytorch1.7.1": { + "py_versions": ["py36"], + "registries": { + "af-south-1": "626614931356", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "eu-south-1": "692866216735", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "huggingface-pytorch-inference" + }, + "tensorflow2.4.1": { + "py_versions": ["py37"], + "registries": { + "af-south-1": "626614931356", + "ap-east-1": "871362719292", + "ap-northeast-1": "763104351884", + "ap-northeast-2": "763104351884", + "ap-south-1": "763104351884", + "ap-southeast-1": "763104351884", + "ap-southeast-2": "763104351884", + "ca-central-1": "763104351884", + "cn-north-1": "727897471807", + "cn-northwest-1": "727897471807", + "eu-central-1": "763104351884", + "eu-north-1": "763104351884", + "eu-south-1": "692866216735", + "eu-west-1": "763104351884", + "eu-west-2": "763104351884", + "eu-west-3": "763104351884", + "me-south-1": "217643126080", + "sa-east-1": "763104351884", + "us-east-1": "763104351884", + "us-east-2": "763104351884", + "us-gov-west-1": "442386744353", + "us-iso-east-1": "886529160074", + "us-west-1": "763104351884", + "us-west-2": "763104351884" + }, + "repository": "huggingface-tensorflow-inference" + } + } + } } } diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py index eeb14518dd..8ebd79147e 100644 --- a/src/sagemaker/model.py +++ b/src/sagemaker/model.py @@ -1114,7 +1114,7 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None): def _upload_code(self, key_prefix, repack=False): """Placeholder Docstring""" local_code = utils.get_config_value("local.local_code", self.sagemaker_session.config) - if self.sagemaker_session.local_mode and local_code: + if (self.sagemaker_session.local_mode and local_code) or self.entry_point is None: self.uploaded_code = None elif not repack: bucket = self.bucket or self.sagemaker_session.default_bucket() @@ -1127,7 +1127,7 @@ def _upload_code(self, key_prefix, repack=False): dependencies=self.dependencies, ) - if repack: + if repack and self.model_data is not None and self.entry_point is not None: bucket = self.bucket or self.sagemaker_session.default_bucket() repacked_model_data = "s3://" + "/".join([bucket, key_prefix, "model.tar.gz"]) @@ -1162,8 +1162,8 @@ def _framework_env_vars(self): dir_name = None return { - SCRIPT_PARAM_NAME.upper(): script_name, - DIR_PARAM_NAME.upper(): dir_name, + SCRIPT_PARAM_NAME.upper(): script_name or str(), + DIR_PARAM_NAME.upper(): dir_name or str(), CONTAINER_LOG_LEVEL_PARAM_NAME.upper(): str(self.container_log_level), SAGEMAKER_REGION_PARAM_NAME.upper(): self.sagemaker_session.boto_region_name, } diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py index 37bd916a62..7b821ea7ca 100644 --- a/tests/integ/test_huggingface.py +++ b/tests/integ/test_huggingface.py @@ -17,9 +17,11 @@ import pytest from sagemaker.huggingface import HuggingFace +from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor +from sagemaker.utils import unique_name_from_base from tests import integ from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES -from tests.integ.timeout import timeout +from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name @pytest.mark.release @@ -104,3 +106,31 @@ def test_huggingface_training_tf( ) hf.fit(train_input) + + +@pytest.mark.skip +def test_huggingface_inference(sagemaker_session, gpu_instance_type): + env = { + "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english", + "HF_TASK": "text-classification", + } + endpoint_name = unique_name_from_base("test-hf-inference") + + model = HuggingFaceModel( + sagemaker_session=sagemaker_session, + role="SageMakerRole", + image_uri="214660476583.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:gpu", + env=env, + ) + with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session): + model.deploy( + instance_type=gpu_instance_type, initial_instance_count=1, endpoint_name=endpoint_name + ) + + predictor = HuggingFacePredictor(endpoint_name=endpoint_name) + data = { + "inputs": "Camera - You are awarded a SiPix Digital Camera!" + "call 09061221066 fromm landline. Delivery within 28 days." + } + output = predictor.predict(data) + assert "score" in output[0]