diff --git a/src/sagemaker/huggingface/__init__.py b/src/sagemaker/huggingface/__init__.py
index b4417da510..355970ca8d 100644
--- a/src/sagemaker/huggingface/__init__.py
+++ b/src/sagemaker/huggingface/__init__.py
@@ -14,3 +14,4 @@
 from __future__ import absolute_import
 
 from sagemaker.huggingface.estimator import HuggingFace  # noqa: F401
+from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor  # noqa: F401
diff --git a/src/sagemaker/huggingface/estimator.py b/src/sagemaker/huggingface/estimator.py
index 2d38db5de7..62470ba87a 100644
--- a/src/sagemaker/huggingface/estimator.py
+++ b/src/sagemaker/huggingface/estimator.py
@@ -23,6 +23,7 @@
     warn_if_parameter_server_with_multi_gpu,
     validate_smdistributed,
 )
+from sagemaker.huggingface.model import HuggingFaceModel
 from sagemaker.vpc_utils import VPC_CONFIG_DEFAULT
 
 logger = logging.getLogger("sagemaker")
@@ -233,8 +234,59 @@ def create_model(
         dependencies=None,
         **kwargs
     ):
-        """Placeholder docstring"""
-        raise NotImplementedError("Creating model with HuggingFace training job is not supported.")
+        """Create a SageMaker ``HuggingFaceModel`` object that can be deployed to an ``Endpoint``.
+
+        Args:
+            model_server_workers (int): Optional. The number of worker processes
+                used by the inference server. If None, server will use one
+                worker per vCPU.
+            role (str): The ``ExecutionRoleArn`` IAM Role ARN for the ``Model``,
+                which is also used during transform jobs. If not specified, the
+                role from the Estimator will be used.
+            vpc_config_override (dict[str, list[str]]): Optional override for VpcConfig set on
+                the model. Default: use subnets and security groups from this Estimator.
+                * 'Subnets' (list[str]): List of subnet ids.
+                * 'SecurityGroupIds' (list[str]): List of security group ids.
+            entry_point (str): Path (absolute or relative) to the local Python source file which
+                should be executed as the entry point to training. If ``source_dir`` is specified,
+                then ``entry_point`` must point to a file located at the root of ``source_dir``.
+                If not specified, the training entry point is used.
+            source_dir (str): Path (absolute or relative) to a directory with any other serving
+                source code dependencies aside from the entry point file.
+                If not specified, the model source directory from training is used.
+            dependencies (list[str]): A list of paths to directories (absolute or relative) with
+                any additional libraries that will be exported to the container.
+                If not specified, the dependencies from training are used.
+                This is not supported with "local code" in Local Mode.
+            **kwargs: Additional kwargs passed to the :class:`~sagemaker.huggingface.model.HuggingFaceModel`
+                constructor.
+
+        Returns:
+            sagemaker.huggingface.model.HuggingFaceModel: A SageMaker ``HuggingFaceModel``
+            object. See :func:`~sagemaker.huggingface.model.HuggingFaceModel` for full details.
+        """
+        if "image_uri" not in kwargs:
+            kwargs["image_uri"] = self.image_uri
+
+        kwargs["name"] = self._get_or_create_name(kwargs.get("name"))
+
+        return HuggingFaceModel(
+            role or self.role,
+            model_data=self.model_data,
+            entry_point=entry_point or self._model_entry_point(),
+            transformers_version=self.framework_version,
+            tensorflow_version=self.tensorflow_version,
+            pytorch_version=self.pytorch_version,
+            py_version=self.py_version,
+            source_dir=(source_dir or self._model_source_dir()),
+            container_log_level=self.container_log_level,
+            code_location=self.code_location,
+            model_server_workers=model_server_workers,
+            sagemaker_session=self.sagemaker_session,
+            vpc_config=self.get_vpc_config(vpc_config_override),
+            dependencies=(dependencies or self.dependencies),
+            **kwargs
+        )
 
     @classmethod
     def _prepare_init_params_from_job_description(cls, job_details, model_channel_name=None):
diff --git a/src/sagemaker/huggingface/model.py b/src/sagemaker/huggingface/model.py
new file mode 100644
index 0000000000..f7fe57dd5b
--- /dev/null
+++ b/src/sagemaker/huggingface/model.py
@@ -0,0 +1,309 @@
+# Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+"""Placeholder docstring"""
+from __future__ import absolute_import
+
+import logging
+
+import sagemaker
+from sagemaker import image_uris
+from sagemaker.deserializers import JSONDeserializer
+from sagemaker.fw_utils import (
+    model_code_key_prefix,
+    validate_version_or_image_args,
+)
+from sagemaker.model import FrameworkModel, MODEL_SERVER_WORKERS_PARAM_NAME
+from sagemaker.predictor import Predictor
+from sagemaker.serializers import JSONSerializer
+
+logger = logging.getLogger("sagemaker")
+
+
+class HuggingFacePredictor(Predictor):
+    """A Predictor for inference against HuggingFace Endpoints.
+
+    This is able to serialize Python lists, dictionaries, and numpy arrays to
+    multidimensional tensors for HuggingFace inference.
+    """
+
+    def __init__(
+        self,
+        endpoint_name,
+        sagemaker_session=None,
+        serializer=JSONSerializer(),
+        deserializer=JSONDeserializer(),
+    ):
+        """Initialize an ``HuggingFacePredictor``.
+
+        Args:
+            endpoint_name (str): The name of the endpoint to perform inference
+                on.
+            sagemaker_session (sagemaker.session.Session): Session object which
+                manages interactions with Amazon SageMaker APIs and any other
+                AWS services needed. If not specified, the estimator creates one
+                using the default AWS configuration chain.
+            serializer (sagemaker.serializers.BaseSerializer): Optional. Default
+                serializes input data to .npy format. Handles lists and numpy
+                arrays.
+            deserializer (sagemaker.deserializers.BaseDeserializer): Optional.
+                Default parses the response from .npy format to numpy array.
+        """
+        super(HuggingFacePredictor, self).__init__(
+            endpoint_name,
+            sagemaker_session,
+            serializer=serializer,
+            deserializer=deserializer,
+        )
+
+
+def _validate_pt_tf_versions(pytorch_version, tensorflow_version, image_uri):
+    """Placeholder docstring"""
+
+    if image_uri is not None:
+        return
+
+    if tensorflow_version is not None and pytorch_version is not None:
+        raise ValueError(
+            "tensorflow_version and pytorch_version are both not None. "
+            "Specify only tensorflow_version or pytorch_version."
+        )
+    if tensorflow_version is None and pytorch_version is None:
+        raise ValueError(
+            "tensorflow_version and pytorch_version are both None. "
+            "Specify either tensorflow_version or pytorch_version."
+        )
+
+
+class HuggingFaceModel(FrameworkModel):
+    """An HuggingFace SageMaker ``Model`` that can be deployed to a SageMaker ``Endpoint``."""
+
+    _framework_name = "huggingface"
+
+    def __init__(
+        self,
+        role,
+        model_data=None,
+        entry_point=None,
+        transformers_version=None,
+        tensorflow_version=None,
+        pytorch_version=None,
+        py_version=None,
+        image_uri=None,
+        predictor_cls=HuggingFacePredictor,
+        model_server_workers=None,
+        **kwargs,
+    ):
+        """Initialize a HuggingFaceModel.
+
+        Args:
+            model_data (str): The S3 location of a SageMaker model data
+                ``.tar.gz`` file.
+            role (str): An AWS IAM role (either name or full ARN). The Amazon
+                SageMaker training jobs and APIs that create Amazon SageMaker
+                endpoints use this role to access training data and model
+                artifacts. After the endpoint is created, the inference code
+                might use the IAM role, if it needs to access an AWS resource.
+            entry_point (str): Path (absolute or relative) to the Python source
+                file which should be executed as the entry point to model
+                hosting. If ``source_dir`` is specified, then ``entry_point``
+                must point to a file located at the root of ``source_dir``.
+                Defaults to None.
+            transformers_version (str): transformers version you want to use for
+                executing your model training code. Defaults to None. Required
+                unless ``image_uri`` is provided.
+            tensorflow_version (str): TensorFlow version you want to use for
+                executing your inference code. Defaults to ``None``. Required unless
+                ``pytorch_version`` is provided. List of supported versions:
+                https://github.com/aws/sagemaker-python-sdk#huggingface-sagemaker-estimators.
+            pytorch_version (str): PyTorch version you want to use for
+                executing your inference code. Defaults to ``None``. Required unless
+                ``tensorflow_version`` is provided. List of supported versions:
+                https://github.com/aws/sagemaker-python-sdk#huggingface-sagemaker-estimators.
+            py_version (str): Python version you want to use for executing your
+                model training code. Defaults to ``None``. Required unless
+                ``image_uri`` is provided.
+            image_uri (str): A Docker image URI (default: None). If not specified, a
+                default image for PyTorch will be used. If ``framework_version``
+                or ``py_version`` are ``None``, then ``image_uri`` is required. If
+                also ``None``, then a ``ValueError`` will be raised.
+            predictor_cls (callable[str, sagemaker.session.Session]): A function
+                to call to create a predictor with an endpoint name and
+                SageMaker ``Session``. If specified, ``deploy()`` returns the
+                result of invoking this function on the created endpoint name.
+            model_server_workers (int): Optional. The number of worker processes
+                used by the inference server. If None, server will use one
+                worker per vCPU.
+            **kwargs: Keyword arguments passed to the superclass
+                :class:`~sagemaker.model.FrameworkModel` and, subsequently, its
+                superclass :class:`~sagemaker.model.Model`.
+
+        .. tip::
+
+            You can find additional parameters for initializing this class at
+            :class:`~sagemaker.model.FrameworkModel` and
+            :class:`~sagemaker.model.Model`.
+        """
+        validate_version_or_image_args(transformers_version, py_version, image_uri)
+        _validate_pt_tf_versions(
+            pytorch_version=pytorch_version,
+            tensorflow_version=tensorflow_version,
+            image_uri=image_uri,
+        )
+        if py_version == "py2":
+            raise ValueError("py2 is not supported with HuggingFace images")
+        self.framework_version = transformers_version
+        self.pytorch_version = pytorch_version
+        self.tensorflow_version = tensorflow_version
+        self.py_version = py_version
+
+        super(HuggingFaceModel, self).__init__(
+            model_data, image_uri, role, entry_point, predictor_cls=predictor_cls, **kwargs
+        )
+
+        self.model_server_workers = model_server_workers
+
+    def register(
+        self,
+        content_types,
+        response_types,
+        inference_instances,
+        transform_instances,
+        model_package_name=None,
+        model_package_group_name=None,
+        image_uri=None,
+        model_metrics=None,
+        metadata_properties=None,
+        marketplace_cert=False,
+        approval_status=None,
+        description=None,
+    ):
+        """Creates a model package for creating SageMaker models or listing on Marketplace.
+
+        Args:
+            content_types (list): The supported MIME types for the input data.
+            response_types (list): The supported MIME types for the output data.
+            inference_instances (list): A list of the instance types that are used to
+                generate inferences in real-time.
+            transform_instances (list): A list of the instance types on which a transformation
+                job can be run or on which an endpoint can be deployed.
+            model_package_name (str): Model Package name, exclusive to `model_package_group_name`,
+                using `model_package_name` makes the Model Package un-versioned (default: None).
+            model_package_group_name (str): Model Package Group name, exclusive to
+                `model_package_name`, using `model_package_group_name` makes the Model Package
+                versioned (default: None).
+            image_uri (str): Inference image uri for the container. Model class' self.image will
+                be used if it is None (default: None).
+            model_metrics (ModelMetrics): ModelMetrics object (default: None).
+            metadata_properties (MetadataProperties): MetadataProperties object (default: None).
+            marketplace_cert (bool): A boolean value indicating if the Model Package is certified
+                for AWS Marketplace (default: False).
+            approval_status (str): Model Approval Status, values can be "Approved", "Rejected",
+                or "PendingManualApproval" (default: "PendingManualApproval").
+            description (str): Model Package description (default: None).
+
+        Returns:
+            A `sagemaker.model.ModelPackage` instance.
+        """
+        instance_type = inference_instances[0]
+        self._init_sagemaker_session_if_does_not_exist(instance_type)
+
+        if image_uri:
+            self.image_uri = image_uri
+        if not self.image_uri:
+            self.image_uri = self.serving_image_uri(
+                region_name=self.sagemaker_session.boto_session.region_name,
+                instance_type=instance_type,
+            )
+        return super(HuggingFaceModel, self).register(
+            content_types,
+            response_types,
+            inference_instances,
+            transform_instances,
+            model_package_name,
+            model_package_group_name,
+            image_uri,
+            model_metrics,
+            metadata_properties,
+            marketplace_cert,
+            approval_status,
+            description,
+        )
+
+    def prepare_container_def(self, instance_type=None, accelerator_type=None):
+        """A container definition with framework configuration set in model environment variables.
+
+        Args:
+            instance_type (str): The EC2 instance type to deploy this Model to.
+                For example, 'ml.p2.xlarge'.
+            accelerator_type (str): The Elastic Inference accelerator type to
+                deploy to the instance for loading and making inferences to the
+                model.
+
+        Returns:
+            dict[str, str]: A container definition object usable with the
+            CreateModel API.
+        """
+        deploy_image = self.image_uri
+        if not deploy_image:
+            if instance_type is None:
+                raise ValueError(
+                    "Must supply either an instance type (for choosing CPU vs GPU) or an image URI."
+                )
+
+            region_name = self.sagemaker_session.boto_session.region_name
+            deploy_image = self.serving_image_uri(
+                region_name, instance_type, accelerator_type=accelerator_type
+            )
+
+        deploy_key_prefix = model_code_key_prefix(self.key_prefix, self.name, deploy_image)
+        self._upload_code(deploy_key_prefix, repack=True)
+        deploy_env = dict(self.env)
+        deploy_env.update(self._framework_env_vars())
+
+        if self.model_server_workers:
+            deploy_env[MODEL_SERVER_WORKERS_PARAM_NAME.upper()] = str(self.model_server_workers)
+        return sagemaker.container_def(
+            deploy_image, self.repacked_model_data or self.model_data, deploy_env
+        )
+
+    def serving_image_uri(self, region_name, instance_type, accelerator_type=None):
+        """Create a URI for the serving image.
+
+        Args:
+            region_name (str): AWS region where the image is uploaded.
+            instance_type (str): SageMaker instance type. Used to determine device type
+                (cpu/gpu/family-specific optimized).
+            accelerator_type (str): The Elastic Inference accelerator type to
+                deploy to the instance for loading and making inferences to the
+                model.
+
+        Returns:
+            str: The appropriate image URI based on the given parameters.
+
+        """
+        if self.tensorflow_version is not None:  # pylint: disable=no-member
+            base_framework_version = (
+                f"tensorflow{self.tensorflow_version}"  # pylint: disable=no-member
+            )
+        else:
+            base_framework_version = f"pytorch{self.pytorch_version}"  # pylint: disable=no-member
+        return image_uris.retrieve(
+            self._framework_name,
+            region_name,
+            version=self.framework_version,
+            py_version=self.py_version,
+            instance_type=instance_type,
+            accelerator_type=accelerator_type,
+            image_scope="inference",
+            base_framework_version=base_framework_version,
+        )
diff --git a/src/sagemaker/image_uri_config/huggingface.json b/src/sagemaker/image_uri_config/huggingface.json
index 73d99b7064..8e35554364 100644
--- a/src/sagemaker/image_uri_config/huggingface.json
+++ b/src/sagemaker/image_uri_config/huggingface.json
@@ -237,5 +237,80 @@
                 }
             }
         }
+    },
+
+    "inference": {
+        "processors": ["gpu", "cpu"],
+        "version_aliases": {
+            "4.6": "4.6.1"
+        },
+        "versions": {
+            "4.6.1": {
+                "version_aliases": {
+                    "pytorch1.7": "pytorch1.7.1",
+                    "tensorflow2.4": "tensorflow2.4.1"
+                },
+                "pytorch1.7.1": {
+                    "py_versions": ["py36"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-pytorch-inference"
+                },
+                "tensorflow2.4.1": {
+                    "py_versions": ["py37"],
+                    "registries": {
+                        "af-south-1": "626614931356",
+                        "ap-east-1": "871362719292",
+                        "ap-northeast-1": "763104351884",
+                        "ap-northeast-2": "763104351884",
+                        "ap-south-1": "763104351884",
+                        "ap-southeast-1": "763104351884",
+                        "ap-southeast-2": "763104351884",
+                        "ca-central-1": "763104351884",
+                        "cn-north-1": "727897471807",
+                        "cn-northwest-1": "727897471807",
+                        "eu-central-1": "763104351884",
+                        "eu-north-1": "763104351884",
+                        "eu-south-1": "692866216735",
+                        "eu-west-1": "763104351884",
+                        "eu-west-2": "763104351884",
+                        "eu-west-3": "763104351884",
+                        "me-south-1": "217643126080",
+                        "sa-east-1": "763104351884",
+                        "us-east-1": "763104351884",
+                        "us-east-2": "763104351884",
+                        "us-gov-west-1": "442386744353",
+                        "us-iso-east-1": "886529160074",
+                        "us-west-1": "763104351884",
+                        "us-west-2": "763104351884"
+                    },
+                    "repository": "huggingface-tensorflow-inference"
+                }
+            }
+        }
     }
 }
diff --git a/src/sagemaker/model.py b/src/sagemaker/model.py
index eeb14518dd..8ebd79147e 100644
--- a/src/sagemaker/model.py
+++ b/src/sagemaker/model.py
@@ -1114,7 +1114,7 @@ def prepare_container_def(self, instance_type=None, accelerator_type=None):
     def _upload_code(self, key_prefix, repack=False):
         """Placeholder Docstring"""
         local_code = utils.get_config_value("local.local_code", self.sagemaker_session.config)
-        if self.sagemaker_session.local_mode and local_code:
+        if (self.sagemaker_session.local_mode and local_code) or self.entry_point is None:
             self.uploaded_code = None
         elif not repack:
             bucket = self.bucket or self.sagemaker_session.default_bucket()
@@ -1127,7 +1127,7 @@ def _upload_code(self, key_prefix, repack=False):
                 dependencies=self.dependencies,
             )
 
-        if repack:
+        if repack and self.model_data is not None and self.entry_point is not None:
             bucket = self.bucket or self.sagemaker_session.default_bucket()
             repacked_model_data = "s3://" + "/".join([bucket, key_prefix, "model.tar.gz"])
 
@@ -1162,8 +1162,8 @@ def _framework_env_vars(self):
             dir_name = None
 
         return {
-            SCRIPT_PARAM_NAME.upper(): script_name,
-            DIR_PARAM_NAME.upper(): dir_name,
+            SCRIPT_PARAM_NAME.upper(): script_name or str(),
+            DIR_PARAM_NAME.upper(): dir_name or str(),
             CONTAINER_LOG_LEVEL_PARAM_NAME.upper(): str(self.container_log_level),
             SAGEMAKER_REGION_PARAM_NAME.upper(): self.sagemaker_session.boto_region_name,
         }
diff --git a/tests/integ/test_huggingface.py b/tests/integ/test_huggingface.py
index 37bd916a62..7b821ea7ca 100644
--- a/tests/integ/test_huggingface.py
+++ b/tests/integ/test_huggingface.py
@@ -17,9 +17,11 @@
 import pytest
 
 from sagemaker.huggingface import HuggingFace
+from sagemaker.huggingface.model import HuggingFaceModel, HuggingFacePredictor
+from sagemaker.utils import unique_name_from_base
 from tests import integ
 from tests.integ import DATA_DIR, TRAINING_DEFAULT_TIMEOUT_MINUTES
-from tests.integ.timeout import timeout
+from tests.integ.timeout import timeout, timeout_and_delete_endpoint_by_name
 
 
 @pytest.mark.release
@@ -104,3 +106,31 @@ def test_huggingface_training_tf(
         )
 
         hf.fit(train_input)
+
+
+@pytest.mark.skip
+def test_huggingface_inference(sagemaker_session, gpu_instance_type):
+    env = {
+        "HF_MODEL_ID": "sshleifer/tiny-distilbert-base-uncased-finetuned-sst-2-english",
+        "HF_TASK": "text-classification",
+    }
+    endpoint_name = unique_name_from_base("test-hf-inference")
+
+    model = HuggingFaceModel(
+        sagemaker_session=sagemaker_session,
+        role="SageMakerRole",
+        image_uri="214660476583.dkr.ecr.us-west-2.amazonaws.com/huggingface-pytorch-inference:gpu",
+        env=env,
+    )
+    with timeout_and_delete_endpoint_by_name(endpoint_name, sagemaker_session):
+        model.deploy(
+            instance_type=gpu_instance_type, initial_instance_count=1, endpoint_name=endpoint_name
+        )
+
+        predictor = HuggingFacePredictor(endpoint_name=endpoint_name)
+        data = {
+            "inputs": "Camera - You are awarded a SiPix Digital Camera!"
+            "call 09061221066 fromm landline. Delivery within 28 days."
+        }
+        output = predictor.predict(data)
+        assert "score" in output[0]