From 3013c34e1adabe16d0a50df20af213591af7ca0e Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Mon, 10 Aug 2020 21:03:54 +1000 Subject: [PATCH 1/7] Upgrade to SageMaker v2 including changes: * image -> image_uri * train_instance_count -> instance_count * train_instance_type -> instance_type * train_max_run -> max_run * train_max_run_wait -> max_run_wait * train_volume_size -> volume_size * sagemaker.session.s3_input -> sagemaker.inputs.TrainingInput --- requirements.txt | 2 +- src/stepfunctions/steps/sagemaker.py | 20 +++++++------- .../template/pipeline/inference.py | 26 +++++++++---------- src/stepfunctions/template/pipeline/train.py | 14 +++++----- tests/integ/conftest.py | 4 +-- tests/integ/test_inference_pipeline.py | 4 +-- tests/integ/test_sagemaker_steps.py | 4 +-- .../test_training_pipeline_estimators.py | 4 +-- ...t_training_pipeline_framework_estimator.py | 8 +++--- tests/unit/test_pipeline.py | 14 +++++----- tests/unit/test_sagemaker_steps.py | 20 +++++++------- 11 files changed, 60 insertions(+), 60 deletions(-) diff --git a/requirements.txt b/requirements.txt index 464d5b1..2c7bcba 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -sagemaker>=1.71.0 +sagemaker>=2.0.0 boto3>=1.9.213 pyyaml diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py index 6321563..2a02ddb 100644 --- a/src/stepfunctions/steps/sagemaker.py +++ b/src/stepfunctions/steps/sagemaker.py @@ -36,12 +36,12 @@ def __init__(self, state_id, estimator, job_name, data=None, hyperparameters=Non data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple channels for training data, you can specify a dict mapping channel names to - strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can + strings or :func:`~sagemaker.inputs.TrainingInput` objects. + * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See - :func:`sagemaker.session.s3_input` for full details. + :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. @@ -202,7 +202,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No tags (list[dict], optional): `List to tags `_ to associate with the resource. """ if isinstance(model, FrameworkModel): - parameters = model_config(model=model, instance_type=instance_type, role=model.role, image=model.image) + parameters = model_config(model=model, instance_type=instance_type, role=model.role, image_uri=model.image_uri) if model_name: parameters['ModelName'] = model_name elif isinstance(model, Model): @@ -211,7 +211,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No 'ModelName': model_name or model.name, 'PrimaryContainer': { 'Environment': {}, - 'Image': model.image, + 'Image': model.image_uri, 'ModelDataUrl': model.model_data } } @@ -322,12 +322,12 @@ def __init__(self, state_id, tuner, job_name, data, wait_for_completion=True, ta data: Information about the training data. Please refer to the ``fit()`` method of the associated estimator in the tuner, as this can take any of the following forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, sagemaker.session.s3_input]) - If using multiple + * (dict[str, str] or dict[str, sagemaker.inputs.TrainingInput]) - If using multiple channels for training data, you can specify a dict mapping channel names to - strings or :func:`~sagemaker.session.s3_input` objects. - * (sagemaker.session.s3_input) - Channel configuration for S3 data sources that can + strings or :func:`~sagemaker.inputs.TrainingInput` objects. + * (sagemaker.inputs.TrainingInput) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See - :func:`sagemaker.session.s3_input` for full details. + :func:`sagemaker.inputs.TrainingInput` for full details. * (sagemaker.amazon.amazon_estimator.RecordSet) - A collection of Amazon :class:`Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. diff --git a/src/stepfunctions/template/pipeline/inference.py b/src/stepfunctions/template/pipeline/inference.py index d0eccaa..17a1dbe 100644 --- a/src/stepfunctions/template/pipeline/inference.py +++ b/src/stepfunctions/template/pipeline/inference.py @@ -48,8 +48,8 @@ def __init__(self, preprocessor, estimator, inputs, s3_bucket, role, client=None inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects. - * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details. + * (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects. + * (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details. * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method. @@ -87,8 +87,8 @@ def build_workflow_definition(self): """ default_name = self.pipeline_name - train_instance_type = self.preprocessor.train_instance_type - train_instance_count = self.preprocessor.train_instance_count + instance_type = self.preprocessor.instance_type + instance_count = self.preprocessor.instance_count # Preprocessor for feature transformation preprocessor_train_step = TrainingStep( @@ -100,13 +100,13 @@ def build_workflow_definition(self): preprocessor_model = self.preprocessor.create_model() preprocessor_model_step = ModelStep( StepId.CreatePreprocessorModel.value, - instance_type=train_instance_type, + instance_type=instance_type, model=preprocessor_model, model_name=default_name ) preprocessor_transform_step = TransformStep( StepId.TransformInput.value, - transformer=self.preprocessor.transformer(instance_count=train_instance_count, instance_type=train_instance_type, max_payload=20), + transformer=self.preprocessor.transformer(instance_count=instance_count, instance_type=instance_type, max_payload=20), job_name=default_name, model_name=default_name, data=self.inputs['train'], @@ -115,8 +115,8 @@ def build_workflow_definition(self): ) # Training - train_instance_type = self.estimator.train_instance_type - train_instance_count = self.estimator.train_instance_count + instance_type = self.estimator.instance_type + instance_count = self.estimator.instance_count training_step = TrainingStep( StepId.Train.value, @@ -135,21 +135,21 @@ def build_workflow_definition(self): ) pipeline_model_step = ModelStep( StepId.CreatePipelineModel.value, - instance_type=train_instance_type, + instance_type=instance_type, model=preprocessor_model, model_name=default_name ) - pipeline_model_step.parameters = self.pipeline_model_config(train_instance_type, pipeline_model) + pipeline_model_step.parameters = self.pipeline_model_config(instance_type, pipeline_model) - deployable_model = Model(model_data='', image='') + deployable_model = Model(model_data='', image_uri='') # Deployment endpoint_config_step = EndpointConfigStep( StepId.ConfigureEndpoint.value, endpoint_config_name=default_name, model_name=default_name, - initial_instance_count=train_instance_count, - instance_type=train_instance_type + initial_instance_count=instance_count, + instance_type=instance_type ) deploy_step = EndpointStep( diff --git a/src/stepfunctions/template/pipeline/train.py b/src/stepfunctions/template/pipeline/train.py index 02c6b5b..d2bb4de 100644 --- a/src/stepfunctions/template/pipeline/train.py +++ b/src/stepfunctions/template/pipeline/train.py @@ -43,8 +43,8 @@ def __init__(self, estimator, role, inputs, s3_bucket, client=None, **kwargs): inputs: Information about the training data. Please refer to the `fit()` method of the associated estimator, as this can take any of the following forms: * (str) - The S3 location where training data is saved. - * (dict[str, str] or dict[str, `sagemaker.session.s3_input`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.session.s3_input` objects. - * (`sagemaker.session.s3_input`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.session.s3_input` for full details. + * (dict[str, str] or dict[str, `sagemaker.inputs.TrainingInput`]) - If using multiple channels for training data, you can specify a dict mapping channel names to strings or `sagemaker.inputs.TrainingInput` objects. + * (`sagemaker.inputs.TrainingInput`) - Channel configuration for S3 data sources that can provide additional information about the training dataset. See `sagemaker.inputs.TrainingInput` for full details. * (`sagemaker.amazon.amazon_estimator.RecordSet`) - A collection of Amazon `Record` objects serialized and stored in S3. For use with an estimator for an Amazon algorithm. * (list[`sagemaker.amazon.amazon_estimator.RecordSet`]) - A list of `sagemaker.amazon.amazon_estimator.RecordSet` objects, where each instance is a different channel of training data. s3_bucket (str): S3 bucket under which the output artifacts from the training job will be stored. The parent path used is built using the format: ``s3://{s3_bucket}/{pipeline_name}/models/{job_name}/``. In this format, `pipeline_name` refers to the keyword argument provided for TrainingPipeline. If a `pipeline_name` argument was not provided, one is auto-generated by the pipeline as `training-pipeline-`. Also, in the format, `job_name` refers to the job name provided when calling the :meth:`TrainingPipeline.run()` method. @@ -79,8 +79,8 @@ def build_workflow_definition(self): """ default_name = self.pipeline_name - train_instance_type = self.estimator.train_instance_type - train_instance_count = self.estimator.train_instance_count + instance_type = self.estimator.instance_type + instance_count = self.estimator.instance_count training_step = TrainingStep( StepId.Train.value, @@ -92,7 +92,7 @@ def build_workflow_definition(self): model = self.estimator.create_model() model_step = ModelStep( StepId.CreateModel.value, - instance_type=train_instance_type, + instance_type=instance_type, model=model, model_name=default_name ) @@ -101,8 +101,8 @@ def build_workflow_definition(self): StepId.ConfigureEndpoint.value, endpoint_config_name=default_name, model_name=default_name, - initial_instance_count=train_instance_count, - instance_type=train_instance_type + initial_instance_count=instance_count, + instance_type=instance_type ) deploy_step = EndpointStep( StepId.Deploy.value, diff --git a/tests/integ/conftest.py b/tests/integ/conftest.py index 394838e..cb3bee0 100644 --- a/tests/integ/conftest.py +++ b/tests/integ/conftest.py @@ -53,8 +53,8 @@ def sagemaker_role_arn(aws_account_id): def pca_estimator_fixture(sagemaker_role_arn): estimator = pca.PCA( role=sagemaker_role_arn, - train_instance_count=1, - train_instance_type="ml.m5.large", + instance_count=1, + instance_type="ml.m5.large", num_components=48 ) return estimator diff --git a/tests/integ/test_inference_pipeline.py b/tests/integ/test_inference_pipeline.py index 7ee2fd5..38d2059 100644 --- a/tests/integ/test_inference_pipeline.py +++ b/tests/integ/test_inference_pipeline.py @@ -45,7 +45,7 @@ def sklearn_preprocessor(sagemaker_role_arn, sagemaker_session): sklearn_preprocessor = SKLearn( entry_point=script_path, role=sagemaker_role_arn, - train_instance_type="ml.m5.large", + instance_type="ml.m5.large", sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, ) @@ -60,7 +60,7 @@ def sklearn_estimator(sagemaker_role_arn, sagemaker_session): sklearn_estimator = SKLearn( entry_point=script_path, role=sagemaker_role_arn, - train_instance_type="ml.m5.large", + instance_type="ml.m5.large", sagemaker_session=sagemaker_session, hyperparameters={"epochs": 1}, input_mode='File' diff --git a/tests/integ/test_sagemaker_steps.py b/tests/integ/test_sagemaker_steps.py index 36f2940..73d1a5b 100644 --- a/tests/integ/test_sagemaker_steps.py +++ b/tests/integ/test_sagemaker_steps.py @@ -254,8 +254,8 @@ def test_tuning_step(sfn_client, record_set_for_hyperparameter_tuning, sagemaker kmeans = KMeans( role=sagemaker_role_arn, - train_instance_count=1, - train_instance_type=INSTANCE_TYPE, + instance_count=1, + instance_type=INSTANCE_TYPE, k=10 ) diff --git a/tests/integ/test_training_pipeline_estimators.py b/tests/integ/test_training_pipeline_estimators.py index 2453b69..c142068 100644 --- a/tests/integ/test_training_pipeline_estimators.py +++ b/tests/integ/test_training_pipeline_estimators.py @@ -50,8 +50,8 @@ def pca_estimator(sagemaker_role_arn): pca_estimator = PCA( role=sagemaker_role_arn, num_components=1, - train_instance_count=1, - train_instance_type='ml.m5.large', + instance_count=1, + instance_type='ml.m5.large', ) pca_estimator.feature_dim=500 diff --git a/tests/integ/test_training_pipeline_framework_estimator.py b/tests/integ/test_training_pipeline_framework_estimator.py index 4a669e9..9ad9310 100644 --- a/tests/integ/test_training_pipeline_framework_estimator.py +++ b/tests/integ/test_training_pipeline_framework_estimator.py @@ -36,8 +36,8 @@ def torch_estimator(sagemaker_role_arn): entry_point=script_path, role=sagemaker_role_arn, framework_version='1.2.0', - train_instance_count=1, - train_instance_type='ml.m5.large', + instance_count=1, + instance_type='ml.m5.large', hyperparameters={ 'epochs': 6, 'backend': 'gloo' @@ -50,8 +50,8 @@ def sklearn_estimator(sagemaker_role_arn): return SKLearn( entry_point=script_path, role=sagemaker_role_arn, - train_instance_count=1, - train_instance_type='ml.m5.large', + instance_count=1, + instance_type='ml.m5.large', framework_version='0.20.0', hyperparameters={ "epochs": 1 diff --git a/tests/unit/test_pipeline.py b/tests/unit/test_pipeline.py index f760166..5e9381d 100644 --- a/tests/unit/test_pipeline.py +++ b/tests/unit/test_pipeline.py @@ -36,8 +36,8 @@ def pca_estimator(): pca = sagemaker.estimator.Estimator( PCA_IMAGE, role=SAGEMAKER_EXECUTION_ROLE, - train_instance_count=1, - train_instance_type='ml.c4.xlarge', + instance_count=1, + instance_type='ml.c4.xlarge', output_path=s3_output_location, sagemaker_session=sagemaker_session ) @@ -62,7 +62,7 @@ def sklearn_preprocessor(): sklearn_preprocessor = SKLearn( entry_point=script_path, role=SAGEMAKER_EXECUTION_ROLE, - train_instance_type="ml.c4.xlarge", + instance_type="ml.c4.xlarge", source_dir=source_dir, sagemaker_session=sagemaker_session ) @@ -82,10 +82,10 @@ def linear_learner_estimator(): ll_estimator = sagemaker.estimator.Estimator( LINEAR_LEARNER_IMAGE, SAGEMAKER_EXECUTION_ROLE, - train_instance_count=1, - train_instance_type='ml.c4.xlarge', - train_volume_size=20, - train_max_run=3600, + instance_count=1, + instance_type='ml.c4.xlarge', + volume_size=20, + max_run=3600, input_mode='File', output_path=s3_output_location, sagemaker_session=sagemaker_session diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py index bdc7a57..0f8a3c4 100644 --- a/tests/unit/test_sagemaker_steps.py +++ b/tests/unit/test_sagemaker_steps.py @@ -44,8 +44,8 @@ def pca_estimator(): pca = sagemaker.estimator.Estimator( PCA_IMAGE, role=EXECUTION_ROLE, - train_instance_count=1, - train_instance_type='ml.c4.xlarge', + instance_count=1, + instance_type='ml.c4.xlarge', output_path=s3_output_location ) @@ -90,8 +90,8 @@ def pca_estimator_with_debug_hook(): pca = sagemaker.estimator.Estimator( PCA_IMAGE, role=EXECUTION_ROLE, - train_instance_count=1, - train_instance_type='ml.c4.xlarge', + instance_count=1, + instance_type='ml.c4.xlarge', output_path=s3_output_location, debugger_hook_config = hook_config, rules=rules @@ -116,7 +116,7 @@ def pca_model(): model_data = 's3://sagemaker/models/pca.tar.gz' return Model( model_data=model_data, - image=PCA_IMAGE, + image_uri=PCA_IMAGE, role=EXECUTION_ROLE, name='pca-model' ) @@ -140,8 +140,8 @@ def tensorflow_estimator(): framework_version='1.13', training_steps=1000, evaluation_steps=100, - train_instance_count=1, - train_instance_type='ml.p2.xlarge', + instance_count=1, + instance_type='ml.p2.xlarge', output_path=s3_output_location, source_dir=s3_source_location, image_name=TENSORFLOW_IMAGE, @@ -460,7 +460,7 @@ def test_get_expected_model(pca_estimator): 'ModelName': 'pca-model', 'PrimaryContainer': { 'Environment': {}, - 'Image': expected_model.image, + 'Image': expected_model.image_uri, 'ModelDataUrl.$': "$['ModelArtifacts']['S3ModelArtifacts']" } }, @@ -492,7 +492,7 @@ def test_get_expected_model_with_framework_estimator(tensorflow_estimator): 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-east-1', }, - 'Image': expected_model.image, + 'Image': expected_model.image_uri, 'ModelDataUrl.$': "$['ModelArtifacts']['S3ModelArtifacts']" } }, @@ -509,7 +509,7 @@ def test_model_step_creation(pca_model): 'ModelName': 'pca-model', 'PrimaryContainer': { 'Environment': {}, - 'Image': pca_model.image, + 'Image': pca_model.image_uri, 'ModelDataUrl': pca_model.model_data }, 'Tags': DEFAULT_TAGS_LIST From 15f37b092e5b0ab5837223b592efbbd10998483e Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Mon, 10 Aug 2020 21:11:47 +1000 Subject: [PATCH 2/7] Update verison to 2.0.0 --- VERSION | 2 +- requirements.txt | 4 ++-- setup.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/VERSION b/VERSION index 9084fa2..227cea2 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.1.0 +2.0.0 diff --git a/requirements.txt b/requirements.txt index 2c7bcba..df91b80 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ -sagemaker>=2.0.0 -boto3>=1.9.213 +sagemaker>=2.1.0 +boto3>=1.14.38 pyyaml diff --git a/setup.py b/setup.py index 5476bfc..1770180 100644 --- a/setup.py +++ b/setup.py @@ -30,8 +30,8 @@ def read_version(): # Declare minimal set for installation required_packages = [ - "sagemaker>=1.42.8", - "boto3>=1.9.213", + "sagemaker>=2.1.0", + "boto3>=1.14.38", "pyyaml" ] From e868bee7c06b4dec5e472266741ec21a294fdca2 Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Mon, 10 Aug 2020 21:30:31 +1000 Subject: [PATCH 3/7] Add addition mapping get_image_uri -> sagemaker.image_uris.retrieve() --- tests/integ/test_state_machine_definition.py | 4 ++-- tests/integ/test_training_pipeline_estimators.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integ/test_state_machine_definition.py b/tests/integ/test_state_machine_definition.py index b28babf..ac5e7ac 100644 --- a/tests/integ/test_state_machine_definition.py +++ b/tests/integ/test_state_machine_definition.py @@ -16,7 +16,7 @@ import json from sagemaker.utils import unique_name_from_base -from sagemaker.amazon.amazon_estimator import get_image_uri +from sagemaker.image_uris import retrieve from stepfunctions import steps from stepfunctions.workflow import Workflow from tests.integ.utils import state_machine_delete_wait @@ -25,7 +25,7 @@ def training_job_parameters(sagemaker_session, sagemaker_role_arn, record_set_fixture): parameters = { "AlgorithmSpecification": { - "TrainingImage": get_image_uri(sagemaker_session.boto_session.region_name, 'pca'), + "TrainingImage": retrieve(region=sagemaker_session.boto_session.region_name, framework='pca'), "TrainingInputMode": "File" }, "OutputDataConfig": { diff --git a/tests/integ/test_training_pipeline_estimators.py b/tests/integ/test_training_pipeline_estimators.py index c142068..effedc1 100644 --- a/tests/integ/test_training_pipeline_estimators.py +++ b/tests/integ/test_training_pipeline_estimators.py @@ -25,7 +25,7 @@ # import Sagemaker from sagemaker.amazon.pca import PCA -from sagemaker.amazon.amazon_estimator import get_image_uri +from sagemaker.image_uris import retrieve # import StepFunctions from stepfunctions.template.pipeline import TrainingPipeline @@ -105,7 +105,7 @@ def test_pca_estimator(sfn_client, sagemaker_session, sagemaker_role_arn, sfn_ro job_name = workflow_execution_info['name'] s3_manifest_uri = inputs.s3_data status = 'SUCCEEDED' - estimator_image_uri = get_image_uri(sagemaker_session.boto_region_name, 'pca') + estimator_image_uri = retrieve(region=sagemaker_session.boto_region_name, framework='pca') execution_info = sfn_client.describe_execution(executionArn=execution_arn) execution_info['input'] = json.loads(execution_info['input']) From 2273ffa02710aa49fb16914364fb960fe2bd81d5 Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Mon, 10 Aug 2020 21:41:24 +1000 Subject: [PATCH 4/7] Update RuleEvaluatorImage to account: 199566480951 --- tests/unit/test_sagemaker_steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py index 0f8a3c4..5fb69e1 100644 --- a/tests/unit/test_sagemaker_steps.py +++ b/tests/unit/test_sagemaker_steps.py @@ -266,7 +266,7 @@ def test_training_step_creation_with_debug_hook(pca_estimator_with_debug_hook): 'DebugRuleConfigurations': [ { 'RuleConfigurationName': 'Confusion', - 'RuleEvaluatorImage': '503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest', + 'RuleEvaluatorImage': '199566480951.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest', 'RuleParameters': { 'rule_to_invoke': 'Confusion', 'category_no': '15', From 8b97898fc86a9c6e3bbd487ee8a11f0e29c127c9 Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Tue, 11 Aug 2020 17:39:33 +1000 Subject: [PATCH 5/7] Additional fixes for SKLearn and Tensorflow Estimators * Removed sagemaker_session for SKLearn * Moved checkpoint_path into hyper parameters (https://sagemaker.readthedocs.io/en/v2.0.0.rc0/frameworks/tensorflow/upgrade_from_legacy.html) * Added framework_version and py_version * Update entry_point and renamed image_name to image_uri for TensorFlow --- tests/integ/test_inference_pipeline.py | 4 ++++ .../test_training_pipeline_framework_estimator.py | 4 +++- tests/unit/test_pipeline.py | 3 ++- tests/unit/test_sagemaker_steps.py | 14 +++++++++----- 4 files changed, 18 insertions(+), 7 deletions(-) diff --git a/tests/integ/test_inference_pipeline.py b/tests/integ/test_inference_pipeline.py index 38d2059..341474c 100644 --- a/tests/integ/test_inference_pipeline.py +++ b/tests/integ/test_inference_pipeline.py @@ -43,6 +43,8 @@ def sklearn_preprocessor(sagemaker_role_arn, sagemaker_session): 'one_p_mnist', 'sklearn_mnist_preprocessor.py') sklearn_preprocessor = SKLearn( + framework_version='0.20.0', + py_version='py3', entry_point=script_path, role=sagemaker_role_arn, instance_type="ml.m5.large", @@ -58,6 +60,8 @@ def sklearn_estimator(sagemaker_role_arn, sagemaker_session): 'one_p_mnist', 'sklearn_mnist_estimator.py') sklearn_estimator = SKLearn( + framework_version='0.20.0', + py_version='py3', entry_point=script_path, role=sagemaker_role_arn, instance_type="ml.m5.large", diff --git a/tests/integ/test_training_pipeline_framework_estimator.py b/tests/integ/test_training_pipeline_framework_estimator.py index 9ad9310..bc775a7 100644 --- a/tests/integ/test_training_pipeline_framework_estimator.py +++ b/tests/integ/test_training_pipeline_framework_estimator.py @@ -33,6 +33,7 @@ def torch_estimator(sagemaker_role_arn): script_path = os.path.join(DATA_DIR, "pytorch_mnist", "mnist.py") return PyTorch( + py_version='py3', entry_point=script_path, role=sagemaker_role_arn, framework_version='1.2.0', @@ -48,11 +49,12 @@ def torch_estimator(sagemaker_role_arn): def sklearn_estimator(sagemaker_role_arn): script_path = os.path.join(DATA_DIR, "sklearn_mnist", "mnist.py") return SKLearn( + framework_version='0.20.0', + py_version='py3', entry_point=script_path, role=sagemaker_role_arn, instance_count=1, instance_type='ml.m5.large', - framework_version='0.20.0', hyperparameters={ "epochs": 1 } diff --git a/tests/unit/test_pipeline.py b/tests/unit/test_pipeline.py index 5e9381d..03fcf4a 100644 --- a/tests/unit/test_pipeline.py +++ b/tests/unit/test_pipeline.py @@ -60,11 +60,12 @@ def sklearn_preprocessor(): sagemaker_session.boto_region_name = 'us-east-1' sklearn_preprocessor = SKLearn( + framework_version='0.20.0', + py_version='py3', entry_point=script_path, role=SAGEMAKER_EXECUTION_ROLE, instance_type="ml.c4.xlarge", source_dir=source_dir, - sagemaker_session=sagemaker_session ) sklearn_preprocessor.debugger_hook_config = DebuggerHookConfig( diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py index 5fb69e1..891bd19 100644 --- a/tests/unit/test_sagemaker_steps.py +++ b/tests/unit/test_sagemaker_steps.py @@ -135,17 +135,21 @@ def tensorflow_estimator(): s3_output_location = 's3://sagemaker/models' s3_source_location = 's3://sagemaker/source' - estimator = TensorFlow(entry_point='tf_train.py', + estimator = TensorFlow( + entry_point='tf_train.py', role=EXECUTION_ROLE, framework_version='1.13', - training_steps=1000, - evaluation_steps=100, instance_count=1, instance_type='ml.p2.xlarge', output_path=s3_output_location, source_dir=s3_source_location, - image_name=TENSORFLOW_IMAGE, - checkpoint_path='s3://sagemaker/models/sagemaker-tensorflow/checkpoints' + image_uri=TENSORFLOW_IMAGE, + model_dir=False, + hyperparameters={ + 'training_steps': 1000, + 'evaluation_steps': 100, + 'checkpoint_path': 's3://sagemaker/models/sagemaker-tensorflow/checkpoints', + } ) estimator.debugger_hook_config = DebuggerHookConfig( From a8053dbd52e0d8ac206ad0c3778016c93880032f Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Sun, 20 Sep 2020 15:41:09 +1000 Subject: [PATCH 6/7] Remove doc string as per comments --- src/stepfunctions/steps/sagemaker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/stepfunctions/steps/sagemaker.py b/src/stepfunctions/steps/sagemaker.py index 2a02ddb..ff248ce 100644 --- a/src/stepfunctions/steps/sagemaker.py +++ b/src/stepfunctions/steps/sagemaker.py @@ -198,7 +198,7 @@ def __init__(self, state_id, model, model_name=None, instance_type=None, tags=No state_id (str): State name whose length **must be** less than or equal to 128 unicode characters. State names **must be** unique within the scope of the whole state machine. model (sagemaker.model.Model): The SageMaker model to use in the ModelStep. If :py:class:`TrainingStep` was used to train the model and saving the model is the next step in the workflow, the output of :py:func:`TrainingStep.get_expected_model()` can be passed here. model_name (str or Placeholder, optional): Specify a model name, this is required for creating the model. We recommend to use :py:class:`~stepfunctions.inputs.ExecutionInput` placeholder collection to pass the value dynamically in each execution. - instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. This parameter is typically required when the estimator used is not an `Amazon built-in algorithm `_. + instance_type (str, optional): The EC2 instance type to deploy this Model to. For example, 'ml.p2.xlarge'. tags (list[dict], optional): `List to tags `_ to associate with the resource. """ if isinstance(model, FrameworkModel): From 50fada4dd2a980412218b486c7e91b3c7725717b Mon Sep 17 00:00:00 2001 From: Julian Bright Date: Wed, 23 Sep 2020 15:02:24 +1000 Subject: [PATCH 7/7] Addition unit test fixups to remove cloudwtch metrics, add training_steps, fix Rule evaluator image, and instance count/type --- tests/integ/test_training_pipeline_estimators.py | 2 ++ tests/unit/test_pipeline.py | 6 ++---- tests/unit/test_sagemaker_steps.py | 14 +++++++------- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integ/test_training_pipeline_estimators.py b/tests/integ/test_training_pipeline_estimators.py index effedc1..3edcd19 100644 --- a/tests/integ/test_training_pipeline_estimators.py +++ b/tests/integ/test_training_pipeline_estimators.py @@ -115,7 +115,9 @@ def test_pca_estimator(sfn_client, sagemaker_session, sagemaker_role_arn, sfn_ro s3_output_path = 's3://{bucket_name}/{workflow_name}/models'.format(bucket_name=bucket_name, workflow_name=unique_name) expected_execution_info = {'executionArn': execution_arn, 'stateMachineArn': state_machine_arn, + 'inputDetails': {'included': True}, 'name': job_name, + 'outputDetails': {'included': True}, 'status': status, 'startDate': execution_info['startDate'], 'stopDate': execution_info['stopDate'], diff --git a/tests/unit/test_pipeline.py b/tests/unit/test_pipeline.py index 03fcf4a..2123a3d 100644 --- a/tests/unit/test_pipeline.py +++ b/tests/unit/test_pipeline.py @@ -66,6 +66,7 @@ def sklearn_preprocessor(): role=SAGEMAKER_EXECUTION_ROLE, instance_type="ml.c4.xlarge", source_dir=source_dir, + sagemaker_session = sagemaker_session ) sklearn_preprocessor.debugger_hook_config = DebuggerHookConfig( @@ -334,11 +335,10 @@ def test_inference_pipeline(sklearn_preprocessor, linear_learner_estimator): }, 'HyperParameters': { 'sagemaker_container_log_level': '20', - 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_job_name': '"preprocessor-linear_learner"', 'sagemaker_program': '"sklearn_abalone_featurizer.py"', 'sagemaker_region': '"us-east-1"', - 'sagemaker_submit_directory': '"s3://sagemaker/source"' + 'sagemaker_submit_directory': '"s3://sagemaker/source"', }, 'InputDataConfig': [{ 'ChannelName': 'train', @@ -371,7 +371,6 @@ def test_inference_pipeline(sklearn_preprocessor, linear_learner_estimator): 'PrimaryContainer': { 'Environment': { 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', - 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_PROGRAM': 'sklearn_abalone_featurizer.py', 'SAGEMAKER_REGION': 'us-east-1', 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://sagemaker/source' @@ -435,7 +434,6 @@ def test_inference_pipeline(sklearn_preprocessor, linear_learner_estimator): { 'Environment': { 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', - 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_PROGRAM': 'sklearn_abalone_featurizer.py', 'SAGEMAKER_REGION': 'us-east-1', 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://sagemaker/source' diff --git a/tests/unit/test_sagemaker_steps.py b/tests/unit/test_sagemaker_steps.py index f8c0ed4..b85d11f 100644 --- a/tests/unit/test_sagemaker_steps.py +++ b/tests/unit/test_sagemaker_steps.py @@ -119,8 +119,8 @@ def pca_estimator_with_falsy_debug_hook(): pca = sagemaker.estimator.Estimator( PCA_IMAGE, role=EXECUTION_ROLE, - train_instance_count=1, - train_instance_type='ml.c4.xlarge', + instance_count=1, + instance_type='ml.c4.xlarge', output_path=s3_output_location, debugger_hook_config = False ) @@ -298,7 +298,7 @@ def test_training_step_creation_with_debug_hook(pca_estimator_with_debug_hook): 'DebugRuleConfigurations': [ { 'RuleConfigurationName': 'Confusion', - 'RuleEvaluatorImage': '199566480951.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest', + 'RuleEvaluatorImage': '503895931360.dkr.ecr.us-east-1.amazonaws.com/sagemaker-debugger-rules:latest', 'RuleParameters': { 'rule_to_invoke': 'Confusion', 'category_no': '15', @@ -449,13 +449,14 @@ def test_training_step_creation_with_framework(tensorflow_estimator): }, 'RoleArn': EXECUTION_ROLE, 'HyperParameters': { - 'model_dir': '"s3://sagemaker/models/tensorflow-job/model"', + 'checkpoint_path': '"s3://sagemaker/models/sagemaker-tensorflow/checkpoints"', + 'evaluation_steps': '100', 'sagemaker_container_log_level': '20', - 'sagemaker_enable_cloudwatch_metrics': 'false', 'sagemaker_job_name': '"tensorflow-job"', 'sagemaker_program': '"tf_train.py"', 'sagemaker_region': '"us-east-1"', - 'sagemaker_submit_directory': '"s3://sagemaker/source"' + 'sagemaker_submit_directory': '"s3://sagemaker/source"', + 'training_steps': '1000', }, 'TrainingJobName': 'tensorflow-job', 'Tags': DEFAULT_TAGS_LIST @@ -557,7 +558,6 @@ def test_get_expected_model_with_framework_estimator(tensorflow_estimator): 'Environment': { 'SAGEMAKER_PROGRAM': 'tf_train.py', 'SAGEMAKER_SUBMIT_DIRECTORY': 's3://sagemaker/tensorflow-job/source/sourcedir.tar.gz', - 'SAGEMAKER_ENABLE_CLOUDWATCH_METRICS': 'false', 'SAGEMAKER_CONTAINER_LOG_LEVEL': '20', 'SAGEMAKER_REGION': 'us-east-1', },