diff --git a/buildspec.yml b/buildspec.yml index 7858bee8..d058377f 100644 --- a/buildspec.yml +++ b/buildspec.yml @@ -3,7 +3,7 @@ version: 0.2 env: variables: FRAMEWORK_VERSION: '1.6.0' - EIA_FRAMEWORK_VERSION: '1.3.1' + EIA_FRAMEWORK_VERSION: '1.5.1' CPU_INSTANCE_TYPE: 'ml.c4.xlarge' GPU_INSTANCE_TYPE: 'ml.p2.8xlarge' EIA_ACCELERATOR_TYPE: 'ml.eia2.medium' @@ -11,7 +11,7 @@ env: GITHUB_REPO: 'sagemaker-pytorch-serving-container' DLC_ACCOUNT: '763104351884' SETUP_FILE: 'setup_cmds.sh' - SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip\npython3.6 -m pip install -U -e .\npython3.6 -m pip install -U -e .[test]' + SETUP_CMDS: '#!/bin/bash\npython3.6 -m pip install --upgrade pip==20.2.2\npython3.6 -m pip install -U -e .\npython3.6 -m pip install -U -e .[test]' phases: @@ -35,54 +35,54 @@ phases: # run unit tests - tox -e py36,py37 test/unit - # define tags - - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID" - - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID" - - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID" + # # define tags + # - GENERIC_TAG="$FRAMEWORK_VERSION-pytorch-$BUILD_ID" + # - DLC_CPU_TAG="$FRAMEWORK_VERSION-dlc-cpu-$BUILD_ID" + # - DLC_GPU_TAG="$FRAMEWORK_VERSION-dlc-gpu-$BUILD_ID" - DLC_EIA_TAG="$FRAMEWORK_VERSION-dlc-eia-$BUILD_ID" - # run local CPU integration tests (build and push the image to ECR repo) - - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - # launch remote GPU instance - - prefix='ml.' - - instance_type=${GPU_INSTANCE_TYPE#"$prefix"} - - create-key-pair - - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest - - # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test - - python3 setup.py sdist - - build_dir="test/container/$FRAMEWORK_VERSION" - - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) - - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION . - # push DLC GPU image to ECR - - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) - - docker push $PREPROD_IMAGE:$DLC_GPU_TAG - - # run GPU local integration tests - - printf "$SETUP_CMDS" > $SETUP_FILE - # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests - - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" - - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" - - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - # run CPU sagemaker integration tests - - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - # run GPU sagemaker integration tests - - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" - - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG" - - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + # # run local CPU integration tests (build and push the image to ECR repo) + # - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type pytorch --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $GENERIC_TAG" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + # - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --build-image --push-image --dockerfile-type dlc.cpu --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --tag $DLC_CPU_TAG" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + + # # launch remote GPU instance + # - prefix='ml.' + # - instance_type=${GPU_INSTANCE_TYPE#"$prefix"} + # - create-key-pair + # - launch-ec2-instance --instance-type $instance_type --ami-name dlami-ubuntu-latest + + # # build DLC GPU image because the base DLC image is too big and takes too long to build as part of the test + # - python3 setup.py sdist + # - build_dir="test/container/$FRAMEWORK_VERSION" + # - $(aws ecr get-login --registry-ids $DLC_ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) + # - docker build -f "$build_dir/Dockerfile.dlc.gpu" -t $PREPROD_IMAGE:$DLC_GPU_TAG --build-arg region=$AWS_DEFAULT_REGION . + # # push DLC GPU image to ECR + # - $(aws ecr get-login --registry-ids $ACCOUNT --no-include-email --region $AWS_DEFAULT_REGION) + # - docker push $PREPROD_IMAGE:$DLC_GPU_TAG + + # # run GPU local integration tests + # - printf "$SETUP_CMDS" > $SETUP_FILE + # # no reason to rebuild the image again since it was already built and pushed to ECR during CPU tests + # - generic_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $GENERIC_TAG" + # - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$generic_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\"" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + # - dlc_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/local --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --tag $DLC_GPU_TAG" + # - test_cmd="remote-test --github-repo $GITHUB_REPO --test-cmd \"$dlc_cmd\" --setup-file $SETUP_FILE --pr-number \"$PR_NUM\" --skip-setup" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + + # # run CPU sagemaker integration tests + # - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $GENERIC_TAG" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + # - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --tag $DLC_CPU_TAG" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + + # # run GPU sagemaker integration tests + # - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $GENERIC_TAG" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" + # - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $FRAMEWORK_VERSION --processor gpu --instance-type $GPU_INSTANCE_TYPE --tag $DLC_GPU_TAG" + # - execute-command-if-has-matching-changes "$test_cmd" "test/" "src/*.py" "setup.py" "setup.cfg" "buildspec-toolkit.yml" "artifacts/*" # run EIA sagemaker integration tests - test_cmd="IGNORE_COVERAGE=- tox -e py36 -- test/integration/sagemaker --build-image --push-image --dockerfile-type dlc.eia --region $AWS_DEFAULT_REGION --docker-base-name $ECR_REPO --aws-id $ACCOUNT --framework-version $EIA_FRAMEWORK_VERSION --processor cpu --instance-type $CPU_INSTANCE_TYPE --accelerator-type $EIA_ACCELERATOR_TYPE --tag $DLC_EIA_TAG" @@ -93,8 +93,8 @@ phases: - cleanup-gpu-instances - cleanup-key-pairs - # remove ECR image - - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG - - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG - - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG + # # remove ECR image + # - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$GENERIC_TAG + # - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_CPU_TAG + # - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_GPU_TAG - aws ecr batch-delete-image --repository-name $ECR_REPO --region $AWS_DEFAULT_REGION --image-ids imageTag=$DLC_EIA_TAG diff --git a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py index 92857434..4f20697d 100644 --- a/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py +++ b/src/sagemaker_pytorch_serving_container/default_pytorch_inference_handler.py @@ -28,6 +28,8 @@ INFERENCE_ACCELERATOR_PRESENT_ENV = "SAGEMAKER_INFERENCE_ACCELERATOR_PRESENT" DEFAULT_MODEL_FILENAME = "model.pt" +VERSIONS_USE_NEW_API = ["1.5.1"] + class DefaultPytorchInferenceHandler(default_inference_handler.DefaultInferenceHandler): VALID_CONTENT_TYPES = (content_types.JSON, content_types.NPY) @@ -86,8 +88,15 @@ def default_predict_fn(self, data, model): model = model.to(device) input_data = data.to(device) model.eval() - with torch.jit.optimized_execution(True, {"target_device": "eia:0"}): - output = model(input_data) + if torch.__version__ in VERSIONS_USE_NEW_API: + import torcheia + torch._C._jit_set_profiling_executor(False) + model = torcheia.jit.attach_eia(model, 0) + with torch.jit.optimized_execution(True): + return model.forward(input_data) + else: + with torch.jit.optimized_execution(True, {"target_device": "eia:0"}): + output = model(input_data) else: device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = model.to(device) diff --git a/test/container/1.5.1/Dockerfile.dlc.eia b/test/container/1.5.1/Dockerfile.dlc.eia new file mode 100644 index 00000000..61c49d1e --- /dev/null +++ b/test/container/1.5.1/Dockerfile.dlc.eia @@ -0,0 +1,6 @@ +ARG region +FROM public.ecr.aws/f1e4j7w5/public_repo:1.5.1-cpu-py36-ubuntu16 + +COPY dist/sagemaker_pytorch_inference-*.tar.gz /sagemaker_pytorch_inference.tar.gz +RUN pip install --upgrade --no-cache-dir /sagemaker_pytorch_inference.tar.gz && \ + rm /sagemaker_pytorch_inference.tar.gz diff --git a/test/integration/sagemaker/test_mnist.py b/test/integration/sagemaker/test_mnist.py index 912eba5b..9496a81f 100644 --- a/test/integration/sagemaker/test_mnist.py +++ b/test/integration/sagemaker/test_mnist.py @@ -34,7 +34,7 @@ def test_mnist_gpu(sagemaker_session, image_uri, instance_type): _test_mnist_distributed(sagemaker_session, image_uri, instance_type, model_gpu_tar, mnist_gpu_script) -@pytest.mark.skip(reason="Latest EIA version is too old - 1.3.1. Remove this after a new DLC release") +# @pytest.mark.skip(reason="Latest EIA version is too old - 1.3.1. Remove this after a new DLC release") @pytest.mark.eia_test def test_mnist_eia(sagemaker_session, image_uri, instance_type, accelerator_type): instance_type = instance_type or 'ml.c4.xlarge' diff --git a/test/resources/mnist/model_eia/mnist.py b/test/resources/mnist/model_eia/mnist.py index ebc0bff0..011c6e8c 100644 --- a/test/resources/mnist/model_eia/mnist.py +++ b/test/resources/mnist/model_eia/mnist.py @@ -21,17 +21,29 @@ logger.setLevel(logging.DEBUG) logger.addHandler(logging.StreamHandler(sys.stdout)) +VERSIONS_USE_NEW_API = ["1.5.1"] def predict_fn(input_data, model): logger.info('Performing EIA inference with Torch JIT context with input of size {}'.format(input_data.shape)) # With EI, client instance should be CPU for cost-efficiency. Subgraphs with unsupported arguments run locally. Server runs with CUDA device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - mdoel = model.to(device) + model = model.to(device) input_data = input_data.to(device) + model = model.eval() with torch.no_grad(): - # Set the target device to the accelerator ordinal - with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}): - return model(input_data) + print("current torch version is: ", torch.__version__) + if torch.__version__ in VERSIONS_USE_NEW_API: + import torcheia + # we need to set the profiling executor for EIA + torch._C._jit_set_profiling_executor(False) + # Here want to use the first attached accelerator, so we specify ordinal 0. + model = torcheia.jit.attach_eia(model, 0) + with torch.jit.optimized_execution(True): + return model.forward(input_data) + else: + # Set the target device to the accelerator ordinal + with torch.jit.optimized_execution(True, {'target_device': 'eia:0'}): + return model(input_data) def model_fn(model_dir):