feat(client-sagemaker): Added inference components model data caching feature

awstools · awstools · commit 6d7740069836 · 2025-10-24T18:14:12.000Z
diff --git a/clients/client-sagemaker/src/commands/CreateInferenceComponentCommand.ts b/clients/client-sagemaker/src/commands/CreateInferenceComponentCommand.ts
@@ -61,6 +61,9 @@ export interface CreateInferenceComponentCommandOutput extends CreateInferenceCo
  *       MaxMemoryRequiredInMb: Number("int"),
  *     },
  *     BaseInferenceComponentName: "STRING_VALUE",
+ *     DataCacheConfig: { // InferenceComponentDataCacheConfig
+ *       EnableCaching: true || false, // required
+ *     },
  *   },
  *   RuntimeConfig: { // InferenceComponentRuntimeConfig
  *     CopyCount: Number("int"), // required
diff --git a/clients/client-sagemaker/src/commands/CreateLabelingJobCommand.ts b/clients/client-sagemaker/src/commands/CreateLabelingJobCommand.ts
@@ -5,7 +5,8 @@ import { Command as $Command } from "@smithy/smithy-client";
 import { MetadataBearer as __MetadataBearer } from "@smithy/types";
 
 import { commonParams } from "../endpoint/EndpointParameters";
-import { CreateLabelingJobRequest, CreateLabelingJobResponse } from "../models/models_1";
+import { CreateLabelingJobRequest } from "../models/models_1";
+import { CreateLabelingJobResponse } from "../models/models_2";
 import { de_CreateLabelingJobCommand, se_CreateLabelingJobCommand } from "../protocols/Aws_json1_1";
 import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";
 
diff --git a/clients/client-sagemaker/src/commands/DescribeClusterNodeCommand.ts b/clients/client-sagemaker/src/commands/DescribeClusterNodeCommand.ts
@@ -5,8 +5,7 @@ import { Command as $Command } from "@smithy/smithy-client";
 import { MetadataBearer as __MetadataBearer } from "@smithy/types";
 
 import { commonParams } from "../endpoint/EndpointParameters";
-import { DescribeClusterNodeRequest } from "../models/models_2";
-import { DescribeClusterNodeResponse } from "../models/models_3";
+import { DescribeClusterNodeRequest, DescribeClusterNodeResponse } from "../models/models_3";
 import { de_DescribeClusterNodeCommand, se_DescribeClusterNodeCommand } from "../protocols/Aws_json1_1";
 import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";
 
diff --git a/clients/client-sagemaker/src/commands/DescribeInferenceComponentCommand.ts b/clients/client-sagemaker/src/commands/DescribeInferenceComponentCommand.ts
@@ -73,6 +73,9 @@ export interface DescribeInferenceComponentCommandOutput extends DescribeInferen
  * //       MaxMemoryRequiredInMb: Number("int"),
  * //     },
  * //     BaseInferenceComponentName: "STRING_VALUE",
+ * //     DataCacheConfig: { // InferenceComponentDataCacheConfigSummary
+ * //       EnableCaching: true || false, // required
+ * //     },
  * //   },
  * //   RuntimeConfig: { // InferenceComponentRuntimeConfigSummary
  * //     DesiredCopyCount: Number("int"),
diff --git a/clients/client-sagemaker/src/commands/GetScalingConfigurationRecommendationCommand.ts b/clients/client-sagemaker/src/commands/GetScalingConfigurationRecommendationCommand.ts
@@ -5,10 +5,8 @@ import { Command as $Command } from "@smithy/smithy-client";
 import { MetadataBearer as __MetadataBearer } from "@smithy/types";
 
 import { commonParams } from "../endpoint/EndpointParameters";
-import {
-  GetScalingConfigurationRecommendationRequest,
-  GetScalingConfigurationRecommendationResponse,
-} from "../models/models_3";
+import { GetScalingConfigurationRecommendationRequest } from "../models/models_3";
+import { GetScalingConfigurationRecommendationResponse } from "../models/models_4";
 import {
   de_GetScalingConfigurationRecommendationCommand,
   se_GetScalingConfigurationRecommendationCommand,
diff --git a/clients/client-sagemaker/src/commands/ListUltraServersByReservedCapacityCommand.ts b/clients/client-sagemaker/src/commands/ListUltraServersByReservedCapacityCommand.ts
@@ -5,8 +5,10 @@ import { Command as $Command } from "@smithy/smithy-client";
 import { MetadataBearer as __MetadataBearer } from "@smithy/types";
 
 import { commonParams } from "../endpoint/EndpointParameters";
-import { ListUltraServersByReservedCapacityRequest } from "../models/models_4";
-import { ListUltraServersByReservedCapacityResponse } from "../models/models_5";
+import {
+  ListUltraServersByReservedCapacityRequest,
+  ListUltraServersByReservedCapacityResponse,
+} from "../models/models_5";
 import {
   de_ListUltraServersByReservedCapacityCommand,
   se_ListUltraServersByReservedCapacityCommand,
diff --git a/clients/client-sagemaker/src/commands/UpdateInferenceComponentCommand.ts b/clients/client-sagemaker/src/commands/UpdateInferenceComponentCommand.ts
@@ -59,6 +59,9 @@ export interface UpdateInferenceComponentCommandOutput extends UpdateInferenceCo
  *       MaxMemoryRequiredInMb: Number("int"),
  *     },
  *     BaseInferenceComponentName: "STRING_VALUE",
+ *     DataCacheConfig: { // InferenceComponentDataCacheConfig
+ *       EnableCaching: true || false, // required
+ *     },
  *   },
  *   RuntimeConfig: { // InferenceComponentRuntimeConfig
  *     CopyCount: Number("int"), // required
diff --git a/clients/client-sagemaker/src/models/models_1.ts b/clients/client-sagemaker/src/models/models_1.ts
@@ -6738,6 +6738,18 @@ export interface InferenceComponentContainerSpecification {
   Environment?: Record<string, string> | undefined;
 }
 
+/**
+ * <p>Settings that affect how the inference component caches data.</p>
+ * @public
+ */
+export interface InferenceComponentDataCacheConfig {
+  /**
+   * <p>Sets whether the endpoint that hosts the inference component caches the model artifacts and container image.</p> <p>With caching enabled, the endpoint caches this data in each instance that it provisions for the inference component. That way, the inference component deploys faster during the auto scaling process. If caching isn't enabled, the inference component takes longer to deploy because of the time it spends downloading the data.</p>
+   * @public
+   */
+  EnableCaching: boolean | undefined;
+}
+
 /**
  * <p>Settings that take effect while the model container starts up.</p>
  * @public
@@ -6790,6 +6802,12 @@ export interface InferenceComponentSpecification {
    * @public
    */
   BaseInferenceComponentName?: string | undefined;
+
+  /**
+   * <p>Settings that affect how the inference component caches data.</p>
+   * @public
+   */
+  DataCacheConfig?: InferenceComponentDataCacheConfig | undefined;
 }
 
 /**
@@ -8096,14 +8114,3 @@ export interface CreateLabelingJobRequest {
    */
   Tags?: Tag[] | undefined;
 }
-
-/**
- * @public
- */
-export interface CreateLabelingJobResponse {
-  /**
-   * <p>The Amazon Resource Name (ARN) of the labeling job. You use this ARN to identify the labeling job.</p>
-   * @public
-   */
-  LabelingJobArn: string | undefined;
-}
diff --git a/clients/client-sagemaker/src/models/models_2.ts b/clients/client-sagemaker/src/models/models_2.ts
@@ -102,6 +102,17 @@ import {
   UserSettings,
 } from "./models_1";
 
+/**
+ * @public
+ */
+export interface CreateLabelingJobResponse {
+  /**
+   * <p>The Amazon Resource Name (ARN) of the labeling job. You use this ARN to identify the labeling job.</p>
+   * @public
+   */
+  LabelingJobArn: string | undefined;
+}
+
 /**
  * @public
  * @enum
@@ -1832,7 +1843,7 @@ export interface CreateNotebookInstanceInput {
   RootAccess?: RootAccess | undefined;
 
   /**
-   * <p>The platform identifier of the notebook instance runtime environment.</p>
+   * <p>The platform identifier of the notebook instance runtime environment. The default value is <code>notebook-al2-v2</code>.</p>
    * @public
    */
   PlatformIdentifier?: string | undefined;
@@ -2875,7 +2886,7 @@ export interface ProcessingS3Input {
   S3InputMode?: ProcessingS3InputMode | undefined;
 
   /**
-   * <p>Whether to distribute the data from Amazon S3 to all processing instances with <code>FullyReplicated</code>, or whether the data from Amazon S3 is shared by Amazon S3 key, downloading one shard of data to each processing instance.</p>
+   * <p>Whether to distribute the data from Amazon S3 to all processing instances with <code>FullyReplicated</code>, or whether the data from Amazon S3 is sharded by Amazon S3 key, downloading one shard of data to each processing instance.</p>
    * @public
    */
   S3DataDistributionType?: ProcessingS3DataDistributionType | undefined;
@@ -7029,29 +7040,6 @@ export interface DescribeClusterEventResponse {
   EventDetails?: ClusterEventDetail | undefined;
 }
 
-/**
- * @public
- */
-export interface DescribeClusterNodeRequest {
-  /**
-   * <p>The string name or the Amazon Resource Name (ARN) of the SageMaker HyperPod cluster in which the node is.</p>
-   * @public
-   */
-  ClusterName: string | undefined;
-
-  /**
-   * <p>The ID of the SageMaker HyperPod cluster node.</p>
-   * @public
-   */
-  NodeId?: string | undefined;
-
-  /**
-   * <p>The logical identifier of the node to describe. You can specify either <code>NodeLogicalId</code> or <code>InstanceId</code>, but not both. <code>NodeLogicalId</code> can be used to describe nodes that are still being provisioned and don't yet have an <code>InstanceId</code> assigned.</p>
-   * @public
-   */
-  NodeLogicalId?: string | undefined;
-}
-
 /**
  * @internal
  */
diff --git a/clients/client-sagemaker/src/models/models_3.ts b/clients/client-sagemaker/src/models/models_3.ts
@@ -198,6 +198,29 @@ import {
   WorkforceIpAddressType,
 } from "./models_2";
 
+/**
+ * @public
+ */
+export interface DescribeClusterNodeRequest {
+  /**
+   * <p>The string name or the Amazon Resource Name (ARN) of the SageMaker HyperPod cluster in which the node is.</p>
+   * @public
+   */
+  ClusterName: string | undefined;
+
+  /**
+   * <p>The ID of the SageMaker HyperPod cluster node.</p>
+   * @public
+   */
+  NodeId?: string | undefined;
+
+  /**
+   * <p>The logical identifier of the node to describe. You can specify either <code>NodeLogicalId</code> or <code>InstanceId</code>, but not both. <code>NodeLogicalId</code> can be used to describe nodes that are still being provisioned and don't yet have an <code>InstanceId</code> assigned.</p>
+   * @public
+   */
+  NodeLogicalId?: string | undefined;
+}
+
 /**
  * @public
  */
@@ -3534,6 +3557,18 @@ export interface InferenceComponentContainerSpecificationSummary {
   Environment?: Record<string, string> | undefined;
 }
 
+/**
+ * <p>Settings that affect how the inference component caches data.</p>
+ * @public
+ */
+export interface InferenceComponentDataCacheConfigSummary {
+  /**
+   * <p>Indicates whether the inference component caches model artifacts as part of the auto scaling process.</p>
+   * @public
+   */
+  EnableCaching: boolean | undefined;
+}
+
 /**
  * <p>Details about the resources that are deployed with this inference component.</p>
  * @public
@@ -3568,6 +3603,12 @@ export interface InferenceComponentSpecificationSummary {
    * @public
    */
   BaseInferenceComponentName?: string | undefined;
+
+  /**
+   * <p>Settings that affect how the inference component caches data.</p>
+   * @public
+   */
+  DataCacheConfig?: InferenceComponentDataCacheConfigSummary | undefined;
 }
 
 /**
@@ -10577,83 +10618,6 @@ export interface ScalingPolicyMetric {
   ModelLatency?: number | undefined;
 }
 
-/**
- * @public
- */
-export interface GetScalingConfigurationRecommendationResponse {
-  /**
-   * <p>The name of a previously completed Inference Recommender job.</p>
-   * @public
-   */
-  InferenceRecommendationsJobName?: string | undefined;
-
-  /**
-   * <p>The recommendation ID of a previously completed inference recommendation.</p>
-   * @public
-   */
-  RecommendationId?: string | undefined;
-
-  /**
-   * <p>The name of an endpoint benchmarked during a previously completed Inference Recommender job.</p>
-   * @public
-   */
-  EndpointName?: string | undefined;
-
-  /**
-   * <p>The percentage of how much utilization you want an instance to use before autoscaling, which you specified in the request. The default value is 50%.</p>
-   * @public
-   */
-  TargetCpuUtilizationPerCore?: number | undefined;
-
-  /**
-   * <p>An object representing the anticipated traffic pattern for an endpoint that you specified in the request.</p>
-   * @public
-   */
-  ScalingPolicyObjective?: ScalingPolicyObjective | undefined;
-
-  /**
-   * <p>An object with a list of metrics that were benchmarked during the previously completed Inference Recommender job.</p>
-   * @public
-   */
-  Metric?: ScalingPolicyMetric | undefined;
-
-  /**
-   * <p>An object with the recommended values for you to specify when creating an autoscaling policy.</p>
-   * @public
-   */
-  DynamicScalingConfiguration?: DynamicScalingConfiguration | undefined;
-}
-
-/**
- * @public
- * @enum
- */
-export const ResourceType = {
-  ENDPOINT: "Endpoint",
-  EXPERIMENT: "Experiment",
-  EXPERIMENT_TRIAL: "ExperimentTrial",
-  EXPERIMENT_TRIAL_COMPONENT: "ExperimentTrialComponent",
-  FEATURE_GROUP: "FeatureGroup",
-  FEATURE_METADATA: "FeatureMetadata",
-  HYPER_PARAMETER_TUNING_JOB: "HyperParameterTuningJob",
-  IMAGE: "Image",
-  IMAGE_VERSION: "ImageVersion",
-  MODEL: "Model",
-  MODEL_CARD: "ModelCard",
-  MODEL_PACKAGE: "ModelPackage",
-  MODEL_PACKAGE_GROUP: "ModelPackageGroup",
-  PIPELINE: "Pipeline",
-  PIPELINE_EXECUTION: "PipelineExecution",
-  PIPELINE_VERSION: "PipelineVersion",
-  PROJECT: "Project",
-  TRAINING_JOB: "TrainingJob",
-} as const;
-
-/**
- * @public
- */
-export type ResourceType = (typeof ResourceType)[keyof typeof ResourceType];
-
 /**
  * @internal
  */
diff --git a/clients/client-sagemaker/src/models/models_4.ts b/clients/client-sagemaker/src/models/models_4.ts
diff --git a/clients/client-sagemaker/src/models/models_5.ts b/clients/client-sagemaker/src/models/models_5.ts
diff --git a/clients/client-sagemaker/src/protocols/Aws_json1_1.ts b/clients/client-sagemaker/src/protocols/Aws_json1_1.ts
diff --git a/codegen/sdk-codegen/aws-models/sagemaker.json b/codegen/sdk-codegen/aws-models/sagemaker.json