Skip to content

Commit 6d77400

Browse files
author
awstools
committed
feat(client-sagemaker): Added inference components model data caching feature
1 parent 1191c67 commit 6d77400

14 files changed

+81783
-81686
lines changed

clients/client-sagemaker/src/commands/CreateInferenceComponentCommand.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ export interface CreateInferenceComponentCommandOutput extends CreateInferenceCo
6161
* MaxMemoryRequiredInMb: Number("int"),
6262
* },
6363
* BaseInferenceComponentName: "STRING_VALUE",
64+
* DataCacheConfig: { // InferenceComponentDataCacheConfig
65+
* EnableCaching: true || false, // required
66+
* },
6467
* },
6568
* RuntimeConfig: { // InferenceComponentRuntimeConfig
6669
* CopyCount: Number("int"), // required

clients/client-sagemaker/src/commands/CreateLabelingJobCommand.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,8 @@ import { Command as $Command } from "@smithy/smithy-client";
55
import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
8-
import { CreateLabelingJobRequest, CreateLabelingJobResponse } from "../models/models_1";
8+
import { CreateLabelingJobRequest } from "../models/models_1";
9+
import { CreateLabelingJobResponse } from "../models/models_2";
910
import { de_CreateLabelingJobCommand, se_CreateLabelingJobCommand } from "../protocols/Aws_json1_1";
1011
import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";
1112

clients/client-sagemaker/src/commands/DescribeClusterNodeCommand.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,7 @@ import { Command as $Command } from "@smithy/smithy-client";
55
import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
8-
import { DescribeClusterNodeRequest } from "../models/models_2";
9-
import { DescribeClusterNodeResponse } from "../models/models_3";
8+
import { DescribeClusterNodeRequest, DescribeClusterNodeResponse } from "../models/models_3";
109
import { de_DescribeClusterNodeCommand, se_DescribeClusterNodeCommand } from "../protocols/Aws_json1_1";
1110
import { SageMakerClientResolvedConfig, ServiceInputTypes, ServiceOutputTypes } from "../SageMakerClient";
1211

clients/client-sagemaker/src/commands/DescribeInferenceComponentCommand.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,9 @@ export interface DescribeInferenceComponentCommandOutput extends DescribeInferen
7373
* // MaxMemoryRequiredInMb: Number("int"),
7474
* // },
7575
* // BaseInferenceComponentName: "STRING_VALUE",
76+
* // DataCacheConfig: { // InferenceComponentDataCacheConfigSummary
77+
* // EnableCaching: true || false, // required
78+
* // },
7679
* // },
7780
* // RuntimeConfig: { // InferenceComponentRuntimeConfigSummary
7881
* // DesiredCopyCount: Number("int"),

clients/client-sagemaker/src/commands/GetScalingConfigurationRecommendationCommand.ts

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,8 @@ import { Command as $Command } from "@smithy/smithy-client";
55
import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
8-
import {
9-
GetScalingConfigurationRecommendationRequest,
10-
GetScalingConfigurationRecommendationResponse,
11-
} from "../models/models_3";
8+
import { GetScalingConfigurationRecommendationRequest } from "../models/models_3";
9+
import { GetScalingConfigurationRecommendationResponse } from "../models/models_4";
1210
import {
1311
de_GetScalingConfigurationRecommendationCommand,
1412
se_GetScalingConfigurationRecommendationCommand,

clients/client-sagemaker/src/commands/ListUltraServersByReservedCapacityCommand.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ import { Command as $Command } from "@smithy/smithy-client";
55
import { MetadataBearer as __MetadataBearer } from "@smithy/types";
66

77
import { commonParams } from "../endpoint/EndpointParameters";
8-
import { ListUltraServersByReservedCapacityRequest } from "../models/models_4";
9-
import { ListUltraServersByReservedCapacityResponse } from "../models/models_5";
8+
import {
9+
ListUltraServersByReservedCapacityRequest,
10+
ListUltraServersByReservedCapacityResponse,
11+
} from "../models/models_5";
1012
import {
1113
de_ListUltraServersByReservedCapacityCommand,
1214
se_ListUltraServersByReservedCapacityCommand,

clients/client-sagemaker/src/commands/UpdateInferenceComponentCommand.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,9 @@ export interface UpdateInferenceComponentCommandOutput extends UpdateInferenceCo
5959
* MaxMemoryRequiredInMb: Number("int"),
6060
* },
6161
* BaseInferenceComponentName: "STRING_VALUE",
62+
* DataCacheConfig: { // InferenceComponentDataCacheConfig
63+
* EnableCaching: true || false, // required
64+
* },
6265
* },
6366
* RuntimeConfig: { // InferenceComponentRuntimeConfig
6467
* CopyCount: Number("int"), // required

clients/client-sagemaker/src/models/models_1.ts

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6738,6 +6738,18 @@ export interface InferenceComponentContainerSpecification {
67386738
Environment?: Record<string, string> | undefined;
67396739
}
67406740

6741+
/**
6742+
* <p>Settings that affect how the inference component caches data.</p>
6743+
* @public
6744+
*/
6745+
export interface InferenceComponentDataCacheConfig {
6746+
/**
6747+
* <p>Sets whether the endpoint that hosts the inference component caches the model artifacts and container image.</p> <p>With caching enabled, the endpoint caches this data in each instance that it provisions for the inference component. That way, the inference component deploys faster during the auto scaling process. If caching isn't enabled, the inference component takes longer to deploy because of the time it spends downloading the data.</p>
6748+
* @public
6749+
*/
6750+
EnableCaching: boolean | undefined;
6751+
}
6752+
67416753
/**
67426754
* <p>Settings that take effect while the model container starts up.</p>
67436755
* @public
@@ -6790,6 +6802,12 @@ export interface InferenceComponentSpecification {
67906802
* @public
67916803
*/
67926804
BaseInferenceComponentName?: string | undefined;
6805+
6806+
/**
6807+
* <p>Settings that affect how the inference component caches data.</p>
6808+
* @public
6809+
*/
6810+
DataCacheConfig?: InferenceComponentDataCacheConfig | undefined;
67936811
}
67946812

67956813
/**
@@ -8096,14 +8114,3 @@ export interface CreateLabelingJobRequest {
80968114
*/
80978115
Tags?: Tag[] | undefined;
80988116
}
8099-
8100-
/**
8101-
* @public
8102-
*/
8103-
export interface CreateLabelingJobResponse {
8104-
/**
8105-
* <p>The Amazon Resource Name (ARN) of the labeling job. You use this ARN to identify the labeling job.</p>
8106-
* @public
8107-
*/
8108-
LabelingJobArn: string | undefined;
8109-
}

clients/client-sagemaker/src/models/models_2.ts

Lines changed: 13 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,17 @@ import {
102102
UserSettings,
103103
} from "./models_1";
104104

105+
/**
106+
* @public
107+
*/
108+
export interface CreateLabelingJobResponse {
109+
/**
110+
* <p>The Amazon Resource Name (ARN) of the labeling job. You use this ARN to identify the labeling job.</p>
111+
* @public
112+
*/
113+
LabelingJobArn: string | undefined;
114+
}
115+
105116
/**
106117
* @public
107118
* @enum
@@ -1832,7 +1843,7 @@ export interface CreateNotebookInstanceInput {
18321843
RootAccess?: RootAccess | undefined;
18331844

18341845
/**
1835-
* <p>The platform identifier of the notebook instance runtime environment.</p>
1846+
* <p>The platform identifier of the notebook instance runtime environment. The default value is <code>notebook-al2-v2</code>.</p>
18361847
* @public
18371848
*/
18381849
PlatformIdentifier?: string | undefined;
@@ -2875,7 +2886,7 @@ export interface ProcessingS3Input {
28752886
S3InputMode?: ProcessingS3InputMode | undefined;
28762887

28772888
/**
2878-
* <p>Whether to distribute the data from Amazon S3 to all processing instances with <code>FullyReplicated</code>, or whether the data from Amazon S3 is shared by Amazon S3 key, downloading one shard of data to each processing instance.</p>
2889+
* <p>Whether to distribute the data from Amazon S3 to all processing instances with <code>FullyReplicated</code>, or whether the data from Amazon S3 is sharded by Amazon S3 key, downloading one shard of data to each processing instance.</p>
28792890
* @public
28802891
*/
28812892
S3DataDistributionType?: ProcessingS3DataDistributionType | undefined;
@@ -7029,29 +7040,6 @@ export interface DescribeClusterEventResponse {
70297040
EventDetails?: ClusterEventDetail | undefined;
70307041
}
70317042

7032-
/**
7033-
* @public
7034-
*/
7035-
export interface DescribeClusterNodeRequest {
7036-
/**
7037-
* <p>The string name or the Amazon Resource Name (ARN) of the SageMaker HyperPod cluster in which the node is.</p>
7038-
* @public
7039-
*/
7040-
ClusterName: string | undefined;
7041-
7042-
/**
7043-
* <p>The ID of the SageMaker HyperPod cluster node.</p>
7044-
* @public
7045-
*/
7046-
NodeId?: string | undefined;
7047-
7048-
/**
7049-
* <p>The logical identifier of the node to describe. You can specify either <code>NodeLogicalId</code> or <code>InstanceId</code>, but not both. <code>NodeLogicalId</code> can be used to describe nodes that are still being provisioned and don't yet have an <code>InstanceId</code> assigned.</p>
7050-
* @public
7051-
*/
7052-
NodeLogicalId?: string | undefined;
7053-
}
7054-
70557043
/**
70567044
* @internal
70577045
*/

clients/client-sagemaker/src/models/models_3.ts

Lines changed: 41 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -198,6 +198,29 @@ import {
198198
WorkforceIpAddressType,
199199
} from "./models_2";
200200

201+
/**
202+
* @public
203+
*/
204+
export interface DescribeClusterNodeRequest {
205+
/**
206+
* <p>The string name or the Amazon Resource Name (ARN) of the SageMaker HyperPod cluster in which the node is.</p>
207+
* @public
208+
*/
209+
ClusterName: string | undefined;
210+
211+
/**
212+
* <p>The ID of the SageMaker HyperPod cluster node.</p>
213+
* @public
214+
*/
215+
NodeId?: string | undefined;
216+
217+
/**
218+
* <p>The logical identifier of the node to describe. You can specify either <code>NodeLogicalId</code> or <code>InstanceId</code>, but not both. <code>NodeLogicalId</code> can be used to describe nodes that are still being provisioned and don't yet have an <code>InstanceId</code> assigned.</p>
219+
* @public
220+
*/
221+
NodeLogicalId?: string | undefined;
222+
}
223+
201224
/**
202225
* @public
203226
*/
@@ -3534,6 +3557,18 @@ export interface InferenceComponentContainerSpecificationSummary {
35343557
Environment?: Record<string, string> | undefined;
35353558
}
35363559

3560+
/**
3561+
* <p>Settings that affect how the inference component caches data.</p>
3562+
* @public
3563+
*/
3564+
export interface InferenceComponentDataCacheConfigSummary {
3565+
/**
3566+
* <p>Indicates whether the inference component caches model artifacts as part of the auto scaling process.</p>
3567+
* @public
3568+
*/
3569+
EnableCaching: boolean | undefined;
3570+
}
3571+
35373572
/**
35383573
* <p>Details about the resources that are deployed with this inference component.</p>
35393574
* @public
@@ -3568,6 +3603,12 @@ export interface InferenceComponentSpecificationSummary {
35683603
* @public
35693604
*/
35703605
BaseInferenceComponentName?: string | undefined;
3606+
3607+
/**
3608+
* <p>Settings that affect how the inference component caches data.</p>
3609+
* @public
3610+
*/
3611+
DataCacheConfig?: InferenceComponentDataCacheConfigSummary | undefined;
35713612
}
35723613

35733614
/**
@@ -10577,83 +10618,6 @@ export interface ScalingPolicyMetric {
1057710618
ModelLatency?: number | undefined;
1057810619
}
1057910620

10580-
/**
10581-
* @public
10582-
*/
10583-
export interface GetScalingConfigurationRecommendationResponse {
10584-
/**
10585-
* <p>The name of a previously completed Inference Recommender job.</p>
10586-
* @public
10587-
*/
10588-
InferenceRecommendationsJobName?: string | undefined;
10589-
10590-
/**
10591-
* <p>The recommendation ID of a previously completed inference recommendation.</p>
10592-
* @public
10593-
*/
10594-
RecommendationId?: string | undefined;
10595-
10596-
/**
10597-
* <p>The name of an endpoint benchmarked during a previously completed Inference Recommender job.</p>
10598-
* @public
10599-
*/
10600-
EndpointName?: string | undefined;
10601-
10602-
/**
10603-
* <p>The percentage of how much utilization you want an instance to use before autoscaling, which you specified in the request. The default value is 50%.</p>
10604-
* @public
10605-
*/
10606-
TargetCpuUtilizationPerCore?: number | undefined;
10607-
10608-
/**
10609-
* <p>An object representing the anticipated traffic pattern for an endpoint that you specified in the request.</p>
10610-
* @public
10611-
*/
10612-
ScalingPolicyObjective?: ScalingPolicyObjective | undefined;
10613-
10614-
/**
10615-
* <p>An object with a list of metrics that were benchmarked during the previously completed Inference Recommender job.</p>
10616-
* @public
10617-
*/
10618-
Metric?: ScalingPolicyMetric | undefined;
10619-
10620-
/**
10621-
* <p>An object with the recommended values for you to specify when creating an autoscaling policy.</p>
10622-
* @public
10623-
*/
10624-
DynamicScalingConfiguration?: DynamicScalingConfiguration | undefined;
10625-
}
10626-
10627-
/**
10628-
* @public
10629-
* @enum
10630-
*/
10631-
export const ResourceType = {
10632-
ENDPOINT: "Endpoint",
10633-
EXPERIMENT: "Experiment",
10634-
EXPERIMENT_TRIAL: "ExperimentTrial",
10635-
EXPERIMENT_TRIAL_COMPONENT: "ExperimentTrialComponent",
10636-
FEATURE_GROUP: "FeatureGroup",
10637-
FEATURE_METADATA: "FeatureMetadata",
10638-
HYPER_PARAMETER_TUNING_JOB: "HyperParameterTuningJob",
10639-
IMAGE: "Image",
10640-
IMAGE_VERSION: "ImageVersion",
10641-
MODEL: "Model",
10642-
MODEL_CARD: "ModelCard",
10643-
MODEL_PACKAGE: "ModelPackage",
10644-
MODEL_PACKAGE_GROUP: "ModelPackageGroup",
10645-
PIPELINE: "Pipeline",
10646-
PIPELINE_EXECUTION: "PipelineExecution",
10647-
PIPELINE_VERSION: "PipelineVersion",
10648-
PROJECT: "Project",
10649-
TRAINING_JOB: "TrainingJob",
10650-
} as const;
10651-
10652-
/**
10653-
* @public
10654-
*/
10655-
export type ResourceType = (typeof ResourceType)[keyof typeof ResourceType];
10656-
1065710621
/**
1065810622
* @internal
1065910623
*/

0 commit comments

Comments
 (0)