Skip to content

Commit 30b8366

Browse files
authored
Merge branch 'main' into ODSC-77200-auto-select-latest-conda-2
2 parents 99f5067 + 7ba99be commit 30b8366

File tree

11 files changed

+287
-96
lines changed

11 files changed

+287
-96
lines changed

ads/aqua/cli.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,5 +111,5 @@ def install():
111111
wheel_file_path = os.environ.get(
112112
"AQUA_EXTENSTION_PATH", "/ads/extension/adsjupyterlab_aqua_extension*.whl"
113113
)
114-
status = subprocess.run(f"pip install {wheel_file_path}", shell=True, check=False)
114+
status = subprocess.run(f"pip install {wheel_file_path} --no-deps", shell=True, check=False)
115115
return status.check_returncode

ads/aqua/modeldeployment/deployment.py

Lines changed: 75 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
get_params_list,
3535
get_resource_name,
3636
get_restricted_params_by_container,
37+
is_valid_ocid,
3738
load_gpu_shapes_index,
3839
validate_cmd_var,
3940
)
@@ -125,6 +126,9 @@ class AquaDeploymentApp(AquaApp):
125126
the GPU allocations for all compatible shapes.
126127
list_shapes(self, **kwargs) -> List[Dict]:
127128
Lists the valid model deployment shapes.
129+
recommend_shape(self, **kwargs) -> ShapeRecommendationReport:
130+
Generates a recommendation report or table of valid GPU deployment shapes
131+
for the provided model and configuration.
128132
129133
Note:
130134
Use `ads aqua deployment <method_name> --help` to get more details on the parameters available.
@@ -1283,56 +1287,97 @@ def validate_deployment_params(
12831287
)
12841288
return {"valid": True}
12851289

1290+
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=1), timer=datetime.now))
12861291
def recommend_shape(self, **kwargs) -> Union[Table, ShapeRecommendationReport]:
12871292
"""
1288-
For the CLI (set by default, generate_table = True), generates the table (in rich diff) with valid
1289-
GPU deployment shapes for the provided model and configuration.
1293+
Generates a recommendation report or table of valid GPU deployment shapes
1294+
for the provided model and configuration.
12901295
1291-
For the API (set generate_table = False), generates the JSON with valid
1292-
GPU deployment shapes for the provided model and configuration.
1296+
For CLI (default `generate_table=True`): generates a rich table.
1297+
For API (`generate_table=False`): returns a structured JSON report.
1298+
Example: ads aqua deployment recommend_shape --model-id meta-llama/Llama-3.3-70B-Instruct --generate_table false
12931299
1294-
Validates the input and determines whether recommendations are available.
1295-
1296-
Parameters
1297-
----------
1298-
**kwargs
1299-
model_ocid : str
1300-
(Required) The OCID of the model to recommend feasible compute shapes for.
1300+
Args:
1301+
model_id : str
1302+
(Required) The OCID or Hugging Face model ID to recommend compute shapes for.
13011303
generate_table : bool, optional
1302-
If True, generate and return a rich-diff table; if False, return a JSON response (default is False).
1303-
compartment_id : str, optional
1304-
The OCID of the user's compartment to use for the recommendation.
1304+
If True, generates and returns a table (default: False).
13051305
13061306
Returns
13071307
-------
1308-
Table (generate_table = True)
1309-
If `generate_table` is True, a table displaying the recommendation report with compatible deployment shapes,
1310-
or troubleshooting info if no shape is suitable.
1308+
Table
1309+
If `generate_table=True`, returns a table of shape recommendations.
13111310
1312-
ShapeRecommendationReport (generate_table = False)
1313-
If `generate_table` is False, a structured recommendation report with compatible deployment shapes,
1314-
or troubleshooting info and citing the largest shapes if no shape is suitable.
1311+
ShapeRecommendationReport
1312+
If `generate_table=False`, returns a structured recommendation report.
13151313
13161314
Raises
13171315
------
13181316
AquaValueError
1319-
If the model type is unsupported and no recommendation report can be generated.
1317+
If required parameters are missing or invalid.
13201318
"""
1321-
deployment_config = self.get_deployment_config(model_id=kwargs.get("model_id"))
1322-
kwargs["deployment_config"] = deployment_config
1319+
model_id = kwargs.pop("model_id", None)
1320+
if not model_id:
1321+
raise AquaValueError(
1322+
"The 'model_id' parameter is required to generate shape recommendations. "
1323+
"Please provide a valid OCID or Hugging Face model identifier."
1324+
)
1325+
1326+
logger.info(f"Starting shape recommendation for model_id: {model_id}")
1327+
1328+
self.telemetry.record_event_async(
1329+
category="aqua/deployment",
1330+
action="recommend_shape",
1331+
detail=get_ocid_substring(model_id, key_len=8)
1332+
if is_valid_ocid(ocid=model_id)
1333+
else model_id,
1334+
**kwargs,
1335+
)
1336+
1337+
if is_valid_ocid(ocid=model_id):
1338+
logger.debug(
1339+
f"Attempting to retrieve deployment configuration for model_id={model_id}"
1340+
)
1341+
try:
1342+
deployment_config = self.get_deployment_config(model_id=model_id)
1343+
kwargs["deployment_config"] = deployment_config
1344+
logger.debug(
1345+
f"Retrieved deployment configuration for model: {model_id}"
1346+
)
1347+
except Exception as e:
1348+
logger.warning(
1349+
f"Failed to retrieve deployment configuration for model_id={model_id}: {e}"
1350+
)
13231351

13241352
try:
1325-
request = RequestRecommend(**kwargs)
1353+
request = RequestRecommend(model_id=model_id, **kwargs)
13261354
except ValidationError as e:
13271355
custom_error = build_pydantic_error_message(e)
1328-
raise AquaValueError( # noqa: B904
1329-
f"Failed to request shape recommendation due to invalid input parameters: {custom_error}"
1356+
logger.error(
1357+
f"Validation failed for shape recommendation request: {custom_error}"
13301358
)
1359+
raise AquaValueError(
1360+
f"Invalid input parameters for shape recommendation: {custom_error}"
1361+
) from e
13311362

1332-
shape_recommend = AquaShapeRecommend()
1333-
shape_recommend_report = shape_recommend.which_shapes(request)
1334-
1335-
return shape_recommend_report
1363+
try:
1364+
shape_recommend = AquaShapeRecommend()
1365+
logger.info(
1366+
f"Running shape recommendation for model '{model_id}' "
1367+
f"with generate_table={getattr(request, 'generate_table', False)}"
1368+
)
1369+
shape_recommend_report = shape_recommend.which_shapes(request)
1370+
logger.info(f"Shape recommendation completed successfully for {model_id}")
1371+
return shape_recommend_report
1372+
except AquaValueError:
1373+
raise
1374+
except Exception as e:
1375+
logger.exception(
1376+
f"Unexpected error while generating shape recommendations: {e}"
1377+
)
1378+
raise AquaValueError(
1379+
f"An unexpected error occurred during shape recommendation: {e}"
1380+
) from e
13361381

13371382
@telemetry(entry_point="plugin=deployment&action=list_shapes", name="aqua")
13381383
@cached(cache=TTLCache(maxsize=1, ttl=timedelta(minutes=5), timer=datetime.now))

ads/aqua/shaperecommend/recommend.py

Lines changed: 92 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,13 @@
22
# Copyright (c) 2025 Oracle and/or its affiliates.
33
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
44

5+
import json
6+
import re
57
import shutil
6-
from typing import List, Union
8+
from typing import Dict, List, Optional, Tuple, Union
79

10+
from huggingface_hub import hf_hub_download
11+
from huggingface_hub.utils import HfHubHTTPError
812
from pydantic import ValidationError
913
from rich.table import Table
1014

@@ -17,7 +21,9 @@
1721
)
1822
from ads.aqua.common.utils import (
1923
build_pydantic_error_message,
24+
format_hf_custom_error_message,
2025
get_resource_type,
26+
is_valid_ocid,
2127
load_config,
2228
load_gpu_shapes_index,
2329
)
@@ -37,6 +43,7 @@
3743
ShapeRecommendationReport,
3844
ShapeReport,
3945
)
46+
from ads.config import COMPARTMENT_OCID
4047
from ads.model.datascience_model import DataScienceModel
4148
from ads.model.service.oci_datascience_model_deployment import (
4249
OCIDataScienceModelDeployment,
@@ -91,20 +98,23 @@ def which_shapes(
9198
try:
9299
shapes = self.valid_compute_shapes(compartment_id=request.compartment_id)
93100

94-
ds_model = self._get_data_science_model(request.model_id)
95-
96-
model_name = ds_model.display_name if ds_model.display_name else ""
97-
98101
if request.deployment_config:
102+
if is_valid_ocid(request.model_id):
103+
ds_model = self._get_data_science_model(request.model_id)
104+
model_name = ds_model.display_name
105+
else:
106+
model_name = request.model_id
107+
99108
shape_recommendation_report = (
100109
ShapeRecommendationReport.from_deployment_config(
101110
request.deployment_config, model_name, shapes
102111
)
103112
)
104113

105114
else:
106-
data = self._get_model_config(ds_model)
107-
115+
data, model_name = self._get_model_config_and_name(
116+
model_id=request.model_id,
117+
)
108118
llm_config = LLMConfig.from_raw_config(data)
109119

110120
shape_recommendation_report = self._summarize_shapes_for_seq_lens(
@@ -135,7 +145,57 @@ def which_shapes(
135145

136146
return shape_recommendation_report
137147

138-
def valid_compute_shapes(self, compartment_id: str) -> List["ComputeShapeSummary"]:
148+
def _get_model_config_and_name(
149+
self,
150+
model_id: str,
151+
) -> Tuple[Dict, str]:
152+
"""
153+
Loads model configuration by trying OCID logic first, then falling back
154+
to treating the model_id as a Hugging Face Hub ID.
155+
156+
Parameters
157+
----------
158+
model_id : str
159+
The model OCID or Hugging Face model ID.
160+
# compartment_id : Optional[str]
161+
# The compartment OCID, used for searching the model catalog.
162+
163+
Returns
164+
-------
165+
Tuple[Dict, str]
166+
A tuple containing:
167+
- The model configuration dictionary.
168+
- The display name for the model.
169+
"""
170+
if is_valid_ocid(model_id):
171+
logger.info(f"Detected OCID: Fetching OCI model config for '{model_id}'.")
172+
ds_model = self._get_data_science_model(model_id)
173+
config = self._get_model_config(ds_model)
174+
model_name = ds_model.display_name
175+
else:
176+
logger.info(
177+
f"Assuming Hugging Face model ID: Fetching config for '{model_id}'."
178+
)
179+
config = self._fetch_hf_config(model_id)
180+
model_name = model_id
181+
182+
return config, model_name
183+
184+
def _fetch_hf_config(self, model_id: str) -> Dict:
185+
"""
186+
Downloads a model's config.json from Hugging Face Hub using the
187+
huggingface_hub library.
188+
"""
189+
try:
190+
config_path = hf_hub_download(repo_id=model_id, filename="config.json")
191+
with open(config_path, encoding="utf-8") as f:
192+
return json.load(f)
193+
except HfHubHTTPError as e:
194+
format_hf_custom_error_message(e)
195+
196+
def valid_compute_shapes(
197+
self, compartment_id: Optional[str] = None
198+
) -> List["ComputeShapeSummary"]:
139199
"""
140200
Returns a filtered list of GPU-only ComputeShapeSummary objects by reading and parsing a JSON file.
141201
@@ -151,9 +211,23 @@ def valid_compute_shapes(self, compartment_id: str) -> List["ComputeShapeSummary
151211
152212
Raises
153213
------
154-
ValueError
155-
If the file cannot be opened, parsed, or the 'shapes' key is missing.
214+
AquaValueError
215+
If a compartment_id is not provided and cannot be found in the
216+
environment variables.
156217
"""
218+
if not compartment_id:
219+
compartment_id = COMPARTMENT_OCID
220+
if compartment_id:
221+
logger.info(f"Using compartment_id from environment: {compartment_id}")
222+
223+
if not compartment_id:
224+
raise AquaValueError(
225+
"A compartment OCID is required to list available shapes. "
226+
"Please specify it using the --compartment_id parameter.\n\n"
227+
"Example:\n"
228+
'ads aqua deployment recommend_shape --model_id "<YOUR_MODEL_OCID>" --compartment_id "<YOUR_COMPARTMENT_OCID>"'
229+
)
230+
157231
oci_shapes = OCIDataScienceModelDeployment.shapes(compartment_id=compartment_id)
158232
set_user_shapes = {shape.name: shape for shape in oci_shapes}
159233

@@ -206,6 +280,13 @@ def _rich_diff_table(shape_report: ShapeRecommendationReport) -> Table:
206280
if name
207281
else "Model Deployment Recommendations"
208282
)
283+
284+
header = (
285+
f"{header}\n"
286+
"Currently, only the VLLM container is supported. "
287+
"All shape and parameter recommendations will be generated for the VLLM container."
288+
)
289+
209290
logger.debug(f"Table header set to: {header!r}")
210291

211292
if shape_report.troubleshoot:
@@ -324,6 +405,7 @@ def _get_model_config(model: DataScienceModel):
324405
"""
325406

326407
model_task = model.freeform_tags.get("task", "").lower()
408+
model_task = re.sub(r"-", "_", model_task)
327409
model_format = model.freeform_tags.get("model_format", "").lower()
328410

329411
logger.info(f"Current model task type: {model_task}")

ads/aqua/shaperecommend/shape_report.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,30 @@ class RequestRecommend(BaseModel):
2929
"""
3030

3131
model_id: str = Field(
32-
..., description="The OCID of the model to recommend feasible compute shapes."
32+
...,
33+
description=(
34+
"The OCID or Hugging Face ID of the model for which to recommend feasible compute shapes."
35+
),
3336
)
34-
generate_table: Optional[bool] = (
35-
Field(
36-
True,
37-
description="True - to generate the rich diff Table, False - generate the JSON response",
37+
38+
generate_table: Optional[bool] = Field(
39+
True,
40+
description=(
41+
"If True, generate a rich formatted table as the response. "
42+
"If False, return the recommendation as a JSON structure."
3843
),
3944
)
45+
4046
compartment_id: Optional[str] = Field(
41-
COMPARTMENT_OCID, description="The OCID of user's compartment"
47+
COMPARTMENT_OCID,
48+
description="The OCID of the user's compartment.",
4249
)
4350

44-
deployment_config: Optional[AquaDeploymentConfig] = Field(
51+
deployment_config: Optional["AquaDeploymentConfig"] = Field(
4552
None,
46-
description="The deployment configuration for model (only available for service models).",
53+
description=(
54+
"The deployment configuration for the model (only available for service models)."
55+
),
4756
)
4857

4958
class Config:

0 commit comments

Comments
 (0)