Skip to content

Added cli to convert legacy fine tuned model to v2. #1241

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Aug 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ads/aqua/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
AQUA_TROUBLESHOOTING_LINK = "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/troubleshooting-tips.md"
MODEL_FILE_DESCRIPTION_VERSION = "1.0"
MODEL_FILE_DESCRIPTION_TYPE = "modelOSSReferenceDescription"
AQUA_FINE_TUNE_MODEL_VERSION = "v2"

TRAINING_METRICS_FINAL = "training_metrics_final"
VALIDATION_METRICS_FINAL = "validation_metrics_final"
Expand Down
84 changes: 84 additions & 0 deletions ads/aqua/model/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
)
from ads.aqua.config.container_config import AquaContainerConfig
from ads.aqua.constants import (
AQUA_FINE_TUNE_MODEL_VERSION,
AQUA_MODEL_ARTIFACT_CONFIG,
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME,
AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE,
Expand Down Expand Up @@ -645,6 +646,89 @@ def edit_registered_model(
else:
raise AquaRuntimeError("Only registered unverified models can be edited.")

def convert_fine_tune(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to add one more validation, in case if model is already converted to V2, we should inform user about this and say:

logger.info(
        f"Model '{model_id}' is already a fine-tuned model in version '{AQUA_FINE_TUNE_MODEL_VERSION}'. "
        "No conversion is necessary."
    )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's make it more explicit. I mean for this use case let's show the dedicated message that I've posted above. Otherwise users might think that something wrong with their model.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

self, model_id: str, delete_model: Optional[bool] = False
) -> DataScienceModel:
"""Converts legacy fine tuned model to fine tuned model v2.
1. 'fine_tune_model_version' tag will be added as 'v2' to new fine tuned model.
2. 'model_file_description' json will only contain fine tuned artifacts for new fine tuned model.

Parameters
----------
model_id: str
The legacy fine tuned model OCID.
delete_model: bool
Flag whether to delete the legacy model or not. Defaults to False.

Returns
-------
DataScienceModel:
The instance of DataScienceModel.
"""
legacy_fine_tuned_model = DataScienceModel.from_id(model_id)
legacy_tags = legacy_fine_tuned_model.freeform_tags or {}

if (
Tags.AQUA_TAG not in legacy_tags
or Tags.AQUA_FINE_TUNED_MODEL_TAG not in legacy_tags
):
raise AquaValueError(
f"Model '{model_id}' is not eligible for conversion. Only legacy AQUA fine-tuned models "
f"without the 'fine_tune_model_version={AQUA_FINE_TUNE_MODEL_VERSION}' tag are supported."
)

if (
legacy_tags.get(Tags.AQUA_FINE_TUNE_MODEL_VERSION, UNKNOWN).lower()
== AQUA_FINE_TUNE_MODEL_VERSION
):
raise AquaValueError(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

raise AquaValueError(
            f"Model '{model_id}' is not eligible for conversion. Only legacy AQUA fine-tuned models "
            f"without the 'fine_tune_model_version={AQUA_FINE_TUNE_MODEL_VERSION}' tag are supported."
        )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

f"Model '{model_id}' is already a fine-tuned model in version '{AQUA_FINE_TUNE_MODEL_VERSION}'. "
"No conversion is necessary."
)

if not legacy_fine_tuned_model.model_file_description:
raise AquaValueError(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

raise AquaValueError(
        f"Model '{model_id}' is missing required metadata and cannot be converted. "
        "This may indicate the model was not created properly or is not a supported legacy AQUA fine-tuned model."
    )

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

f"Model '{model_id}' is missing required metadata and cannot be converted. "
"This may indicate the model was not created properly or is not a supported legacy AQUA fine-tuned model."
)

# add 'fine_tune_model_version' tag as 'v2'
fine_tune_model_v2_tags = {
**legacy_tags,
Tags.AQUA_FINE_TUNE_MODEL_VERSION: AQUA_FINE_TUNE_MODEL_VERSION,
}

# remove base model artifacts in 'model_file_description' json file
# base model artifacts are placed as the first entry in 'models' list
legacy_fine_tuned_model.model_file_description["models"].pop(0)

fine_tune_model_v2 = (
DataScienceModel()
.with_compartment_id(legacy_fine_tuned_model.compartment_id)
.with_project_id(legacy_fine_tuned_model.project_id)
.with_model_file_description(
json_dict=legacy_fine_tuned_model.model_file_description
)
.with_display_name(legacy_fine_tuned_model.display_name)
.with_description(legacy_fine_tuned_model.description)
.with_freeform_tags(**fine_tune_model_v2_tags)
.with_defined_tags(**(legacy_fine_tuned_model.defined_tags or {}))
.with_custom_metadata_list(legacy_fine_tuned_model.custom_metadata_list)
.with_defined_metadata_list(legacy_fine_tuned_model.defined_metadata_list)
.with_provenance_metadata(legacy_fine_tuned_model.provenance_metadata)
.create(model_by_reference=True)
)

logger.info(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

     f"Successfully created version '{AQUA_FINE_TUNE_MODEL_VERSION}' fine-tuned model: '{fine_tune_model_v2.id}' "
     f"based on legacy model '{model_id}'. This model is now ready for deployment."
 )```

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated

f"Successfully created version '{AQUA_FINE_TUNE_MODEL_VERSION}' fine-tuned model: '{fine_tune_model_v2.id}' "
f"based on legacy model '{model_id}'. This new model is now ready for deployment."
)

if delete_model:
legacy_fine_tuned_model.delete()

return fine_tune_model_v2

def _fetch_metric_from_metadata(
self,
custom_metadata_list: ModelCustomMetadata,
Expand Down
119 changes: 119 additions & 0 deletions tests/unitary/with_extras/aqua/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,6 +825,125 @@ def test_get_model_fine_tuned(
"evaluation_container": "odsc-llm-evaluate",
}

@patch.object(DataScienceModel, "create")
@patch.object(DataScienceModel, "from_id")
def test_convert_fine_tune(self, mock_from_id, mock_create):
ds_model = MagicMock()
ds_model.id = "test_id"
ds_model.compartment_id = "test_model_compartment_id"
ds_model.project_id = "test_project_id"
ds_model.display_name = "test_display_name"
ds_model.description = "test_description"
ds_model.model_version_set_id = "test_model_version_set_id"
ds_model.model_version_set_name = "test_model_version_set_name"
ds_model.freeform_tags = {
"license": "test_license",
"organization": "test_organization",
"task": "test_task",
"aqua_fine_tuned_model": "test_finetuned_model",
}
ds_model.time_created = "2024-01-19T17:57:39.158000+00:00"
ds_model.lifecycle_state = "ACTIVE"
custom_metadata_list = ModelCustomMetadata()
custom_metadata_list.add(
**{"key": "artifact_location", "value": "oci://bucket@namespace/prefix/"}
)
custom_metadata_list.add(
**{"key": "fine_tune_source", "value": "test_fine_tuned_source_id"}
)
custom_metadata_list.add(
**{"key": "fine_tune_source_name", "value": "test_fine_tuned_source_name"}
)
custom_metadata_list.add(
**{
"key": "deployment-container",
"value": "odsc-vllm-serving",
}
)
custom_metadata_list.add(
**{
"key": "evaluation-container",
"value": "odsc-llm-evaluate",
}
)
custom_metadata_list.add(
**{
"key": "finetune-container",
"value": "odsc-llm-fine-tuning",
}
)
ds_model.custom_metadata_list = custom_metadata_list
defined_metadata_list = ModelTaxonomyMetadata()
defined_metadata_list["Hyperparameters"].value = {
"training_data": "test_training_data",
"val_set_size": "test_val_set_size",
}
ds_model.defined_metadata_list = defined_metadata_list
ds_model.provenance_metadata = ModelProvenanceMetadata(
training_id="test_training_job_run_id"
)
ds_model.model_file_description = {
"version": "1.0",
"type": "modelOSSReferenceDescription",
"models": [
{
"namespace": "test_namespace_one",
"bucketName": "test_bucket_name_one",
"prefix": "test_prefix_one",
"objects": [
{
"name": "artifact/.gitattributes",
"version": "123",
"sizeInBytes": 1519,
}
],
},
{
"namespace": "test_namespace_two",
"bucketName": "test_bucket_name_two",
"prefix": "test_prefix_two",
"objects": [
{
"name": "/README.md",
"version": "b52c2608-009f-4774-8325-60ec226ae003",
"sizeInBytes": 5189,
}
],
},
],
}

mock_from_id.return_value = ds_model

# missing 'OCI_AQUA' tag
with pytest.raises(
AquaValueError,
match="Model 'mock_model_id' is not eligible for conversion. Only legacy AQUA fine-tuned models without the 'fine_tune_model_version=v2' tag are supported.",
):
self.app.convert_fine_tune(model_id="mock_model_id")

# add 'OCI_AQUA' tag
mock_from_id.return_value.freeform_tags["OCI_AQUA"] = "ACTIVE"

self.app.convert_fine_tune(model_id="mock_model_id")

mock_create.assert_called_with(model_by_reference=True)

assert mock_from_id.return_value.model_file_description["models"] == [
{
"namespace": "test_namespace_two",
"bucketName": "test_bucket_name_two",
"prefix": "test_prefix_two",
"objects": [
{
"name": "/README.md",
"version": "b52c2608-009f-4774-8325-60ec226ae003",
"sizeInBytes": 5189,
}
],
}
]

@pytest.mark.parametrize(
("artifact_location_set", "download_from_hf", "cleanup_model_cache"),
[
Expand Down