diff --git a/ads/aqua/constants.py b/ads/aqua/constants.py index e64dc0d56..ca6e5ed6a 100644 --- a/ads/aqua/constants.py +++ b/ads/aqua/constants.py @@ -45,6 +45,7 @@ AQUA_TROUBLESHOOTING_LINK = "https://github.com/oracle-samples/oci-data-science-ai-samples/blob/main/ai-quick-actions/troubleshooting-tips.md" MODEL_FILE_DESCRIPTION_VERSION = "1.0" MODEL_FILE_DESCRIPTION_TYPE = "modelOSSReferenceDescription" +AQUA_FINE_TUNE_MODEL_VERSION = "v2" TRAINING_METRICS_FINAL = "training_metrics_final" VALIDATION_METRICS_FINAL = "validation_metrics_final" diff --git a/ads/aqua/model/model.py b/ads/aqua/model/model.py index d9b12ca6e..a149610f4 100644 --- a/ads/aqua/model/model.py +++ b/ads/aqua/model/model.py @@ -43,6 +43,7 @@ ) from ads.aqua.config.container_config import AquaContainerConfig from ads.aqua.constants import ( + AQUA_FINE_TUNE_MODEL_VERSION, AQUA_MODEL_ARTIFACT_CONFIG, AQUA_MODEL_ARTIFACT_CONFIG_MODEL_NAME, AQUA_MODEL_ARTIFACT_CONFIG_MODEL_TYPE, @@ -645,6 +646,89 @@ def edit_registered_model( else: raise AquaRuntimeError("Only registered unverified models can be edited.") + def convert_fine_tune( + self, model_id: str, delete_model: Optional[bool] = False + ) -> DataScienceModel: + """Converts legacy fine tuned model to fine tuned model v2. + 1. 'fine_tune_model_version' tag will be added as 'v2' to new fine tuned model. + 2. 'model_file_description' json will only contain fine tuned artifacts for new fine tuned model. + + Parameters + ---------- + model_id: str + The legacy fine tuned model OCID. + delete_model: bool + Flag whether to delete the legacy model or not. Defaults to False. + + Returns + ------- + DataScienceModel: + The instance of DataScienceModel. + """ + legacy_fine_tuned_model = DataScienceModel.from_id(model_id) + legacy_tags = legacy_fine_tuned_model.freeform_tags or {} + + if ( + Tags.AQUA_TAG not in legacy_tags + or Tags.AQUA_FINE_TUNED_MODEL_TAG not in legacy_tags + ): + raise AquaValueError( + f"Model '{model_id}' is not eligible for conversion. Only legacy AQUA fine-tuned models " + f"without the 'fine_tune_model_version={AQUA_FINE_TUNE_MODEL_VERSION}' tag are supported." + ) + + if ( + legacy_tags.get(Tags.AQUA_FINE_TUNE_MODEL_VERSION, UNKNOWN).lower() + == AQUA_FINE_TUNE_MODEL_VERSION + ): + raise AquaValueError( + f"Model '{model_id}' is already a fine-tuned model in version '{AQUA_FINE_TUNE_MODEL_VERSION}'. " + "No conversion is necessary." + ) + + if not legacy_fine_tuned_model.model_file_description: + raise AquaValueError( + f"Model '{model_id}' is missing required metadata and cannot be converted. " + "This may indicate the model was not created properly or is not a supported legacy AQUA fine-tuned model." + ) + + # add 'fine_tune_model_version' tag as 'v2' + fine_tune_model_v2_tags = { + **legacy_tags, + Tags.AQUA_FINE_TUNE_MODEL_VERSION: AQUA_FINE_TUNE_MODEL_VERSION, + } + + # remove base model artifacts in 'model_file_description' json file + # base model artifacts are placed as the first entry in 'models' list + legacy_fine_tuned_model.model_file_description["models"].pop(0) + + fine_tune_model_v2 = ( + DataScienceModel() + .with_compartment_id(legacy_fine_tuned_model.compartment_id) + .with_project_id(legacy_fine_tuned_model.project_id) + .with_model_file_description( + json_dict=legacy_fine_tuned_model.model_file_description + ) + .with_display_name(legacy_fine_tuned_model.display_name) + .with_description(legacy_fine_tuned_model.description) + .with_freeform_tags(**fine_tune_model_v2_tags) + .with_defined_tags(**(legacy_fine_tuned_model.defined_tags or {})) + .with_custom_metadata_list(legacy_fine_tuned_model.custom_metadata_list) + .with_defined_metadata_list(legacy_fine_tuned_model.defined_metadata_list) + .with_provenance_metadata(legacy_fine_tuned_model.provenance_metadata) + .create(model_by_reference=True) + ) + + logger.info( + f"Successfully created version '{AQUA_FINE_TUNE_MODEL_VERSION}' fine-tuned model: '{fine_tune_model_v2.id}' " + f"based on legacy model '{model_id}'. This new model is now ready for deployment." + ) + + if delete_model: + legacy_fine_tuned_model.delete() + + return fine_tune_model_v2 + def _fetch_metric_from_metadata( self, custom_metadata_list: ModelCustomMetadata, diff --git a/tests/unitary/with_extras/aqua/test_model.py b/tests/unitary/with_extras/aqua/test_model.py index 5a678c8cc..7dbe630ea 100644 --- a/tests/unitary/with_extras/aqua/test_model.py +++ b/tests/unitary/with_extras/aqua/test_model.py @@ -825,6 +825,125 @@ def test_get_model_fine_tuned( "evaluation_container": "odsc-llm-evaluate", } + @patch.object(DataScienceModel, "create") + @patch.object(DataScienceModel, "from_id") + def test_convert_fine_tune(self, mock_from_id, mock_create): + ds_model = MagicMock() + ds_model.id = "test_id" + ds_model.compartment_id = "test_model_compartment_id" + ds_model.project_id = "test_project_id" + ds_model.display_name = "test_display_name" + ds_model.description = "test_description" + ds_model.model_version_set_id = "test_model_version_set_id" + ds_model.model_version_set_name = "test_model_version_set_name" + ds_model.freeform_tags = { + "license": "test_license", + "organization": "test_organization", + "task": "test_task", + "aqua_fine_tuned_model": "test_finetuned_model", + } + ds_model.time_created = "2024-01-19T17:57:39.158000+00:00" + ds_model.lifecycle_state = "ACTIVE" + custom_metadata_list = ModelCustomMetadata() + custom_metadata_list.add( + **{"key": "artifact_location", "value": "oci://bucket@namespace/prefix/"} + ) + custom_metadata_list.add( + **{"key": "fine_tune_source", "value": "test_fine_tuned_source_id"} + ) + custom_metadata_list.add( + **{"key": "fine_tune_source_name", "value": "test_fine_tuned_source_name"} + ) + custom_metadata_list.add( + **{ + "key": "deployment-container", + "value": "odsc-vllm-serving", + } + ) + custom_metadata_list.add( + **{ + "key": "evaluation-container", + "value": "odsc-llm-evaluate", + } + ) + custom_metadata_list.add( + **{ + "key": "finetune-container", + "value": "odsc-llm-fine-tuning", + } + ) + ds_model.custom_metadata_list = custom_metadata_list + defined_metadata_list = ModelTaxonomyMetadata() + defined_metadata_list["Hyperparameters"].value = { + "training_data": "test_training_data", + "val_set_size": "test_val_set_size", + } + ds_model.defined_metadata_list = defined_metadata_list + ds_model.provenance_metadata = ModelProvenanceMetadata( + training_id="test_training_job_run_id" + ) + ds_model.model_file_description = { + "version": "1.0", + "type": "modelOSSReferenceDescription", + "models": [ + { + "namespace": "test_namespace_one", + "bucketName": "test_bucket_name_one", + "prefix": "test_prefix_one", + "objects": [ + { + "name": "artifact/.gitattributes", + "version": "123", + "sizeInBytes": 1519, + } + ], + }, + { + "namespace": "test_namespace_two", + "bucketName": "test_bucket_name_two", + "prefix": "test_prefix_two", + "objects": [ + { + "name": "/README.md", + "version": "b52c2608-009f-4774-8325-60ec226ae003", + "sizeInBytes": 5189, + } + ], + }, + ], + } + + mock_from_id.return_value = ds_model + + # missing 'OCI_AQUA' tag + with pytest.raises( + AquaValueError, + match="Model 'mock_model_id' is not eligible for conversion. Only legacy AQUA fine-tuned models without the 'fine_tune_model_version=v2' tag are supported.", + ): + self.app.convert_fine_tune(model_id="mock_model_id") + + # add 'OCI_AQUA' tag + mock_from_id.return_value.freeform_tags["OCI_AQUA"] = "ACTIVE" + + self.app.convert_fine_tune(model_id="mock_model_id") + + mock_create.assert_called_with(model_by_reference=True) + + assert mock_from_id.return_value.model_file_description["models"] == [ + { + "namespace": "test_namespace_two", + "bucketName": "test_bucket_name_two", + "prefix": "test_prefix_two", + "objects": [ + { + "name": "/README.md", + "version": "b52c2608-009f-4774-8325-60ec226ae003", + "sizeInBytes": 5189, + } + ], + } + ] + @pytest.mark.parametrize( ("artifact_location_set", "download_from_hf", "cleanup_model_cache"), [