From 93efbcf7800dbc73fcfc6820bc643831d1bbffdc Mon Sep 17 00:00:00 2001 From: henrylee Date: Mon, 8 Apr 2024 19:52:15 +0000 Subject: [PATCH 1/3] fix: Set better default args suggested by quality engineers. --- generative_ai/embedding_model_tuning.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generative_ai/embedding_model_tuning.py b/generative_ai/embedding_model_tuning.py index 55f58d17943..8e1ce58ece1 100644 --- a/generative_ai/embedding_model_tuning.py +++ b/generative_ai/embedding_model_tuning.py @@ -31,8 +31,8 @@ def tune_embedding_model( corpus_path: str = "gs://embedding-customization-pipeline/dataset/corpus.jsonl", train_label_path: str = "gs://embedding-customization-pipeline/dataset/train.tsv", test_label_path: str = "gs://embedding-customization-pipeline/dataset/test.tsv", - batch_size: int = 50, - iterations: int = 300, + batch_size: int = 128, + iterations: int = 1000, ) -> pipeline_jobs.PipelineJob: match = re.search(r"(.+)(-autopush|-staging)?-aiplatform.+", api_endpoint) location = match.group(1) if match else "us-central1" From 8b69c43e58334d740bed1e08e5165c95fdb9f7e7 Mon Sep 17 00:00:00 2001 From: henrylee Date: Mon, 8 Apr 2024 22:39:40 +0000 Subject: [PATCH 2/3] fix: Set better default args; tidy up api_endpoint processing. --- generative_ai/embedding_model_tuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generative_ai/embedding_model_tuning.py b/generative_ai/embedding_model_tuning.py index 8e1ce58ece1..c73b8af7779 100644 --- a/generative_ai/embedding_model_tuning.py +++ b/generative_ai/embedding_model_tuning.py @@ -34,7 +34,7 @@ def tune_embedding_model( batch_size: int = 128, iterations: int = 1000, ) -> pipeline_jobs.PipelineJob: - match = re.search(r"(.+)(-autopush|-staging)?-aiplatform.+", api_endpoint) + match = re.search(r"(\w+-\w+).*-aiplatform.+", api_endpoint) location = match.group(1) if match else "us-central1" job = aiplatform.PipelineJob( display_name=pipeline_job_display_name, From 75dc4d98335f255fc6a11282327afa96f1714cd0 Mon Sep 17 00:00:00 2001 From: henrylee Date: Mon, 8 Apr 2024 22:47:42 +0000 Subject: [PATCH 3/3] tidy up api_endpoint processing. --- generative_ai/embedding_model_tuning.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generative_ai/embedding_model_tuning.py b/generative_ai/embedding_model_tuning.py index c73b8af7779..f8e48c1ecd0 100644 --- a/generative_ai/embedding_model_tuning.py +++ b/generative_ai/embedding_model_tuning.py @@ -34,7 +34,7 @@ def tune_embedding_model( batch_size: int = 128, iterations: int = 1000, ) -> pipeline_jobs.PipelineJob: - match = re.search(r"(\w+-\w+).*-aiplatform.+", api_endpoint) + match = re.search(r"^(\w+-\w+)", api_endpoint) location = match.group(1) if match else "us-central1" job = aiplatform.PipelineJob( display_name=pipeline_job_display_name,