From 27a0eb473e1248eedf45b67b5be18b7eae530a6f Mon Sep 17 00:00:00 2001 From: Dariusz Parys Date: Tue, 19 Nov 2019 13:29:34 +0100 Subject: [PATCH 1/6] Refactoring Variable Code Base Usage Simplified and cleaned-up the usage of variables inside all python files and Azure DevOps pipelines. Variables are now defined in two places, the Azure DevOps variable template and the Azure DevOps variable group. - Refactored dotenv usage in a singleton class to serve variables to all python scripts - Created new variable template `azdo-variables.yml` to be used within Azure DevOps pipelines - Adjusted existing pipelines to leverage new variable template - Prepared the ARM template to specify individual names for the various resources Please provide feedback, I'll update the PR with documentation modifications Resolves: #279075 --- .gitignore | 1 + .pipelines/azdo-ci-build-train.yml | 11 +- .pipelines/azdo-pr-build-train.yml | 1 + .pipelines/azdo-variables.yml | 41 ++++++ .../arm-templates/cloud-environment.json | 30 ++++- environment_setup/iac-create-environment.yml | 2 +- environment_setup/iac-remove-environment.yml | 2 +- ml_service/pipelines/build_train_pipeline.py | 51 +++---- .../pipelines/build_train_pipeline_with_r.py | 35 ++--- .../build_train_pipeline_with_r_on_dbricks.py | 38 ++---- ml_service/pipelines/run_train_pipeline.py | 37 ++--- ml_service/util/attach_compute.py | 15 +-- ml_service/util/create_scoring_image.py | 31 ++--- ml_service/util/env_variables.py | 127 ++++++++++++++++++ ml_service/util/register_model.py | 38 ++---- tests/unit/code_test.py | 14 +- 16 files changed, 293 insertions(+), 181 deletions(-) create mode 100644 .pipelines/azdo-variables.yml create mode 100644 ml_service/util/env_variables.py diff --git a/.gitignore b/.gitignore index 3a5a8879..7bac8768 100644 --- a/.gitignore +++ b/.gitignore @@ -24,6 +24,7 @@ wheels/ .installed.cfg *.egg MANIFEST +venv/ # PyInstaller # Usually these files are written by a python script from a template diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml index c2453d4d..09c52d95 100644 --- a/.pipelines/azdo-ci-build-train.yml +++ b/.pipelines/azdo-ci-build-train.yml @@ -11,14 +11,9 @@ trigger: - ml_service/util/create_scoring_image.py variables: +- template: azdo-variables.yml - group: devopsforai-aml-vg -# Choose from default, build_train_pipeline_with_r.py, or build_train_pipeline_with_r_on_dbricks.py -- name: build-train-script - value: 'build_train_pipeline.py' -# Automatically triggers the train, evaluate, register pipeline after the CI steps. -# Uncomment to set to false or add same variable name at queue time with value of false to disable. -# - name: auto-trigger-training -# value: false + stages: - stage: 'Model_CI' @@ -34,7 +29,7 @@ stages: - template: azdo-base-pipeline.yml - script: | # Invoke the Python building and publishing a training pipeline - python3 $(Build.SourcesDirectory)/ml_service/pipelines/$(build-train-script) + python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }} failOnStderr: 'false' env: SP_APP_SECRET: '$(SP_APP_SECRET)' diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml index 8bf6ca56..24231b2a 100644 --- a/.pipelines/azdo-pr-build-train.yml +++ b/.pipelines/azdo-pr-build-train.yml @@ -11,6 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest variables: +- template: azdo-variables.yml - group: devopsforai-aml-vg diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml new file mode 100644 index 00000000..d0914e3d --- /dev/null +++ b/.pipelines/azdo-variables.yml @@ -0,0 +1,41 @@ +variables: +- name: DEVOPS_VARIABLEGROUP + value: devopsforai-aml-vg +- name: BUILD_TRAIN_SCRIPT + value: build_train_pipeline.py +- name: AML_COMPUTE_CLUSTER_CPU_SKU + value: STANDARD_DS2_V2 +- name: AML_COMPUTE_CLUSTER_NAME + value: train-cluster +- name: AML_CLUSTER_MIN_NODES + value: 0 +- name: AML_CLUSTER_MAX_NODES + value: 4 +- name: AML_CLUSTER_PRIORITY + value: lowpriority +- name: EVALUATE_SCRIPT_PATH + value: evaluate/evaluate_model.py +- name: EXPERIMENT_NAME + value: mlopspython +- name: MODEL_NAME + value: sklearn_regression_model.pkl +- name: MODEL_VERSION + value: '1' +- name: MODEL_PATH + value: '' +- name: REGISTER_SCRIPT_PATH + value: register/register_model.py +- name: SOURCES_DIR_TRAIN + value: code +- name: TRAIN_SCRIPT_PATH + value: training/train.py +- name: TRAINING_PIPELINE_NAME + value: training-pipeline +- name: BUILD_BUILDID + value: 001 +- name: RELEASE_RELEASEID + value: 001 +- name: IMAGE_NAME + value: '' +- name: DB_CLUSTER_ID + value: '' \ No newline at end of file diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json index 590a4aed..a7f02e54 100644 --- a/environment_setup/arm-templates/cloud-environment.json +++ b/environment_setup/arm-templates/cloud-environment.json @@ -26,16 +26,36 @@ "metadata": { "description": "Specifies the location for all resources." } + }, + "workspace": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-WS')]" + }, + "storageAccount": { + "type": "string", + "defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]" + }, + "keyvault": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-KV')]" + }, + "appInsights": { + "type": "string", + "defaultValue": "[concat(parameters('baseName'),'-AML-AI')]" + }, + "acr": { + "type": "string", + "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]" } }, "variables": { - "amlWorkspaceName": "[concat(parameters('baseName'),'-AML-WS')]", - "storageAccountName": "[concat(toLower(parameters('baseName')), 'amlsa')]", + "amlWorkspaceName": "[parameters('workspace')]", + "storageAccountName": "[parameters('storageAccount')]", "storageAccountType": "Standard_LRS", - "keyVaultName": "[concat(parameters('baseName'),'-AML-KV')]", + "keyVaultName": "[parameters('keyvault')]", "tenantId": "[subscription().tenantId]", - "applicationInsightsName": "[concat(parameters('baseName'),'-AML-AI')]", - "containerRegistryName": "[concat(toLower(parameters('baseName')),'amlcr')]" + "applicationInsightsName": "[parameters('appInsights')]", + "containerRegistryName": "[parameters('acr')]" }, "resources": [ { diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml index 2dd00694..585f3ba5 100644 --- a/environment_setup/iac-create-environment.yml +++ b/environment_setup/iac-create-environment.yml @@ -25,7 +25,7 @@ steps: inputs: azureSubscription: 'AzureResourceConnection' action: 'Create Or Update Resource Group' - resourceGroupName: '$(BASE_NAME)-AML-RG' + resourceGroupName: '$(RESOURCEGROUP_NAME)' location: $(LOCATION) templateLocation: 'Linked artifact' csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json' diff --git a/environment_setup/iac-remove-environment.yml b/environment_setup/iac-remove-environment.yml index 4ca8b04e..81c257f8 100644 --- a/environment_setup/iac-remove-environment.yml +++ b/environment_setup/iac-remove-environment.yml @@ -18,7 +18,7 @@ steps: inputs: azureSubscription: 'AzureResourceConnection' action: 'DeleteRG' - resourceGroupName: '$(BASE_NAME)-AML-RG' + resourceGroupName: '$(RESOURCEGROUP_NAME)' location: $(LOCATION) displayName: 'Delete resources in Azure' diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py index 481c68e5..b866201d 100644 --- a/ml_service/pipelines/build_train_pipeline.py +++ b/ml_service/pipelines/build_train_pipeline.py @@ -5,45 +5,32 @@ # from azureml.core import Datastore import os import sys -from dotenv import load_dotenv sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace from attach_compute import get_compute +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") - sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") - train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") - evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") - vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") - compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") - model_name = os.environ.get("MODEL_NAME") - build_id = os.environ.get("BUILD_BUILDID") - pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") - + e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( - workspace_name, - resource_group, - subscription_id, - tenant_id, - app_id, - app_secret) + e.workspace_name, + e.resource_group, + e.subscription_id, + e.tenant_id, + e.app_id, + e.app_secret) + print("get_workspace:") print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, - compute_name, - vm_size) + e.compute_name, + e.vm_size) if aml_compute is not None: + print("aml_compute:") print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( @@ -56,16 +43,16 @@ def main(): run_config.environment.docker.enabled = True model_name = PipelineParameter( - name="model_name", default_value=model_name) + name="model_name", default_value=e.model_name) release_id = PipelineParameter( name="release_id", default_value="0" ) train_step = PythonScriptStep( name="Train Model", - script_name=train_script_path, + script_name=e.train_script_path, compute_target=aml_compute, - source_directory=sources_directory_train, + source_directory=e.sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, @@ -77,9 +64,9 @@ def main(): evaluate_step = PythonScriptStep( name="Evaluate Model ", - script_name=evaluate_script_path, + script_name=e.evaluate_script_path, compute_target=aml_compute, - source_directory=sources_directory_train, + source_directory=e.sources_directory_train, arguments=[ "--release_id", release_id, "--model_name", model_name, @@ -95,9 +82,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=pipeline_name, + name=e.pipeline_name, description="Model training/retraining pipeline", - version=build_id + version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/build_train_pipeline_with_r.py b/ml_service/pipelines/build_train_pipeline_with_r.py index 7eae2c98..72ed8e2a 100644 --- a/ml_service/pipelines/build_train_pipeline_with_r.py +++ b/ml_service/pipelines/build_train_pipeline_with_r.py @@ -4,40 +4,29 @@ # from azureml.core import Datastore import os import sys -from dotenv import load_dotenv sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace from attach_compute import get_compute +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") - vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") - compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") - build_id = os.environ.get("BUILD_BUILDID") - pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") - + e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( - workspace_name, - resource_group, - subscription_id, - tenant_id, - app_id, - app_secret) + e.workspace_name, + e.resource_group, + e.subscription_id, + e.tenant_id, + e.app_id, + e.app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, - compute_name, - vm_size) + e.compute_name, + e.vm_size) if aml_compute is not None: print(aml_compute) @@ -66,9 +55,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=pipeline_name + "_with_R", + name=e.pipeline_name + "_with_R", description="Model training/retraining pipeline", - version=build_id + version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py index 95de9e55..733683eb 100644 --- a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py +++ b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py @@ -1,42 +1,30 @@ from azureml.pipeline.core import Pipeline import os import sys -from dotenv import load_dotenv sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace from attach_compute import get_compute from azureml.pipeline.steps import DatabricksStep +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") - vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") - compute_name = os.environ.get("DATABRICKS_COMPUTE_NAME") - db_cluster_id = os.environ.get("DB_CLUSTER_ID") - build_id = os.environ.get("BUILD_BUILDID") - pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") - + e = Env() # Get Azure machine learning workspace aml_workspace = get_workspace( - workspace_name, - resource_group, - subscription_id, - tenant_id, - app_id, - app_secret) + e.workspace_name, + e.resource_group, + e.subscription_id, + e.tenant_id, + e.app_id, + e.app_secret) print(aml_workspace) # Get Azure machine learning cluster aml_compute = get_compute( aml_workspace, - compute_name, - vm_size) + e.compute_name, + e.vm_size) if aml_compute is not None: print(aml_compute) @@ -46,7 +34,7 @@ def main(): python_script_name="train_with_r_on_databricks.py", source_directory="code/training/R", run_name='DB_Python_R_demo', - existing_cluster_id=db_cluster_id, + existing_cluster_id=e.db_cluster_id, compute_target=aml_compute, allow_reuse=False ) @@ -58,9 +46,9 @@ def main(): train_pipeline = Pipeline(workspace=aml_workspace, steps=steps) train_pipeline.validate() published_pipeline = train_pipeline.publish( - name=pipeline_name + "_with_R_on_DB", + name=e.pipeline_name + "_with_R_on_DB", description="Model training/retraining pipeline", - version=build_id + version=e.build_id ) print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index 1d942a8c..3ee03056 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -1,31 +1,20 @@ -import os from azureml.pipeline.core import PublishedPipeline from azureml.core import Workspace from azureml.core.authentication import ServicePrincipalAuthentication -from dotenv import load_dotenv +from env_variables import Env def main(): - load_dotenv() - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - experiment_name = os.environ.get("EXPERIMENT_NAME") - model_name = os.environ.get("MODEL_NAME") - app_id = os.environ.get('SP_APP_ID') - app_secret = os.environ.get('SP_APP_SECRET') - build_id = os.environ.get('BUILD_BUILDID') - + e = Env() service_principal = ServicePrincipalAuthentication( - tenant_id=tenant_id, - service_principal_id=app_id, - service_principal_password=app_secret) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) aml_workspace = Workspace.get( - name=workspace_name, - subscription_id=subscription_id, - resource_group=resource_group, + name=e.workspace_name, + subscription_id=e.subscription_id, + resource_group=e.resource_group, auth=service_principal ) @@ -34,23 +23,23 @@ def main(): matched_pipes = [] for p in pipelines: - if p.version == build_id: + if p.version == e.build_id: matched_pipes.append(p) if(len(matched_pipes) > 1): published_pipeline = None - raise Exception(f"Multiple active pipelines are published for build {build_id}.") # NOQA: E501 + raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 elif(len(matched_pipes) == 0): published_pipeline = None - raise KeyError(f"Unable to find a published pipeline for this build {build_id}") # NOQA: E501 + raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 else: published_pipeline = matched_pipes[0] - pipeline_parameters = {"model_name": model_name} + pipeline_parameters = {"model_name": e.model_name} response = published_pipeline.submit( aml_workspace, - experiment_name, + e.experiment_name, pipeline_parameters) run_id = response.id diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py index 7a34cd38..569e3041 100644 --- a/ml_service/util/attach_compute.py +++ b/ml_service/util/attach_compute.py @@ -1,9 +1,8 @@ -import os -from dotenv import load_dotenv from azureml.core import Workspace from azureml.core.compute import AmlCompute from azureml.core.compute import ComputeTarget from azureml.exceptions import ComputeTargetException +from env_variables import Env def get_compute( @@ -11,10 +10,6 @@ def get_compute( compute_name: str, vm_size: str ): - # Load the environment variables from .env in case this script - # is called outside an existing process - load_dotenv() - # Verify that cluster does not exist already try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] @@ -22,12 +17,12 @@ def get_compute( print('Found existing compute target ' + compute_name + ' so using it.') else: + e = Env() compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, - vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", - 'lowpriority'), - min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), - max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)), + vm_priority=e.vm_priority, + min_nodes=e.min_nodes, + max_nodes=e.max_nodes, idle_seconds_before_scaledown="300" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py index 08ae49b5..7e99bd28 100644 --- a/ml_service/util/create_scoring_image.py +++ b/ml_service/util/create_scoring_image.py @@ -2,36 +2,25 @@ from azureml.core import Workspace from azureml.core.image import ContainerImage, Image from azureml.core.model import Model -from dotenv import load_dotenv from azureml.core.authentication import ServicePrincipalAuthentication +from env_variables import Env -load_dotenv() - -TENANT_ID = os.environ.get('TENANT_ID') -APP_ID = os.environ.get('SP_APP_ID') -APP_SECRET = os.environ.get('SP_APP_SECRET') -WORKSPACE_NAME = os.environ.get("BASE_NAME")+"-AML-WS" -SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID') -RESOURCE_GROUP = os.environ.get("BASE_NAME")+"-AML-RG" -MODEL_NAME = os.environ.get('MODEL_NAME') -MODEL_VERSION = os.environ.get('MODEL_VERSION') -IMAGE_NAME = os.environ.get('IMAGE_NAME') - +e = Env() SP_AUTH = ServicePrincipalAuthentication( - tenant_id=TENANT_ID, - service_principal_id=APP_ID, - service_principal_password=APP_SECRET) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) ws = Workspace.get( - WORKSPACE_NAME, + e.workspace_name, SP_AUTH, - SUBSCRIPTION_ID, - RESOURCE_GROUP + e.subscription_id, + e.resource_group ) -model = Model(ws, name=MODEL_NAME, version=MODEL_VERSION) +model = Model(ws, name=e.model_name, version=e.model_version) os.chdir("./code/scoring") image_config = ContainerImage.image_configuration( @@ -43,7 +32,7 @@ ) image = Image.create( - name=IMAGE_NAME, models=[model], image_config=image_config, workspace=ws + name=e.image_name, models=[model], image_config=image_config, workspace=ws ) image.wait_for_creation(show_output=True) diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py new file mode 100644 index 00000000..c420bec8 --- /dev/null +++ b/ml_service/util/env_variables.py @@ -0,0 +1,127 @@ +import os +from dotenv import load_dotenv + + +class Singleton(object): + _instances = {} + + def __new__(class_, *args, **kwargs): + if class_ not in class_._instances: + class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) # noqa E501 + return class_._instances[class_] + + +class Env(Singleton): + + def __init__(self): + load_dotenv() + self._workspace_name = os.environ.get("WORKSPACE_NAME") + self._resource_group = os.environ.get("RESOURCEGROUP_NAME") + self._subscription_id = os.environ.get("SUBSCRIPTION_ID") + self._tenant_id = os.environ.get("TENANT_ID") + self._app_id = os.environ.get("SP_APP_ID") + self._app_secret = os.environ.get("SP_APP_SECRET") + self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") + self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") + self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") + self._build_id = os.environ.get("BUILD_BUILDID") + self._pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") + self._sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") + self._train_script_path = os.environ.get("TRAIN_SCRIPT_PATH") + self._evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") + self._model_name = os.environ.get("MODEL_NAME") + self._experiment_name = os.environ.get("EXPERIMENT_NAME") + self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501 + self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) + self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) + self._model_version = os.environ.get('MODEL_VERSION') + self._image_name = os.environ.get('IMAGE_NAME') + self._model_path = os.environ.get('MODEL_PATH') + + @property + def workspace_name(self): + return self._workspace_name + + @property + def resource_group(self): + return self._resource_group + + @property + def subscription_id(self): + return self._subscription_id + + @property + def tenant_id(self): + return self._tenant_id + + @property + def app_id(self): + return self._app_id + + @property + def app_secret(self): + return self._app_secret + + @property + def vm_size(self): + return self._vm_size + + @property + def compute_name(self): + return self._compute_name + + @property + def db_cluster_id(self): + return self._db_cluster_id + + @property + def build_id(self): + return self._build_id + + @property + def pipeline_name(self): + return self._pipeline_name + + @property + def sources_directory_train(self): + return self._sources_directory_train + + @property + def train_script_path(self): + return self._train_script_path + + @property + def evaluate_script_path(self): + return self._evaluate_script_path + + @property + def model_name(self): + return self._model_name + + @property + def experiment_name(self): + return self._experiment_name + + @property + def vm_priority(self): + return self._vm_priority + + @property + def min_nodes(self): + return self._min_nodes + + @property + def max_nodes(self): + return self._max_nodes + + @property + def model_version(self): + return self._model_version + + @property + def image_name(self): + return self._image_name + + @property + def model_path(self): + return self._model_path diff --git a/ml_service/util/register_model.py b/ml_service/util/register_model.py index ea26a997..7c99aaac 100644 --- a/ml_service/util/register_model.py +++ b/ml_service/util/register_model.py @@ -1,47 +1,35 @@ import sys import os import os.path -from dotenv import load_dotenv from azureml.core import Workspace from azureml.core.model import Model from azureml.core.authentication import ServicePrincipalAuthentication +from env_variables import Env -# Load the environment variables from .env in case this script -# is called outside an existing process -load_dotenv() +e = Env() -TENANT_ID = os.environ.get('TENANT_ID') -APP_ID = os.environ.get('SP_APP_ID') -APP_SECRET = os.environ.get('SP_APP_SECRET') -MODEL_PATH = os.environ.get('MODEL_PATH') -MODEL_NAME = os.environ.get('MODEL_NAME') -WORKSPACE_NAME = os.environ.get("BASE_NAME")+"-AML-WS" -SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID') -RESOURCE_GROUP = os.environ.get("BASE_NAME")+"-AML-RG" - - -if os.path.isfile(MODEL_PATH) is False: - print("The given model path %s is invalid" % (MODEL_PATH)) +if os.path.isfile(e.model_path) is False: + print("The given model path %s is invalid" % (e.model_path)) sys.exit(1) SP_AUTH = ServicePrincipalAuthentication( - tenant_id=TENANT_ID, - service_principal_id=APP_ID, - service_principal_password=APP_SECRET) + tenant_id=e.tenant_id, + service_principal_id=e.app_id, + service_principal_password=e.app_secret) WORKSPACE = Workspace.get( - WORKSPACE_NAME, + e.workspace_name, SP_AUTH, - SUBSCRIPTION_ID, - RESOURCE_GROUP + e.subscription_id, + e.resource_group ) try: MODEL = Model.register( - model_path=MODEL_PATH, - model_name=MODEL_NAME, + model_path=e.model_path, + model_name=e.model_name, description="Forecasting Model", - workspace=WORKSPACE) + workspace=e.workspace) print("Model registered successfully. ID: " + MODEL.id) except Exception as caught_error: diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py index b22b186c..3c49454d 100644 --- a/tests/unit/code_test.py +++ b/tests/unit/code_test.py @@ -2,17 +2,19 @@ import os sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from workspace import get_workspace +from env_variables import Env # Just an example of a unit test against # a utility function common_scoring.next_saturday def test_get_workspace(): - workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" - resource_group = os.environ.get("BASE_NAME")+"-AML-RG" - subscription_id = os.environ.get("SUBSCRIPTION_ID") - tenant_id = os.environ.get("TENANT_ID") - app_id = os.environ.get("SP_APP_ID") - app_secret = os.environ.get("SP_APP_SECRET") + e = Env() + workspace_name = e.workspace_name + resource_group = e.resource_group + subscription_id = e.subscription_id + tenant_id = e.tenant_id + app_id = e.app_id + app_secret = e.app_secret aml_workspace = get_workspace( workspace_name, From b17b30a75627b591175ab7b2fe127e6b84efb0b8 Mon Sep 17 00:00:00 2001 From: Dariusz Parys Date: Tue, 19 Nov 2019 13:51:45 +0100 Subject: [PATCH 2/6] Adding Workspace in Variable Group The workspace has now to be defined in the variable group - Adjusted infrastructure as code template --- environment_setup/iac-create-environment.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml index 585f3ba5..8f795857 100644 --- a/environment_setup/iac-create-environment.yml +++ b/environment_setup/iac-create-environment.yml @@ -29,7 +29,7 @@ steps: location: $(LOCATION) templateLocation: 'Linked artifact' csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json' - overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)' + overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME)' deploymentMode: 'Incremental' displayName: 'Deploy MLOps resources to Azure' From e285ec1e8262f663f7bab8838ad0a7deaf055a3b Mon Sep 17 00:00:00 2001 From: Dariusz Parys Date: Tue, 19 Nov 2019 15:11:33 +0100 Subject: [PATCH 3/6] Default WORKSPACE_NAME definition In case there is no WORKSPACE_NAME in the Azure DevOps variable group we build up the workspace using the provided BASE_NAME variable. --- ml_service/util/env_variables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index c420bec8..01b2e867 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -16,6 +16,8 @@ class Env(Singleton): def __init__(self): load_dotenv() self._workspace_name = os.environ.get("WORKSPACE_NAME") + if not self._workspace_name: + self._workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" self._resource_group = os.environ.get("RESOURCEGROUP_NAME") self._subscription_id = os.environ.get("SUBSCRIPTION_ID") self._tenant_id = os.environ.get("TENANT_ID") From 3b0a4dbcda865c5ff92d26f38f2b26af3483ad93 Mon Sep 17 00:00:00 2001 From: Dariusz Parys Date: Tue, 19 Nov 2019 15:33:38 +0100 Subject: [PATCH 4/6] Extended Getting Started Documentation Describes the usage of all variable definitions --- docs/getting_started.md | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index d39be2e8..b12a05d0 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -47,27 +47,17 @@ Click on **Library** in the **Pipelines** section as indicated below: Please name your variable group **``devopsforai-aml-vg``** as we are using this name within our build yaml file. -The variable group should contain the following variables: +The variable group should contain the following required variables: | Variable Name | Suggested Value | | --------------------------- | -----------------------------------| -| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2 | -| AML_COMPUTE_CLUSTER_NAME | train-cluster | | BASE_NAME | [unique base name] | -| DB_CLUSTER_ID | [Optional Databricks cluster Id] | -| DATABRICKS_COMPUTE_NAME | [Optional Databricks compute name] | -| EVALUATE_SCRIPT_PATH | evaluate/evaluate_model.py | -| EXPERIMENT_NAME | mlopspython | | LOCATION | centralus | -| MODEL_NAME | sklearn_regression_model.pkl | -| REGISTER_SCRIPT_PATH | register/register_model.py | -| SOURCES_DIR_TRAIN | code | | SP_APP_ID | | | SP_APP_SECRET | | | SUBSCRIPTION_ID | | | TENANT_ID | | -| TRAIN_SCRIPT_PATH | training/train.py | -| TRAINING_PIPELINE_NAME | training-pipeline | +| RESOURCE_GROUP | | Mark **SP_APP_SECRET** variable as a secret one. @@ -79,9 +69,33 @@ BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of the BASE_NAME value should not exceed 10 characters. +> You can also adjust the pipeline `environment_setup/iac-create-environment.yml` to include the following parameters to specify individual names for each resource within the resource-group: `workspace` (Azure Machine Learning Workspace name), `storageAccount` (Azure Storage Account name), `keyvault` (Azure Key Vault name), `appInsights` (Application Insights Name) and `acr` (Azure Container Registry name). + Make sure to select the **Allow access to all pipelines** checkbox in the variable group configuration. +## Adjust .pipelines/azdo-variables.yml + +Non subscription related variables are stored as variable template in the file `.pipelines/azdo-variables.yml`. In order to change the predefined once feel free to adjust the values as needed. They're prepopulated as follows: + +| Variable Name | Suggested Value | +| --------------------------- | -----------------------------------| +| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2 | +| AML_COMPUTE_CLUSTER_NAME | train-cluster | +| AML_CLUSTER_MIN_NODES | 0 | +| AML_CLUSTER_MAX_NODES | 4 | +| AML_CLUSTER_PRIORITY | lowpriority | +| BUILD_BUILDID | 001 | +| DB_CLUSTER_ID | [Optional Databricks cluster Id] | +| EVALUATE_SCRIPT_PATH | evaluate/evaluate_model.py | +| EXPERIMENT_NAME | mlopspython | +| IMAGE_NAME | | +| MODEL_NAME | sklearn_regression_model.pkl | +| REGISTER_SCRIPT_PATH | register/register_model.py | +| SOURCES_DIR_TRAIN | code | +| TRAIN_SCRIPT_PATH | training/train.py | +| TRAINING_PIPELINE_NAME | training-pipeline | + Up until now you should have: * Forked (or cloned) the repo From c683af74db1df0dab387aeb091950993bf5be20c Mon Sep 17 00:00:00 2001 From: Dariusz Parys Date: Wed, 20 Nov 2019 10:20:12 +0100 Subject: [PATCH 5/6] Incorporated feedback - Removed unnecessary variables from `azdo-variables.yml` - `workspace` is now a required ARM template parameter. Needs to be existant in the variable group as `WORKSPACE_NAME` - Cleand-up `.env.example` to only include variables needed to execute scripts in local environments --- .env.example | 35 ++++++------------- .pipelines/azdo-variables.yml | 31 ++++++++-------- .../arm-templates/cloud-environment.json | 3 +- environment_setup/iac-create-environment.yml | 2 +- environment_setup/iac-remove-environment.yml | 2 +- ml_service/pipelines/run_train_pipeline.py | 3 ++ ml_service/util/env_variables.py | 12 +++---- 7 files changed, 37 insertions(+), 51 deletions(-) diff --git a/.env.example b/.env.example index 2f62dad7..ea1338ae 100644 --- a/.env.example +++ b/.env.example @@ -1,52 +1,39 @@ # Azure Subscription Variables SUBSCRIPTION_ID = '' -LOCATION = '' +LOCATION = 'westeurope' TENANT_ID = '' BASE_NAME = '' SP_APP_ID = '' SP_APP_SECRET = '' +RESOUCE_GROUP = 'mlops-rg' # Mock build/release ID for local testing - update ReleaseID each "release" BUILD_BUILDID = '001' RELEASE_RELEASEID = '001' # Azure ML Workspace Variables +WORKSPACE_NAME = '' EXPERIMENT_NAME = '' -SCRIPT_FOLDER = './' # AML Compute Cluster Config -AML_COMPUTE_CLUSTER_NAME = '' -AML_COMPUTE_CLUSTER_CPU_SKU = '' -AML_CLUSTER_MAX_NODES = '' -AML_CLUSTER_MIN_NODES = '' +AML_COMPUTE_CLUSTER_NAME = 'train-cluster' +AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2' +AML_CLUSTER_MAX_NODES = '4' +AML_CLUSTER_MIN_NODES = '0' AML_CLUSTER_PRIORITY = 'lowpriority' # Training Config MODEL_NAME = 'sklearn_regression_model.pkl' MODEL_VERSION = '1' TRAIN_SCRIPT_PATH = 'training/train.py' # AML Pipeline Config -TRAINING_PIPELINE_NAME = '' -PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml' +TRAINING_PIPELINE_NAME = 'Training Pipeline' MODEL_PATH = '' EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py' REGISTER_SCRIPT_PATH = 'register/register_model.py' SOURCES_DIR_TRAIN = 'code' -# These are not mandatory for the core workflow -# Remote VM Config -REMOTE_VM_NAME = '' -REMOTE_VM_USERNAME = '' -REMOTE_VM_PASSWORD = '' -REMOTE_VM_IP = '' -# Image config -IMAGE_NAME = '' -IMAGE_DESCRIPTION = '' -IMAGE_VERSION = '' -# ACI Config -ACI_CPU_CORES = '' -ACI_MEM_GB = '' -ACI_DESCRIPTION = '' - # Optional. Used by a training pipeline with R on Databricks DB_CLUSTER_ID = '' -DATABRICKS_COMPUTE_NAME = '' \ No newline at end of file + +# Optional. Container Image name for image creation +IMAGE_NAME = 'ml-trained' \ No newline at end of file diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml index d0914e3d..64a42d5b 100644 --- a/.pipelines/azdo-variables.yml +++ b/.pipelines/azdo-variables.yml @@ -1,8 +1,8 @@ variables: -- name: DEVOPS_VARIABLEGROUP - value: devopsforai-aml-vg -- name: BUILD_TRAIN_SCRIPT - value: build_train_pipeline.py + # Azure ML Workspace Variables +- name: EXPERIMENT_NAME + value: mlopspython + # AML Compute Cluster Config - name: AML_COMPUTE_CLUSTER_CPU_SKU value: STANDARD_DS2_V2 - name: AML_COMPUTE_CLUSTER_NAME @@ -13,29 +13,28 @@ variables: value: 4 - name: AML_CLUSTER_PRIORITY value: lowpriority -- name: EVALUATE_SCRIPT_PATH - value: evaluate/evaluate_model.py -- name: EXPERIMENT_NAME - value: mlopspython + # Training Config +- name: BUILD_TRAIN_SCRIPT + value: build_train_pipeline.py +- name: TRAIN_SCRIPT_PATH + value: training/train.py - name: MODEL_NAME value: sklearn_regression_model.pkl - name: MODEL_VERSION value: '1' + # AML Pipeline Config +- name: TRAINING_PIPELINE_NAME + value: 'Training Pipeline' - name: MODEL_PATH value: '' +- name: EVALUATE_SCRIPT_PATH + value: evaluate/evaluate_model.py - name: REGISTER_SCRIPT_PATH value: register/register_model.py - name: SOURCES_DIR_TRAIN value: code -- name: TRAIN_SCRIPT_PATH - value: training/train.py -- name: TRAINING_PIPELINE_NAME - value: training-pipeline -- name: BUILD_BUILDID - value: 001 -- name: RELEASE_RELEASEID - value: 001 - name: IMAGE_NAME value: '' + # Optional. Used by a training pipeline with R on Databricks - name: DB_CLUSTER_ID value: '' \ No newline at end of file diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json index a7f02e54..f2b2ac2f 100644 --- a/environment_setup/arm-templates/cloud-environment.json +++ b/environment_setup/arm-templates/cloud-environment.json @@ -28,8 +28,7 @@ } }, "workspace": { - "type": "string", - "defaultValue": "[concat(parameters('baseName'),'-AML-WS')]" + "type": "string" }, "storageAccount": { "type": "string", diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml index 8f795857..f4c08ddf 100644 --- a/environment_setup/iac-create-environment.yml +++ b/environment_setup/iac-create-environment.yml @@ -25,7 +25,7 @@ steps: inputs: azureSubscription: 'AzureResourceConnection' action: 'Create Or Update Resource Group' - resourceGroupName: '$(RESOURCEGROUP_NAME)' + resourceGroupName: '$(RESOURCE_GROUP)' location: $(LOCATION) templateLocation: 'Linked artifact' csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json' diff --git a/environment_setup/iac-remove-environment.yml b/environment_setup/iac-remove-environment.yml index 81c257f8..67626223 100644 --- a/environment_setup/iac-remove-environment.yml +++ b/environment_setup/iac-remove-environment.yml @@ -18,7 +18,7 @@ steps: inputs: azureSubscription: 'AzureResourceConnection' action: 'DeleteRG' - resourceGroupName: '$(RESOURCEGROUP_NAME)' + resourceGroupName: '$(RESOURCE_GROUP)' location: $(LOCATION) displayName: 'Delete resources in Azure' diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py index 3ee03056..fdc8f5a5 100644 --- a/ml_service/pipelines/run_train_pipeline.py +++ b/ml_service/pipelines/run_train_pipeline.py @@ -1,6 +1,9 @@ from azureml.pipeline.core import PublishedPipeline from azureml.core import Workspace from azureml.core.authentication import ServicePrincipalAuthentication +import os +import sys +sys.path.append(os.path.abspath("./ml_service/util")) # NOQA: E402 from env_variables import Env diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py index 01b2e867..9fe6d061 100644 --- a/ml_service/util/env_variables.py +++ b/ml_service/util/env_variables.py @@ -16,16 +16,16 @@ class Env(Singleton): def __init__(self): load_dotenv() self._workspace_name = os.environ.get("WORKSPACE_NAME") - if not self._workspace_name: - self._workspace_name = os.environ.get("BASE_NAME") + "-AML-WS" - self._resource_group = os.environ.get("RESOURCEGROUP_NAME") + self._resource_group = os.environ.get("RESOURCE_GROUP") self._subscription_id = os.environ.get("SUBSCRIPTION_ID") self._tenant_id = os.environ.get("TENANT_ID") self._app_id = os.environ.get("SP_APP_ID") self._app_secret = os.environ.get("SP_APP_SECRET") self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU") self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME") - self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") + self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501 + self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) + self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) self._build_id = os.environ.get("BUILD_BUILDID") self._pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME") self._sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN") @@ -33,12 +33,10 @@ def __init__(self): self._evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH") self._model_name = os.environ.get("MODEL_NAME") self._experiment_name = os.environ.get("EXPERIMENT_NAME") - self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501 - self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)) - self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)) self._model_version = os.environ.get('MODEL_VERSION') self._image_name = os.environ.get('IMAGE_NAME') self._model_path = os.environ.get('MODEL_PATH') + self._db_cluster_id = os.environ.get("DB_CLUSTER_ID") @property def workspace_name(self): From cc9fed106a4794c9f737f3d9124eb8e09c7e966d Mon Sep 17 00:00:00 2001 From: Dariusz Parys Date: Wed, 20 Nov 2019 11:40:17 +0100 Subject: [PATCH 6/6] Modified Getting Started Document Included pointers where to find the proper variable options in order to execute the project locally vs. Azure DevOps --- docs/getting_started.md | 41 +++++++++++++++++------------------------ 1 file changed, 17 insertions(+), 24 deletions(-) diff --git a/docs/getting_started.md b/docs/getting_started.md index b12a05d0..cc56c6c4 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -58,43 +58,36 @@ The variable group should contain the following required variables: | SUBSCRIPTION_ID | | | TENANT_ID | | | RESOURCE_GROUP | | +| WORKSPACE_NAME | mlops-AML-WS | Mark **SP_APP_SECRET** variable as a secret one. -**Note:** The **BASE_NAME** parameter is used throughout the solution for naming +**Note:** + +The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide here an existing AML Workspace if you have one. + +The **BASE_NAME** parameter is used throughout the solution for naming Azure resources. When the solution is used in a shared subscription, there can be naming collisions with resources that require unique names like azure blob storage and registry DNS naming. Make sure to give a unique value to the BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have -unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of -the BASE_NAME value should not exceed 10 characters. - -> You can also adjust the pipeline `environment_setup/iac-create-environment.yml` to include the following parameters to specify individual names for each resource within the resource-group: `workspace` (Azure Machine Learning Workspace name), `storageAccount` (Azure Storage Account name), `keyvault` (Azure Key Vault name), `appInsights` (Application Insights Name) and `acr` (Azure Container Registry name). +unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-KV, etc.). The length of +the BASE_NAME value should not exceed 10 characters. Make sure to select the **Allow access to all pipelines** checkbox in the variable group configuration. -## Adjust .pipelines/azdo-variables.yml +## More variable options -Non subscription related variables are stored as variable template in the file `.pipelines/azdo-variables.yml`. In order to change the predefined once feel free to adjust the values as needed. They're prepopulated as follows: +There are more variables used in the project. They're defined in two places one for local execution one for using Azure DevOps Pipelines -| Variable Name | Suggested Value | -| --------------------------- | -----------------------------------| -| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2 | -| AML_COMPUTE_CLUSTER_NAME | train-cluster | -| AML_CLUSTER_MIN_NODES | 0 | -| AML_CLUSTER_MAX_NODES | 4 | -| AML_CLUSTER_PRIORITY | lowpriority | -| BUILD_BUILDID | 001 | -| DB_CLUSTER_ID | [Optional Databricks cluster Id] | -| EVALUATE_SCRIPT_PATH | evaluate/evaluate_model.py | -| EXPERIMENT_NAME | mlopspython | -| IMAGE_NAME | | -| MODEL_NAME | sklearn_regression_model.pkl | -| REGISTER_SCRIPT_PATH | register/register_model.py | -| SOURCES_DIR_TRAIN | code | -| TRAIN_SCRIPT_PATH | training/train.py | -| TRAINING_PIPELINE_NAME | training-pipeline | +### Local configuration + +In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs. Please be aware that the local environment also needs access to the Azure subscription so you have to provide the credentials of your service principal and Azure account information here as well. + +### Azure DevOps configuration + +For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/azdo-variables.yml`. Adjust as needed the variables, also the defaults will give you an easy jump start. Up until now you should have: