From 27a0eb473e1248eedf45b67b5be18b7eae530a6f Mon Sep 17 00:00:00 2001
From: Dariusz Parys <dparys@microsoft.com>
Date: Tue, 19 Nov 2019 13:29:34 +0100
Subject: [PATCH 1/6] Refactoring Variable Code Base Usage

Simplified and cleaned-up the usage of variables inside all python files
and Azure DevOps pipelines. Variables are now defined in two places, the
Azure DevOps variable template and the Azure DevOps variable group.

- Refactored dotenv usage in a singleton class to serve variables to all
python scripts
- Created new variable template `azdo-variables.yml` to be used within
Azure DevOps pipelines
- Adjusted existing pipelines to leverage new variable template
- Prepared the ARM template to specify individual names for the various
resources

Please provide feedback, I'll update the PR with documentation
modifications

Resolves: #279075
---
 .gitignore                                    |   1 +
 .pipelines/azdo-ci-build-train.yml            |  11 +-
 .pipelines/azdo-pr-build-train.yml            |   1 +
 .pipelines/azdo-variables.yml                 |  41 ++++++
 .../arm-templates/cloud-environment.json      |  30 ++++-
 environment_setup/iac-create-environment.yml  |   2 +-
 environment_setup/iac-remove-environment.yml  |   2 +-
 ml_service/pipelines/build_train_pipeline.py  |  51 +++----
 .../pipelines/build_train_pipeline_with_r.py  |  35 ++---
 .../build_train_pipeline_with_r_on_dbricks.py |  38 ++----
 ml_service/pipelines/run_train_pipeline.py    |  37 ++---
 ml_service/util/attach_compute.py             |  15 +--
 ml_service/util/create_scoring_image.py       |  31 ++---
 ml_service/util/env_variables.py              | 127 ++++++++++++++++++
 ml_service/util/register_model.py             |  38 ++----
 tests/unit/code_test.py                       |  14 +-
 16 files changed, 293 insertions(+), 181 deletions(-)
 create mode 100644 .pipelines/azdo-variables.yml
 create mode 100644 ml_service/util/env_variables.py

diff --git a/.gitignore b/.gitignore
index 3a5a8879..7bac8768 100644
--- a/.gitignore
+++ b/.gitignore
@@ -24,6 +24,7 @@ wheels/
 .installed.cfg
 *.egg
 MANIFEST
+venv/
 
 # PyInstaller
 #  Usually these files are written by a python script from a template
diff --git a/.pipelines/azdo-ci-build-train.yml b/.pipelines/azdo-ci-build-train.yml
index c2453d4d..09c52d95 100644
--- a/.pipelines/azdo-ci-build-train.yml
+++ b/.pipelines/azdo-ci-build-train.yml
@@ -11,14 +11,9 @@ trigger:
     - ml_service/util/create_scoring_image.py
 
 variables:
+- template: azdo-variables.yml
 - group: devopsforai-aml-vg
-# Choose from default, build_train_pipeline_with_r.py, or build_train_pipeline_with_r_on_dbricks.py
-- name: build-train-script
-  value: 'build_train_pipeline.py'
-# Automatically triggers the train, evaluate, register pipeline after the CI steps. 
-# Uncomment to set to false or add same variable name at queue time with value of false to disable.
-# - name: auto-trigger-training
-#   value: false
+
 
 stages:
 - stage: 'Model_CI'
@@ -34,7 +29,7 @@ stages:
     - template: azdo-base-pipeline.yml
     - script: |
         # Invoke the Python building and publishing a training pipeline
-        python3 $(Build.SourcesDirectory)/ml_service/pipelines/$(build-train-script)
+        python3 $(Build.SourcesDirectory)/ml_service/pipelines/${{ variables.BUILD_TRAIN_SCRIPT }}
       failOnStderr: 'false'
       env:
         SP_APP_SECRET: '$(SP_APP_SECRET)'
diff --git a/.pipelines/azdo-pr-build-train.yml b/.pipelines/azdo-pr-build-train.yml
index 8bf6ca56..24231b2a 100644
--- a/.pipelines/azdo-pr-build-train.yml
+++ b/.pipelines/azdo-pr-build-train.yml
@@ -11,6 +11,7 @@ container: mcr.microsoft.com/mlops/python:latest
 
 
 variables:
+- template: azdo-variables.yml
 - group: devopsforai-aml-vg
 
 
diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml
new file mode 100644
index 00000000..d0914e3d
--- /dev/null
+++ b/.pipelines/azdo-variables.yml
@@ -0,0 +1,41 @@
+variables:
+- name: DEVOPS_VARIABLEGROUP
+  value: devopsforai-aml-vg
+- name: BUILD_TRAIN_SCRIPT
+  value: build_train_pipeline.py
+- name: AML_COMPUTE_CLUSTER_CPU_SKU
+  value: STANDARD_DS2_V2
+- name: AML_COMPUTE_CLUSTER_NAME
+  value: train-cluster
+- name: AML_CLUSTER_MIN_NODES
+  value: 0
+- name: AML_CLUSTER_MAX_NODES
+  value: 4
+- name: AML_CLUSTER_PRIORITY
+  value: lowpriority
+- name: EVALUATE_SCRIPT_PATH
+  value: evaluate/evaluate_model.py
+- name: EXPERIMENT_NAME
+  value: mlopspython
+- name: MODEL_NAME
+  value: sklearn_regression_model.pkl
+- name: MODEL_VERSION
+  value: '1'
+- name: MODEL_PATH
+  value: ''
+- name: REGISTER_SCRIPT_PATH
+  value: register/register_model.py
+- name: SOURCES_DIR_TRAIN
+  value: code
+- name: TRAIN_SCRIPT_PATH
+  value: training/train.py
+- name: TRAINING_PIPELINE_NAME
+  value: training-pipeline
+- name: BUILD_BUILDID
+  value: 001
+- name: RELEASE_RELEASEID
+  value: 001
+- name: IMAGE_NAME
+  value: ''
+- name: DB_CLUSTER_ID
+  value: ''
\ No newline at end of file
diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json
index 590a4aed..a7f02e54 100644
--- a/environment_setup/arm-templates/cloud-environment.json
+++ b/environment_setup/arm-templates/cloud-environment.json
@@ -26,16 +26,36 @@
       "metadata": {
         "description": "Specifies the location for all resources."
       }
+    },
+    "workspace": {
+      "type": "string",
+      "defaultValue": "[concat(parameters('baseName'),'-AML-WS')]"
+    },
+    "storageAccount": {
+      "type": "string",
+      "defaultValue": "[concat(toLower(parameters('baseName')), 'amlsa')]"
+    },
+    "keyvault": {
+      "type": "string",
+      "defaultValue": "[concat(parameters('baseName'),'-AML-KV')]"
+    },
+    "appInsights": {
+      "type": "string",
+      "defaultValue": "[concat(parameters('baseName'),'-AML-AI')]"
+    },
+    "acr": {
+      "type": "string",
+      "defaultValue": "[concat(toLower(parameters('baseName')),'amlcr')]"
     }
   },
   "variables": {
-    "amlWorkspaceName": "[concat(parameters('baseName'),'-AML-WS')]",
-    "storageAccountName": "[concat(toLower(parameters('baseName')), 'amlsa')]",
+    "amlWorkspaceName": "[parameters('workspace')]",
+    "storageAccountName": "[parameters('storageAccount')]",
     "storageAccountType": "Standard_LRS",
-    "keyVaultName": "[concat(parameters('baseName'),'-AML-KV')]",
+    "keyVaultName": "[parameters('keyvault')]",
     "tenantId": "[subscription().tenantId]",
-    "applicationInsightsName": "[concat(parameters('baseName'),'-AML-AI')]",
-    "containerRegistryName": "[concat(toLower(parameters('baseName')),'amlcr')]"
+    "applicationInsightsName": "[parameters('appInsights')]",
+    "containerRegistryName": "[parameters('acr')]"
   },
   "resources": [
     {
diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml
index 2dd00694..585f3ba5 100644
--- a/environment_setup/iac-create-environment.yml
+++ b/environment_setup/iac-create-environment.yml
@@ -25,7 +25,7 @@ steps:
   inputs:
     azureSubscription: 'AzureResourceConnection'
     action: 'Create Or Update Resource Group'
-    resourceGroupName: '$(BASE_NAME)-AML-RG'
+    resourceGroupName: '$(RESOURCEGROUP_NAME)'
     location: $(LOCATION)
     templateLocation: 'Linked artifact'
     csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
diff --git a/environment_setup/iac-remove-environment.yml b/environment_setup/iac-remove-environment.yml
index 4ca8b04e..81c257f8 100644
--- a/environment_setup/iac-remove-environment.yml
+++ b/environment_setup/iac-remove-environment.yml
@@ -18,7 +18,7 @@ steps:
   inputs:
     azureSubscription: 'AzureResourceConnection'
     action: 'DeleteRG'
-    resourceGroupName: '$(BASE_NAME)-AML-RG'
+    resourceGroupName: '$(RESOURCEGROUP_NAME)'
     location: $(LOCATION)
   displayName: 'Delete resources in Azure'
 
diff --git a/ml_service/pipelines/build_train_pipeline.py b/ml_service/pipelines/build_train_pipeline.py
index 481c68e5..b866201d 100644
--- a/ml_service/pipelines/build_train_pipeline.py
+++ b/ml_service/pipelines/build_train_pipeline.py
@@ -5,45 +5,32 @@
 # from azureml.core import Datastore
 import os
 import sys
-from dotenv import load_dotenv
 sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
 from workspace import get_workspace
 from attach_compute import get_compute
+from env_variables import Env
 
 
 def main():
-    load_dotenv()
-    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
-    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
-    subscription_id = os.environ.get("SUBSCRIPTION_ID")
-    tenant_id = os.environ.get("TENANT_ID")
-    app_id = os.environ.get("SP_APP_ID")
-    app_secret = os.environ.get("SP_APP_SECRET")
-    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
-    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
-    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
-    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
-    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
-    model_name = os.environ.get("MODEL_NAME")
-    build_id = os.environ.get("BUILD_BUILDID")
-    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
-
+    e = Env()
     # Get Azure machine learning workspace
     aml_workspace = get_workspace(
-        workspace_name,
-        resource_group,
-        subscription_id,
-        tenant_id,
-        app_id,
-        app_secret)
+        e.workspace_name,
+        e.resource_group,
+        e.subscription_id,
+        e.tenant_id,
+        e.app_id,
+        e.app_secret)
+    print("get_workspace:")
     print(aml_workspace)
 
     # Get Azure machine learning cluster
     aml_compute = get_compute(
         aml_workspace,
-        compute_name,
-        vm_size)
+        e.compute_name,
+        e.vm_size)
     if aml_compute is not None:
+        print("aml_compute:")
         print(aml_compute)
 
     run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
@@ -56,16 +43,16 @@ def main():
     run_config.environment.docker.enabled = True
 
     model_name = PipelineParameter(
-        name="model_name", default_value=model_name)
+        name="model_name", default_value=e.model_name)
     release_id = PipelineParameter(
         name="release_id", default_value="0"
     )
 
     train_step = PythonScriptStep(
         name="Train Model",
-        script_name=train_script_path,
+        script_name=e.train_script_path,
         compute_target=aml_compute,
-        source_directory=sources_directory_train,
+        source_directory=e.sources_directory_train,
         arguments=[
             "--release_id", release_id,
             "--model_name", model_name,
@@ -77,9 +64,9 @@ def main():
 
     evaluate_step = PythonScriptStep(
         name="Evaluate Model ",
-        script_name=evaluate_script_path,
+        script_name=e.evaluate_script_path,
         compute_target=aml_compute,
-        source_directory=sources_directory_train,
+        source_directory=e.sources_directory_train,
         arguments=[
             "--release_id", release_id,
             "--model_name", model_name,
@@ -95,9 +82,9 @@ def main():
     train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
     train_pipeline.validate()
     published_pipeline = train_pipeline.publish(
-        name=pipeline_name,
+        name=e.pipeline_name,
         description="Model training/retraining pipeline",
-        version=build_id
+        version=e.build_id
     )
     print(f'Published pipeline: {published_pipeline.name}')
     print(f'for build {published_pipeline.version}')
diff --git a/ml_service/pipelines/build_train_pipeline_with_r.py b/ml_service/pipelines/build_train_pipeline_with_r.py
index 7eae2c98..72ed8e2a 100644
--- a/ml_service/pipelines/build_train_pipeline_with_r.py
+++ b/ml_service/pipelines/build_train_pipeline_with_r.py
@@ -4,40 +4,29 @@
 # from azureml.core import Datastore
 import os
 import sys
-from dotenv import load_dotenv
 sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
 from workspace import get_workspace
 from attach_compute import get_compute
+from env_variables import Env
 
 
 def main():
-    load_dotenv()
-    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
-    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
-    subscription_id = os.environ.get("SUBSCRIPTION_ID")
-    tenant_id = os.environ.get("TENANT_ID")
-    app_id = os.environ.get("SP_APP_ID")
-    app_secret = os.environ.get("SP_APP_SECRET")
-    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
-    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
-    build_id = os.environ.get("BUILD_BUILDID")
-    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
-
+    e = Env()
     # Get Azure machine learning workspace
     aml_workspace = get_workspace(
-        workspace_name,
-        resource_group,
-        subscription_id,
-        tenant_id,
-        app_id,
-        app_secret)
+        e.workspace_name,
+        e.resource_group,
+        e.subscription_id,
+        e.tenant_id,
+        e.app_id,
+        e.app_secret)
     print(aml_workspace)
 
     # Get Azure machine learning cluster
     aml_compute = get_compute(
         aml_workspace,
-        compute_name,
-        vm_size)
+        e.compute_name,
+        e.vm_size)
     if aml_compute is not None:
         print(aml_compute)
 
@@ -66,9 +55,9 @@ def main():
     train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
     train_pipeline.validate()
     published_pipeline = train_pipeline.publish(
-        name=pipeline_name + "_with_R",
+        name=e.pipeline_name + "_with_R",
         description="Model training/retraining pipeline",
-        version=build_id
+        version=e.build_id
     )
     print(f'Published pipeline: {published_pipeline.name}')
     print(f'for build {published_pipeline.version}')
diff --git a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py
index 95de9e55..733683eb 100644
--- a/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py
+++ b/ml_service/pipelines/build_train_pipeline_with_r_on_dbricks.py
@@ -1,42 +1,30 @@
 from azureml.pipeline.core import Pipeline
 import os
 import sys
-from dotenv import load_dotenv
 sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
 from workspace import get_workspace
 from attach_compute import get_compute
 from azureml.pipeline.steps import DatabricksStep
+from env_variables import Env
 
 
 def main():
-    load_dotenv()
-    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
-    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
-    subscription_id = os.environ.get("SUBSCRIPTION_ID")
-    tenant_id = os.environ.get("TENANT_ID")
-    app_id = os.environ.get("SP_APP_ID")
-    app_secret = os.environ.get("SP_APP_SECRET")
-    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
-    compute_name = os.environ.get("DATABRICKS_COMPUTE_NAME")
-    db_cluster_id = os.environ.get("DB_CLUSTER_ID")
-    build_id = os.environ.get("BUILD_BUILDID")
-    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
-
+    e = Env()
     # Get Azure machine learning workspace
     aml_workspace = get_workspace(
-        workspace_name,
-        resource_group,
-        subscription_id,
-        tenant_id,
-        app_id,
-        app_secret)
+        e.workspace_name,
+        e.resource_group,
+        e.subscription_id,
+        e.tenant_id,
+        e.app_id,
+        e.app_secret)
     print(aml_workspace)
 
     # Get Azure machine learning cluster
     aml_compute = get_compute(
         aml_workspace,
-        compute_name,
-        vm_size)
+        e.compute_name,
+        e.vm_size)
     if aml_compute is not None:
         print(aml_compute)
 
@@ -46,7 +34,7 @@ def main():
         python_script_name="train_with_r_on_databricks.py",
         source_directory="code/training/R",
         run_name='DB_Python_R_demo',
-        existing_cluster_id=db_cluster_id,
+        existing_cluster_id=e.db_cluster_id,
         compute_target=aml_compute,
         allow_reuse=False
     )
@@ -58,9 +46,9 @@ def main():
     train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
     train_pipeline.validate()
     published_pipeline = train_pipeline.publish(
-        name=pipeline_name + "_with_R_on_DB",
+        name=e.pipeline_name + "_with_R_on_DB",
         description="Model training/retraining pipeline",
-        version=build_id
+        version=e.build_id
     )
     print(f'Published pipeline: {published_pipeline.name}')
     print(f'for build {published_pipeline.version}')
diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py
index 1d942a8c..3ee03056 100644
--- a/ml_service/pipelines/run_train_pipeline.py
+++ b/ml_service/pipelines/run_train_pipeline.py
@@ -1,31 +1,20 @@
-import os
 from azureml.pipeline.core import PublishedPipeline
 from azureml.core import Workspace
 from azureml.core.authentication import ServicePrincipalAuthentication
-from dotenv import load_dotenv
+from env_variables import Env
 
 
 def main():
-    load_dotenv()
-    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
-    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
-    subscription_id = os.environ.get("SUBSCRIPTION_ID")
-    tenant_id = os.environ.get("TENANT_ID")
-    experiment_name = os.environ.get("EXPERIMENT_NAME")
-    model_name = os.environ.get("MODEL_NAME")
-    app_id = os.environ.get('SP_APP_ID')
-    app_secret = os.environ.get('SP_APP_SECRET')
-    build_id = os.environ.get('BUILD_BUILDID')
-
+    e = Env()
     service_principal = ServicePrincipalAuthentication(
-            tenant_id=tenant_id,
-            service_principal_id=app_id,
-            service_principal_password=app_secret)
+            tenant_id=e.tenant_id,
+            service_principal_id=e.app_id,
+            service_principal_password=e.app_secret)
 
     aml_workspace = Workspace.get(
-        name=workspace_name,
-        subscription_id=subscription_id,
-        resource_group=resource_group,
+        name=e.workspace_name,
+        subscription_id=e.subscription_id,
+        resource_group=e.resource_group,
         auth=service_principal
         )
 
@@ -34,23 +23,23 @@ def main():
     matched_pipes = []
 
     for p in pipelines:
-        if p.version == build_id:
+        if p.version == e.build_id:
             matched_pipes.append(p)
 
     if(len(matched_pipes) > 1):
         published_pipeline = None
-        raise Exception(f"Multiple active pipelines are published for build {build_id}.")  # NOQA: E501
+        raise Exception(f"Multiple active pipelines are published for build {e.build_id}.")  # NOQA: E501
     elif(len(matched_pipes) == 0):
         published_pipeline = None
-        raise KeyError(f"Unable to find a published pipeline for this build {build_id}")  # NOQA: E501
+        raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
     else:
         published_pipeline = matched_pipes[0]
 
-    pipeline_parameters = {"model_name": model_name}
+    pipeline_parameters = {"model_name": e.model_name}
 
     response = published_pipeline.submit(
         aml_workspace,
-        experiment_name,
+        e.experiment_name,
         pipeline_parameters)
 
     run_id = response.id
diff --git a/ml_service/util/attach_compute.py b/ml_service/util/attach_compute.py
index 7a34cd38..569e3041 100644
--- a/ml_service/util/attach_compute.py
+++ b/ml_service/util/attach_compute.py
@@ -1,9 +1,8 @@
-import os
-from dotenv import load_dotenv
 from azureml.core import Workspace
 from azureml.core.compute import AmlCompute
 from azureml.core.compute import ComputeTarget
 from azureml.exceptions import ComputeTargetException
+from env_variables import Env
 
 
 def get_compute(
@@ -11,10 +10,6 @@ def get_compute(
     compute_name: str,
     vm_size: str
 ):
-    # Load the environment variables from .env in case this script
-    # is called outside an existing process
-    load_dotenv()
-    # Verify that cluster does not exist already
     try:
         if compute_name in workspace.compute_targets:
             compute_target = workspace.compute_targets[compute_name]
@@ -22,12 +17,12 @@ def get_compute(
                 print('Found existing compute target ' + compute_name
                       + ' so using it.')
         else:
+            e = Env()
             compute_config = AmlCompute.provisioning_configuration(
                 vm_size=vm_size,
-                vm_priority=os.environ.get("AML_CLUSTER_PRIORITY",
-                                           'lowpriority'),
-                min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)),
-                max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)),
+                vm_priority=e.vm_priority,
+                min_nodes=e.min_nodes,
+                max_nodes=e.max_nodes,
                 idle_seconds_before_scaledown="300"
                 #    #Uncomment the below lines for VNet support
                 #    vnet_resourcegroup_name=vnet_resourcegroup_name,
diff --git a/ml_service/util/create_scoring_image.py b/ml_service/util/create_scoring_image.py
index 08ae49b5..7e99bd28 100644
--- a/ml_service/util/create_scoring_image.py
+++ b/ml_service/util/create_scoring_image.py
@@ -2,36 +2,25 @@
 from azureml.core import Workspace
 from azureml.core.image import ContainerImage, Image
 from azureml.core.model import Model
-from dotenv import load_dotenv
 from azureml.core.authentication import ServicePrincipalAuthentication
+from env_variables import Env
 
-load_dotenv()
-
-TENANT_ID = os.environ.get('TENANT_ID')
-APP_ID = os.environ.get('SP_APP_ID')
-APP_SECRET = os.environ.get('SP_APP_SECRET')
-WORKSPACE_NAME = os.environ.get("BASE_NAME")+"-AML-WS"
-SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID')
-RESOURCE_GROUP = os.environ.get("BASE_NAME")+"-AML-RG"
-MODEL_NAME = os.environ.get('MODEL_NAME')
-MODEL_VERSION = os.environ.get('MODEL_VERSION')
-IMAGE_NAME = os.environ.get('IMAGE_NAME')
-
+e = Env()
 
 SP_AUTH = ServicePrincipalAuthentication(
-    tenant_id=TENANT_ID,
-    service_principal_id=APP_ID,
-    service_principal_password=APP_SECRET)
+    tenant_id=e.tenant_id,
+    service_principal_id=e.app_id,
+    service_principal_password=e.app_secret)
 
 ws = Workspace.get(
-    WORKSPACE_NAME,
+    e.workspace_name,
     SP_AUTH,
-    SUBSCRIPTION_ID,
-    RESOURCE_GROUP
+    e.subscription_id,
+    e.resource_group
 )
 
 
-model = Model(ws, name=MODEL_NAME, version=MODEL_VERSION)
+model = Model(ws, name=e.model_name, version=e.model_version)
 os.chdir("./code/scoring")
 
 image_config = ContainerImage.image_configuration(
@@ -43,7 +32,7 @@
 )
 
 image = Image.create(
-    name=IMAGE_NAME, models=[model], image_config=image_config, workspace=ws
+    name=e.image_name, models=[model], image_config=image_config, workspace=ws
 )
 
 image.wait_for_creation(show_output=True)
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
new file mode 100644
index 00000000..c420bec8
--- /dev/null
+++ b/ml_service/util/env_variables.py
@@ -0,0 +1,127 @@
+import os
+from dotenv import load_dotenv
+
+
+class Singleton(object):
+    _instances = {}
+
+    def __new__(class_, *args, **kwargs):
+        if class_ not in class_._instances:
+            class_._instances[class_] = super(Singleton, class_).__new__(class_, *args, **kwargs) # noqa E501
+        return class_._instances[class_]
+
+
+class Env(Singleton):
+
+    def __init__(self):
+        load_dotenv()
+        self._workspace_name = os.environ.get("WORKSPACE_NAME")
+        self._resource_group = os.environ.get("RESOURCEGROUP_NAME")
+        self._subscription_id = os.environ.get("SUBSCRIPTION_ID")
+        self._tenant_id = os.environ.get("TENANT_ID")
+        self._app_id = os.environ.get("SP_APP_ID")
+        self._app_secret = os.environ.get("SP_APP_SECRET")
+        self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
+        self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
+        self._db_cluster_id = os.environ.get("DB_CLUSTER_ID")
+        self._build_id = os.environ.get("BUILD_BUILDID")
+        self._pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
+        self._sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
+        self._train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
+        self._evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
+        self._model_name = os.environ.get("MODEL_NAME")
+        self._experiment_name = os.environ.get("EXPERIMENT_NAME")
+        self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501
+        self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0))
+        self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4))
+        self._model_version = os.environ.get('MODEL_VERSION')
+        self._image_name = os.environ.get('IMAGE_NAME')
+        self._model_path = os.environ.get('MODEL_PATH')
+
+    @property
+    def workspace_name(self):
+        return self._workspace_name
+
+    @property
+    def resource_group(self):
+        return self._resource_group
+
+    @property
+    def subscription_id(self):
+        return self._subscription_id
+
+    @property
+    def tenant_id(self):
+        return self._tenant_id
+
+    @property
+    def app_id(self):
+        return self._app_id
+
+    @property
+    def app_secret(self):
+        return self._app_secret
+
+    @property
+    def vm_size(self):
+        return self._vm_size
+
+    @property
+    def compute_name(self):
+        return self._compute_name
+
+    @property
+    def db_cluster_id(self):
+        return self._db_cluster_id
+
+    @property
+    def build_id(self):
+        return self._build_id
+
+    @property
+    def pipeline_name(self):
+        return self._pipeline_name
+
+    @property
+    def sources_directory_train(self):
+        return self._sources_directory_train
+
+    @property
+    def train_script_path(self):
+        return self._train_script_path
+
+    @property
+    def evaluate_script_path(self):
+        return self._evaluate_script_path
+
+    @property
+    def model_name(self):
+        return self._model_name
+
+    @property
+    def experiment_name(self):
+        return self._experiment_name
+
+    @property
+    def vm_priority(self):
+        return self._vm_priority
+
+    @property
+    def min_nodes(self):
+        return self._min_nodes
+
+    @property
+    def max_nodes(self):
+        return self._max_nodes
+
+    @property
+    def model_version(self):
+        return self._model_version
+
+    @property
+    def image_name(self):
+        return self._image_name
+
+    @property
+    def model_path(self):
+        return self._model_path
diff --git a/ml_service/util/register_model.py b/ml_service/util/register_model.py
index ea26a997..7c99aaac 100644
--- a/ml_service/util/register_model.py
+++ b/ml_service/util/register_model.py
@@ -1,47 +1,35 @@
 import sys
 import os
 import os.path
-from dotenv import load_dotenv
 from azureml.core import Workspace
 from azureml.core.model import Model
 from azureml.core.authentication import ServicePrincipalAuthentication
+from env_variables import Env
 
-# Load the environment variables from .env in case this script
-# is called outside an existing process
-load_dotenv()
+e = Env()
 
-TENANT_ID = os.environ.get('TENANT_ID')
-APP_ID = os.environ.get('SP_APP_ID')
-APP_SECRET = os.environ.get('SP_APP_SECRET')
-MODEL_PATH = os.environ.get('MODEL_PATH')
-MODEL_NAME = os.environ.get('MODEL_NAME')
-WORKSPACE_NAME = os.environ.get("BASE_NAME")+"-AML-WS"
-SUBSCRIPTION_ID = os.environ.get('SUBSCRIPTION_ID')
-RESOURCE_GROUP = os.environ.get("BASE_NAME")+"-AML-RG"
-
-
-if os.path.isfile(MODEL_PATH) is False:
-    print("The given model path %s is invalid" % (MODEL_PATH))
+if os.path.isfile(e.model_path) is False:
+    print("The given model path %s is invalid" % (e.model_path))
     sys.exit(1)
 
 SP_AUTH = ServicePrincipalAuthentication(
-    tenant_id=TENANT_ID,
-    service_principal_id=APP_ID,
-    service_principal_password=APP_SECRET)
+    tenant_id=e.tenant_id,
+    service_principal_id=e.app_id,
+    service_principal_password=e.app_secret)
 
 WORKSPACE = Workspace.get(
-    WORKSPACE_NAME,
+    e.workspace_name,
     SP_AUTH,
-    SUBSCRIPTION_ID,
-    RESOURCE_GROUP
+    e.subscription_id,
+    e.resource_group
 )
 
 try:
     MODEL = Model.register(
-        model_path=MODEL_PATH,
-        model_name=MODEL_NAME,
+        model_path=e.model_path,
+        model_name=e.model_name,
         description="Forecasting Model",
-        workspace=WORKSPACE)
+        workspace=e.workspace)
 
     print("Model registered successfully. ID: " + MODEL.id)
 except Exception as caught_error:
diff --git a/tests/unit/code_test.py b/tests/unit/code_test.py
index b22b186c..3c49454d 100644
--- a/tests/unit/code_test.py
+++ b/tests/unit/code_test.py
@@ -2,17 +2,19 @@
 import os
 sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
 from workspace import get_workspace
+from env_variables import Env
 
 
 # Just an example of a unit test against
 # a utility function common_scoring.next_saturday
 def test_get_workspace():
-    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
-    resource_group = os.environ.get("BASE_NAME")+"-AML-RG"
-    subscription_id = os.environ.get("SUBSCRIPTION_ID")
-    tenant_id = os.environ.get("TENANT_ID")
-    app_id = os.environ.get("SP_APP_ID")
-    app_secret = os.environ.get("SP_APP_SECRET")
+    e = Env()
+    workspace_name = e.workspace_name
+    resource_group = e.resource_group
+    subscription_id = e.subscription_id
+    tenant_id = e.tenant_id
+    app_id = e.app_id
+    app_secret = e.app_secret
 
     aml_workspace = get_workspace(
         workspace_name,

From b17b30a75627b591175ab7b2fe127e6b84efb0b8 Mon Sep 17 00:00:00 2001
From: Dariusz Parys <dparys@microsoft.com>
Date: Tue, 19 Nov 2019 13:51:45 +0100
Subject: [PATCH 2/6] Adding Workspace in Variable Group

The workspace has now to be defined in the variable group

- Adjusted infrastructure as code template
---
 environment_setup/iac-create-environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml
index 585f3ba5..8f795857 100644
--- a/environment_setup/iac-create-environment.yml
+++ b/environment_setup/iac-create-environment.yml
@@ -29,7 +29,7 @@ steps:
     location: $(LOCATION)
     templateLocation: 'Linked artifact'
     csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
-    overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION)'
+    overrideParameters: '-baseName $(BASE_NAME) -location $(LOCATION) -workspace $(WORKSPACE_NAME)'
     deploymentMode: 'Incremental'
   displayName: 'Deploy MLOps resources to Azure'
 

From e285ec1e8262f663f7bab8838ad0a7deaf055a3b Mon Sep 17 00:00:00 2001
From: Dariusz Parys <dparys@microsoft.com>
Date: Tue, 19 Nov 2019 15:11:33 +0100
Subject: [PATCH 3/6] Default WORKSPACE_NAME definition

In case there is no WORKSPACE_NAME in the Azure DevOps variable group we
build up the workspace using the provided BASE_NAME variable.
---
 ml_service/util/env_variables.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index c420bec8..01b2e867 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -16,6 +16,8 @@ class Env(Singleton):
     def __init__(self):
         load_dotenv()
         self._workspace_name = os.environ.get("WORKSPACE_NAME")
+        if not self._workspace_name:
+            self._workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
         self._resource_group = os.environ.get("RESOURCEGROUP_NAME")
         self._subscription_id = os.environ.get("SUBSCRIPTION_ID")
         self._tenant_id = os.environ.get("TENANT_ID")

From 3b0a4dbcda865c5ff92d26f38f2b26af3483ad93 Mon Sep 17 00:00:00 2001
From: Dariusz Parys <dparys@microsoft.com>
Date: Tue, 19 Nov 2019 15:33:38 +0100
Subject: [PATCH 4/6] Extended Getting Started Documentation

Describes the usage of all variable definitions
---
 docs/getting_started.md | 38 ++++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index d39be2e8..b12a05d0 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -47,27 +47,17 @@ Click on **Library** in the **Pipelines** section as indicated below:
 Please name your variable group **``devopsforai-aml-vg``** as we are using this
 name within our build yaml file.
 
-The variable group should contain the following variables:
+The variable group should contain the following required variables:
 
 | Variable Name               | Suggested Value                    |
 | --------------------------- | -----------------------------------|
-| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2                    |
-| AML_COMPUTE_CLUSTER_NAME    | train-cluster                      |
 | BASE_NAME                   | [unique base name]                 |
-| DB_CLUSTER_ID               | [Optional Databricks cluster Id]   |
-| DATABRICKS_COMPUTE_NAME     | [Optional Databricks compute name] |
-| EVALUATE_SCRIPT_PATH        | evaluate/evaluate_model.py         |
-| EXPERIMENT_NAME             | mlopspython                        |
 | LOCATION                    | centralus                          |
-| MODEL_NAME                  | sklearn_regression_model.pkl       |
-| REGISTER_SCRIPT_PATH        | register/register_model.py         |
-| SOURCES_DIR_TRAIN           | code                               |
 | SP_APP_ID                   |                                    |
 | SP_APP_SECRET               |                                    |
 | SUBSCRIPTION_ID             |                                    |
 | TENANT_ID                   |                                    |
-| TRAIN_SCRIPT_PATH           | training/train.py                  |
-| TRAINING_PIPELINE_NAME      | training-pipeline                  |
+| RESOURCE_GROUP              |                                    |
 
 Mark **SP_APP_SECRET** variable as a secret one.
 
@@ -79,9 +69,33 @@ BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have
 unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of
 the BASE_NAME value should not exceed 10 characters.
 
+> You can also adjust the pipeline `environment_setup/iac-create-environment.yml` to include the following parameters to specify individual names for each resource within the resource-group: `workspace` (Azure Machine Learning Workspace name), `storageAccount` (Azure Storage Account name), `keyvault` (Azure Key Vault name),  `appInsights` (Application Insights Name) and `acr` (Azure Container Registry name).
+
 Make sure to select the **Allow access to all pipelines** checkbox in the
 variable group configuration.
 
+## Adjust .pipelines/azdo-variables.yml
+
+Non subscription related variables are stored as variable template in the file `.pipelines/azdo-variables.yml`. In order to change the predefined once feel free to adjust the values as needed. They're prepopulated as follows:
+
+| Variable Name               | Suggested Value                    |
+| --------------------------- | -----------------------------------|
+| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2                    |
+| AML_COMPUTE_CLUSTER_NAME    | train-cluster                      |
+| AML_CLUSTER_MIN_NODES       | 0                                  |
+| AML_CLUSTER_MAX_NODES       | 4                                  |
+| AML_CLUSTER_PRIORITY        | lowpriority                        |
+| BUILD_BUILDID               | 001                                |
+| DB_CLUSTER_ID               | [Optional Databricks cluster Id]   |
+| EVALUATE_SCRIPT_PATH        | evaluate/evaluate_model.py         |
+| EXPERIMENT_NAME             | mlopspython                        |
+| IMAGE_NAME                  |                                    |
+| MODEL_NAME                  | sklearn_regression_model.pkl       |
+| REGISTER_SCRIPT_PATH        | register/register_model.py         |
+| SOURCES_DIR_TRAIN           | code                               |
+| TRAIN_SCRIPT_PATH           | training/train.py                  |
+| TRAINING_PIPELINE_NAME      | training-pipeline                  |
+
 Up until now you should have:
 
 * Forked (or cloned) the repo

From c683af74db1df0dab387aeb091950993bf5be20c Mon Sep 17 00:00:00 2001
From: Dariusz Parys <dparys@microsoft.com>
Date: Wed, 20 Nov 2019 10:20:12 +0100
Subject: [PATCH 5/6] Incorporated feedback

- Removed unnecessary variables from `azdo-variables.yml`
- `workspace` is now a required ARM template parameter. Needs to be
existant in the variable group as `WORKSPACE_NAME`
- Cleand-up `.env.example` to only include variables needed to execute
scripts in local environments
---
 .env.example                                  | 35 ++++++-------------
 .pipelines/azdo-variables.yml                 | 31 ++++++++--------
 .../arm-templates/cloud-environment.json      |  3 +-
 environment_setup/iac-create-environment.yml  |  2 +-
 environment_setup/iac-remove-environment.yml  |  2 +-
 ml_service/pipelines/run_train_pipeline.py    |  3 ++
 ml_service/util/env_variables.py              | 12 +++----
 7 files changed, 37 insertions(+), 51 deletions(-)

diff --git a/.env.example b/.env.example
index 2f62dad7..ea1338ae 100644
--- a/.env.example
+++ b/.env.example
@@ -1,52 +1,39 @@
 # Azure Subscription Variables
 SUBSCRIPTION_ID = ''
-LOCATION = ''
+LOCATION = 'westeurope'
 TENANT_ID = ''
 BASE_NAME = ''
 SP_APP_ID = ''
 SP_APP_SECRET = ''
+RESOUCE_GROUP = 'mlops-rg'
 
 # Mock build/release ID for local testing - update ReleaseID each "release"
 BUILD_BUILDID = '001'
 RELEASE_RELEASEID = '001'
 
 # Azure ML Workspace Variables
+WORKSPACE_NAME = ''
 EXPERIMENT_NAME = ''
-SCRIPT_FOLDER = './'
 
 # AML Compute Cluster Config
-AML_COMPUTE_CLUSTER_NAME = ''
-AML_COMPUTE_CLUSTER_CPU_SKU = ''
-AML_CLUSTER_MAX_NODES = ''
-AML_CLUSTER_MIN_NODES = ''
+AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
+AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
+AML_CLUSTER_MAX_NODES = '4'
+AML_CLUSTER_MIN_NODES = '0'
 AML_CLUSTER_PRIORITY = 'lowpriority'
 # Training Config
 MODEL_NAME = 'sklearn_regression_model.pkl'
 MODEL_VERSION = '1'
 TRAIN_SCRIPT_PATH = 'training/train.py'
 # AML Pipeline Config
-TRAINING_PIPELINE_NAME = ''
-PIPELINE_CONDA_PATH = 'aml_config/conda_dependencies.yml'
+TRAINING_PIPELINE_NAME = 'Training Pipeline'
 MODEL_PATH = ''
 EVALUATE_SCRIPT_PATH = 'evaluate/evaluate_model.py'
 REGISTER_SCRIPT_PATH = 'register/register_model.py'
 SOURCES_DIR_TRAIN = 'code'
 
-# These are not mandatory for the core workflow
-# Remote VM Config
-REMOTE_VM_NAME = ''
-REMOTE_VM_USERNAME = ''
-REMOTE_VM_PASSWORD = ''
-REMOTE_VM_IP = ''
-# Image config
-IMAGE_NAME = ''
-IMAGE_DESCRIPTION = ''
-IMAGE_VERSION = ''
-# ACI Config
-ACI_CPU_CORES = ''
-ACI_MEM_GB = ''
-ACI_DESCRIPTION = ''
-
 # Optional. Used by a training pipeline with R on Databricks
 DB_CLUSTER_ID = ''
-DATABRICKS_COMPUTE_NAME = ''
\ No newline at end of file
+
+# Optional. Container Image name for image creation
+IMAGE_NAME = 'ml-trained'
\ No newline at end of file
diff --git a/.pipelines/azdo-variables.yml b/.pipelines/azdo-variables.yml
index d0914e3d..64a42d5b 100644
--- a/.pipelines/azdo-variables.yml
+++ b/.pipelines/azdo-variables.yml
@@ -1,8 +1,8 @@
 variables:
-- name: DEVOPS_VARIABLEGROUP
-  value: devopsforai-aml-vg
-- name: BUILD_TRAIN_SCRIPT
-  value: build_train_pipeline.py
+  # Azure ML Workspace Variables
+- name: EXPERIMENT_NAME
+  value: mlopspython
+  # AML Compute Cluster Config
 - name: AML_COMPUTE_CLUSTER_CPU_SKU
   value: STANDARD_DS2_V2
 - name: AML_COMPUTE_CLUSTER_NAME
@@ -13,29 +13,28 @@ variables:
   value: 4
 - name: AML_CLUSTER_PRIORITY
   value: lowpriority
-- name: EVALUATE_SCRIPT_PATH
-  value: evaluate/evaluate_model.py
-- name: EXPERIMENT_NAME
-  value: mlopspython
+  # Training Config
+- name: BUILD_TRAIN_SCRIPT
+  value: build_train_pipeline.py
+- name: TRAIN_SCRIPT_PATH
+  value: training/train.py
 - name: MODEL_NAME
   value: sklearn_regression_model.pkl
 - name: MODEL_VERSION
   value: '1'
+  # AML Pipeline Config 
+- name: TRAINING_PIPELINE_NAME
+  value: 'Training Pipeline'
 - name: MODEL_PATH
   value: ''
+- name: EVALUATE_SCRIPT_PATH
+  value: evaluate/evaluate_model.py
 - name: REGISTER_SCRIPT_PATH
   value: register/register_model.py
 - name: SOURCES_DIR_TRAIN
   value: code
-- name: TRAIN_SCRIPT_PATH
-  value: training/train.py
-- name: TRAINING_PIPELINE_NAME
-  value: training-pipeline
-- name: BUILD_BUILDID
-  value: 001
-- name: RELEASE_RELEASEID
-  value: 001
 - name: IMAGE_NAME
   value: ''
+  # Optional. Used by a training pipeline with R on Databricks
 - name: DB_CLUSTER_ID
   value: ''
\ No newline at end of file
diff --git a/environment_setup/arm-templates/cloud-environment.json b/environment_setup/arm-templates/cloud-environment.json
index a7f02e54..f2b2ac2f 100644
--- a/environment_setup/arm-templates/cloud-environment.json
+++ b/environment_setup/arm-templates/cloud-environment.json
@@ -28,8 +28,7 @@
       }
     },
     "workspace": {
-      "type": "string",
-      "defaultValue": "[concat(parameters('baseName'),'-AML-WS')]"
+      "type": "string"
     },
     "storageAccount": {
       "type": "string",
diff --git a/environment_setup/iac-create-environment.yml b/environment_setup/iac-create-environment.yml
index 8f795857..f4c08ddf 100644
--- a/environment_setup/iac-create-environment.yml
+++ b/environment_setup/iac-create-environment.yml
@@ -25,7 +25,7 @@ steps:
   inputs:
     azureSubscription: 'AzureResourceConnection'
     action: 'Create Or Update Resource Group'
-    resourceGroupName: '$(RESOURCEGROUP_NAME)'
+    resourceGroupName: '$(RESOURCE_GROUP)'
     location: $(LOCATION)
     templateLocation: 'Linked artifact'
     csmFile: '$(Build.SourcesDirectory)/environment_setup/arm-templates/cloud-environment.json'
diff --git a/environment_setup/iac-remove-environment.yml b/environment_setup/iac-remove-environment.yml
index 81c257f8..67626223 100644
--- a/environment_setup/iac-remove-environment.yml
+++ b/environment_setup/iac-remove-environment.yml
@@ -18,7 +18,7 @@ steps:
   inputs:
     azureSubscription: 'AzureResourceConnection'
     action: 'DeleteRG'
-    resourceGroupName: '$(RESOURCEGROUP_NAME)'
+    resourceGroupName: '$(RESOURCE_GROUP)'
     location: $(LOCATION)
   displayName: 'Delete resources in Azure'
 
diff --git a/ml_service/pipelines/run_train_pipeline.py b/ml_service/pipelines/run_train_pipeline.py
index 3ee03056..fdc8f5a5 100644
--- a/ml_service/pipelines/run_train_pipeline.py
+++ b/ml_service/pipelines/run_train_pipeline.py
@@ -1,6 +1,9 @@
 from azureml.pipeline.core import PublishedPipeline
 from azureml.core import Workspace
 from azureml.core.authentication import ServicePrincipalAuthentication
+import os
+import sys
+sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
 from env_variables import Env
 
 
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
index 01b2e867..9fe6d061 100644
--- a/ml_service/util/env_variables.py
+++ b/ml_service/util/env_variables.py
@@ -16,16 +16,16 @@ class Env(Singleton):
     def __init__(self):
         load_dotenv()
         self._workspace_name = os.environ.get("WORKSPACE_NAME")
-        if not self._workspace_name:
-            self._workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
-        self._resource_group = os.environ.get("RESOURCEGROUP_NAME")
+        self._resource_group = os.environ.get("RESOURCE_GROUP")
         self._subscription_id = os.environ.get("SUBSCRIPTION_ID")
         self._tenant_id = os.environ.get("TENANT_ID")
         self._app_id = os.environ.get("SP_APP_ID")
         self._app_secret = os.environ.get("SP_APP_SECRET")
         self._vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
         self._compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
-        self._db_cluster_id = os.environ.get("DB_CLUSTER_ID")
+        self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501
+        self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0))
+        self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4))
         self._build_id = os.environ.get("BUILD_BUILDID")
         self._pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
         self._sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
@@ -33,12 +33,10 @@ def __init__(self):
         self._evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
         self._model_name = os.environ.get("MODEL_NAME")
         self._experiment_name = os.environ.get("EXPERIMENT_NAME")
-        self._vm_priority = os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority') # noqa E501
-        self._min_nodes = int(os.environ.get("AML_CLUSTER_MIN_NODES", 0))
-        self._max_nodes = int(os.environ.get("AML_CLUSTER_MAX_NODES", 4))
         self._model_version = os.environ.get('MODEL_VERSION')
         self._image_name = os.environ.get('IMAGE_NAME')
         self._model_path = os.environ.get('MODEL_PATH')
+        self._db_cluster_id = os.environ.get("DB_CLUSTER_ID")
 
     @property
     def workspace_name(self):

From cc9fed106a4794c9f737f3d9124eb8e09c7e966d Mon Sep 17 00:00:00 2001
From: Dariusz Parys <dparys@microsoft.com>
Date: Wed, 20 Nov 2019 11:40:17 +0100
Subject: [PATCH 6/6] Modified Getting Started Document

Included pointers where to find the proper variable options in order to
execute the project locally vs. Azure DevOps
---
 docs/getting_started.md | 41 +++++++++++++++++------------------------
 1 file changed, 17 insertions(+), 24 deletions(-)

diff --git a/docs/getting_started.md b/docs/getting_started.md
index b12a05d0..cc56c6c4 100644
--- a/docs/getting_started.md
+++ b/docs/getting_started.md
@@ -58,43 +58,36 @@ The variable group should contain the following required variables:
 | SUBSCRIPTION_ID             |                                    |
 | TENANT_ID                   |                                    |
 | RESOURCE_GROUP              |                                    |
+| WORKSPACE_NAME              | mlops-AML-WS                       |
 
 Mark **SP_APP_SECRET** variable as a secret one.
 
-**Note:** The **BASE_NAME** parameter is used throughout the solution for naming
+**Note:** 
+
+The **WORKSPACE_NAME** parameter is used for the Azure Machine Learning Workspace creation. You can provide here an existing AML Workspace if you have one.
+
+The **BASE_NAME** parameter is used throughout the solution for naming
 Azure resources. When the solution is used in a shared subscription, there can
 be naming collisions with resources that require unique names like azure blob
 storage and registry DNS naming. Make sure to give a unique value to the
 BASE_NAME variable (e.g. MyUniqueML), so that the created resources will have
-unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-WS, etc.). The length of
-the BASE_NAME value should not exceed 10 characters.
-
-> You can also adjust the pipeline `environment_setup/iac-create-environment.yml` to include the following parameters to specify individual names for each resource within the resource-group: `workspace` (Azure Machine Learning Workspace name), `storageAccount` (Azure Storage Account name), `keyvault` (Azure Key Vault name),  `appInsights` (Application Insights Name) and `acr` (Azure Container Registry name).
+unique names (e.g. MyUniqueML-AML-RG, MyUniqueML-AML-KV, etc.). The length of
+the BASE_NAME value should not exceed 10 characters. 
 
 Make sure to select the **Allow access to all pipelines** checkbox in the
 variable group configuration.
 
-## Adjust .pipelines/azdo-variables.yml
+## More variable options
 
-Non subscription related variables are stored as variable template in the file `.pipelines/azdo-variables.yml`. In order to change the predefined once feel free to adjust the values as needed. They're prepopulated as follows:
+There are more variables used in the project. They're defined in two places one for local execution one for using Azure DevOps Pipelines
 
-| Variable Name               | Suggested Value                    |
-| --------------------------- | -----------------------------------|
-| AML_COMPUTE_CLUSTER_CPU_SKU | STANDARD_DS2_V2                    |
-| AML_COMPUTE_CLUSTER_NAME    | train-cluster                      |
-| AML_CLUSTER_MIN_NODES       | 0                                  |
-| AML_CLUSTER_MAX_NODES       | 4                                  |
-| AML_CLUSTER_PRIORITY        | lowpriority                        |
-| BUILD_BUILDID               | 001                                |
-| DB_CLUSTER_ID               | [Optional Databricks cluster Id]   |
-| EVALUATE_SCRIPT_PATH        | evaluate/evaluate_model.py         |
-| EXPERIMENT_NAME             | mlopspython                        |
-| IMAGE_NAME                  |                                    |
-| MODEL_NAME                  | sklearn_regression_model.pkl       |
-| REGISTER_SCRIPT_PATH        | register/register_model.py         |
-| SOURCES_DIR_TRAIN           | code                               |
-| TRAIN_SCRIPT_PATH           | training/train.py                  |
-| TRAINING_PIPELINE_NAME      | training-pipeline                  |
+### Local configuration
+
+In order to configure the project locally you have to create a copy from `.env.example` to the root and name it `.env`. Fill out all missing values and adjust the existing ones to your needs. Please be aware that the local environment also needs access to the Azure subscription so you have to provide the credentials of your service principal and Azure account information here as well.
+
+### Azure DevOps configuration
+
+For using Azure DevOps Pipelines all other variables are stored in the file `.pipelines/azdo-variables.yml`. Adjust as needed the variables, also the defaults will give you an easy jump start.
 
 Up until now you should have: