From 0c115175eacdb9640e87eebd5134b043d158c96d Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Tue, 19 Jan 2021 16:15:46 -0400 Subject: [PATCH 1/5] Create CLI interface in /src/python/tensorflow_cloud/core/cli.py - Create CLI interface based on typer in top of click (docs: https://typer.tiangolo.com/) - Create command for get remote (something like: tfc remote) - Create command for run (something like: tfc run --entry-point mnist_example.py) --- src/python/dependencies.py | 2 + src/python/tensorflow_cloud/core/cli.py | 76 +++++++++++++++++++++++++ 2 files changed, 78 insertions(+) create mode 100644 src/python/tensorflow_cloud/core/cli.py diff --git a/src/python/dependencies.py b/src/python/dependencies.py index d171cca9..589dcdc7 100644 --- a/src/python/dependencies.py +++ b/src/python/dependencies.py @@ -26,6 +26,8 @@ def make_required_install_packages(): "tensorboard>=2.3.0", "tensorflow>=1.15.0,<3.0", "tensorflow_datasets<3.1.0", + "colorama==0.4.4", + "typer[all]==0.3.2", ] diff --git a/src/python/tensorflow_cloud/core/cli.py b/src/python/tensorflow_cloud/core/cli.py new file mode 100644 index 00000000..c4fe185b --- /dev/null +++ b/src/python/tensorflow_cloud/core/cli.py @@ -0,0 +1,76 @@ +from typing import Optional + +import typer + +from .run import run +from .run import remote +from . import docker_config as docker_config_module +from .machine_config import COMMON_MACHINE_CONFIGS + +app = typer.Typer() + +@app.command("remote") +def remote_command(): + """ + To know is you code is running remote with TF Cloud. + """ + if remote(): + typer.echo("Running remotely") + typer.Exit() + + return typer.echo("Running Locally") + +@app.command("run", help="Run your code in a remote cloud environment with TF Cloud.") +def run_command( + entry_point: Optional[str] = typer.Argument(..., help="File path to the python file or iPython notebook that contains the TensorFlow code"), + requirements_txt: Optional[str] = typer.Option(None, help="File path to requirements.txt file containing additional pip dependencies if any"), + image_uri: Optional[str] = typer.Option(None, help="Docker image URI for the Docker image being built"), + parent_image: Optional[str] = typer.Option(None, help="Parent Docker image to use. Example value - 'gcr.io/my_gcp_project/deep_learning:v2' If a parent Docker image is not provided here, we will use a [TensorFlow Docker image](https://www.tensorflow.org/install/docker) as the parent image."), + cache_from: Optional[str] = typer.Option(None, help="Docker image URI to be used as a cache when building the new Docker image. This is especially useful if you are iteratively improving your model architecture/training code. If this parameter is not provided, then we will use `image` URI as cache."), + image_build_bucket: Optional[str] = typer.Option(None, help="GCS bucket name to be used for building a Docker image via [Google Cloud Build](https://cloud.google.com/cloud-build/). If it is not specified, then your local Docker daemon will be used for Docker build."), + distribution_strategy: str = typer.Option("auto", help="Tensorflow distribution strategy based on the machine config"), + chief_config: str = typer.Option("auto", help="`MachineConfig` that represents the configuration for the chief worker in a distribution cluster. Choose between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU)"), + worker_config: str = typer.Option("auto", help="`MachineConfig` that represents the configuration for the general workers in a distribution cluster. Choose between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU)"), + worker_count: int = typer.Option(0, help="Represents the number of general workers in a distribution cluster."), + entry_point_args: str = typer.Option(None, help="Command line arguments to pass to the `entry_point` program. Not implemented yet."), + stream_logs: bool = typer.Option(False, help="Boolean flag which when enabled streams logs back from the cloud job."), + job_labels: str = typer.Option(None, help="Labels to organize jobs. You can specify up to 64 key-value pairs in lowercase letters and numbers, where the first character must be lowercase letter. For more details see https://cloud.google.com/ai-platform/training/docs/resource-labels. Not implemented yet.") +): + entry_point_args = None + job_labels = None + + docker_config = docker_config_module.DockerConfig(image=image_uri, + parent_image=parent_image, + cache_from=cache_from, + image_build_bucket=image_build_bucket) + + if chief_config != "auto": + try: + chief_config = COMMON_MACHINE_CONFIGS[chief_config] + except KeyError: + typer.BadParameter("You need to choose a between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU) options") + + if worker_config != "auto": + try: + worker_config = COMMON_MACHINE_CONFIGS[worker_config] + except KeyError: + typer.BadParameter("You need to choose a between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU) options") + + info = run( + entry_point=entry_point, + requirements_txt=requirements_txt, + docker_config=docker_config, + distribution_strategy=distribution_strategy, + chief_config=chief_config, + worker_config=worker_config, + worker_count=worker_count, + entry_point_args=entry_point_args, + stream_logs=stream_logs, + job_labels=job_labels + ) + + typer.echo(f"Job id: {info['job_id']}") + typer.echo(f"Docker image URI: {info['docker_image']}") + +if __name__ == '__main__': + app() \ No newline at end of file From beea426e899d55eb5a22b0b52766e396d0ffd555 Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Wed, 10 Mar 2021 11:08:27 -0500 Subject: [PATCH 2/5] Adding requested change: 1. Could you move the cli.py module under cloud/src/python/tensorflow_cloud/experimental/cli - I would like to keep it under experimental until we see user feedback and utilization. 2. Please use soft dependency instead of additions to dependancy.py. Specifically you can check for installed modules or attempt to import the required module for the cli.py and raise an error with corresponding instructions for the user when user uses the cli.py module and the required dependancies do not exist. 3. [Optional - though highly recommended] Do you mind adding a short readme with instructions on how to generate / update the cli.py module for future developers / maintainers. --- .../experimental/cli/README.md | 55 ++++++++++++ .../tensorflow_cloud/experimental/cli/cli.py | 83 +++++++++++++++++++ 2 files changed, 138 insertions(+) create mode 100644 src/python/tensorflow_cloud/experimental/cli/README.md create mode 100644 src/python/tensorflow_cloud/experimental/cli/cli.py diff --git a/src/python/tensorflow_cloud/experimental/cli/README.md b/src/python/tensorflow_cloud/experimental/cli/README.md new file mode 100644 index 00000000..7d7cf0a0 --- /dev/null +++ b/src/python/tensorflow_cloud/experimental/cli/README.md @@ -0,0 +1,55 @@ +# Tensorflow cloud CLI + +Is a feature of tfcloud that let you run tfcloud from your terminal using `tfc` command. + + +### How to use it + +1. (Follow the configuration and installation instructions.)[https://github.com/tensorflow/cloud#tensorflow-cloud-run-api-for-gcp-trainingtuning] +2. Create a file with all that you need to train your model or a jupyter notebook. for example `train.py`: + +``` python +import tensorflow_datasets as tfds +import tensorflow as tf + +datasets, info = tfds.load(name='mnist', with_info=True, as_supervised=True) +mnist_train, mnist_test = datasets['train'], datasets['test'] + +num_train_examples = info.splits['train'].num_examples +num_test_examples = info.splits['test'].num_examples + +BUFFER_SIZE = 10000 +BATCH_SIZE = 64 + +def scale(image, label): + image = tf.cast(image, tf.float32) + image /= 255 + return image, label + +train_dataset = mnist_train.map(scale).cache() +train_dataset = train_dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE) + +model = tf.keras.Sequential([ + tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=( + 28, 28, 1)), + tf.keras.layers.MaxPooling2D(), + tf.keras.layers.Flatten(), + tf.keras.layers.Dense(64, activation='relu'), + tf.keras.layers.Dense(10, activation='softmax') +]) + +model.compile(loss='sparse_categorical_crossentropy', + optimizer=tf.keras.optimizers.Adam(), + metrics=['accuracy']) +model.fit(train_dataset, epochs=12) +``` + +3. Train your model on the cloud with: + +``` bash +tfc run train.py +``` + +### Contribution guidelines + +We use typer to build the CLI interface for tfcloud, if you want to contributed to this feature is important to follow typer ideology of write type annotations in the code. (Typer documentation.)[https://typer.tiangolo.com/] \ No newline at end of file diff --git a/src/python/tensorflow_cloud/experimental/cli/cli.py b/src/python/tensorflow_cloud/experimental/cli/cli.py new file mode 100644 index 00000000..4ff3fd2b --- /dev/null +++ b/src/python/tensorflow_cloud/experimental/cli/cli.py @@ -0,0 +1,83 @@ +from typing import Optional +from typing import Any +from typing import Dict + +try: + import typer +except (ImportError, ModuleNotFoundError) as inst: + print(type(inst)) + print(inst) + print('You can try running "pip install colorama==0.4.4 typer[all]==0.3.2" on you terminal') + +from src.python.tensorflow_cloud.core.run import run +from src.python.tensorflow_cloud.core.run import remote +from src.python.tensorflow_cloud.core import docker_config as docker_config_module +from src.python.tensorflow_cloud.core.machine_config import COMMON_MACHINE_CONFIGS + +app = typer.Typer() + +@app.command("remote") +def remote_command(): + """ + To know is you code is running remote with TF Cloud. + """ + if remote(): + typer.echo("Running remotly") + typer.Exit() + + return typer.echo("Running Localy") + +@app.command("run", help="Run your code in a remote cloud environment with TF Cloud.") +def run_command( + entry_point: Optional[str] = typer.Argument(..., help="File path to the python file or iPython notebook that contains the TensorFlow code"), + requirements_txt: Optional[str] = typer.Option(None, help="File path to requirements.txt file containing additional pip dependencies if any"), + image_uri: Optional[str] = typer.Option(None, help="Docker image URI for the Docker image being built"), + parent_image: Optional[str] = typer.Option(None, help="Parent Docker image to use. Example value - 'gcr.io/my_gcp_project/deep_learning:v2' If a parent Docker image is not provided here, we will use a [TensorFlow Docker image](https://www.tensorflow.org/install/docker) as the parent image."), + cache_from: Optional[str] = typer.Option(None, help="Docker image URI to be used as a cache when building the new Docker image. This is especially useful if you are iteratively improving your model architecture/training code. If this parameter is not provided, then we will use `image` URI as cache."), + image_build_bucket: Optional[str] = typer.Option(None, help="GCS bucket name to be used for building a Docker image via [Google Cloud Build](https://cloud.google.com/cloud-build/). If it is not specified, then your local Docker daemon will be used for Docker build."), + distribution_strategy: str = typer.Option("auto", help="Tensorflow distribution strategy based on the machine config"), + chief_config: str = typer.Option("auto", help="`MachineConfig` that represents the configuration for the chief worker in a distribution cluster. Choose between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU)"), + worker_config: str = typer.Option("auto", help="`MachineConfig` that represents the configuration for the general workers in a distribution cluster. Choose between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU)"), + worker_count: int = typer.Option(0, help="Represents the number of general workers in a distribution cluster."), + entry_point_args: str = typer.Option(None, help="Command line arguments to pass to the `entry_point` program. Not implemented yet."), # review + stream_logs: bool = typer.Option(False, help="Boolean flag which when enabled streams logs back from the cloud job."), + job_labels: str = typer.Option(None, help="Labels to organize jobs. You can specify up to 64 key-value pairs in lowercase letters and numbers, where the first character must be lowercase letter. For more details see https://cloud.google.com/ai-platform/training/docs/resource-labels. Not implemented yet.") # review +): + entry_point_args = None + job_labels = None + + docker_config = docker_config_module.DockerConfig(image=image_uri, + parent_image=parent_image, + cache_from=cache_from, + image_build_bucket=image_build_bucket) + + if chief_config != "auto": + try: + chief_config = COMMON_MACHINE_CONFIGS[chief_config] + except KeyError: + typer.BadParameter("You need to choose a between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU) options") + + if worker_config != "auto": + try: + worker_config = COMMON_MACHINE_CONFIGS[worker_config] + except KeyError: + typer.BadParameter("You need to choose a between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU) options") + + info = run( + entry_point=entry_point, + requirements_txt=requirements_txt, + docker_config=docker_config, + distribution_strategy=distribution_strategy, + chief_config=chief_config, + worker_config=worker_config, + worker_count=worker_count, + entry_point_args=entry_point_args, + stream_logs=stream_logs, + job_labels=job_labels + ) + + typer.echo(f"Job id: {info['job_id']}") + typer.echo(f"Docker image URI: {info['docker_image']}") + +if __name__ == '__main__': + app() \ No newline at end of file From 6e0880b4b9c0f2e109d603e3e607e4f4e621c6ef Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Wed, 10 Mar 2021 11:18:35 -0500 Subject: [PATCH 3/5] Fix links error in readme 1. Fix link error for instalations 2. Fix link error for Typer documentation --- src/python/tensorflow_cloud/experimental/cli/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/python/tensorflow_cloud/experimental/cli/README.md b/src/python/tensorflow_cloud/experimental/cli/README.md index 7d7cf0a0..72e725c4 100644 --- a/src/python/tensorflow_cloud/experimental/cli/README.md +++ b/src/python/tensorflow_cloud/experimental/cli/README.md @@ -5,7 +5,7 @@ Is a feature of tfcloud that let you run tfcloud from your terminal using `tfc` ### How to use it -1. (Follow the configuration and installation instructions.)[https://github.com/tensorflow/cloud#tensorflow-cloud-run-api-for-gcp-trainingtuning] +1. [Follow the configuration and installation instructions.](https://github.com/tensorflow/cloud#tensorflow-cloud-run-api-for-gcp-trainingtuning) 2. Create a file with all that you need to train your model or a jupyter notebook. for example `train.py`: ``` python @@ -52,4 +52,4 @@ tfc run train.py ### Contribution guidelines -We use typer to build the CLI interface for tfcloud, if you want to contributed to this feature is important to follow typer ideology of write type annotations in the code. (Typer documentation.)[https://typer.tiangolo.com/] \ No newline at end of file +We use typer to build the CLI interface for tfcloud, if you want to contributed to this feature is important to follow typer ideology of write type annotations in the code. [Typer documentation.](https://typer.tiangolo.com/) From a4a242491d201e2aed6079ab6524b2bb2f22273c Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Wed, 10 Mar 2021 11:32:27 -0500 Subject: [PATCH 4/5] Remove dependency for typer Create dependency for typer as soft dependency --- src/python/dependencies.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/python/dependencies.py b/src/python/dependencies.py index 589dcdc7..f1cddfcd 100644 --- a/src/python/dependencies.py +++ b/src/python/dependencies.py @@ -25,9 +25,7 @@ def make_required_install_packages(): "keras-tuner", "tensorboard>=2.3.0", "tensorflow>=1.15.0,<3.0", - "tensorflow_datasets<3.1.0", - "colorama==0.4.4", - "typer[all]==0.3.2", + "tensorflow_datasets<3.1.0" ] From 5fcce1dfe729fbf1362f7c39d3b97ba0cdbdabba Mon Sep 17 00:00:00 2001 From: Eddy Decena Date: Wed, 17 Mar 2021 10:05:11 -0400 Subject: [PATCH 5/5] Remove src/python/tensorflow_cloud/core/cli.py 1. cli.py file was move to src/python/tensorflow_cloud/experimental/cli.py --- src/python/tensorflow_cloud/core/cli.py | 76 ------------------------- 1 file changed, 76 deletions(-) delete mode 100644 src/python/tensorflow_cloud/core/cli.py diff --git a/src/python/tensorflow_cloud/core/cli.py b/src/python/tensorflow_cloud/core/cli.py deleted file mode 100644 index c4fe185b..00000000 --- a/src/python/tensorflow_cloud/core/cli.py +++ /dev/null @@ -1,76 +0,0 @@ -from typing import Optional - -import typer - -from .run import run -from .run import remote -from . import docker_config as docker_config_module -from .machine_config import COMMON_MACHINE_CONFIGS - -app = typer.Typer() - -@app.command("remote") -def remote_command(): - """ - To know is you code is running remote with TF Cloud. - """ - if remote(): - typer.echo("Running remotely") - typer.Exit() - - return typer.echo("Running Locally") - -@app.command("run", help="Run your code in a remote cloud environment with TF Cloud.") -def run_command( - entry_point: Optional[str] = typer.Argument(..., help="File path to the python file or iPython notebook that contains the TensorFlow code"), - requirements_txt: Optional[str] = typer.Option(None, help="File path to requirements.txt file containing additional pip dependencies if any"), - image_uri: Optional[str] = typer.Option(None, help="Docker image URI for the Docker image being built"), - parent_image: Optional[str] = typer.Option(None, help="Parent Docker image to use. Example value - 'gcr.io/my_gcp_project/deep_learning:v2' If a parent Docker image is not provided here, we will use a [TensorFlow Docker image](https://www.tensorflow.org/install/docker) as the parent image."), - cache_from: Optional[str] = typer.Option(None, help="Docker image URI to be used as a cache when building the new Docker image. This is especially useful if you are iteratively improving your model architecture/training code. If this parameter is not provided, then we will use `image` URI as cache."), - image_build_bucket: Optional[str] = typer.Option(None, help="GCS bucket name to be used for building a Docker image via [Google Cloud Build](https://cloud.google.com/cloud-build/). If it is not specified, then your local Docker daemon will be used for Docker build."), - distribution_strategy: str = typer.Option("auto", help="Tensorflow distribution strategy based on the machine config"), - chief_config: str = typer.Option("auto", help="`MachineConfig` that represents the configuration for the chief worker in a distribution cluster. Choose between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU)"), - worker_config: str = typer.Option("auto", help="`MachineConfig` that represents the configuration for the general workers in a distribution cluster. Choose between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU)"), - worker_count: int = typer.Option(0, help="Represents the number of general workers in a distribution cluster."), - entry_point_args: str = typer.Option(None, help="Command line arguments to pass to the `entry_point` program. Not implemented yet."), - stream_logs: bool = typer.Option(False, help="Boolean flag which when enabled streams logs back from the cloud job."), - job_labels: str = typer.Option(None, help="Labels to organize jobs. You can specify up to 64 key-value pairs in lowercase letters and numbers, where the first character must be lowercase letter. For more details see https://cloud.google.com/ai-platform/training/docs/resource-labels. Not implemented yet.") -): - entry_point_args = None - job_labels = None - - docker_config = docker_config_module.DockerConfig(image=image_uri, - parent_image=parent_image, - cache_from=cache_from, - image_build_bucket=image_build_bucket) - - if chief_config != "auto": - try: - chief_config = COMMON_MACHINE_CONFIGS[chief_config] - except KeyError: - typer.BadParameter("You need to choose a between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU) options") - - if worker_config != "auto": - try: - worker_config = COMMON_MACHINE_CONFIGS[worker_config] - except KeyError: - typer.BadParameter("You need to choose a between (CPU, K80_1X, K80_4X, K80_8X, P100_1X, P100_4X, P4_1X, P4_4X, V100_1X, V100_4X, T4_1X, T4_4X, TPU) options") - - info = run( - entry_point=entry_point, - requirements_txt=requirements_txt, - docker_config=docker_config, - distribution_strategy=distribution_strategy, - chief_config=chief_config, - worker_config=worker_config, - worker_count=worker_count, - entry_point_args=entry_point_args, - stream_logs=stream_logs, - job_labels=job_labels - ) - - typer.echo(f"Job id: {info['job_id']}") - typer.echo(f"Docker image URI: {info['docker_image']}") - -if __name__ == '__main__': - app() \ No newline at end of file