Merge pull request #7 from hpcflow/feat/beginners-tutorial

SJaffa · web-flow · commit 938b60079da1 · 2025-06-09T10:17:24.000+01:00
Beginners tutorial
diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -203,6 +203,7 @@ def prepare_task_schema_action_info(app: BaseApp):
 
 with open("config.jsonc") as fp:
     jsonc_str = fp.read()
+    # Strip out comments denoted by // to leave a valid JSON file
     json_str = re.sub(
         r'\/\/(?=([^"]*"[^"]*")*[^"]*$).*', "", jsonc_str, flags=re.MULTILINE
     )
@@ -226,6 +227,8 @@ def prepare_task_schema_action_info(app: BaseApp):
 
 # distribution name (i.e. name on PyPI):
 with open("../../pyproject.toml") as fp:
+    dist_name = tomlkit.load(fp)["tool"]["poetry"]["name"]
+    supported_python_versions = tomlkit.load(fp)["tool"]["poetry"]["dependencies"]["python"]
     pyproject_config = tomlkit.load(fp)
     dist_name = pyproject_config["tool"]["poetry"]["name"]
     supported_python = pyproject_config["tool"]["poetry"]["dependencies"]["python"]
diff --git a/docs/source/user/tutorials/index.rst b/docs/source/user/tutorials/index.rst
@@ -4,3 +4,5 @@ Tutorials
 .. toctree::
    :maxdepth: 1
 
+   Beginner: Install MatFlow on your local machine <install-locally>
+
diff --git a/docs/source/user/tutorials/install-locally.rst b/docs/source/user/tutorials/install-locally.rst
@@ -0,0 +1,93 @@
+.. jinja:: first_ctx
+
+################################################
+Tutorial: Install {{ app_name }} on your local machine
+################################################
+
+This tutorial will guide you through the process of installing {{ app_name }} on your local machine (laptop or desktop), creating and running some example workflows.
+This tutorial is intended for users who are new to {{ app_name }} and want to understand the setup and terminology.
+Most workflows used in your research will be too large to run on your local machine, 
+but this tutorial will help you understand the basics of how {{ app_name }} works before you move to setting it up on a cluster.
+
+Step 1: Set up a Python environment
+====================================
+
+The first step is to set up a Python environment on your local machine.
+
+**If you have not already installed Python**, you can download the latest version of Python from the `Python website <https://www.python.org/downloads/>`_.
+Follow the instructions on the website for your operating system.
+
+**If you have already installed Python**, you can check the version of Python installed on your machine by running
+``python --version``.
+
+Check that your version matches one of the ones supported by {{ app_name }}. 
+You can find the supported Python versions in the :ref:`installation instructions <def_python_versions>`_.
+If your version is not supported, you may need to update to a newer version of Python.
+
+Next, you will need to set up a virtual environment to install {{ app_name }} and its dependencies.
+A virtual environment is a self-contained directory that contains a particular version of Python with the all libraries and dependencies you install.
+This allows you to install packages without affecting the system Python installation or other projects,
+and when you run a command inside that environment you are certain which versions are being used.
+
+To create a virtual environment, you can use the `venv <https://docs.python.org/3/library/venv.html>`_ module that comes with Python.
+Follow the instructions in the `Python Packaging Guide <https://packaging.python.org/en/latest/guides/installing-using-pip-and-virtual-environments/#create-and-use-virtual-environments>`_ to create and activate a virtual environment.
+The convention is to call your environment ``.venv``, but you can call it whatever you like.
+We recommend calling it ``{{ app_module }}-env`` to make it clear that this environment is for {{ app_name }}.
+
+When the environment is activated, you should see the name of the virtual environment in brackets in your terminal prompt.
+Whenever you are working with Python in the terminal, you can check if it is accessing your system installation of Python or a virtual environemnt by running ``which python``.
+This will print out the path to the Python executable it is calling, so currently the path should be inside the virtual environment folder you just created.
+
+Step 2: Install {{ app_name }}
+=======================
+
+Once you have created and activated a Python environment (check for the environment name in brackets in your prompt), you can install {{ app_name }} using pip by running
+``pip install {{ dist_name }}``.
+
+This will install the latest version of {{ app_name }} from the Python Package Index (PyPI), and all the dependencies it needs.
+Once it has finished, check that {{ app_name }} has been installed correctly by running
+``{{ app_module }} --version``.
+
+This should print the version of {{ app_name }} that you have installed.
+If you see an error message saying it doesn't recognise "{{ app_module }}" as a command name, check that you have activated the correct virtual environment and that you have installed {{ app_name }} correctly.
+
+Step 3: Configure {{ app_name }} for your machine
+========================================
+
+Now that you have installed {{ app_name }}, you need to set it up for your machine.
+{{ app_name }} uses a configuration file to store information about the machine you are running on, such as the number of cores available and the locations of important folders.
+This will be stored in your user home directory so that it can be read by {{ app_name }} no matter what project you are working on, or what folder you are working in.
+
+The configuration file is called `config.yml` and is stored in the `~/.{{ app_name }}-new` directory (`~` is a shortcut for your user home directory, and the `.` at the start of the filename indicates that this is a hidden folder).
+When you first install {{ app_name }}, the directory and file will not exist.
+You can either make it yourself or run ``{{ app_name }} init`` to create the ``~/.{{ app_name }}-new`` directory and a ``config.yml`` file inside it with the minimum default settings.
+
+Step 4: Define workflow
+========================
+
+Now that you have installed {{ app_name }} and set up the configuration file, you can start defining :ref:`workflows <_def_workflow>`_.
+{{ app_name }} uses a YAML file to define the workflow, which is a text file that describes the steps in the workflow and the parameters for each step.
+The workflow file is stored in the directory where you want to run the workflow.
+
+Step 5: Run the workflow
+========================
+
+Once you have defined the workflow, you can run it using the command
+``{{ app_module }} go <workflow_file>``.
+
+Step 6: Monitor the workflow
+============================
+
+You can monitor the progress of the workflow by running
+``{{ app_module }} show``.
+This will show you the status of each step in the workflow, including whether it is running, completed, or failed.
+You can also view the log files generated during the run by running
+``{{ app_module }} logs <workflow_file>``.
+This will show you the log files for each step in the workflow, including any error messages or warnings that were generated during the run.
+
+
+Step 6: View the results
+========================
+
+Once the workflow has finished running, you can view the results in the output directory specified in the workflow file.
+The output directory will contain the results of each step in the workflow, as well as any log files generated during the run.
diff --git a/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml b/docs/source/user/tutorials/tutorial_resources/advanced_workflow.yaml
@@ -0,0 +1,73 @@
+template_components:
+  task_schemas:
+  - objective: process_some_data
+    inputs:
+    - parameter: input_data
+    outputs:
+    - parameter: parsed_output
+    actions:
+    - input_file_generators:
+      - input_file: my_input_file
+        from_inputs:
+        - input_data
+        script: <<script:/path/to/generate_input_file.py>>
+      environments:
+      - scope:
+          type: any
+        environment: python_env
+      script_exe: python_script
+      script: <<script:/path/to/process_input_file.py>>
+      save_files:
+      - processed_file
+      output_file_parsers:
+        parsed_output:
+          from_files:
+          - my_input_file
+          - processed_file
+          script: <<script:/path/to/parse_output.py>>
+          save_files:
+            - parsed_output
+
+  - objective: process_data_without_input_file_generator
+    inputs:
+    - parameter: input_data
+    - parameter: path
+    actions:
+    - script: <<script:/path/to/generate_input_file.py>>
+      script_data_in: direct
+      script_exe: python_script
+      save_files: 
+      - my_input_file
+      environments:
+      - scope:
+          type: any
+        environment: python_env
+    - script: <<script:/path/to/process_input_file.py>>
+      script_exe: python_script
+      environments:
+      - scope:
+          type: any
+        environment: python_env
+      save_files:
+      - processed_file
+
+  command_files:
+  - label: my_input_file
+    name:
+      name: input_file.json
+  - label: processed_file
+    name:
+      name: processed_file.json
+  - label: parsed_output
+    name:
+      name: parsed_output.json
+
+
+tasks:
+- schema: process_some_data
+  inputs:
+    input_data: [1, 2, 3, 4]
+- schema: process_data_without_input_file_generator
+  inputs:
+    input_data: [1, 2, 3, 4]
+    path: input_file.json
diff --git a/docs/source/user/tutorials/tutorial_resources/config.yaml b/docs/source/user/tutorials/tutorial_resources/config.yaml
@@ -0,0 +1,20 @@
+configs:
+  default:
+    invocation:
+      environment_setup:
+      match: {}
+    config:
+      machine: YOUR-MACHINE-NAME
+      log_file_path: logs/<<app_name>>_v<<app_version>>.log
+      environment_sources: [~/.matflow-new/envs_local.yaml]
+      task_schema_sources: []
+      command_file_sources: []
+      parameter_sources: []
+      default_scheduler: direct
+      default_shell: bash
+      schedulers:
+        direct:
+          defaults: {}
+      shells:
+        bash:
+          defaults: {}
diff --git a/docs/source/user/tutorials/tutorial_resources/envs_local.yaml b/docs/source/user/tutorials/tutorial_resources/envs_local.yaml
@@ -0,0 +1,14 @@
+name: temp_python_env
+  # Any setup steps e.g. loading a module, activating a virtual environment can go here
+  setup: source venv/bin/activate
+  # There might be multiple executables in your environment
+  # e.g. python, abaqus, etc
+  executables:
+  # It's probably a good idea to stick with `python_script` for any python
+  # executables for compatiblility with existing tasks which you
+  # might want to call in your workflow which will expect this label
+  - label: python_script
+    instances:
+    - command: python <<script_name>> <<args>>
+      num_cores: 1
+      parallel_mode: null
diff --git a/docs/source/user/tutorials/tutorial_resources/generate_input_file.py b/docs/source/user/tutorials/tutorial_resources/generate_input_file.py
@@ -0,0 +1,5 @@
+import json
+def generate_input_file(path: str, input_data: list):
+    """Generate an input file"""
+    with open(path, "w") as f:
+        json.dump(input_data, f, indent=2)
diff --git a/docs/source/user/tutorials/tutorial_resources/greet.py b/docs/source/user/tutorials/tutorial_resources/greet.py
@@ -0,0 +1,3 @@
+def greet(greeting: str, name: str):
+    """Return a greeting"""
+    return {"string_to_print": f"{greeting}, {name}!"}
diff --git a/docs/source/user/tutorials/tutorial_resources/groups.yaml b/docs/source/user/tutorials/tutorial_resources/groups.yaml
@@ -0,0 +1,31 @@
+template_components:
+  task_schemas:
+    - objective: s1
+      inputs:
+        - parameter: p1
+      outputs:
+        - parameter: p2
+      actions:
+        - commands:
+            - command: echo $(( <<parameter:p1>> + 1 )) # This is printed to stdout
+            - command: echo $(( <<parameter:p1>> + 1 )) # This is captured as p2
+              stdout: <<int(parameter:p2)>>
+    - objective: s2
+      inputs:
+        - parameter: p2
+          group: my_group
+      outputs:
+        - parameter: p3
+      actions:
+        - commands:
+            - command: echo <<parameter:p2>> # This one is printed to stdout
+            - command: echo $(( <<sum(parameter:p2)>> )) # This is captured as p3
+              stdout: <<parameter:p3>>
+tasks:
+  - schema: s1
+    sequences:
+      - path: inputs.p1
+        values: [1, 2]
+    groups:
+      - name: my_group
+  - schema: s2
diff --git a/docs/source/user/tutorials/tutorial_resources/hello.yaml b/docs/source/user/tutorials/tutorial_resources/hello.yaml
@@ -0,0 +1,89 @@
+template_components:
+  task_schemas:
+  - objective: greet
+    inputs:
+    - parameter: name
+      default_value: World
+    - parameter: greeting
+      default_value: Hello
+    actions:
+    - commands:
+      - command: echo "<<parameter:greeting>>, <<parameter:name>>!" > printed_string.txt
+
+  - objective: python_greet
+    inputs:
+    - parameter: name
+      default_value: World
+    - parameter: greeting
+      default_value: Hello
+    outputs:
+    - parameter: string_to_print
+    actions:
+    - script: <<script:/path/to/greet.py>>
+      script_data_in: direct
+      script_data_out: direct
+      script_exe: python_script
+      environments:
+      - scope:
+          type: any
+        environment: python_env
+
+  - objective: print
+    inputs:
+    - parameter: string_to_print
+    actions:
+    - commands:
+      - command: echo "<<parameter:string_to_print>>" > printed_string.txt
+
+  # This schema uses the environment `temp_python_env`
+  # which loads a python venv.
+  # This is shown in `envs.yaml` in this repo.
+  - objective: which_python
+    actions:
+    - commands:
+      - command: which python
+      environments:
+      - scope:
+          type: any
+        environment: temp_python_env
+
+# Workflow
+tasks:
+- schema: greet
+- schema: greet
+  inputs:
+    greeting: What's up
+    name: doc
+- schema: python_greet
+  inputs:
+    greeting: Howdy
+    name: partner
+- schema: print
+- schema: print
+  inputs:
+    string_to_print: another string to print!
+- schema: print
+  # Explicitly reference output parameter from a task
+  input_sources:
+    string_to_print: task.python_greet
+- schema: print
+  input_sources:
+    # Note that local variable will appear first, regardless of its position in the list
+    string_to_print: [task.python_greet, local]
+  inputs:
+    string_to_print: Yet another string to print!
+- schema: which_python
+- schema: greet
+  sequences:
+  - path: inputs.greeting
+    values:
+    - hey
+    - see ya later
+    - in a while
+    nesting_order: 0
+  - path: inputs.name
+    values:
+    - you
+    - alligator
+    - crocodile
+    nesting_order: 1
diff --git a/docs/source/user/tutorials/tutorial_resources/parse_output.py b/docs/source/user/tutorials/tutorial_resources/parse_output.py
@@ -0,0 +1,18 @@
+import json
+def parse_output(my_input_file: str, processed_file: str):
+    """Do some post-processing of data files.
+    
+    In this instance, we're just making a dictionary containing both the input
+    and output data.
+    """
+    with open(my_input_file, "r") as f:
+        input_data = json.load(f)
+    with open(processed_file, "r") as f:
+        processed_data = json.load(f)
+    
+    combined_data = {"input_data": input_data, "output_data": processed_data}
+    # Save file so we can look at the data
+    with open("parsed_output.json", "w") as f:
+        json.dump(combined_data, f, indent=2)
+    
+    return {"parsed_output": combined_data}
diff --git a/docs/source/user/tutorials/tutorial_resources/process_input_file.py b/docs/source/user/tutorials/tutorial_resources/process_input_file.py
@@ -0,0 +1,11 @@
+import json
+def process_input_file():
+    """Process an input file.
+
+    This could be a materials science simulation for example.
+    """
+    with open("input_file.json", "r") as f:
+        data = json.load(f)
+    data = [item * 2 for item in data]
+    with open("processed_file.json", "w") as f:
+        json.dump(data, f, indent=2)

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+def greet(greeting: str, name: str):`
	`2`	`+ """Return a greeting"""`
	`3`	`+ return {"string_to_print": f"{greeting}, {name}!"}`