diff --git a/agent/config/firewalld/pbench-dcgm-exporter.xml b/agent/config/firewalld/pbench-dcgm-exporter.xml
new file mode 100644
index 0000000000..5ae4e1727b
--- /dev/null
+++ b/agent/config/firewalld/pbench-dcgm-exporter.xml
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="utf-8"?>
+<service>
+  <short>pbench-dcgm-exporter</short>
+  <description>Pbench Agent Prometheus dcgm-exporter</description>
+  <port protocol="tcp" port="9400"/>
+</service>
+
diff --git a/agent/config/firewalld/pbench-redis.xml b/agent/config/firewalld/pbench-redis.xml
index 7b3f97e0b6..22f6f0d169 100644
--- a/agent/config/firewalld/pbench-redis.xml
+++ b/agent/config/firewalld/pbench-redis.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <service>
-  <short>pbench-tool-data-sink</short>
-  <description>Pbench Agent Tool Data Sink</description>
-  <port protocol="tcp" port="8080"/>
+  <short>pbench-redis</short>
+  <description>Pbench Agent Redis Server</description>
+  <port protocol="tcp" port="17001"/>
 </service>
diff --git a/agent/config/firewalld/pbench-tool-data-sink.xml b/agent/config/firewalld/pbench-tool-data-sink.xml
index 22f6f0d169..7b3f97e0b6 100644
--- a/agent/config/firewalld/pbench-tool-data-sink.xml
+++ b/agent/config/firewalld/pbench-tool-data-sink.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="utf-8"?>
 <service>
-  <short>pbench-redis</short>
-  <description>Pbench Agent Redis Server</description>
-  <port protocol="tcp" port="17001"/>
+  <short>pbench-tool-data-sink</short>
+  <description>Pbench Agent Tool Data Sink</description>
+  <port protocol="tcp" port="8080"/>
 </service>
diff --git a/agent/containers/images/Dockerfile.base.j2 b/agent/containers/images/Dockerfile.base.j2
index 436f0bc40a..79c73cca6f 100644
--- a/agent/containers/images/Dockerfile.base.j2
+++ b/agent/containers/images/Dockerfile.base.j2
@@ -13,9 +13,9 @@ RUN \
     {{ pkgmgr }} module -y disable python38 && \
 {% endif %}
 {% if distro_image.startswith('centos') %}
-    {{ pkgmgr }} install -y https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ distro_image.split(':', 1)[1] }}.noarch.rpm && \
+    {{ pkgmgr }} install -y --setopt=tsflags=nodocs https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ distro_image.split(':', 1)[1] }}.noarch.rpm && \
 {% endif %}
-    {{ pkgmgr }} install -y {% if distro_image == 'centos:8' %}--enablerepo powertools glibc-locale-source {% endif %} pbench-agent && \
+    {{ pkgmgr }} install -y --setopt=tsflags=nodocs {% if distro_image == 'centos:8' %}--enablerepo powertools glibc-locale-source {% endif %} pbench-agent && \
 {% if distro_image == 'centos:8' %}
     localedef -i en_US -f UTF-8 en_US.UTF-8 && \
 {% endif %}
diff --git a/agent/containers/images/Dockerfile.dcgmEX.j2 b/agent/containers/images/Dockerfile.dcgmEX.j2
new file mode 100644
index 0000000000..f307d45d3b
--- /dev/null
+++ b/agent/containers/images/Dockerfile.dcgmEX.j2
@@ -0,0 +1,15 @@
+# NOTE: Must be run with --privileged
+# RECOMMENDED: Use with the fedora image variants for direct compatibility
+FROM pbench-agent-tool-meister-{{ distro }}:{{ tag }}
+
+RUN {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} install -y 'dnf-command(config-manager)' && \
+    {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/{{ distro.split("-")|join("") }}/x86_64/cuda-{{ distro.split("-")|join("") }}.repo && \
+    {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} clean expire-cache && \
+    {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} install -y nvidia-driver-cuda nvidia-modprobe datacenter-gpu-manager-2.1.4 golang && \
+    git clone https://github.com/NVIDIA/gpu-monitoring-tools.git && \
+    (cd gpu-monitoring-tools; git checkout tags/2.1.2 -b build; make binary install) && \
+    {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} -y clean all && \
+    rm -rf /var/cache/{% if distro == 'centos-7' %}yum{% else %}dnf{% endif %}
+
+ENV NVIDIA_DISABLE_REQUIRE="true" \
+    NVIDIA_VISIBLE_DEVICES=all
diff --git a/agent/containers/images/Dockerfile.layered.j2 b/agent/containers/images/Dockerfile.layered.j2
index b7b4685548..a9d91ff4c5 100644
--- a/agent/containers/images/Dockerfile.layered.j2
+++ b/agent/containers/images/Dockerfile.layered.j2
@@ -1,10 +1,14 @@
 # {{ distro }} pbench-agent {{ kind }} image
 FROM pbench-agent-base-{{ distro }}:{{ tag }}
 
+{% if kind in ('tools', 'all') %}
+COPY ./{{ distro }}-pcp.repo /etc/yum.repos.d/pcp.repo
+{% endif %}
+
 # Install all the RPMs required for this image.
 #
 # FIXME: this is not exhaustive, it does not include RPMs to support
 #        Kubernetes or RHV environments.
-RUN {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} install -y {% if distro == 'centos-8' %}--enablerepo powertools {% endif %}{{ rpms }} && \
+RUN {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} install -y --setopt=tsflags=nodocs {% if distro == 'centos-8' %}--enablerepo powertools {% endif %}{% if kind in ('tools', 'all') %}--enablerepo pcp {% endif %}{{ rpms }} && \
     {% if distro == 'centos-7' %}yum{% else %}dnf{% endif %} -y clean all && \
     rm -rf /var/cache/{% if distro == 'centos-7' %}yum{% else %}dnf{% endif %}
diff --git a/agent/containers/images/Dockerfile.tds.j2 b/agent/containers/images/Dockerfile.tds.j2
new file mode 100644
index 0000000000..f59e620117
--- /dev/null
+++ b/agent/containers/images/Dockerfile.tds.j2
@@ -0,0 +1,9 @@
+# {{ distro }} pbench-agent-tool-data-sink  image
+FROM pbench-agent-tools-{{ distro }}:{{ tag }}
+
+VOLUME /var/lib/pbench-agent
+
+# Port 8080 should be Bottle server, 9090 optional Prometheus server, and 44566
+# the optional pmproxy server.
+EXPOSE 8080 9090 44566
+ENTRYPOINT [ "/opt/pbench-agent/util-scripts/tool-meister/tool-data-sink-ep" ]
diff --git a/agent/containers/images/Dockerfile.tm.j2 b/agent/containers/images/Dockerfile.tm.j2
new file mode 100644
index 0000000000..95f9fde6f4
--- /dev/null
+++ b/agent/containers/images/Dockerfile.tm.j2
@@ -0,0 +1,7 @@
+# {{ distro }} pbench-agent-tool-meister image
+FROM pbench-agent-tools-{{ distro }}:{{ tag }}
+
+# Port 9400 should be the optional dcgm tool, 9100 the optional node_exporter
+# tool, and 55677 the pcp (pmcd) tool.
+EXPOSE 9100 9400 55677
+ENTRYPOINT [ "/opt/pbench-agent/util-scripts/tool-meister/tool-meister-ep" ]
diff --git a/agent/containers/images/Makefile b/agent/containers/images/Makefile
index 0d9e79abdf..c1d053fe62 100644
--- a/agent/containers/images/Makefile
+++ b/agent/containers/images/Makefile
@@ -27,9 +27,43 @@ IMAGE_REPO = docker://quay.io/pbench
 # Not intended to be overridden with an environment variable.
 _REPO_TEMPLATE = ../../ansible/pbench/agent/roles/pbench_repo_install/templates/etc/yum.repos.d/pbench.repo.j2
 
+# NOTE: Currently we require 5.2.2 of the PCP RPMs because using the 5.2.3
+# version prevents us from integrating with Grafana, see PCP Issue #1183,
+# https://github.com/performancecopilot/pcp/issues/1183.
+# NOTE: We also have to enumerate so many RPMs because for CentOS 7 the RPM
+# dependency resolver does not properly resolve to the same version RPMs. Once
+# we no longer have to use v5.2.2, we can just list 3 RPMs: pcp-zeroconf,
+# pcp-system-tools, and pcp-gui.
+_PCP_RPMS = \
+	pcp-doc-5.2.2 \
+	pcp-gui-5.2.2 \
+	pcp-pmda-dm-5.2.2 \
+	pcp-pmda-nfsclient-5.2.2 \
+	pcp-pmda-openmetrics-5.2.2 \
+	pcp-system-tools-5.2.2 \
+	pcp-zeroconf-5.2.2 \
+	python3-pcp-5.2.2
 # The list of RPMs which provide the various tools we offer.
 # Not intended to be overridden with an environment variable.
-_TOOL_RPMS = prometheus2 node_exporter blktrace bpftrace cpupowerutils golang kernel-tools libvirt-client nmap-ncat numactl pbench-sysstat pcp-system-tools perf procps-ng strace tcpdump trace-cmd
+# Please keep the lists sorted.
+_TOOL_RPMS = \
+	blktrace \
+	bpftrace \
+	cpupowerutils \
+	golang \
+	kernel-tools \
+	libvirt-client \
+	nmap-ncat \
+	node_exporter \
+	numactl \
+	pbench-sysstat \
+	${_PCP_RPMS} \
+	perf \
+	procps-ng \
+	prometheus2 \
+	strace \
+	tcpdump \
+	trace-cmd
 
 # The list of RPMs for the default workloads we offer.
 # Not intended to be overridden with an environment variable.
@@ -41,8 +75,38 @@ _ALL_RPMS = ${_TOOL_RPMS} ${_WORKLOAD_RPMS}
 # By default we only build images for the following distributions:
 _DISTROS = centos-8 centos-7 fedora-33 fedora-32
 
+# By default we won't build the Tool Data Sink and Tool Meister images
 all: all-tags $(foreach distro, ${_DISTROS}, ${distro}-all-tagged)
 
+tds: all-tags $(foreach distro, ${_DISTROS}, ${distro}-tool-data-sink-tagged)
+
+tm: all-tags $(foreach distro, ${_DISTROS}, ${distro}-tool-meister-tagged)
+
+# We also offer targets per distribution target
+centos-8: all-tags centos-8-all-tagged
+
+centos-7: all-tags centos-7-all-tagged
+
+fedora-33: all-tags fedora-33-all-tagged
+
+fedora-32: all-tags fedora-32-all-tagged
+
+centos-8-tds: all-tags centos-8-tool-data-sink-tagged
+
+centos-7-tds: all-tags centos-7-tool-data-sink-tagged
+
+fedora-33-tds: all-tags fedora-33-tool-data-sink-tagged
+
+fedora-32-tds: all-tags fedora-32-tool-data-sink-tagged
+
+centos-8-tm: all-tags centos-8-tool-meister-tagged
+
+centos-7-tm: all-tags centos-7-tool-meister-tagged
+
+fedora-33-tm: all-tags fedora-33-tool-meister-tagged
+
+fedora-32-tm: all-tags fedora-32-tool-meister-tagged
+
 #+
 # Tagging targets
 #-
@@ -97,16 +161,34 @@ push-major-minor: $(foreach distro, ${_DISTROS}, ${distro}-push-major-minor)
 %-all-tagged: %-all %-tags.lis
 	./apply-tags pbench-agent-all-$* $*-tags.lis
 
-%-all: %-tools-tagged %-workloads-tagged %-all.Dockerfile
+%-all: %-workloads-tagged %-tool-data-sink-tagged %-tool-meister-tagged %-all.Dockerfile
 	./build-image all $* $*-tags.lis
 
 %-all.Dockerfile: Dockerfile.layered.j2 %-tags.lis
 	jinja2 Dockerfile.layered.j2 -D distro=$* -D tag="$$(grep -v -E '^v' $*-tags.lis)" -D kind="all" -D rpms="${_ALL_RPMS}" > ./$@
 
+%-tool-data-sink-tagged: %-tool-data-sink %-tags.lis
+	./apply-tags pbench-agent-tool-data-sink-$* $*-tags.lis
+
+%-tool-data-sink: %-tools-tagged %-tool-data-sink.Dockerfile
+	./build-image tool-data-sink $* $*-tags.lis
+
+%-tool-data-sink.Dockerfile: Dockerfile.tds.j2 %-tags.lis
+	jinja2 Dockerfile.tds.j2 -D distro=$* -D tag="$$(grep -v -E '^v' $*-tags.lis)" > ./$@
+
+%-tool-meister-tagged: %-tool-meister %-tags.lis
+	./apply-tags pbench-agent-tool-meister-$* $*-tags.lis
+
+%-tool-meister: %-tools-tagged %-tool-meister.Dockerfile
+	./build-image tool-meister $* $*-tags.lis
+
+%-tool-meister.Dockerfile: Dockerfile.tm.j2 %-tags.lis
+	jinja2 Dockerfile.tm.j2 -D distro=$* -D tag="$$(grep -v -E '^v' $*-tags.lis)" > ./$@
+
 %-tools-tagged: %-tools %-tags.lis
 	./apply-tags pbench-agent-tools-$* $*-tags.lis
 
-%-tools: %-base-tagged %-tools.Dockerfile
+%-tools: %-base-tagged %-tools.Dockerfile %-pcp.repo
 	./build-image tools $* $*-tags.lis
 
 %-tools.Dockerfile: Dockerfile.layered.j2 %-tags.lis
@@ -204,15 +286,23 @@ fedora-32-base.Dockerfile: Dockerfile.base.j2 fedora-32-pbench.repo
 # Helper target to build each distro's ".repo" and ".Dockerfile"
 all-dockerfiles: $(foreach distro, ${_DISTROS}, ${distro}-base.Dockerfile ${distro}-tools.Dockerfile ${distro}-workloads.Dockerfile ${distro}-all.Dockerfile)
 
-# Rule pattern dependencies on non-patterned targets have to be set up
-# separately for some reason.
-%.repo: ${_REPO_TEMPLATE}
+%-pbench.repo: %-pbench.yml ${_REPO_TEMPLATE}
+	jinja2 ${_REPO_TEMPLATE} $*-pbench.yml -o $@
+
+%-pbench.yml: repo.yml.j2
+	jinja2 repo.yml.j2 -D distro=$* -D url_prefix=${URL_PREFIX} -D test_suffix=${_TEST_SUFFIX} -D user=${USER} -o $@
+
+fedora-33-pcp.repo: pcp.repo.j2
+	jinja2 pcp.repo.j2 -D target=f33 -o $@
+
+fedora-32-pcp.repo: pcp.repo.j2
+	jinja2 pcp.repo.j2 -D target=f32 -o $@
 
-%.repo: %.yml
-	jinja2 ${_REPO_TEMPLATE} $*.yml -o $@
+centos-8-pcp.repo: pcp.repo.j2
+	jinja2 pcp.repo.j2 -D target=el8 -o $@
 
-%.yml: repo.yml.j2
-	jinja2 repo.yml.j2 -D distro=${@:-pbench.yml=} -D url_prefix=${URL_PREFIX} -D test_suffix=${_TEST_SUFFIX} -D user=${USER} -o $@
+centos-7-pcp.repo: pcp.repo.j2
+	jinja2 pcp.repo.j2 -D target=el7 -o $@
 
 clean:
 	rm -f *.Dockerfile *.repo *.yml *-tags.lis
diff --git a/agent/containers/images/pcp-pmcd/Dockerfile b/agent/containers/images/pcp-pmcd/Dockerfile
deleted file mode 100644
index 78c3b177f0..0000000000
--- a/agent/containers/images/pcp-pmcd/Dockerfile
+++ /dev/null
@@ -1,16 +0,0 @@
-FROM fedora:33
-
-ENV SUMMARY="Performance Co-Pilot" \
-    DESCRIPTION="Performance Co-Pilot is a system performance analysis toolkit." \
-    VERSION=5
-
-RUN dnf install -y --setopt=tsflags=nodocs procps-ng gettext pcp pcp-zeroconf && \
-    dnf install -y pcp-doc pcp-gui pcp-system-tools && \
-    dnf clean all
-RUN systemctl enable pmcd && systemctl disable pmlogger
-
-COPY config /etc/sysconfig/pmcd
-
-EXPOSE 44321
-CMD ["/usr/sbin/init"]
-
diff --git a/agent/containers/images/pcp-pmcd/config b/agent/containers/images/pcp-pmcd/config
deleted file mode 100644
index 9506ba807c..0000000000
--- a/agent/containers/images/pcp-pmcd/config
+++ /dev/null
@@ -1,30 +0,0 @@
-# Environment variables for the pmcd daemon.  Refer also to the
-# pmcd.options and pmcd.conf files for additional configuration.
-
-# Behaviour regarding listening on external-facing interfaces;
-# unset PMCD_LOCAL to allow connections from remote hosts.
-# A value of 0 permits remote connections, 1 permits local only.
-PMCD_LOCAL=0
-
-# Max length to which the queue of pending connections may grow
-# A value of 5 is the default.
-# PMCD_MAXPENDING=5
-
-# Default behaviour regarding pmcd's approach to starting PMDAs;
-# In cases where pmdaroot is available, setting this variable to
-# 1, offloads starting and stopping of agents to pmdaroot.  This
-# allows pmcd to not require a restart when starting a new PMDA.
-PMCD_ROOT_AGENT=1
-
-# Default behaviour regarding pmcd's approach to re-starting any
-# unresponsive PMDAs; this should only be used with pmdaroot and
-# PMCD_ROOT_AGENT=1 as it allows pmcd to attempt to automatically
-# restart any exited PMDA that it detects (which usually requires
-# privileges not available to pmcd itself).
-PMCD_RESTART_AGENTS=1
-
-# Default timeout for waiting on pmcd to accept connections; any
-# longer than this value and the rc scripts report it as failed.
-# The value is a PCPIntro(1) interval in units of seconds and it
-# will be passed directly to the pmcd_wait(1) utility.
-# PMCD_WAIT_TIMEOUT=60
diff --git a/agent/containers/images/pcp-pmlogger/Dockerfile b/agent/containers/images/pcp-pmlogger/Dockerfile
deleted file mode 100644
index 52214587cc..0000000000
--- a/agent/containers/images/pcp-pmlogger/Dockerfile
+++ /dev/null
@@ -1,14 +0,0 @@
-FROM fedora:33
-
-ENV SUMMARY="Performance Co-Pilot" \
-    DESCRIPTION="Performance Co-Pilot is a system performance analysis toolkit." \
-    VERSION=5
-
-RUN dnf install -y --setopt=tsflags=nodocs procps-ng gettext pcp pcp-zeroconf && \
-    dnf install -y pcp-doc pcp-gui pcp-system-tools && \
-    dnf clean all && \
-    rm -rf /etc/pcp/pmlogger/control.d/local
-RUN systemctl enable pmlogger && systemctl disable pmcd
-
-VOLUME ["/var/log/pcp/pmlogger"]
-CMD ["/usr/sbin/init"]
diff --git a/agent/containers/images/pcp.repo.j2 b/agent/containers/images/pcp.repo.j2
new file mode 100644
index 0000000000..02a3eeb545
--- /dev/null
+++ b/agent/containers/images/pcp.repo.j2
@@ -0,0 +1,6 @@
+[pcp]
+name=pcp
+baseurl=https://dl.bintray.com/pcp/{{ target }}
+gpgcheck=0
+repo_gpgcheck=0
+enabled=1
diff --git a/agent/containers/images/push b/agent/containers/images/push
index c31536c519..af19c6329d 100755
--- a/agent/containers/images/push
+++ b/agent/containers/images/push
@@ -27,7 +27,7 @@ function pushit {
     buildah push ${1} ${image_repo}/${1}
 }
 
-for image in base tools workloads all; do
+for image in base tools tool-meister tool-data-sink workloads all; do
     pushit pbench-agent-${image}-${distro}:${githash}
     pushit pbench-agent-${image}-${distro}:${ver}
     if [[ ! -z "${other}" ]]; then
diff --git a/agent/containers/images/tagit b/agent/containers/images/tagit
index 28727c9afc..858c0bb6a4 100755
--- a/agent/containers/images/tagit
+++ b/agent/containers/images/tagit
@@ -16,6 +16,6 @@ function tagit {
     buildah tag ${1}:${githash} ${1}:${tag}
 }
 
-for image in base tools workloads all; do
+for image in base tools tool-meister tool-data-sink workloads all; do
     tagit pbench-agent-${image}-${distro}
 done
diff --git a/agent/containers/images/visualizers/combo.json b/agent/containers/images/visualizers/combo.json
index 9c9e4e4591..b08568d7a0 100644
--- a/agent/containers/images/visualizers/combo.json
+++ b/agent/containers/images/visualizers/combo.json
@@ -123,7 +123,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_gpu_temp",
+          "expr": "DCGM_FI_DEV_GPU_TEMP",
           "format": "time_series",
           "instant": false,
           "interval": "",
@@ -227,7 +227,7 @@
       "pluginVersion": "7.1.2",
       "targets": [
         {
-          "expr": "avg(dcgm_gpu_temp)",
+          "expr": "avg(DCGM_FI_DEV_GPU_TEMP)",
           "interval": "",
           "legendFormat": "",
           "refId": "A"
@@ -286,7 +286,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_power_usage",
+          "expr": "DCGM_FI_DEV_POWER_USAGE",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
@@ -408,7 +408,7 @@
       "pluginVersion": "7.1.2",
       "targets": [
         {
-          "expr": "sum(dcgm_power_usage)",
+          "expr": "sum(DCGM_FI_DEV_POWER_USAGE)",
           "instant": true,
           "interval": "",
           "legendFormat": "",
@@ -471,7 +471,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_sm_clock",
+          "expr": "DCGM_FI_DEV_SM_CLOCK",
           "format": "time_series",
           "instant": false,
           "interval": "",
@@ -523,6 +523,97 @@
         "alignLevel": null
       }
     },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "hiddenSeries": false,
+      "id": 4,
+      "legend": {
+        "alignAsTable": true,
+        "avg": true,
+        "current": true,
+        "max": true,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 2,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "DCGM_FI_DEV_MEM_CLOCK",
+          "interval": "",
+          "legendFormat": "GPU {{gpu}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "interval": "3",
+      "title": "GPU Memory Clocks",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "hertz",
+          "label": null,
+          "logBase": 1,
+          "max": "100",
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
     {
       "aliasColors": {},
       "bars": false,
@@ -570,7 +661,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_gpu_utilization",
+          "expr": "DCGM_FI_DEV_GPU_UTIL",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
@@ -618,6 +709,97 @@
         "alignLevel": null
       }
     },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "hiddenSeries": false,
+      "id": 8,
+      "legend": {
+        "alignAsTable": true,
+        "avg": true,
+        "current": true,
+        "max": true,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 2,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "DCGM_FI_DEV_MEM_COPY_UTIL",
+          "interval": "",
+          "legendFormat": "GPU {{gpu}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "interval": 3,
+      "title": "GPU Mem Cpy Utilization",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "cumulative"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "percent",
+          "label": null,
+          "logBase": 1,
+          "max": "100",
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
     {
       "aliasColors": {},
       "bars": false,
@@ -664,7 +846,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_fb_used",
+          "expr": "DCGM_FI_DEV_FB_USED",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
@@ -759,7 +941,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_fb_free",
+          "expr": "DCGM_FI_DEV_FB_FREE",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
diff --git a/agent/containers/images/visualizers/dcgm.json b/agent/containers/images/visualizers/dcgm.json
index 32f8b16992..2c4106c85e 100644
--- a/agent/containers/images/visualizers/dcgm.json
+++ b/agent/containers/images/visualizers/dcgm.json
@@ -119,7 +119,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_gpu_temp",
+          "expr": "DCGM_FI_DEV_GPU_TEMP",
           "format": "time_series",
           "instant": false,
           "interval": "",
@@ -223,7 +223,7 @@
       "pluginVersion": "7.1.2",
       "targets": [
         {
-          "expr": "avg(dcgm_gpu_temp)",
+          "expr": "avg(DCGM_FI_DEV_GPU_TEMP)",
           "interval": "",
           "legendFormat": "",
           "refId": "A"
@@ -282,7 +282,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_power_usage",
+          "expr": "DCGM_FI_DEV_POWER_USAGE",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
@@ -404,7 +404,7 @@
       "pluginVersion": "7.1.2",
       "targets": [
         {
-          "expr": "sum(dcgm_power_usage)",
+          "expr": "sum(DCGM_FI_DEV_POWER_USAGE)",
           "instant": true,
           "interval": "",
           "legendFormat": "",
@@ -467,7 +467,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_sm_clock",
+          "expr": "DCGM_FI_DEV_SM_CLOCK",
           "format": "time_series",
           "instant": false,
           "interval": "",
@@ -519,6 +519,97 @@
         "alignLevel": null
       }
     },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 16
+      },
+      "hiddenSeries": false,
+      "id": 4,
+      "legend": {
+        "alignAsTable": true,
+        "avg": true,
+        "current": true,
+        "max": true,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 2,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "DCGM_FI_DEV_MEM_CLOCK",
+          "interval": "",
+          "legendFormat": "GPU {{gpu}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "interval": "3",
+      "title": "GPU Memory Clocks",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "hertz",
+          "label": null,
+          "logBase": 1,
+          "max": "100",
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
     {
       "aliasColors": {},
       "bars": false,
@@ -566,7 +657,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_gpu_utilization",
+          "expr": "DCGM_FI_DEV_GPU_UTIL",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
@@ -614,6 +705,97 @@
         "alignLevel": null
       }
     },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": "${DS_PROMETHEUS}",
+      "fill": 1,
+      "fillGradient": 0,
+      "gridPos": {
+        "h": 8,
+        "w": 12,
+        "x": 12,
+        "y": 24
+      },
+      "hiddenSeries": false,
+      "id": 8,
+      "legend": {
+        "alignAsTable": true,
+        "avg": true,
+        "current": true,
+        "max": true,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 2,
+      "nullPointMode": "null",
+      "options": {
+        "dataLinks": []
+      },
+      "percentage": false,
+      "pointradius": 2,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "DCGM_FI_DEV_MEM_COPY_UTIL",
+          "interval": "",
+          "legendFormat": "GPU {{gpu}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeRegions": [],
+      "timeShift": null,
+      "interval": 3,
+      "title": "GPU Mem Cpy Utilization",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "cumulative"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "format": "percent",
+          "label": null,
+          "logBase": 1,
+          "max": "100",
+          "min": "0",
+          "show": true
+        },
+        {
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
     {
       "aliasColors": {},
       "bars": false,
@@ -660,7 +842,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_fb_used",
+          "expr": "DCGM_FI_DEV_FB_USED",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
@@ -755,7 +937,7 @@
       "steppedLine": false,
       "targets": [
         {
-          "expr": "dcgm_fb_free",
+          "expr": "DCGM_FI_DEV_FB_FREE",
           "interval": "",
           "legendFormat": "GPU {{gpu}}",
           "refId": "A"
diff --git a/agent/tool-scripts/dcgm b/agent/tool-scripts/dcgm
index 835b4bf899..af21817452 100755
--- a/agent/tool-scripts/dcgm
+++ b/agent/tool-scripts/dcgm
@@ -15,10 +15,9 @@ import sys
 if len(sys.argv) == 2 and sys.argv[1] == "--help":
     help = """Options:
 
---inst=<LOCATION OF dcgm INSTALL> (required)
 --interval=# (number of seconds between collections)
 
-For more information on this tool, please Nvidia's "dcgm-exporter" at:
+For more information on this tool, please see Nvidia's "dcgm-exporter" at:
 \thttps://ngc.nvidia.com/catalog/containers/nvidia:k8s:dcgm-exporter
 """
     print(help)
diff --git a/agent/tool-scripts/meta.json b/agent/tool-scripts/meta.json
index c69a941a75..edd8dc782e 100644
--- a/agent/tool-scripts/meta.json
+++ b/agent/tool-scripts/meta.json
@@ -42,7 +42,7 @@
 
 	"persistent":{
 		"node-exporter": {"collector": "prometheus", "port": "9100"},
-		"dcgm": {"collector": "prometheus", "port": "8000"},
+		"dcgm": {"collector": "prometheus", "port": "9400"},
 		"pcp": {"collector": "pcp", "port": "44321"}
 	}
 }
diff --git a/agent/util-scripts/gold/pbench-tool-meister-start/test-54.txt b/agent/util-scripts/gold/pbench-tool-meister-start/test-54.txt
index 335c2c4e4c..8e9a30a8a8 100644
--- a/agent/util-scripts/gold/pbench-tool-meister-start/test-54.txt
+++ b/agent/util-scripts/gold/pbench-tool-meister-start/test-54.txt
@@ -1,13 +1,18 @@
 +++ Running test-54 pbench-tool-meister-start --help
 usage: Usage: pbench-tool-meister-start [--sysinfo <list of system information items>]
-       [-h] [--sysinfo SYSINFO] tool_group
+       [-h] [--sysinfo SYSINFO] [--redis-server REDIS_SERVER] tool_group
 
 positional arguments:
-  tool_group         The tool group of items to be run by the Tool Meisters.
+  tool_group            The tool group name of tools to be run by the Tool
+                        Meisters.
 
 optional arguments:
-  -h, --help         show this help message and exit
-  --sysinfo SYSINFO  The list of system information items to be collected.
+  -h, --help            show this help message and exit
+  --sysinfo SYSINFO     The list of system information items to be collected.
+  --redis-server REDIS_SERVER
+                        Use an existing Redis server specified by
+                        <hostname>:<port>; implies an existing Tool Data Sink
+                        and Tool Meisters as well.
 --- Finished test-54 pbench-tool-meister-start (status=0)
 +++ pbench tree state
 /var/tmp/pbench-test-utils/pbench
diff --git a/agent/util-scripts/gold/pbench-tool-meister-stop/test-55.txt b/agent/util-scripts/gold/pbench-tool-meister-stop/test-55.txt
index a2b7d7cf2b..0f987393ae 100644
--- a/agent/util-scripts/gold/pbench-tool-meister-stop/test-55.txt
+++ b/agent/util-scripts/gold/pbench-tool-meister-stop/test-55.txt
@@ -1,15 +1,21 @@
 +++ Running test-55 pbench-tool-meister-stop --help
 usage: Usage: pbench-tool-meister-stop [--sysinfo <list of system information items>]
-       [-h] [--sysinfo SYSINFO] [--interrupt] tool_group
+       [-h] [--sysinfo SYSINFO] [--interrupt] [--redis-server REDIS_SERVER]
+       tool_group
 
 positional arguments:
-  tool_group         The tool group of items being run in the Tool Meisters.
+  tool_group            The tool group name of tools being run in the Tool
+                        Meisters.
 
 optional arguments:
-  -h, --help         show this help message and exit
-  --sysinfo SYSINFO  The list of system information items to be collected.
-  --interrupt        Whether or not the stop operation is in response to an
-                     interrupt.
+  -h, --help            show this help message and exit
+  --sysinfo SYSINFO     The list of system information items to be collected.
+  --interrupt           Whether or not the stop operation is in response to an
+                        interrupt.
+  --redis-server REDIS_SERVER
+                        Use an existing Redis server specified by
+                        <hostname>:<port>; implies the use of an existing Tool
+                        Data Sink and Tool Meisters as well.
 --- Finished test-55 pbench-tool-meister-stop (status=0)
 +++ pbench tree state
 /var/tmp/pbench-test-utils/pbench
diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt
index 9c287003d6..0f12446984 100644
--- a/agent/util-scripts/gold/test-client-tool-meister/test-53.txt
+++ b/agent/util-scripts/gold/test-client-tool-meister/test-53.txt
@@ -1,6 +1,8 @@
 +++ Running test-53 test-client-tool-meister 
 "mpstat" tool is now registered for host "testhost.example.com" in group "default"
 "dcgm" tool is now registered for host "testhost.example.com" in group "default"
+pbench-tool-data-sink: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
 Collecting system information
 --- Finished test-53 test-client-tool-meister (status=0)
 +++ pbench tree state
@@ -55,7 +57,6 @@ Collecting system information
 /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/end/testhost.example.com/tm-sysinfo.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.err
-/var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.conf
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.log
@@ -139,17 +140,15 @@ install_check_output = mpstat: pbench-sysstat-12.0.3 is installed
 
 --- mock-run/metadata.log file contents
 +++ mock-run/tm/pbench-tool-data-sink.err file contents
+INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
 Bottle v#.##.## server starting up (using DataSinkWsgiServer(handler_class=<class 'pbench.agent.tool_data_sink.DataSinkWsgiServer.__init__.<locals>.DataSinkWsgiRequestHandler'>))...
 Listening on http://localhost:8080/
 Hit Ctrl-C to quit.
 
---- mock-run/tm/pbench-tool-data-sink.err file contents
-+++ mock-run/tm/pbench-tool-data-sink.log file contents
-INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
 INFO pbench-tool-data-sink tm_log_capture -- Running Tool Meister log capture ...
 INFO pbench-tool-data-sink execute -- Tool Data Sink terminating
 INFO pbench-tool-data-sink web_server_run -- Bottle web server exited
---- mock-run/tm/pbench-tool-data-sink.log file contents
+--- mock-run/tm/pbench-tool-data-sink.err file contents
 +++ mock-run/tm/pbench-tool-data-sink.out file contents
 --- mock-run/tm/pbench-tool-data-sink.out file contents
 +++ mock-run/tm/redis.conf file contents
@@ -173,7 +172,7 @@ port 17001
 +++ mock-run/tm/tm-default-testhost.example.com.err file contents
 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 INFO pbench-tool-meister sysinfo -- testhost.example.com: sysinfo send (no-op) default /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com
-INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 INFO pbench-tool-meister start -- mpstat: start_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --start --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-default/testhost.example.com --interval=42 --options=forty-two
 INFO pbench-tool-meister stop -- mpstat: stop_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --stop --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-default/testhost.example.com --interval=42 --options=forty-two
 INFO pbench-tool-meister wait -- Waiting for transient tool mpstat stop process
@@ -196,7 +195,7 @@ INFO pbench-tool-meister __exit__ -- testhost.example.com: terminating
 pbench-tool-meister-start - verify logging channel up
 testhost.example.com 0000 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 testhost.example.com 0001 INFO pbench-tool-meister sysinfo -- testhost.example.com: sysinfo send (no-op) default /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com
-testhost.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+testhost.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 testhost.example.com 0003 INFO pbench-tool-meister start -- mpstat: start_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --start --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-default/testhost.example.com --interval=42 --options=forty-two
 testhost.example.com 0004 INFO pbench-tool-meister stop -- mpstat: stop_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --stop --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-default/testhost.example.com --interval=42 --options=forty-two
 testhost.example.com 0005 INFO pbench-tool-meister wait -- Waiting for transient tool mpstat stop process
@@ -231,10 +230,10 @@ scrape_configs:
 
   - job_name: 'testhost.example.com_dcgm'
     static_configs:
-    - targets: ['testhost.example.com:8000']
+    - targets: ['testhost.example.com:9400']
 --- tools-default/prometheus/prometheus.yml file contents
 +++ tools-default/testhost.example.com/dcgm/dcgm.file file contents
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 --- tools-default/testhost.example.com/dcgm/dcgm.file file contents
 +++ tools-default/testhost.example.com/dcgm/tm-dcgm-start.err file contents
 --- tools-default/testhost.example.com/dcgm/tm-dcgm-start.err file contents
@@ -242,10 +241,10 @@ scrape_configs:
 --- tools-default/testhost.example.com/dcgm/tm-dcgm-start.out file contents
 +++ test-execution.log file contents
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/cp -rL /etc/ssh/ssh_config.d /var/tmp/pbench-test-utils/pbench/mock-run/
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/end/testhost.example.com block,security_mitigations,sos parallel
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pidof -x mpstat
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pidof -x mpstat
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/prometheus --config.file=/var/tmp/pbench-test-utils/pbench/mock-run/tools-default/prometheus/prometheus.yml --storage.tsdb.path=/var/tmp/pbench-test-utils/pbench/mock-run/tools-default/prometheus --web.console.libraries=/usr/share/prometheus/console_libraries --web.console.templates=/usr/share/prometheus/consoles
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
 --- test-execution.log file contents
diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt
index 4decd73edd..60af8c011f 100644
--- a/agent/util-scripts/gold/test-client-tool-meister/test-56.txt
+++ b/agent/util-scripts/gold/test-client-tool-meister/test-56.txt
@@ -6,6 +6,11 @@
 "node-exporter" tool is now registered for host "remote_b.example.com", with label "blue", in group "lite"
 "dcgm" tool is now registered for host "remote_c.example.com", with label "red", in group "lite"
 "pcp" tool is now registered for host "remote_c.example.com", with label "red", in group "lite"
+pbench-tool-data-sink: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
 Collecting system information
 --- Finished test-56 test-client-tool-meister (status=0)
 +++ pbench tree state
@@ -142,7 +147,6 @@ Collecting system information
 /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/end/testhost.example.com/tm-sysinfo.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.err
-/var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.conf
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.log
@@ -241,7 +245,7 @@ INFO pbench-tool-meister __exit__ -- remote_b.example.com: terminating
 === /var/tmp/pbench-test-utils/pbench/tmp/tm-lite-remote_c.example.com.err:
 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com block,security_mitigations,sos parallel
 INFO pbench-tool-meister _send_directory -- remote_c.example.com: PUT sysinfo-data completed lite /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com
-INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 INFO pbench-tool-meister start -- Started persistent tool pcp, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmcd', '--foreground', '--socket=./pmcd.socket', '--port=55677', '--config=/var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmcd.conf']
 INFO pbench-tool-meister stop -- Terminate issued for persistent tool dcgm
 INFO pbench-tool-meister stop -- Terminate issued for persistent tool pcp
@@ -256,7 +260,7 @@ INFO pbench-tool-meister __exit__ -- remote_c.example.com: terminating
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/blue:remote_b.example.com/node-exporter/tm-node-exporter-start.err:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/blue:remote_b.example.com/node-exporter/tm-node-exporter-start.out:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/dcgm/dcgm.file:
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/dcgm/tm-dcgm-start.err:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/dcgm/tm-dcgm-start.out:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/pcp/pmcd.file:
@@ -411,12 +415,9 @@ install_check_output = mpstat: pbench-sysstat-12.0.3 is installed
 
 --- mock-run/metadata.log file contents
 +++ mock-run/tm/pbench-tool-data-sink.err file contents
+
 Bottle v#.##.## server starting up (using DataSinkWsgiServer(handler_class=<class 'pbench.agent.tool_data_sink.DataSinkWsgiServer.__init__.<locals>.DataSinkWsgiRequestHandler'>))...
-Listening on http://localhost:8080/
 Hit Ctrl-C to quit.
-
---- mock-run/tm/pbench-tool-data-sink.err file contents
-+++ mock-run/tm/pbench-tool-data-sink.log file contents
 INFO pbench-tool-data-sink execute -- Tool Data Sink terminating
 INFO pbench-tool-data-sink log_request -- 127.0.0.1 - - "PUT /sysinfo-data/27c00bc325171c4893ef3862b4340952/remote_a.example.com HTTP/1.1" 200 0
 INFO pbench-tool-data-sink log_request -- 127.0.0.1 - - "PUT /sysinfo-data/27c00bc325171c4893ef3862b4340952/remote_b.example.com HTTP/1.1" 200 0
@@ -431,7 +432,8 @@ INFO pbench-tool-data-sink log_request -- 127.0.0.1 - - "PUT /tool-data/9e243dae
 INFO pbench-tool-data-sink tm_log_capture -- Running Tool Meister log capture ...
 INFO pbench-tool-data-sink web_server_run -- Bottle web server exited
 INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
---- mock-run/tm/pbench-tool-data-sink.log file contents
+Listening on http://localhost:8080/
+--- mock-run/tm/pbench-tool-data-sink.err file contents
 +++ mock-run/tm/pbench-tool-data-sink.out file contents
 --- mock-run/tm/pbench-tool-data-sink.out file contents
 +++ mock-run/tm/redis.conf file contents
@@ -455,7 +457,7 @@ port 17001
 +++ mock-run/tm/tm-lite-testhost.example.com.err file contents
 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 INFO pbench-tool-meister sysinfo -- testhost.example.com: sysinfo send (no-op) lite /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com
-INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 INFO pbench-tool-meister start -- mpstat: start_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --start --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 INFO pbench-tool-meister stop -- mpstat: stop_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --stop --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 INFO pbench-tool-meister wait -- Waiting for transient tool mpstat stop process
@@ -511,7 +513,7 @@ remote_b.example.com 0016 INFO pbench-tool-meister _send_directory -- remote_b.e
 remote_b.example.com 0017 INFO pbench-tool-meister __exit__ -- remote_b.example.com: terminating
 remote_c.example.com 0000 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com block,security_mitigations,sos parallel
 remote_c.example.com 0001 INFO pbench-tool-meister _send_directory -- remote_c.example.com: PUT sysinfo-data completed lite /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com
-remote_c.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+remote_c.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 remote_c.example.com 0003 INFO pbench-tool-meister start -- Started persistent tool pcp, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmcd', '--foreground', '--socket=./pmcd.socket', '--port=55677', '--config=/var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmcd.conf']
 remote_c.example.com 0004 INFO pbench-tool-meister stop -- Terminate issued for persistent tool dcgm
 remote_c.example.com 0005 INFO pbench-tool-meister stop -- Terminate issued for persistent tool pcp
@@ -522,7 +524,7 @@ remote_c.example.com 0009 INFO pbench-tool-meister _send_directory -- remote_c.e
 remote_c.example.com 0010 INFO pbench-tool-meister __exit__ -- remote_c.example.com: terminating
 testhost.example.com 0000 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 testhost.example.com 0001 INFO pbench-tool-meister sysinfo -- testhost.example.com: sysinfo send (no-op) lite /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com
-testhost.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+testhost.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 testhost.example.com 0003 INFO pbench-tool-meister start -- mpstat: start_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --start --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 testhost.example.com 0004 INFO pbench-tool-meister stop -- mpstat: stop_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --stop --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 testhost.example.com 0005 INFO pbench-tool-meister wait -- Waiting for transient tool mpstat stop process
@@ -575,15 +577,15 @@ scrape_configs:
 
   - job_name: 'remote_c.example.com_dcgm'
     static_configs:
-    - targets: ['remote_c.example.com:8000']
+    - targets: ['remote_c.example.com:9400']
 
 
   - job_name: 'testhost.example.com_dcgm'
     static_configs:
-    - targets: ['testhost.example.com:8000']
+    - targets: ['testhost.example.com:9400']
 --- tools-lite/prometheus/prometheus.yml file contents
 +++ tools-lite/testhost.example.com/dcgm/dcgm.file file contents
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 --- tools-lite/testhost.example.com/dcgm/dcgm.file file contents
 +++ tools-lite/testhost.example.com/dcgm/tm-dcgm-start.err file contents
 --- tools-lite/testhost.example.com/dcgm/tm-dcgm-start.err file contents
@@ -591,6 +593,8 @@ scrape_configs:
 --- tools-lite/testhost.example.com/dcgm/tm-dcgm-start.out file contents
 +++ test-execution.log file contents
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/cp -rL /etc/ssh/ssh_config.d /var/tmp/pbench-test-utils/pbench/mock-run/
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/mpstat -P ALL --options=forty-two 42
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/mpstat -P ALL --options=forty-two 42
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/mpstat -P ALL --options=forty-two 42
@@ -615,8 +619,6 @@ scrape_configs:
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmlogger --log=- --report -t 3s -c /var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmlogger.conf --host=remote_c.example.com:55677 /var/tmp/pbench-test-utils/pbench/mock-run/tools-lite/pcp/data/red:remote_c.example.com/%Y%m%d.%H.%M
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmproxy --log=- --foreground --timeseries --port=44566 --redishost=localhost --redisport=17001 --config=/var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmproxy.conf
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/prometheus --config.file=/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite/prometheus/prometheus.yml --storage.tsdb.path=/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite/prometheus --web.console.libraries=/usr/share/prometheus/console_libraries --web.console.templates=/usr/share/prometheus/consoles
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no remote_a.example.com /var/tmp/pbench-test-utils/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister-remote localhost 17001 tm-lite-remote_a.example.com yes
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no remote_b.example.com /var/tmp/pbench-test-utils/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister-remote localhost 17001 tm-lite-remote_b.example.com yes
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no remote_c.example.com /var/tmp/pbench-test-utils/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister-remote localhost 17001 tm-lite-remote_c.example.com yes
diff --git a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt
index d2ca802e3f..426dace95a 100644
--- a/agent/util-scripts/gold/test-client-tool-meister/test-57.txt
+++ b/agent/util-scripts/gold/test-client-tool-meister/test-57.txt
@@ -6,6 +6,11 @@
 "node-exporter" tool is now registered for host "remote_b.example.com", with label "blue", in group "lite"
 "dcgm" tool is now registered for host "remote_c.example.com", with label "red", in group "lite"
 "pcp" tool is now registered for host "remote_c.example.com", with label "red", in group "lite"
+pbench-tool-data-sink: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
 system information not collected when --interrupt specified
 --- Finished test-57 test-client-tool-meister (status=0)
 +++ pbench tree state
@@ -121,7 +126,6 @@ system information not collected when --interrupt specified
 /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com/tm-sysinfo.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.err
-/var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.conf
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.log
@@ -216,7 +220,7 @@ INFO pbench-tool-meister __exit__ -- remote_b.example.com: terminating
 === /var/tmp/pbench-test-utils/pbench/tmp/tm-lite-remote_c.example.com.err:
 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com block,security_mitigations,sos parallel
 INFO pbench-tool-meister _send_directory -- remote_c.example.com: PUT sysinfo-data completed lite /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com
-INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 INFO pbench-tool-meister start -- Started persistent tool pcp, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmcd', '--foreground', '--socket=./pmcd.socket', '--port=55677', '--config=/var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmcd.conf']
 INFO pbench-tool-meister stop -- Terminate issued for persistent tool dcgm
 INFO pbench-tool-meister stop -- Terminate issued for persistent tool pcp
@@ -229,7 +233,7 @@ INFO pbench-tool-meister __exit__ -- remote_c.example.com: terminating
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/blue:remote_b.example.com/node-exporter/tm-node-exporter-start.err:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/blue:remote_b.example.com/node-exporter/tm-node-exporter-start.out:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/dcgm/dcgm.file:
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/dcgm/tm-dcgm-start.err:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/dcgm/tm-dcgm-start.out:
 === /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com/pcp/pmcd.file:
@@ -384,12 +388,9 @@ install_check_output = mpstat: pbench-sysstat-12.0.3 is installed
 
 --- mock-run/metadata.log file contents
 +++ mock-run/tm/pbench-tool-data-sink.err file contents
+
 Bottle v#.##.## server starting up (using DataSinkWsgiServer(handler_class=<class 'pbench.agent.tool_data_sink.DataSinkWsgiServer.__init__.<locals>.DataSinkWsgiRequestHandler'>))...
-Listening on http://localhost:8080/
 Hit Ctrl-C to quit.
-
---- mock-run/tm/pbench-tool-data-sink.err file contents
-+++ mock-run/tm/pbench-tool-data-sink.log file contents
 INFO pbench-tool-data-sink execute -- Tool Data Sink terminating
 INFO pbench-tool-data-sink log_request -- 127.0.0.1 - - "PUT /sysinfo-data/27c00bc325171c4893ef3862b4340952/remote_a.example.com HTTP/1.1" 200 0
 INFO pbench-tool-data-sink log_request -- 127.0.0.1 - - "PUT /sysinfo-data/27c00bc325171c4893ef3862b4340952/remote_b.example.com HTTP/1.1" 200 0
@@ -401,7 +402,8 @@ INFO pbench-tool-data-sink log_request -- 127.0.0.1 - - "PUT /tool-data/9e243dae
 INFO pbench-tool-data-sink tm_log_capture -- Running Tool Meister log capture ...
 INFO pbench-tool-data-sink web_server_run -- Bottle web server exited
 INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
---- mock-run/tm/pbench-tool-data-sink.log file contents
+Listening on http://localhost:8080/
+--- mock-run/tm/pbench-tool-data-sink.err file contents
 +++ mock-run/tm/pbench-tool-data-sink.out file contents
 --- mock-run/tm/pbench-tool-data-sink.out file contents
 +++ mock-run/tm/redis.conf file contents
@@ -425,7 +427,7 @@ port 17001
 +++ mock-run/tm/tm-lite-testhost.example.com.err file contents
 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 INFO pbench-tool-meister sysinfo -- testhost.example.com: sysinfo send (no-op) lite /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com
-INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 INFO pbench-tool-meister start -- mpstat: start_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --start --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 INFO pbench-tool-meister stop -- mpstat: stop_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --stop --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 INFO pbench-tool-meister wait -- Waiting for transient tool mpstat stop process
@@ -475,7 +477,7 @@ remote_b.example.com 0014 INFO pbench-tool-meister wait -- Stopped persistent to
 remote_b.example.com 0015 INFO pbench-tool-meister __exit__ -- remote_b.example.com: terminating
 remote_c.example.com 0000 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com block,security_mitigations,sos parallel
 remote_c.example.com 0001 INFO pbench-tool-meister _send_directory -- remote_c.example.com: PUT sysinfo-data completed lite /var/tmp/pbench-test-utils/pbench/tmp/tm.lite.NNNNN.nnnnnnnn/red:remote_c.example.com
-remote_c.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+remote_c.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 remote_c.example.com 0003 INFO pbench-tool-meister start -- Started persistent tool pcp, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmcd', '--foreground', '--socket=./pmcd.socket', '--port=55677', '--config=/var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmcd.conf']
 remote_c.example.com 0004 INFO pbench-tool-meister stop -- Terminate issued for persistent tool dcgm
 remote_c.example.com 0005 INFO pbench-tool-meister stop -- Terminate issued for persistent tool pcp
@@ -484,7 +486,7 @@ remote_c.example.com 0007 INFO pbench-tool-meister wait -- Stopped persistent to
 remote_c.example.com 0008 INFO pbench-tool-meister __exit__ -- remote_c.example.com: terminating
 testhost.example.com 0000 INFO pbench-tool-meister sysinfo -- pbench-sysinfo-dump -- /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pbench-sysinfo-dump /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com block,security_mitigations,sos parallel
 testhost.example.com 0001 INFO pbench-tool-meister sysinfo -- testhost.example.com: sysinfo send (no-op) lite /var/tmp/pbench-test-utils/pbench/mock-run/sysinfo/beg/testhost.example.com
-testhost.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, env PYTHONPATH=/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings:/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/bindings/common, args ['python2', '/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py']
+testhost.example.com 0002 INFO pbench-tool-meister start -- Started persistent tool dcgm, ['/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter']
 testhost.example.com 0003 INFO pbench-tool-meister start -- mpstat: start_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --start --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 testhost.example.com 0004 INFO pbench-tool-meister stop -- mpstat: stop_tool -- /var/tmp/pbench-test-utils/opt/pbench-agent/tool-scripts/mpstat --stop --dir=/var/tmp/pbench-test-utils/pbench/mock-run/0-iter-zero/sample42/tools-lite/testhost.example.com --interval=42 --options=forty-two
 testhost.example.com 0005 INFO pbench-tool-meister wait -- Waiting for transient tool mpstat stop process
@@ -535,15 +537,15 @@ scrape_configs:
 
   - job_name: 'remote_c.example.com_dcgm'
     static_configs:
-    - targets: ['remote_c.example.com:8000']
+    - targets: ['remote_c.example.com:9400']
 
 
   - job_name: 'testhost.example.com_dcgm'
     static_configs:
-    - targets: ['testhost.example.com:8000']
+    - targets: ['testhost.example.com:9400']
 --- tools-lite/prometheus/prometheus.yml file contents
 +++ tools-lite/testhost.example.com/dcgm/dcgm.file file contents
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 --- tools-lite/testhost.example.com/dcgm/dcgm.file file contents
 +++ tools-lite/testhost.example.com/dcgm/tm-dcgm-start.err file contents
 --- tools-lite/testhost.example.com/dcgm/tm-dcgm-start.err file contents
@@ -551,6 +553,8 @@ scrape_configs:
 --- tools-lite/testhost.example.com/dcgm/tm-dcgm-start.out file contents
 +++ test-execution.log file contents
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/cp -rL /etc/ssh/ssh_config.d /var/tmp/pbench-test-utils/pbench/mock-run/
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
+/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/dcgm-exporter
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/mpstat -P ALL --options=forty-two 42
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/mpstat -P ALL --options=forty-two 42
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/mpstat -P ALL --options=forty-two 42
@@ -571,8 +575,6 @@ scrape_configs:
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmlogger --log=- --report -t 3s -c /var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmlogger.conf --host=remote_c.example.com:55677 /var/tmp/pbench-test-utils/pbench/mock-run/tools-lite/pcp/data/red:remote_c.example.com/%Y%m%d.%H.%M
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/pmproxy --log=- --foreground --timeseries --port=44566 --redishost=localhost --redisport=17001 --config=/var/tmp/pbench-test-utils/opt/pbench-agent/templates/pmproxy.conf
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/prometheus --config.file=/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite/prometheus/prometheus.yml --storage.tsdb.path=/var/tmp/pbench-test-utils/pbench/mock-run/tools-lite/prometheus --web.console.libraries=/usr/share/prometheus/console_libraries --web.console.templates=/usr/share/prometheus/consoles
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
-/var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/samples/scripts/dcgm_prometheus.py
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no remote_a.example.com /var/tmp/pbench-test-utils/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister-remote localhost 17001 tm-lite-remote_a.example.com yes
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no remote_b.example.com /var/tmp/pbench-test-utils/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister-remote localhost 17001 tm-lite-remote_b.example.com yes
 /var/tmp/pbench-test-utils/opt/pbench-agent/unittest-scripts/ssh -o StrictHostKeyChecking=no remote_c.example.com /var/tmp/pbench-test-utils/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister-remote localhost 17001 tm-lite-remote_c.example.com yes
diff --git a/agent/util-scripts/gold/test-start-stop-tool-meister/test-51.txt b/agent/util-scripts/gold/test-start-stop-tool-meister/test-51.txt
index 969847d2bd..81c79120e9 100644
--- a/agent/util-scripts/gold/test-start-stop-tool-meister/test-51.txt
+++ b/agent/util-scripts/gold/test-start-stop-tool-meister/test-51.txt
@@ -5,7 +5,7 @@
 3. push tool group data and metadata
 4. starting tool data sink
 5a. starting localhost tool meister
-6. waiting for all successfully spawned SSH processes to show up as subscribers
+6. waiting for all successfully created Tool Meister processes to show up as subscribers
 8. Initialize persistent tools
 channel pbench-agent-cli-to-client payload, '{"action": "end", "kind": "ds", "status": "success"}'
 channel pbench-agent-cli-to-client payload, '{"action": "init", "kind": "ds", "status": "success"}'
@@ -18,6 +18,8 @@ next pbench-agent-cli-to-client
 payload from pbench-agent-cli-to-client: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-to-client', 'data': b'{"action": "end", "kind": "ds", "status": "success"}'}
 payload from pbench-agent-cli-to-client: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-to-client', 'data': b'{"action": "init", "kind": "ds", "status": "success"}'}
 payload from pbench-agent-cli-to-client: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-to-client', 'data': b'{"action": "startup", "kind": "ds", "status": "success"}'}
+pbench-tool-data-sink: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
 publish end on chan pbench-agent-cli-from-client
 publish init on chan pbench-agent-cli-from-client
 publish terminate on chan pbench-agent-cli-from-client
@@ -33,7 +35,6 @@ waiting for tool-data-sink (#####) to exit
 /var/tmp/pbench-test-utils/pbench/mock-run/ssh_config.d
 /var/tmp/pbench-test-utils/pbench/mock-run/tm
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.err
-/var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.conf
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.log
@@ -108,22 +109,20 @@ install_check_output = perf: perf is installed
 
 --- mock-run/metadata.log file contents
 +++ mock-run/tm/pbench-tool-data-sink.err file contents
+DEBUG pbench-tool-data-sink daemon -- re-constructing Redis server object
+DEBUG pbench-tool-data-sink daemon -- reconstructed Redis server object
+DEBUG pbench-tool-data-sink driver -- params_key (tds-default): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'bind_hostname': 'localhost', 'channel_prefix': 'pbench-agent-cli', 'group': 'default', 'optional_md': {'config': '', 'date': '1900-01-01T00:00:00', 'script': 'fake-bm', 'ssh_opts': '-o StrictHostKeyChecking=no'}, 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '9400'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tool_trigger': None, 'tools': {'testhost.example.com': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}}
+INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
 Bottle v#.##.## server starting up (using DataSinkWsgiServer(handler_class=<class 'pbench.agent.tool_data_sink.DataSinkWsgiServer.__init__.<locals>.DataSinkWsgiRequestHandler'>))...
 Listening on http://localhost:8080/
 Hit Ctrl-C to quit.
 
---- mock-run/tm/pbench-tool-data-sink.err file contents
-+++ mock-run/tm/pbench-tool-data-sink.log file contents
-DEBUG pbench-tool-data-sink main -- params_key (tds-default): b'{"benchmark_run_dir": "/var/tmp/pbench-test-utils/pbench/mock-run", "bind_hostname": "localhost", "channel_prefix": "pbench-agent-cli", "group": "default", "optional_md": {"config": "", "date": "1900-01-01T00:00:00", "script": "fake-bm", "ssh_opts": "-o StrictHostKeyChecking=no"}, "tool_metadata": {"persistent": {"dcgm": {"collector": "prometheus", "port": "8000"}, "node-exporter": {"collector": "prometheus", "port": "9100"}, "pcp": {"collector": "pcp", "port": "44321"}}, "transient": {"blktrace": null, "bpftrace": null, "cpuacct": null, "disk": null, "dm-cache": null, "docker": null, "docker-info": null, "external-data-source": null, "haproxy-ocp": null, "iostat": null, "jmap": null, "jstack": null, "kvm-spinlock": null, "kvmstat": null, "kvmtrace": null, "lockstat": null, "mpstat": null, "numastat": null, "oc": null, "openvswitch": null, "perf": null, "pidstat": null, "pprof": null, "proc-interrupts": null, "proc-sched_debug": null, "proc-vmstat": null, "prometheus-metrics": null, "qemu-migrate": null, "rabbit": null, "sar": null, "strace": null, "sysfs": null, "systemtap": null, "tcpdump": null, "turbostat": null, "user-tool": null, "virsh-migrate": null, "vmstat": null}}, "tool_trigger": null, "tools": {"testhost.example.com": {"mpstat": "", "perf": "--record-opts=\\"-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions\\" --report-opts=\\"-I -g\\""}}}'
-DEBUG pbench-tool-data-sink main -- Tool Data Sink parameters check out, daemonizing ...
-DEBUG pbench-tool-data-sink main -- constructing Redis() object
-DEBUG pbench-tool-data-sink main -- constructed Redis() object
-INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
 DEBUG pbench-tool-data-sink run -- Making tool data sink WSGI server ...
+DEBUG pbench-tool-data-sink run -- Successfully created WSGI server
 DEBUG pbench-tool-data-sink run -- Running tool data sink WSGI server ...
 DEBUG pbench-tool-data-sink __enter__ -- web server 'run' thread started, processing payloads ...
 INFO pbench-tool-data-sink tm_log_capture -- Running Tool Meister log capture ...
-DEBUG pbench-tool-data-sink __enter__ -- 'tm_log_capture' thread started, processing logs ...
+DEBUG pbench-tool-data-sink __enter__ -- 'tm_log_capture' thread started, processing Tool Meister logs ...
 DEBUG pbench-tool-data-sink fetch_message -- next pbench-agent-cli-from-tms
 DEBUG pbench-tool-data-sink fetch_message -- payload from pbench-agent-cli-from-tms: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-from-tms', 'data': b'{"hostname": "testhost.example.com", "hostname_A": "agent.example.com", "hostname_I": "agent.example.com", "hostname_f": "agent.example.com", "hostname_i": "agent.example.com", "hostname_s": "agent.example.com", "installs": {"mpstat": [0, "mpstat: pbench-sysstat-12.0.3 is installed"], "perf": [0, "perf: perf is installed"]}, "kind": "tm", "label": "", "pid": NNNNN, "seqno": "", "sha1": "(unknown)", "version": "(unknown)"}'}
 DEBUG pbench-tool-data-sink fetch_message -- channel pbench-agent-cli-from-tms payload, '{"hostname": "testhost.example.com", "hostname_A": "agent.example.com", "hostname_I": "agent.example.com", "hostname_f": "agent.example.com", "hostname_i": "agent.example.com", "hostname_s": "agent.example.com", "installs": {"mpstat": [0, "mpstat: pbench-sysstat-12.0.3 is installed"], "perf": [0, "perf: perf is installed"]}, "kind": "tm", "label": "", "pid": NNNNN, "seqno": "", "sha1": "(unknown)", "version": "(unknown)"}'
@@ -170,7 +169,7 @@ INFO pbench-tool-data-sink web_server_run -- Bottle web server exited
 DEBUG pbench-tool-data-sink __exit__ -- Waiting for the web server thread to exit ...
 DEBUG pbench-tool-data-sink __exit__ -- Waiting for the log capture thread to exit ...
 DEBUG pbench-tool-data-sink __exit__ -- Exiting Tool Data Sink context ...
---- mock-run/tm/pbench-tool-data-sink.log file contents
+--- mock-run/tm/pbench-tool-data-sink.err file contents
 +++ mock-run/tm/pbench-tool-data-sink.out file contents
 --- mock-run/tm/pbench-tool-data-sink.out file contents
 +++ mock-run/tm/redis.conf file contents
@@ -194,7 +193,7 @@ port 17001
 +++ mock-run/tm/tm-default-testhost.example.com.err file contents
 DEBUG pbench-tool-meister daemon -- re-constructing Redis server object
 DEBUG pbench-tool-meister daemon -- re-constructed Redis server object
-DEBUG pbench-tool-meister driver -- params_key (tm-default-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'default', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '8000'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
+DEBUG pbench-tool-meister driver -- params_key (tm-default-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'default', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '9400'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
 DEBUG pbench-tool-meister __enter__ -- publish pbench-agent-cli-from-tms
 DEBUG pbench-tool-meister __enter__ -- published pbench-agent-cli-from-tms
 DEBUG pbench-tool-meister driver -- waiting ...
@@ -226,7 +225,7 @@ DEBUG pbench-tool-meister _send_client_status -- publish pbench-agent-cli-from-t
 --- mock-run/tm/tm-default-testhost.example.com.out file contents
 +++ mock-run/tm/tm.logs file contents
 pbench-tool-meister-start - verify logging channel up
-testhost.example.com 0000 DEBUG pbench-tool-meister driver -- params_key (tm-default-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'default', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '8000'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
+testhost.example.com 0000 DEBUG pbench-tool-meister driver -- params_key (tm-default-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'default', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '9400'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
 testhost.example.com 0001 DEBUG pbench-tool-meister __enter__ -- publish pbench-agent-cli-from-tms
 testhost.example.com 0002 DEBUG pbench-tool-meister __enter__ -- published pbench-agent-cli-from-tms
 testhost.example.com 0003 DEBUG pbench-tool-meister driver -- waiting ...
diff --git a/agent/util-scripts/gold/test-start-stop-tool-meister/test-52.txt b/agent/util-scripts/gold/test-start-stop-tool-meister/test-52.txt
index 38cef83404..64cfa744a8 100644
--- a/agent/util-scripts/gold/test-start-stop-tool-meister/test-52.txt
+++ b/agent/util-scripts/gold/test-start-stop-tool-meister/test-52.txt
@@ -5,7 +5,7 @@
 3. push tool group data and metadata
 4. starting tool data sink
 5a. starting localhost tool meister
-6. waiting for all successfully spawned SSH processes to show up as subscribers
+6. waiting for all successfully created Tool Meister processes to show up as subscribers
 8. Initialize persistent tools
 channel pbench-agent-cli-to-client payload, '{"action": "end", "kind": "ds", "status": "success"}'
 channel pbench-agent-cli-to-client payload, '{"action": "init", "kind": "ds", "status": "success"}'
@@ -18,6 +18,8 @@ next pbench-agent-cli-to-client
 payload from pbench-agent-cli-to-client: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-to-client', 'data': b'{"action": "end", "kind": "ds", "status": "success"}'}
 payload from pbench-agent-cli-to-client: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-to-client', 'data': b'{"action": "init", "kind": "ds", "status": "success"}'}
 payload from pbench-agent-cli-to-client: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-to-client', 'data': b'{"action": "startup", "kind": "ds", "status": "success"}'}
+pbench-tool-data-sink: connected to redis server localhost:17001
+pbench-tool-meister: connected to redis server localhost:17001
 publish end on chan pbench-agent-cli-from-client
 publish init on chan pbench-agent-cli-from-client
 publish terminate on chan pbench-agent-cli-from-client
@@ -33,7 +35,6 @@ waiting for tool-data-sink (#####) to exit
 /var/tmp/pbench-test-utils/pbench/mock-run/ssh_config.d
 /var/tmp/pbench-test-utils/pbench/mock-run/tm
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.err
-/var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.log
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/pbench-tool-data-sink.out
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.conf
 /var/tmp/pbench-test-utils/pbench/mock-run/tm/redis.log
@@ -108,22 +109,20 @@ install_check_output = perf: perf is installed
 
 --- mock-run/metadata.log file contents
 +++ mock-run/tm/pbench-tool-data-sink.err file contents
+DEBUG pbench-tool-data-sink daemon -- re-constructing Redis server object
+DEBUG pbench-tool-data-sink daemon -- reconstructed Redis server object
+DEBUG pbench-tool-data-sink driver -- params_key (tds-mygroup): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'bind_hostname': 'localhost', 'channel_prefix': 'pbench-agent-cli', 'group': 'mygroup', 'optional_md': {'config': '', 'date': '1900-01-01T00:00:00', 'script': 'fake-bm', 'ssh_opts': '-o StrictHostKeyChecking=no'}, 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '9400'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tool_trigger': None, 'tools': {'testhost.example.com': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}}
+INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
 Bottle v#.##.## server starting up (using DataSinkWsgiServer(handler_class=<class 'pbench.agent.tool_data_sink.DataSinkWsgiServer.__init__.<locals>.DataSinkWsgiRequestHandler'>))...
 Listening on http://localhost:8080/
 Hit Ctrl-C to quit.
 
---- mock-run/tm/pbench-tool-data-sink.err file contents
-+++ mock-run/tm/pbench-tool-data-sink.log file contents
-DEBUG pbench-tool-data-sink main -- params_key (tds-mygroup): b'{"benchmark_run_dir": "/var/tmp/pbench-test-utils/pbench/mock-run", "bind_hostname": "localhost", "channel_prefix": "pbench-agent-cli", "group": "mygroup", "optional_md": {"config": "", "date": "1900-01-01T00:00:00", "script": "fake-bm", "ssh_opts": "-o StrictHostKeyChecking=no"}, "tool_metadata": {"persistent": {"dcgm": {"collector": "prometheus", "port": "8000"}, "node-exporter": {"collector": "prometheus", "port": "9100"}, "pcp": {"collector": "pcp", "port": "44321"}}, "transient": {"blktrace": null, "bpftrace": null, "cpuacct": null, "disk": null, "dm-cache": null, "docker": null, "docker-info": null, "external-data-source": null, "haproxy-ocp": null, "iostat": null, "jmap": null, "jstack": null, "kvm-spinlock": null, "kvmstat": null, "kvmtrace": null, "lockstat": null, "mpstat": null, "numastat": null, "oc": null, "openvswitch": null, "perf": null, "pidstat": null, "pprof": null, "proc-interrupts": null, "proc-sched_debug": null, "proc-vmstat": null, "prometheus-metrics": null, "qemu-migrate": null, "rabbit": null, "sar": null, "strace": null, "sysfs": null, "systemtap": null, "tcpdump": null, "turbostat": null, "user-tool": null, "virsh-migrate": null, "vmstat": null}}, "tool_trigger": null, "tools": {"testhost.example.com": {"mpstat": "", "perf": "--record-opts=\\"-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions\\" --report-opts=\\"-I -g\\""}}}'
-DEBUG pbench-tool-data-sink main -- Tool Data Sink parameters check out, daemonizing ...
-DEBUG pbench-tool-data-sink main -- constructing Redis() object
-DEBUG pbench-tool-data-sink main -- constructed Redis() object
-INFO pbench-tool-data-sink web_server_run -- Running Bottle web server ...
 DEBUG pbench-tool-data-sink run -- Making tool data sink WSGI server ...
+DEBUG pbench-tool-data-sink run -- Successfully created WSGI server
 DEBUG pbench-tool-data-sink run -- Running tool data sink WSGI server ...
 DEBUG pbench-tool-data-sink __enter__ -- web server 'run' thread started, processing payloads ...
 INFO pbench-tool-data-sink tm_log_capture -- Running Tool Meister log capture ...
-DEBUG pbench-tool-data-sink __enter__ -- 'tm_log_capture' thread started, processing logs ...
+DEBUG pbench-tool-data-sink __enter__ -- 'tm_log_capture' thread started, processing Tool Meister logs ...
 DEBUG pbench-tool-data-sink fetch_message -- next pbench-agent-cli-from-tms
 DEBUG pbench-tool-data-sink fetch_message -- payload from pbench-agent-cli-from-tms: {'type': 'message', 'pattern': None, 'channel': b'pbench-agent-cli-from-tms', 'data': b'{"hostname": "testhost.example.com", "hostname_A": "agent.example.com", "hostname_I": "agent.example.com", "hostname_f": "agent.example.com", "hostname_i": "agent.example.com", "hostname_s": "agent.example.com", "installs": {"mpstat": [0, "mpstat: pbench-sysstat-12.0.3 is installed"], "perf": [0, "perf: perf is installed"]}, "kind": "tm", "label": "", "pid": NNNNN, "seqno": "", "sha1": "(unknown)", "version": "(unknown)"}'}
 DEBUG pbench-tool-data-sink fetch_message -- channel pbench-agent-cli-from-tms payload, '{"hostname": "testhost.example.com", "hostname_A": "agent.example.com", "hostname_I": "agent.example.com", "hostname_f": "agent.example.com", "hostname_i": "agent.example.com", "hostname_s": "agent.example.com", "installs": {"mpstat": [0, "mpstat: pbench-sysstat-12.0.3 is installed"], "perf": [0, "perf: perf is installed"]}, "kind": "tm", "label": "", "pid": NNNNN, "seqno": "", "sha1": "(unknown)", "version": "(unknown)"}'
@@ -170,7 +169,7 @@ INFO pbench-tool-data-sink web_server_run -- Bottle web server exited
 DEBUG pbench-tool-data-sink __exit__ -- Waiting for the web server thread to exit ...
 DEBUG pbench-tool-data-sink __exit__ -- Waiting for the log capture thread to exit ...
 DEBUG pbench-tool-data-sink __exit__ -- Exiting Tool Data Sink context ...
---- mock-run/tm/pbench-tool-data-sink.log file contents
+--- mock-run/tm/pbench-tool-data-sink.err file contents
 +++ mock-run/tm/pbench-tool-data-sink.out file contents
 --- mock-run/tm/pbench-tool-data-sink.out file contents
 +++ mock-run/tm/redis.conf file contents
@@ -194,7 +193,7 @@ port 17001
 +++ mock-run/tm/tm-mygroup-testhost.example.com.err file contents
 DEBUG pbench-tool-meister daemon -- re-constructing Redis server object
 DEBUG pbench-tool-meister daemon -- re-constructed Redis server object
-DEBUG pbench-tool-meister driver -- params_key (tm-mygroup-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'mygroup', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '8000'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
+DEBUG pbench-tool-meister driver -- params_key (tm-mygroup-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'mygroup', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '9400'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
 DEBUG pbench-tool-meister __enter__ -- publish pbench-agent-cli-from-tms
 DEBUG pbench-tool-meister __enter__ -- published pbench-agent-cli-from-tms
 DEBUG pbench-tool-meister driver -- waiting ...
@@ -226,7 +225,7 @@ DEBUG pbench-tool-meister _send_client_status -- publish pbench-agent-cli-from-t
 --- mock-run/tm/tm-mygroup-testhost.example.com.out file contents
 +++ mock-run/tm/tm.logs file contents
 pbench-tool-meister-start - verify logging channel up
-testhost.example.com 0000 DEBUG pbench-tool-meister driver -- params_key (tm-mygroup-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'mygroup', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '8000'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
+testhost.example.com 0000 DEBUG pbench-tool-meister driver -- params_key (tm-mygroup-testhost.example.com): {'benchmark_run_dir': '/var/tmp/pbench-test-utils/pbench/mock-run', 'channel_prefix': 'pbench-agent-cli', 'controller': 'testhost.example.com', 'group': 'mygroup', 'hostname': 'testhost.example.com', 'label': '', 'tool_metadata': {'persistent': {'dcgm': {'collector': 'prometheus', 'port': '9400'}, 'node-exporter': {'collector': 'prometheus', 'port': '9100'}, 'pcp': {'collector': 'pcp', 'port': '44321'}}, 'transient': {'blktrace': None, 'bpftrace': None, 'cpuacct': None, 'disk': None, 'dm-cache': None, 'docker': None, 'docker-info': None, 'external-data-source': None, 'haproxy-ocp': None, 'iostat': None, 'jmap': None, 'jstack': None, 'kvm-spinlock': None, 'kvmstat': None, 'kvmtrace': None, 'lockstat': None, 'mpstat': None, 'numastat': None, 'oc': None, 'openvswitch': None, 'perf': None, 'pidstat': None, 'pprof': None, 'proc-interrupts': None, 'proc-sched_debug': None, 'proc-vmstat': None, 'prometheus-metrics': None, 'qemu-migrate': None, 'rabbit': None, 'sar': None, 'strace': None, 'sysfs': None, 'systemtap': None, 'tcpdump': None, 'turbostat': None, 'user-tool': None, 'virsh-migrate': None, 'vmstat': None}}, 'tools': {'mpstat': '', 'perf': '--record-opts="-a -freq=100 -g --event=branch-misses --event=cache-misses --event=instructions" --report-opts="-I -g"'}}
 testhost.example.com 0001 DEBUG pbench-tool-meister __enter__ -- publish pbench-agent-cli-from-tms
 testhost.example.com 0002 DEBUG pbench-tool-meister __enter__ -- published pbench-agent-cli-from-tms
 testhost.example.com 0003 DEBUG pbench-tool-meister driver -- waiting ...
diff --git a/agent/util-scripts/pbench-tool-meister-client b/agent/util-scripts/pbench-tool-meister-client
index 6168dd2196..3226264591 100755
--- a/agent/util-scripts/pbench-tool-meister-client
+++ b/agent/util-scripts/pbench-tool-meister-client
@@ -13,7 +13,7 @@ import sys
 from pbench.agent.constants import (
     cli_tm_allowed_actions,
     cli_tm_channel_prefix,
-    redis_port,
+    def_redis_port,
 )
 from pbench.agent.tool_meister_client import Client
 
@@ -47,30 +47,48 @@ def main(argv):
     try:
         group = argv[1]
     except IndexError:
-        raise Exception("Missing group argument")
+        logger.error("Missing group argument")
+        return 1
     try:
         directory = argv[2]
     except IndexError:
-        raise Exception("Missing directory argument")
+        logger.error("Missing directory argument")
+        return 1
     try:
         action = argv[3]
     except IndexError:
-        raise Exception("Missing action argument")
+        logger.error("Missing action argument")
+        return 1
     else:
         if action not in cli_tm_allowed_actions:
-            raise Exception(
-                f"Unrecognized action, '{action}', allowed actions are:"
-                f" {cli_tm_allowed_actions}"
+            logger.error(
+                "Unrecognized action, '{}', allowed actions are: {}",
+                action,
+                cli_tm_allowed_actions,
             )
+            return 1
         elif action == "kill":
             # FIXME: we need to implement the gritty method of killing all the
             # tool meisters, locally and remotely, and ensuring they are all
             # properly shut down.
             return 0
 
+    redis_server = os.environ.get("PBENCH_REDIS_SERVER", f"localhost:{def_redis_port}")
+    parts = redis_server.split(":", 1)
+    if len(parts) != 2:
+        logger.error("Bad Redis server specified, {!r}", redis_server)
+        return 1
+    try:
+        redis_port = int(parts[1])
+    except Exception:
+        logger.error("Bad port for Redis server specified in {!r}", redis_server)
+        return 1
+    else:
+        redis_host = parts[0]
+
     # The Redis server is always running on the local host with the CLI.
     with Client(
-        redis_host="localhost",
+        redis_host=redis_host,
         redis_port=redis_port,
         channel_prefix=cli_tm_channel_prefix,
         logger=logger,
diff --git a/agent/util-scripts/pbench-tool-meister-start b/agent/util-scripts/pbench-tool-meister-start
index 214c28c825..7816b01ff1 100755
--- a/agent/util-scripts/pbench-tool-meister-start
+++ b/agent/util-scripts/pbench-tool-meister-start
@@ -82,9 +82,8 @@ from pathlib import Path
 
 import redis
 
-from pbench.agent import PbenchAgentConfig
 from pbench.agent.constants import (
-    redis_port,
+    def_redis_port,
     cli_tm_channel_prefix,
     tm_channel_suffix_to_client,
     tm_channel_suffix_from_client,
@@ -92,16 +91,11 @@ from pbench.agent.constants import (
 )
 from pbench.agent.redis import RedisChannelSubscriber
 from pbench.agent.tool_data_sink import main as tds_main
+from pbench.agent.tool_group import BadToolGroup, ToolGroup
 from pbench.agent.tool_meister import main as tm_main
 from pbench.agent.tool_meister_client import Client
 from pbench.agent.toolmetadata import ToolMetadata
-from pbench.agent.utils import (
-    cli_verify_sysinfo,
-    info_log,
-    verify_tool_group,
-    BadToolGroup,
-)
-from pbench.common.exceptions import BadConfig
+from pbench.agent.utils import cli_verify_sysinfo, error_log, info_log
 
 
 # Redis server configuration template for pbench's use
@@ -116,120 +110,6 @@ port {redis_port:d}
 """
 
 
-class ToolGroup:
-    """Provides an in-memory representation of the registered tools as recorded
-    on-disk.
-    """
-
-    def __init__(self, group):
-        """Construct a ToolGroup object from the on-disk data of the given
-        tool group.
-
-        If the given tool group is valid, the contents are read into the three
-        dictionary structures:
-
-          "toolnames" - each tool name is the key, with separate dictionaries
-          for each registered host
-
-          "hostnames" - each registered host is the key, with separate
-          dictionaries for each tool registered on that host
-
-          "labels" - each registered host name, that has a label, is the key,
-          and the label as the value; if a host is not labeled, it does not
-          show up in this dictionary
-
-        Raises BadToolGroup via the verify_tool_group() method on error.
-        """
-        self.tg_dir = verify_tool_group(group)
-        self.group = group
-
-        # __trigger__
-        try:
-            _trigger = (self.tg_dir / "__trigger__").read_text()
-        except OSError as ex:
-            if ex.errno != errno.ENOENT:
-                raise
-            # Ignore missing trigger file
-            self.trigger = None
-        else:
-            if len(_trigger) == 0:
-                # Ignore empty trigger file contents
-                self.trigger = None
-            else:
-                self.trigger = _trigger
-
-        # toolnames - Dict with tool name as the key, dictionary with host
-        # names and parameters for each host
-        self.toolnames = {}
-        # hostnames - Dict with host name as the key, dictionary with tool
-        # names and parameters for each tool
-        self.hostnames = {}
-        self.labels = {}
-        for hdirent in os.listdir(self.tg_dir):
-            if hdirent == "__trigger__":
-                # Ignore handled above
-                continue
-            if not (self.tg_dir / hdirent).is_dir():
-                # Ignore wayward non-directory files
-                continue
-            # We assume this directory is a hostname.
-            host = hdirent
-            if host not in self.hostnames:
-                self.hostnames[host] = {}
-            for tdirent in os.listdir(self.tg_dir / host):
-                if tdirent == "__label__":
-                    self.labels[host] = (
-                        (self.tg_dir / host / tdirent).read_text().strip()
-                    )
-                    continue
-                if tdirent.endswith("__noinstall__"):
-                    # FIXME: ignore "noinstall" for now, tools are going to be
-                    # in containers so this does not make sense going forward.
-                    continue
-                # This directory entry is the name of a tool.
-                tool = tdirent
-                tool_opts_raw_lines = (
-                    (self.tg_dir / host / tool).read_text().split("\n")
-                )
-                tool_opts_lines = []
-                for line_raw in tool_opts_raw_lines:
-                    line = line_raw.strip()
-                    if not line:
-                        # Ignore blank lines
-                        continue
-                    tool_opts_lines.append(line)
-                tool_opts = " ".join(tool_opts_lines)
-                if tool not in self.toolnames:
-                    self.toolnames[tool] = {}
-                self.toolnames[tool][host] = tool_opts
-
-    def get_tools(self, host):
-        """get_tools - given a target host, return a dictionary with the list
-        of tool names as keys, and the values being their options for that
-        host.
-        """
-        tools = dict()
-        for tool, opts in self.toolnames.items():
-            try:
-                host_opts = opts[host]
-            except KeyError:
-                # This host does not have this tool registered, ignore.
-                pass
-            else:
-                tools[tool] = host_opts
-        return tools
-
-    def get_label(self, host):
-        """get_label - given a target host, return the label associated with
-        that host.
-        """
-        try:
-            label = self.labels[host]
-        except KeyError:
-            label = ""
-        return label
-
-
 def wait_for_tds(chan, logger):
     """wait_for_tds - Wait for the Tool Data Sink to report back success or
     failure regarding the Tool Meister environment setup.
@@ -260,14 +140,13 @@ def wait_for_tds(chan, logger):
 
 
 class ReturnCode:
-    """ReturnCode - symbolic return codes for when the main program of
+    """ReturnCode - symbolic return codes for the main program of
     pbench-tool-meister-start.
     """
 
     SUCCESS = 0
     BADTOOLGROUP = 1
     BADAGENTCONFIG = 2
-    EXCAGENTCONFIG = 3
     MISSINGINSTALLDIR = 4
     EXCINSTALLDIR = 5
     BADTOOLMETADATA = 6
@@ -290,6 +169,10 @@ class ReturnCode:
     EXCTOOLGROUPDIR = 23
     SYSINFOFAILED = 24
     INITFAILED = 25
+    TDSSTARTUPTIMEOUT = 26
+    TOOLGROUPEXC = 27
+    BADREDISARG = 28
+    BADREDISPORT = 29
 
     # Kill sub-codes
     KILL_SUCCESS = 0
@@ -327,7 +210,7 @@ def kill_redis_server(pid_file, ret_val):
     else:
         try:
             pid = int(raw_pid)
-        except Exception:
+        except ValueError:
             # Bad pid value
             return ReturnCode.kill_ret_code(ReturnCode.KILL_BADPID, ret_val)
         try:
@@ -395,10 +278,10 @@ def main(_prog, cli_params):
         tool_group = ToolGroup(group)
     except BadToolGroup as exc:
         logger.error(str(exc))
-        return 1
+        return ReturnCode.BADTOOLGROUP
     except Exception:
         logger.exception("failed to load tool group data for '%s'", group)
-        return ReturnCode.BADTOOLGROUP
+        return ReturnCode.TOOLGROUPEXC
     else:
         if not tool_group.hostnames:
             # If a tool group has no tools registered, then there will be no
@@ -411,38 +294,31 @@ def main(_prog, cli_params):
 
     # Load the tool metadata
     try:
-        inst_dir = PbenchAgentConfig(
-            os.environ["_PBENCH_AGENT_CONFIG"]
-        ).pbench_install_dir
-    except BadConfig as exc:
-        logger.error("%s", exc)
+        inst_dir = os.environ["pbench_install_dir"]
+    except KeyError:
+        logger.error(
+            "The required 'pbench_install_dir' environment variable appears to be missing"
+        )
         return ReturnCode.BADAGENTCONFIG
-    except Exception as exc:
+    try:
+        tm_start_path = Path(inst_dir).resolve(strict=True)
+    except FileNotFoundError:
         logger.error(
-            "Unexpected error encountered loading pbench agent configuration: '%s'", exc
+            "Unable to determine proper installation directory, '%s' not found",
+            inst_dir,
         )
-        return ReturnCode.EXCAGENTCONFIG
+        return ReturnCode.MISSINGINSTALLDIR
+    except Exception as exc:
+        logger.exception(
+            "Unexpected error encountered resolving installation directory: '%s'", exc,
+        )
+        return ReturnCode.EXCINSTALLDIR
     else:
         try:
-            tm_start_path = Path(inst_dir).resolve(strict=True)
-        except FileNotFoundError:
-            logger.error(
-                "Unable to determine proper installation directory, '%s' not found",
-                inst_dir,
-            )
-            return ReturnCode.MISSINGINSTALLDIR
-        except Exception as exc:
-            logger.exception(
-                "Unexpected error encountered resolving installation directory: '%s'",
-                exc,
-            )
-            return ReturnCode.EXCINSTALLDIR
-        else:
-            try:
-                tool_metadata = ToolMetadata(tm_start_path)
-            except Exception:
-                logger.exception("failed to load tool metadata")
-                return ReturnCode.BADTOOLMETADATA
+            tool_metadata = ToolMetadata(tm_start_path)
+        except Exception:
+            logger.exception("failed to load tool metadata")
+            return ReturnCode.BADTOOLMETADATA
 
     # Load and verify required and optional environment variables.
     try:
@@ -463,7 +339,8 @@ def main(_prog, cli_params):
         if not full_hostname or not hostname:
             logger.error(
                 "ERROR - _pbench_hostname ('%s') and _pbench_full_hostname ('%s')"
-                " environment variables are required",
+                " environment variables are required to represent the respective"
+                " hostname strings",
                 hostname,
                 full_hostname,
             )
@@ -537,33 +414,52 @@ def main(_prog, cli_params):
     # +
     # Step 2. - Start the Redis Server
     # -
-
-    # Create the Redis server pbench-specific configuration file
-    redis_conf = tm_dir / "redis.conf"
-    params = {"hostnames": hostnames, "tm_dir": tm_dir, "redis_port": redis_port}
-    try:
-        with redis_conf.open("w") as fp:
-            fp.write(redis_conf_tmpl.format(**params))
-    except Exception:
-        logger.exception("failed to create redis server configuration")
-        return ReturnCode.EXCREDISCONFIG
-
-    # Start the Redis Server itself
-    redis_srvr = "redis-server"
-    redis_srvr_path = find_executable(redis_srvr)
-    redis_pid = tm_dir / f"redis_{redis_port:d}.pid"
-    logger.debug("2. starting redis server")
-    try:
-        retcode = os.spawnl(os.P_WAIT, redis_srvr_path, redis_srvr, redis_conf)
-    except Exception:
-        logger.exception("failed to create redis server, daemonized")
-        return ReturnCode.EXCSPAWNREDIS
+    if cli_params.redis_server is None:
+        # Create the Redis server pbench-specific configuration file
+        redis_conf = tm_dir / "redis.conf"
+        params = {
+            "hostnames": hostnames,
+            "tm_dir": tm_dir,
+            "redis_port": def_redis_port,
+        }
+        try:
+            with redis_conf.open("w") as fp:
+                fp.write(redis_conf_tmpl.format(**params))
+        except Exception:
+            logger.exception("failed to create redis server configuration")
+            return ReturnCode.EXCREDISCONFIG
+
+        # Start the Redis Server itself
+        redis_srvr = "redis-server"
+        redis_srvr_path = find_executable(redis_srvr)
+        redis_pid = tm_dir / f"redis_{def_redis_port:d}.pid"
+        logger.debug("2. starting redis server")
+        try:
+            retcode = os.spawnl(os.P_WAIT, redis_srvr_path, redis_srvr, redis_conf)
+        except Exception:
+            logger.exception("failed to create redis server, daemonized")
+            return ReturnCode.EXCSPAWNREDIS
+        else:
+            if retcode != 0:
+                logger.error(
+                    "failed to create redis server, daemonized; return code: %d",
+                    retcode,
+                )
+                return ReturnCode.REDISFAILED
+        redis_host = "localhost"
+        redis_port = def_redis_port
     else:
-        if retcode != 0:
-            logger.error(
-                "failed to create redis server, daemonized; return code: %d", retcode
-            )
-            return ReturnCode.REDISFAILED
+        parts = cli_params.redis_server.split(":", 1)
+        if len(parts) != 2:
+            logger.error("Bad Redis server specified, '%s'", cli_params.redis_server)
+            return ReturnCode.BADREDISARG
+        try:
+            redis_port = int(parts[1])
+        except ValueError:
+            logger.error("Bad Redis port specified, '%s'", cli_params.redis_server)
+            return ReturnCode.BADREDISPORT
+        else:
+            redis_host = parts[0]
 
     # Connect to the Redis Server.
     #
@@ -574,11 +470,11 @@ def main(_prog, cli_params):
     # listen for responses from the Tool Data Sink.
     try:
         to_client_channel = f"{cli_tm_channel_prefix}-{tm_channel_suffix_to_client}"
-        redis_server = redis.Redis(host="localhost", port=redis_port, db=0)
+        redis_server = redis.Redis(host=redis_host, port=redis_port, db=0)
         to_client_chan = RedisChannelSubscriber(redis_server, to_client_channel)
     except Exception as exc:
         logger.error(
-            "Unable to connect to redis server, %s:%d: %r", "localhost", redis_port, exc
+            "Unable to connect to redis server, %s:%d: %r", redis_host, redis_port, exc
         )
         return kill_redis_server(redis_pid, ReturnCode.REDISCHANFAILED)
 
@@ -601,9 +497,9 @@ def main(_prog, cli_params):
             controller=_controller,
             group=group,
             hostname=host,
+            label=tool_group.get_label(host),
             tool_metadata=tool_metadata.getFullData(),
             tools=tools,
-            label=tool_group.get_label(host),
         )
         # Create a separate key for the Tool Meister that will be on that host
         #
@@ -624,13 +520,13 @@ def main(_prog, cli_params):
     # Sink.
     tds_param_key = f"tds-{group}"
     tds = dict(
-        channel_prefix=cli_tm_channel_prefix,
         benchmark_run_dir=str(benchmark_run_dir),
         bind_hostname=tm_bind_hostname,
+        channel_prefix=cli_tm_channel_prefix,
         group=group,
+        tool_metadata=tool_metadata.getFullData(),
         tool_trigger=tool_group.trigger,
         tools=tool_group_data,
-        tool_metadata=tool_metadata.getFullData(),
         # The following are optional
         optional_md=optional_md,
     )
@@ -646,186 +542,201 @@ def main(_prog, cli_params):
     # 4. Start the Tool Data Sink process
     # -
 
-    # FIXME: if only one host is registered, and that host is the same as this
-    # controller, then don't bother starting the Tool Data Sink.
-    logger.debug("4. starting tool data sink")
-    try:
-        pid = os.fork()
-        if pid == 0:
-            # In the child!
-
-            # The main() of the Tool Data Sink module will not return here
-            # since it will daemonize itself and this child pid will be
-            # replaced by a new pid.
-            status = tds_main(
-                [
-                    PROG.parent / "pbench-tool-data-sink",
-                    "localhost",
-                    str(redis_port),
-                    tds_param_key,
-                ]
-            )
-            sys.exit(status)
-        else:
-            # In the parent!
-
-            # Wait for the child to finish daemonizing itself.
-            retcode = waitpid(pid)
-            if retcode != 0:
-                logger.error(
-                    "failed to create pbench data sink, daemonized; return code: %d",
-                    retcode,
-                )
-    except Exception:
-        logger.exception("failed to create pbench data sink, daemonized")
-        return kill_redis_server(redis_pid, ReturnCode.TDSFORKFAILED)
-    else:
-        # Wait for logging channel to be up and ready before we start the
-        # local and remote Tool Meisters.
-        num_present = 0
-        while num_present == 0:
-            try:
-                num_present = redis_server.publish(
-                    f"{cli_tm_channel_prefix}-{tm_channel_suffix_to_logging}",
-                    "pbench-tool-meister-start - verify logging channel up",
+    if cli_params.redis_server is None:
+        # FIXME: if only one host is registered, and that host is the same as this
+        # controller, then don't bother starting the Tool Data Sink.
+        logger.debug("4. starting tool data sink")
+        try:
+            pid = os.fork()
+            if pid == 0:
+                # In the child!
+
+                # The main() of the Tool Data Sink module will not return here
+                # since it will daemonize itself and this child pid will be
+                # replaced by a new pid.
+                status = tds_main(
+                    [
+                        PROG.parent / "tool-meister" / "pbench-tool-data-sink",
+                        "localhost",
+                        str(redis_port),
+                        tds_param_key,
+                        "yes",  # Request tool-data-sink daemonize itself
+                    ]
                 )
-            except Exception:
-                logger.exception("Failed to verify Tool Data Sink logging sink working")
-                return kill_redis_server(redis_pid, ReturnCode.TDSLOGPUBFAILED)
+                sys.exit(status)
             else:
-                if num_present == 0:
-                    time.sleep(0.1)
+                # In the parent!
+
+                # Wait for the child to finish daemonizing itself.
+                retcode = waitpid(pid)
+                if retcode != 0:
+                    logger.error(
+                        "failed to create pbench data sink, daemonized; return code: %d",
+                        retcode,
+                    )
+        except Exception:
+            logger.exception("failed to create pbench data sink, daemonized")
+            return kill_redis_server(redis_pid, ReturnCode.TDSFORKFAILED)
+        else:
+            # Wait for logging channel to be up and ready before we start the
+            # local and remote Tool Meisters.
+            timeout = time.time() + 60
+            num_present = 0
+            while num_present == 0:
+                try:
+                    num_present = redis_server.publish(
+                        f"{cli_tm_channel_prefix}-{tm_channel_suffix_to_logging}",
+                        "pbench-tool-meister-start - verify logging channel up",
+                    )
+                except Exception:
+                    logger.exception(
+                        "Failed to verify Tool Data Sink logging sink working"
+                    )
+                    return kill_redis_server(redis_pid, ReturnCode.TDSLOGPUBFAILED)
+                else:
+                    if num_present == 0:
+                        if time.time() > timeout:
+                            logger.error(
+                                "The Tool Data Sink failed to start within one minute"
+                            )
+                            return kill_redis_server(
+                                redis_pid, ReturnCode.TDSSTARTUPTIMEOUT
+                            )
+                        else:
+                            time.sleep(0.1)
 
     # +
     # 5. Start all the local and remote Tool Meisters
     # -
 
-    failures = 0
-    successes = 0
-    # NOTE: it is assumed that the location of the pbench-tool-meister command
-    # is the same on the local host as it is on any remote host.
-    tool_meister_cmd = PROG.parent / "tool-meister" / "pbench-tool-meister"
-    ssh_cmd = "ssh"
-    ssh_path = find_executable(ssh_cmd)
-    base_args = [
-        ssh_cmd,
-    ]
-    base_args.extend(shlex.split(ssh_opts))
-    args = [
-        "<host replace me>",
-        f"{tool_meister_cmd}-remote",
-        tm_bind_hostname,
-        str(redis_port),
-        "<tm param key>",
-        "yes",
-    ]
-    tms = dict()
-    tm_count = 0
-    for host in tool_group.hostnames.keys():
-        tm_count += 1
-        tm_param_key = f"tm-{group}-{host}"
-        if host == full_hostname:
-            logger.debug("5a. starting localhost tool meister")
-            try:
-                pid = os.fork()
-                if pid == 0:
-                    # In the child!
-
-                    # The main() of the Tool Meister module will not return
-                    # here since it will daemonize itself and this child pid
-                    # will be replaced by a new pid.
-                    status = tm_main(
-                        [
-                            str(tool_meister_cmd),
-                            "localhost",
-                            str(redis_port),
-                            tm_param_key,
-                            "yes",
-                        ]
+    if cli_params.redis_server is None:
+        failures = 0
+        successes = 0
+        # NOTE: it is assumed that the location of the pbench-tool-meister command
+        # is the same on the local host as it is on any remote host.
+        tool_meister_cmd = PROG.parent / "tool-meister" / "pbench-tool-meister"
+        ssh_cmd = "ssh"
+        ssh_path = find_executable(ssh_cmd)
+        base_args = [
+            ssh_cmd,
+        ]
+        base_args.extend(shlex.split(ssh_opts))
+        args = [
+            "<host replace me>",
+            f"{tool_meister_cmd}-remote",
+            tm_bind_hostname,
+            str(redis_port),
+            "<tm param key>",
+            "yes",  # Yes, request the tool meister daemonize itself
+        ]
+        tms = dict()
+        tm_count = 0
+        for host in tool_group.hostnames.keys():
+            tm_count += 1
+            tm_param_key = f"tm-{group}-{host}"
+            if host == full_hostname:
+                logger.debug("5a. starting localhost tool meister")
+                try:
+                    pid = os.fork()
+                    if pid == 0:
+                        # In the child!
+
+                        # The main() of the Tool Meister module will not return
+                        # here since it will daemonize itself and this child pid
+                        # will be replaced by a new pid.
+                        status = tm_main(
+                            [
+                                str(tool_meister_cmd),
+                                "localhost",
+                                str(redis_port),
+                                tm_param_key,
+                                "yes",  # Yes, daemonize yourself TM ...
+                            ]
+                        )
+                        sys.exit(status)
+                    else:
+                        # In the parent!
+                        pass
+                except Exception:
+                    logger.exception(
+                        "failed to create localhost tool meister, daemonized"
                     )
-                    sys.exit(status)
+                    failures += 1
+                    tms[host] = {"pid": None, "status": "failed"}
                 else:
-                    # In the parent!
-                    pass
-            except Exception:
-                logger.exception("failed to create localhost tool meister, daemonized")
-                failures += 1
-                tms[host] = {"pid": None, "status": "failed"}
+                    # Record the child pid to wait below.
+                    tms[host] = {"pid": pid, "status": "forked"}
             else:
-                # Record the child pid to wait below.
-                tms[host] = {"pid": pid, "status": "forked"}
-        else:
-            args[0] = host
-            args[4] = tm_param_key
-            ssh_args = base_args + args
-            logger.debug(
-                "5b. starting remote tool meister, ssh_path=%r ssh_args=%r",
-                ssh_path,
-                ssh_args,
-            )
+                args[0] = host
+                args[4] = tm_param_key
+                ssh_args = base_args + args
+                logger.debug(
+                    "5b. starting remote tool meister, ssh_path=%r ssh_args=%r",
+                    ssh_path,
+                    ssh_args,
+                )
+                try:
+                    pid = os.spawnv(os.P_NOWAIT, ssh_path, ssh_args)
+                except Exception:
+                    logger.exception(
+                        "failed to create a tool meister instance for host %s", host
+                    )
+                    tms[host] = {"pid": None, "status": "failed"}
+                else:
+                    # Record the child pid to wait below.
+                    tms[host] = {"pid": pid, "status": "spawned"}
+
+        for host, tm_proc in tms.items():
+            if tm_proc["status"] == "failed":
+                failures += 1
+                continue
+            pid = tm_proc["pid"]
             try:
-                pid = os.spawnv(os.P_NOWAIT, ssh_path, ssh_args)
+                exit_status = waitpid(pid)
             except Exception:
+                failures += 1
                 logger.exception(
                     "failed to create a tool meister instance for host %s", host
                 )
-                tms[host] = {"pid": None, "status": "failed"}
             else:
-                # Record the child pid to wait below.
-                tms[host] = {"pid": pid, "status": "spawned"}
+                if exit_status != 0:
+                    failures += 1
+                    logger.error(
+                        "failed to start tool meister on remote host '%s'"
+                        " (pid %d), exit status: %d",
+                        host,
+                        pid,
+                        exit_status,
+                    )
+                else:
+                    successes += 1
 
-    failures = 0
-    for host, tm_proc in tms.items():
-        if tm_proc["status"] == "failed":
-            failures += 1
-            continue
-        pid = tm_proc["pid"]
-        try:
-            exit_status = waitpid(pid)
-        except Exception:
-            failures += 1
-            logger.exception(
-                "failed to create a tool meister instance for host %s", host
-            )
-        else:
-            if exit_status != 0:
-                failures += 1
-                logger.error(
-                    "failed to start tool meister on remote host '%s'"
-                    " (pid %d), exit status: %d",
-                    host,
-                    pid,
-                    exit_status,
+        if failures > 0:
+            # Don't wait for the Tool Meisters
+            logger.info("terminating tool meister startup due to failures")
+            terminate_msg = dict(action="terminate", group=group, directory=None)
+            try:
+                ret = redis_server.publish(
+                    f"{cli_tm_channel_prefix}-{tm_channel_suffix_from_client}",
+                    json.dumps(terminate_msg, sort_keys=True),
                 )
+            except Exception:
+                logger.exception("Failed to publish terminate message")
             else:
-                successes += 1
+                logger.debug("publish('terminate') = %r", ret)
+            return kill_redis_server(redis_pid, ReturnCode.TMFAILURES)
 
-    if failures > 0:
-        # Don't wait for the Tool Meisters
-        logger.info("terminating tool meister startup due to failures")
-        terminate_msg = dict(action="terminate", group=group, directory=None)
-        try:
-            ret = redis_server.publish(
-                f"{cli_tm_channel_prefix}-{tm_channel_suffix_from_client}",
-                json.dumps(terminate_msg, sort_keys=True),
+        if successes == 0:
+            logger.warning(
+                "unable to successfully start any tool meisters,"
+                " but encountered no failures either: terminating"
             )
-        except Exception:
-            logger.exception("Failed to publish terminate message")
-        else:
-            logger.debug("publish('terminate') = %r", ret)
-        return kill_redis_server(redis_pid, ReturnCode.TMFAILURES)
+            return kill_redis_server(redis_pid, ReturnCode.TMNOSUCCESSES)
 
-    if successes == 0:
-        logger.warning(
-            "unable to successfully start any tool meisters,"
-            " but encountered no failures either: terminating"
+        assert successes == tm_count, (
+            f"Logic Bomb! Number of created Tool Meisters, {successes}, does not"
+            f" match the expected number of Tool Meisters, {tm_count}"
         )
-        return kill_redis_server(redis_pid, ReturnCode.TMNOSUCCESSES)
-
-    assert successes == tm_count, (
-        f"Logic Bomb! Number of created Tool Meisters, {successes}, does not"
-        f" match the expected number of Tool Meisters, {tm_count}"
-    )
 
     # +
     # 6. Wait for the TDS to send a message reporting that it, and all the
@@ -835,12 +746,16 @@ def main(_prog, cli_params):
     # If any successes, then we need to wait for them to show up as
     # subscribers.
     logger.debug(
-        "6. waiting for all successfully spawned SSH processes"
+        "6. waiting for all successfully created Tool Meister processes"
         " to show up as subscribers"
     )
     ret_val = wait_for_tds(to_client_chan, logger)
     if ret_val != 0:
-        return kill_redis_server(redis_pid, ReturnCode.TDSWAITFAILURE)
+        if cli_params.redis_server is None:
+            # We created the Redis server, so we should clean it up.
+            return kill_redis_server(redis_pid, ReturnCode.TDSWAITFAILURE)
+        else:
+            return ReturnCode.TDSWAITFAILURE
 
     # Setup a Client API object using our existing to_client_chan object to
     # drive the following client operations ("sysinfo" [optional] and "init"
@@ -856,34 +771,29 @@ def main(_prog, cli_params):
             try:
                 sysinfo_path.mkdir(parents=True)
             except Exception:
-                logger.error(
-                    "Unable to create sysinfo-dump directory base path: {}",
-                    sysinfo_path,
+                error_log(
+                    f"Unable to create sysinfo-dump directory base path: {sysinfo_path}"
                 )
-                ret_val = ReturnCode.EXCSYSINFODIR
             else:
                 logger.debug("7. Collecting system information")
                 info_log("Collecting system information")
-                ret_val = client.publish(group, sysinfo_path, "sysinfo", sysinfo)
-                ret_val = (
-                    ReturnCode.SUCCESS if ret_val == 0 else ReturnCode.SYSINFOFAILED
-                )
+                # Collecting system information is optional, so we don't gate
+                # the success or failure of the startup on it.
+                client.publish(group, sysinfo_path, "sysinfo", sysinfo)
 
-        if ret_val == ReturnCode.SUCCESS:
-            tool_dir = benchmark_run_dir / f"tools-{group}"
-            try:
-                tool_dir.mkdir(exist_ok=True)
-            except Exception as exc:
-                logger.error(
-                    'failed to create tool output directory, "{}": {}', tool_dir, exc
-                )
-                ret_val = ReturnCode.EXCTOOLGROUPDIR
-            else:
-                logger.debug("8. Initialize persistent tools")
-                ret_val = client.publish(group, tool_dir, "init", None)
-                if ret_val != ReturnCode.SUCCESS:
+        tool_dir = benchmark_run_dir / f"tools-{group}"
+        try:
+            tool_dir.mkdir(exist_ok=True)
+        except Exception as exc:
+            error_log(f"failed to create tool output directory, '{tool_dir}': {exc}")
+            return ReturnCode.EXCTOOLGROUPDIR
+        else:
+            logger.debug("8. Initialize persistent tools")
+            ret_val = client.publish(group, tool_dir, "init", None)
+            if ret_val != 0:
+                if cli_params.redis_server is None:
+                    # We created the Redis server, so we should clean it up.
                     ret_val = kill_redis_server(redis_pid, ReturnCode.INITFAILED)
-
     return ret_val
 
 
@@ -900,7 +810,17 @@ if __name__ == "__main__":
         help="The list of system information items to be collected.",
     )
     parser.add_argument(
-        "tool_group", help="The tool group of items to be run by the Tool Meisters."
+        "--redis-server",
+        dest="redis_server",
+        default=os.environ.get("PBENCH_REDIS_SERVER", None),
+        help=(
+            "Use an existing Redis server specified by <hostname>:<port>;"
+            " implies an existing Tool Data Sink and Tool Meisters as well."
+        ),
+    )
+    parser.add_argument(
+        "tool_group",
+        help="The tool group name of tools to be run by the Tool Meisters.",
     )
     parsed = parser.parse_args()
     status = main(sys.argv[0], parsed)
diff --git a/agent/util-scripts/pbench-tool-meister-stop b/agent/util-scripts/pbench-tool-meister-stop
index 6a68603dbe..ce4f9b62b9 100755
--- a/agent/util-scripts/pbench-tool-meister-stop
+++ b/agent/util-scripts/pbench-tool-meister-stop
@@ -19,14 +19,10 @@ import time
 from argparse import ArgumentParser
 from pathlib import Path
 
-from pbench.agent.constants import redis_port, cli_tm_channel_prefix
+from pbench.agent.constants import def_redis_port, cli_tm_channel_prefix
+from pbench.agent.tool_group import BadToolGroup, ToolGroup
 from pbench.agent.tool_meister_client import Client
-from pbench.agent.utils import (
-    cli_verify_sysinfo,
-    info_log,
-    verify_tool_group,
-    BadToolGroup,
-)
+from pbench.agent.utils import cli_verify_sysinfo, error_log, info_log
 
 
 def is_running(pid):
@@ -42,6 +38,87 @@ def is_running(pid):
     return True
 
 
+def wait_for_pid(pid):
+    """wait_for_pid - wait for a process to actually stop running.
+    """
+    while is_running(pid):
+        time.sleep(0.1)
+
+
+def graceful_shutdown(
+    benchmark_run_dir, full_hostname, group, redis_server_pid_file, logger
+):
+    # The assumption/assertion here is that the tool meister "stop" command is
+    # run on the same node as the tool meister "start" command ran, creating
+    # the local Tool Data Sink and the optional local Tool Meister. We want to
+    # make sure anything "local" to this stop command is shut down gracefully
+    # before we report back to the user.  If Tool Meisters from remote nodes
+    # have already reported that they have received the "terminate" message,
+    # then we trust they will shutdown gracefully themselves.
+    try:
+        tds_pid_file = benchmark_run_dir / "tm" / "pbench-tool-data-sink.pid"
+        try:
+            pid_str = tds_pid_file.read_text()
+        except OSError as exc:
+            if exc.errno != errno.ENOENT:
+                raise
+        else:
+            tds_pid = int(pid_str)
+            logger.debug("waiting for tool-data-sink (%d) to exit", tds_pid)
+            wait_for_pid(tds_pid)
+    except Exception:
+        logger.exception("Exception encountered waiting for tool-data-sink")
+        ret_val = 1
+    else:
+        ret_val = 0
+
+    try:
+        ltm_pid_file = benchmark_run_dir / "tm" / f"tm-{group}-{full_hostname}.pid"
+        try:
+            pid_str = ltm_pid_file.read_text()
+        except OSError as exc:
+            if exc.errno != errno.ENOENT:
+                raise
+        else:
+            ltm_pid = int(pid_str)
+            logger.debug("waiting for local tool-meister (%d) to exit", ltm_pid)
+            wait_for_pid(ltm_pid)
+    except Exception:
+        logger.exception("Exception encountered waiting for local tool-meister")
+        ret_val = 1
+
+    # All was good so far, so we can terminate the redis server.
+    try:
+        try:
+            pid_str = redis_server_pid_file.read_text()
+        except OSError as exc:
+            if exc.errno != errno.ENOENT:
+                raise
+        else:
+            redis_server_pid = int(pid_str)
+            pid_exists = True
+            timeout = time.time() + 60
+            while pid_exists:
+                try:
+                    os.kill(redis_server_pid, signal.SIGTERM)
+                except ProcessLookupError:
+                    pid_exists = False
+                else:
+                    if time.time() > timeout:
+                        try:
+                            os.kill(redis_server_pid, signal.SIGKILL)
+                        except ProcessLookupError:
+                            pid_exists = False
+                        except Exception:
+                            raise
+                    time.sleep(0.1)
+    except Exception:
+        logger.exception("Exception encountered terminating Redis server")
+        ret_val = 1
+
+    return ret_val
+
+
 def main(_prog, cli_params):
     """Main program for the tool meister stop CLI interface.
 
@@ -81,7 +158,7 @@ def main(_prog, cli_params):
     logger.addHandler(sh)
 
     try:
-        verify_tool_group(cli_params.tool_group)
+        ToolGroup.verify_tool_group(cli_params.tool_group)
     except BadToolGroup as exc:
         logger.error(str(exc))
         return 1
@@ -103,20 +180,38 @@ def main(_prog, cli_params):
         full_hostname = os.environ["_pbench_full_hostname"]
         benchmark_run_dir = Path(os.environ["benchmark_run_dir"]).resolve(strict=True)
     except Exception:
-        logger.exception("failed to fetch parameters from the environment")
+        logger.exception("failed to fetch required parameters from the environment")
         return 1
 
-    try:
-        redis_server_pid_file = (
-            benchmark_run_dir / "tm" / f"redis_{redis_port:d}.pid"
-        ).resolve(strict=True)
-    except FileNotFoundError:
-        # No Redis server, nothing to do.
-        return 0
+    if cli_params.redis_server is None:
+        # No Redis server was given, so look locally to see if we can find it.
+        # If no Redis server locally, we're done.
+        try:
+            redis_server_pid_file = (
+                benchmark_run_dir / "tm" / f"redis_{def_redis_port:d}.pid"
+            ).resolve(strict=True)
+        except FileNotFoundError:
+            # No Redis server, nothing to do.
+            return 0
+        else:
+            redis_host = "localhost"
+            redis_port = def_redis_port
+    else:
+        parts = cli_params.redis_server.split(":", 1)
+        if len(parts) != 2:
+            logger.error("Bad Redis server specified, '%s'", cli_params.redis_server)
+            return 1
+        try:
+            redis_port = int(parts[1])
+        except Exception:
+            logger.error("Bad Redis port specified, '%s'", cli_params.redis_server)
+            return 1
+        else:
+            redis_host = parts[0]
 
     # The Redis server is always running on the local host with the CLI.
     with Client(
-        redis_host="localhost",
+        redis_host=redis_host,
         redis_port=redis_port,
         channel_prefix=cli_tm_channel_prefix,
         logger=logger,
@@ -126,23 +221,19 @@ def main(_prog, cli_params):
         try:
             tool_dir.mkdir(exist_ok=True)
         except Exception as exc:
-            logger.error(
-                'failed to create tool output directory, "{}": {}', tool_dir, exc
-            )
+            error_log(f"failed to create tool output directory, '{tool_dir}': {exc}")
             end_ret_val = 1
         else:
             end_ret_val = client.publish(group, tool_dir, "end", None)
-
-        # Next we collect the system configuration information, but only if the
-        # "end" operation was successful, and if it was requested.
+        # Next we collect the system configuration information only if we were
+        # successfully able to end the persistent tools run.
         if end_ret_val == 0 and sysinfo:
             sysinfo_path = benchmark_run_dir / "sysinfo" / "end"
             try:
                 sysinfo_path.mkdir(parents=True)
             except Exception:
-                logger.error(
-                    "Unable to create sysinfo-dump directory base path: {}",
-                    sysinfo_path,
+                error_log(
+                    f"Unable to create sysinfo-dump directory base path: {sysinfo_path}",
                 )
             else:
                 logger.info("Collecting system information")
@@ -163,65 +254,19 @@ def main(_prog, cli_params):
     # just return the success/failure of the terminate operation.
     ret_val = end_ret_val if end_ret_val != 0 else term_ret_val
 
-    # The assumption/assertion here is that the tool meister "stop" command is
-    # run on the same node as the tool meister "start" command ran, creating
-    # the local Tool Data Sink and the optional local Tool Meister. We want to
-    # make sure anything "local" to this stop command is shut down gracefully
-    # before we report back to the user.  If Tool Meisters from remote nodes
-    # have already reported that they have received the "terminate" message,
-    # then we trust they will shutdown gracefully themselves.
-    try:
-        tds_pid_file = benchmark_run_dir / "tm" / "pbench-tool-data-sink.pid"
-        try:
-            pid_str = tds_pid_file.read_text()
-        except OSError as exc:
-            if exc.errno != errno.ENOENT:
-                raise
-        else:
-            tds_pid = int(pid_str)
-            logger.debug("waiting for tool-data-sink (%d) to exit", tds_pid)
-            while is_running(tds_pid):
-                time.sleep(0.1)
-    except Exception:
-        logger.exception("Exception encountered waiting for tool-data-sink")
-        ret_val = 1
-
-    try:
-        ltm_pid_file = benchmark_run_dir / "tm" / f"tm-{group}-{full_hostname}.pid"
-        try:
-            pid_str = ltm_pid_file.read_text()
-        except OSError as exc:
-            if exc.errno != errno.ENOENT:
-                raise
-        else:
-            ltm_pid = int(pid_str)
-            logger.debug("waiting for local tool-meister (%d) to exit", ltm_pid)
-            while is_running(ltm_pid):
-                time.sleep(0.1)
-    except Exception:
-        logger.exception("Exception encountered waiting for local tool-meister")
-        ret_val = 1
-
-    # All was good so far, so we can terminate the Redis server.
-    try:
-        try:
-            pid_str = redis_server_pid_file.read_text()
-        except OSError as exc:
-            if exc.errno != errno.ENOENT:
-                raise
-        else:
-            redis_server_pid = int(pid_str)
-            pid_exists = True
-            while pid_exists:
-                try:
-                    os.kill(redis_server_pid, signal.SIGTERM)
-                except ProcessLookupError:
-                    pid_exists = False
-                else:
-                    time.sleep(0.1)
-    except Exception:
-        logger.exception("Exception encountered terminating Redis server")
-        ret_val = 1
+    if cli_params.redis_server is None:
+        # The client operations have finished, successful or unsuccessfully,
+        # and we were not given an explicit Redis server to use.  So the
+        # previous pbench-tool-meister-start must have set up the local Tool
+        # Data Sink, Tool Meister (if registered), and the Redis server.  It is
+        # our responsibility to make sure these processes shut down correctly.
+        shutdown_ret_val = graceful_shutdown(
+            benchmark_run_dir, full_hostname, group, redis_server_pid_file, logger
+        )
+        if ret_val == 0:
+            # If client termination was successful, report the status of the
+            # graceful shutdown of the Tool Data Sink and the Redis server.
+            ret_val = shutdown_ret_val
 
     return ret_val
 
@@ -243,7 +288,18 @@ if __name__ == "__main__":
         help="Whether or not the stop operation is in response to an interrupt.",
     )
     parser.add_argument(
-        "tool_group", help="The tool group of items being run in the Tool Meisters."
+        "--redis-server",
+        dest="redis_server",
+        default=os.environ.get("PBENCH_REDIS_SERVER", None),
+        help=(
+            "Use an existing Redis server specified by <hostname>:<port>;"
+            " implies the use of an existing Tool Data Sink and Tool Meisters"
+            " as well."
+        ),
+    )
+    parser.add_argument(
+        "tool_group",
+        help="The tool group name of tools being run in the Tool Meisters.",
     )
     parsed = parser.parse_args()
     status = main(sys.argv[0], parsed)
diff --git a/agent/util-scripts/test-bin/samples/scripts/dcgm_prometheus.py b/agent/util-scripts/test-bin/dcgm-exporter
similarity index 100%
rename from agent/util-scripts/test-bin/samples/scripts/dcgm_prometheus.py
rename to agent/util-scripts/test-bin/dcgm-exporter
diff --git a/agent/util-scripts/tool-meister/pbench-tool-data-sink b/agent/util-scripts/tool-meister/pbench-tool-data-sink
new file mode 100755
index 0000000000..985f830799
--- /dev/null
+++ b/agent/util-scripts/tool-meister/pbench-tool-data-sink
@@ -0,0 +1,14 @@
+#!/usr/bin/env python3
+
+"""Simple command-line wrapper to keep the tool data sink from being in the
+CLI command set, while still allowing it to be invoked by the container entry
+point.
+"""
+
+import sys
+
+from pbench.agent.tool_data_sink import main
+
+
+status = main(sys.argv)
+sys.exit(status)
diff --git a/agent/util-scripts/tool-meister/tool-data-sink-ep b/agent/util-scripts/tool-meister/tool-data-sink-ep
new file mode 100755
index 0000000000..47d02d65a5
--- /dev/null
+++ b/agent/util-scripts/tool-meister/tool-data-sink-ep
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+_dir="$(dirname ${0})"
+
+source /etc/profile.d/pbench-agent.sh
+source /opt/pbench-agent/base
+# Instruct the Tool Data Sink not to daemonize.
+/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-data-sink "${REDIS_HOST}" "${REDIS_PORT}" "${PARAM_KEY}" no
diff --git a/agent/util-scripts/tool-meister/tool-meister-ep b/agent/util-scripts/tool-meister/tool-meister-ep
new file mode 100755
index 0000000000..dce85d64b9
--- /dev/null
+++ b/agent/util-scripts/tool-meister/tool-meister-ep
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+_dir="$(dirname ${0})"
+
+source /etc/profile.d/pbench-agent.sh
+source /opt/pbench-agent/base
+# Instruct the Tool Meister not to daemonize.
+/opt/pbench-agent/util-scripts/tool-meister/pbench-tool-meister "${REDIS_HOST}" "${REDIS_PORT}" "${PARAM_KEY}" no
diff --git a/agent/util-scripts/unittests b/agent/util-scripts/unittests
index 66682e7e20..4333c5c425 100755
--- a/agent/util-scripts/unittests
+++ b/agent/util-scripts/unittests
@@ -528,7 +528,7 @@ function sort_log_file {
 }
 
 function sort_tdslog {
-    sort_log_file ${_testdir}/mock-run/tm/pbench-tool-data-sink.log
+    sort_log_file ${_testdir}/mock-run/tm/pbench-tool-data-sink.err
 }
 
 function sort_tmlogs {
diff --git a/lib/pbench/agent/base.py b/lib/pbench/agent/base.py
index 30c6dfd96b..d2105c5b04 100644
--- a/lib/pbench/agent/base.py
+++ b/lib/pbench/agent/base.py
@@ -8,6 +8,7 @@
 import click
 
 from pbench.agent import PbenchAgentConfig
+from pbench.agent.tool_group import ToolGroup, BadToolGroup
 from pbench.agent.utils import setup_logging
 
 
@@ -108,15 +109,16 @@ def get_path(self, path):
 
     def verify_tool_group(self, group):
         """Ensure we have a tools group directory to work with"""
-        self.tool_group_dir = self.pbench_run / f"tools-v1-{group}"
-        if not self.tool_group_dir.exists():
-            click.secho(
-                f'\t{self.name}: invalid --group option ("{group}"), directory not found: {self.tool_group_dir}'
-            )
+        try:
+            self.tool_group_dir = self.gen_tools_group_dir(group)
+        except BadToolGroup as exc:
+            click.echo(f'\t{self.name}: invalid --group option ("{group}"), {exc}')
             ctxt = click.get_current_context()
             click.echo(ctxt.get_help())
-            return 1
-        return 0
+            ret_code = 1
+        else:
+            ret_code = 0
+        return ret_code
 
     def gen_tools_group_dir(self, group):
-        return self.pbench_run / f"tools-v1-{group}"
+        return ToolGroup.verify_tool_group(group, pbench_run=self.pbench_run)
diff --git a/lib/pbench/agent/constants.py b/lib/pbench/agent/constants.py
index a1fde65974..eea0e1eb63 100644
--- a/lib/pbench/agent/constants.py
+++ b/lib/pbench/agent/constants.py
@@ -2,10 +2,10 @@
 """
 
 # Default Redis server port number used is "One Tool" in hex 0x17001
-redis_port = 17001
+def_redis_port = 17001
 
 # Default port number used for the Tool Data Sink
-tds_port = 8080
+def_tds_port = 8080
 
 # The amount of time a TM tries to publish its setup message.
 TDS_RETRY_PERIOD_SECS = 60
diff --git a/lib/pbench/agent/redis.py b/lib/pbench/agent/redis.py
index 86e11a13c9..28388aca17 100644
--- a/lib/pbench/agent/redis.py
+++ b/lib/pbench/agent/redis.py
@@ -220,3 +220,41 @@ def emit(self, record):
                 self.dropped += 1
         finally:
             self.counter += 1
+
+
+def wait_for_conn_and_key(redis_server, key, prog, redis_host, redis_port):
+    """wait_for_conn_and_key - convenience method of both the Tool Meister and
+    the Tool Data Sink to startup and wait for an initial connection to the
+    Redis server, and for the expected key to show up.
+    """
+    # Loop waiting for the key to show up.
+    connected = None
+    payload = None
+    while payload is None:
+        try:
+            payload = redis_server.get(key)
+        except redis.ConnectionError:
+            if connected is None:
+                print(
+                    f"{prog}: waiting to connect to redis server {redis_host}:{redis_port}",
+                    flush=True,
+                )
+                connected = False
+            elif connected:
+                print(
+                    f"{prog}: disconnected from redis server {redis_host}:{redis_port}",
+                    flush=True,
+                )
+                connected = False
+            time.sleep(1)
+        else:
+            if not connected:
+                print(
+                    f"{prog}: connected to redis server {redis_host}:{redis_port}",
+                    flush=True,
+                )
+                connected = True
+            if payload is None:
+                print(f'{prog}: key, "{key}" does not exist yet', flush=True)
+                time.sleep(1)
+    return payload.decode("utf-8")
diff --git a/lib/pbench/agent/tool_data_sink.py b/lib/pbench/agent/tool_data_sink.py
index f1d9f93970..09db6257bf 100644
--- a/lib/pbench/agent/tool_data_sink.py
+++ b/lib/pbench/agent/tool_data_sink.py
@@ -18,7 +18,6 @@
 import subprocess
 import sys
 import tempfile
-import time
 
 from configparser import ConfigParser, DuplicateSectionError
 from datetime import datetime
@@ -29,14 +28,14 @@
 from threading import Thread, Lock, Condition
 from wsgiref.simple_server import WSGIRequestHandler, make_server
 
-import daemon
 import pidfile
 import redis
 
 from bottle import Bottle, ServerAdapter, request, abort
+from daemon import DaemonContext
 
 from pbench.agent.constants import (
-    tds_port,
+    def_tds_port,
     tm_allowed_actions,
     tm_channel_suffix_from_client,
     tm_channel_suffix_from_tms,
@@ -44,26 +43,27 @@
     tm_channel_suffix_to_logging,
     tm_channel_suffix_to_tms,
 )
-from pbench.agent.redis import RedisChannelSubscriber
+from pbench.agent.redis import RedisChannelSubscriber, wait_for_conn_and_key
 from pbench.agent.toolmetadata import ToolMetadata
 from pbench.agent.utils import collect_local_info
 
 
+# Logging format string for unit tests
+fmtstr_ut = "%(levelname)s %(name)s %(funcName)s -- %(message)s"
+fmtstr = "%(asctime)s %(levelname)s %(process)s %(thread)s %(name)s %(funcName)s %(lineno)d -- %(message)s"
+
+
 # Read in 64 KB chunks off the wire for HTTP PUT requests.
 _BUFFER_SIZE = 65536
 
 # Maximum size of the tar ball for collected tool data.
 _MAX_TOOL_DATA_SIZE = 2 ** 30
 
-# Executable path of the tar and cp programs.
-tar_path = None
-cp_path = None
-
 
 def _now(when):
-    """_now - An ugly hack to facility testing without the ability to mock.
+    """_now - An ugly hack to facilitate testing without the ability to mock.
 
-    Instead of directly calling `datatime.utcnow().isoformat()`, each call
+    Instead of directly calling `datetime.utcnow().isoformat()`, each call
     site invokes this method with an argument only used during unit testing
     to determine the expected behavior.  This allows us to provide a "start"
     time that is one microsecond less than the "end" time.
@@ -76,9 +76,9 @@ def _now(when):
 
 
 class DataSinkWsgiServer(ServerAdapter):
-    """DataSinkWsgiServer - an re-implementation of Bottle's WSGIRefServer
+    """DataSinkWsgiServer - a re-implementation of Bottle's WSGIRefServer
     where we have access to the underlying WSGIServer instance in order to
-    invoke it's stop() method, and we also provide an WSGIReqeustHandler with
+    invoke its stop() method, and we also provide an WSGIRequestHandler with
     an opinionated logging implementation.
     """
 
@@ -124,25 +124,78 @@ def log_request(self, code="-", size="-"):
 
         self.options["handler_class"] = DataSinkWsgiRequestHandler
         self._server = None
+        self._err_code = None
+        self._err_text = None
         self._lock = Lock()
         self._cv = Condition(lock=self._lock)
         self._logger = logger
 
-    def run(self, app):
-        assert self._server is None, "'run' method called twice"
-        self._logger.debug("Making tool data sink WSGI server ...")
-        server = make_server(self.host, self.port, app, **self.options)
+    def _do_notify(self, text=None, code=0, server=None):
+        """_do_notify - simple helper method to encapsulate method of notification.
+        """
         with self._lock:
+            self._err_text = text
+            self._err_code = code
             self._server = server
             self._cv.notify()
-        self._logger.debug("Running tool data sink WSGI server ...")
-        self._server.serve_forever()
 
-    def stop(self):
+    def run(self, app):
+        """run - Start the WSGI server, called by the Bottle framework.
+
+        Intended to be run as a separate thread.
+
+        We record the outcome of the `make_server` call for success or failure
+        and notify anybody waiting for this thread to succeed.
+        """
+        assert self._server is None, "'run' method called twice"
+        self._logger.debug("Making tool data sink WSGI server ...")
+        try:
+            server = make_server(self.host, self.port, app, **self.options)
+        except OSError as exc:
+            assert exc.errno != 0, "Logic bomb!  OSError exception with no errno value"
+            self._do_notify(str(exc), exc.errno)
+            raise
+        except Exception as exc:
+            self._logger.exception("Unexpected error in WSGI server")
+            self._do_notify(str(exc), -1)
+            raise
+        else:
+            self._logger.debug("Successfully created WSGI server")
+            self._do_notify(server=server)
+            self._logger.debug("Running tool data sink WSGI server ...")
+            server.serve_forever()
+
+    def wait(self):
+        """ wait - wait for the WSGI thread executing the `run` method to start
+        running and successfully create a WSGI server object, or fail trying.
+
+        Returns a tuple of the error text and the error code set by the _run()
+        method attempting to create the WSGI server.  The error code will be
+        0 on success, an Errno value, or -1 if an expected exception was
+        raised.
+        """
         with self._lock:
-            while self._server is None:
+            while self._err_code is None:
                 self._cv.wait()
-        self._server.shutdown()
+        return self._err_text, self._err_code
+
+    def stop(self):
+        """ stop - stop the running WSGI server via the shutdown() method of
+        the WSGI server object.
+        """
+        # We have to wait for the thread to start the server and fill in the
+        # server object first.
+        self.wait()
+        if self._err_code == 0:
+            self._server.shutdown()
+
+
+class ToolDataSinkError(Exception):
+    """ToolDataSinkError - generic exception class for Tool Data Sink related
+    exceptions.
+    """
+
+    pass
 
 
 class BaseCollector:
@@ -159,20 +212,25 @@ def __init__(
         tool_group,
         host_tools_dict,
         tool_metadata,
+        tar_path,
         logger,
     ):
         """Constructor - responsible for recording the arguments, and creating
         the Environment() for template rendering.
         """
         self.templates_path = pbench_bin / "templates"
+        assert (
+            self.templates_path.is_dir()
+        ), f"Logic bomb! {self.templates_path} does not exist as a directory"
         self.benchmark_run_dir = benchmark_run_dir
         self.tool_group = tool_group
         self.host_tools_dict = host_tools_dict
         self.tool_metadata = tool_metadata
+        self.tar_path = tar_path
         self.logger = logger
 
         self.run = []
-        self.tool_group_dir = self.benchmark_run_dir / f"tools-{self.tool_group}"
+        self.tool_group_dir = self.benchmark_run_dir.local / f"tools-{self.tool_group}"
         self.tool_dir = self.tool_group_dir / self.name
         self.template_dir = Environment(
             autoescape=False,
@@ -253,7 +311,7 @@ def terminate(self):
                 if sts != 0:
                     self.logger.warning("Collector process terminated with %d", sts)
         if errors > 0:
-            raise Exception("Failed to terminate all the collector processes")
+            raise ToolDataSinkError("Failed to terminate all the collector processes")
 
 
 class PromCollector(BaseCollector):
@@ -268,7 +326,7 @@ def __init__(self, *args, **kwargs):
         """
         self.prometheus_path = find_executable("prometheus")
         if self.prometheus_path is None:
-            raise Exception("External 'prometheus' executable not found")
+            raise ToolDataSinkError("External 'prometheus' executable not found")
 
         super().__init__(*args, **kwargs)
         self.tool_context = []
@@ -279,7 +337,9 @@ def __init__(self, *args, **kwargs):
                     dict(hostname=f"{host}_{tool}", hostport=f"{host}:{port}")
                 )
         if not self.tool_context:
-            raise Exception("Expected prometheus persistent tool context not found")
+            raise ToolDataSinkError(
+                "Expected prometheus persistent tool context not found"
+            )
 
     def launch(self):
         """launch - creates the YAML file that directs Prometheus's behavior,
@@ -329,7 +389,7 @@ def terminate(self):
         self.logger.debug("Prometheus terminated")
 
         args = [
-            tar_path,
+            self.tar_path,
             "--remove-files",
             "-Jcf",
             f"{self.tool_group_dir}/prometheus_data.tar.xz",
@@ -537,7 +597,7 @@ def terminate(self):
         self.logger.debug("Pmproxy and pmlogger(s) terminated")
 
         args = [
-            tar_path,
+            self.tar_path,
             "--remove-files",
             "-Jcf",
             f"{self.tool_group_dir}/pcp_data.tar.xz",
@@ -550,6 +610,125 @@ def terminate(self):
             self.logger.warning("Failed to tar up pmlogger data: %r", args)
 
 
+class BenchmarkRunDir:
+    """BenchmarkRunDir - helper class for handling the benchmark_run_dir
+    directory Redis parameter vs the actual "local" benchmark run directory.
+
+    It is a requirement of the Tool Meister sub-system that the ${pbench_run}
+    directory is always a prefix of the ${benchmark_run_dir}.
+
+    When the pbench CLI starts the Tool Data Sink directly, the local
+    benchmark run directory is the same as the value of the benchmark_run_dir
+    parameter.
+
+    But when the Tool Data Sink runs in a container, the path to the benchmark
+    run directory inside the container might be different from the parameter
+    value because the mount point for the external file system has a different
+    path inside the container.  Typically, the container is constructed with
+    the default pbench installation, where the ${pbench_run} directory is
+    "/var/lib/pbench-agent".
+
+    The entity responsible for starting the Tool Data Sink container typically
+    mounts a different directory for /var/lib/pbench-agent via 'podman run
+    --volume /srv/data/pbench-run-dir:/var/lib/pbench-agent:Z'.  This leads to
+    a conflict where the external ${pbench_run} path is different from the
+    internal-to-the-container ${pbench_run} path.  To resolve this, the entity
+    which creates the external pbench run directory creates a ".path" file in
+    that directory containing the full "external" path to the pbench run
+    directory. The Tool Data Sink uses that path to validate that external
+    benchmark_run_dir parameter values are valid.
+
+    This class implements the mechanism that allows the Tool Data Sink code to
+    handle that seamlessly.
+    """
+
+    class Exists(Exception):
+        pass
+
+    class Prefix(Exception):
+        pass
+
+    def __init__(self, ext_benchmark_run_dir, int_pbench_run):
+        self._ext_benchmark_run_dir = Path(ext_benchmark_run_dir)
+        self._ext_pbench_run = self._ext_benchmark_run_dir.parent
+        self._int_pbench_run = Path(int_pbench_run)
+
+        # The Tool Data Sink could be running in a container. If so, then
+        # it'll be using the default benchmark run directory.  If the
+        # benchmark_run_dir parameter is valid, there will be a file
+        # called ".path" in the default benchmark run directory which will
+        # match.
+        #
+        # E.g.:
+        #  $ pbench_run="/home/<USER>/run-dir"
+        #  $ benchmark_run_dir="${pbench_run}/script_config_<date>"
+        #  $ cat ${pbench_run}/.path
+        #  /home/<USER>/run-dir
+        #  $ podman run --volume ${pbench_run}:/var/lib/pbench-agent \
+        #    pbench-agent-tool-data-sink bash
+        #  [ abcdefg /]$ cat /var/lib/pbench-agent/.path
+        #  /home/<USER>/run-dir
+        try:
+            benchmark_run_dir_lcl = self._ext_benchmark_run_dir.resolve(strict=True)
+        except Exception:
+            # Might be in a container; let's first construct the
+            # internal-to-the-container benchmark run directory.
+            benchmark_run_dir_lcl = (
+                self._int_pbench_run / self._ext_benchmark_run_dir.name
+            )
+            dot_path = self._int_pbench_run / ".path"
+            try:
+                dot_path_contents = dot_path.read_text().strip()
+            except Exception as exc:
+                # Failed to read ".path" contents, give up.
+                raise ToolDataSinkError(
+                    f"Run directory parameter, '{ext_benchmark_run_dir}', must"
+                    f" be an existing directory ('{self._ext_pbench_run}/"
+                    f".path' not found, '{exc}').",
+                )
+            else:
+                if dot_path_contents != str(self._ext_pbench_run):
+                    raise ToolDataSinkError(
+                        f"Run directory parameter, '{ext_benchmark_run_dir}',"
+                        " must be an existing directory (.path contents"
+                        f" mismatch, .path='{dot_path_contents}' !="
+                        f" '{self._ext_pbench_run}').",
+                    )
+        else:
+            # We can access the benchmark_run_dir directly, no need to
+            # consider contents of ".path" file.
+            pass
+        if not benchmark_run_dir_lcl.is_dir():
+            raise ToolDataSinkError(
+                f"Run directory parameter, '{ext_benchmark_run_dir}', must be"
+                " a real directory.",
+            )
+        self.local = benchmark_run_dir_lcl
+
+    def __str__(self):
+        """__str__ - the string representation of a BenchmarkRunDir object is
+        the original external benchmark run directory string.
+        """
+        return str(self._ext_benchmark_run_dir)
+
+    def validate(self, directory):
+        """validate - check that an external directory has a prefix of the external
+        benchmark run directory.
+        """
+        directory_p = Path(directory)
+        try:
+            # Check that "directory" has a prefix of
+            rel_path = directory_p.relative_to(self._ext_benchmark_run_dir)
+        except ValueError:
+            raise self.Prefix()
+        local_dir = self.local / rel_path
+        if not local_dir.is_dir():
+            # The internal benchmark run directory does not have the same
+            # sub-directory hierarchy.
+            raise self.Exists()
+        return local_dir
+
+
 class ToolDataSink(Bottle):
     """ToolDataSink - sub-class of Bottle representing state for tracking data
     sent from tool meisters via an HTTP PUT method.
@@ -558,20 +737,41 @@ class ToolDataSink(Bottle):
     # The list of actions where we expect Tool Meisters to send data to us.
     _data_actions = frozenset(("send", "sysinfo"))
 
+    @staticmethod
+    def fetch_params(params, pbench_run):
+        try:
+            _benchmark_run_dir = params["benchmark_run_dir"]
+            bind_hostname = params["bind_hostname"]
+            channel_prefix = params["channel_prefix"]
+            tool_group = params["group"]
+            tool_metadata = ToolMetadata.tool_md_from_dict(params["tool_metadata"])
+            tool_trigger = params["tool_trigger"]
+            tools = params["tools"]
+        except KeyError as exc:
+            raise ToolDataSinkError(f"Invalid parameter block, missing key {exc}")
+        else:
+            benchmark_run_dir = BenchmarkRunDir(_benchmark_run_dir, pbench_run)
+            return (
+                benchmark_run_dir,
+                bind_hostname,
+                channel_prefix,
+                tool_group,
+                tool_metadata,
+                tool_trigger,
+                tools,
+            )
+
     def __init__(
         self,
         pbench_bin,
+        pbench_run,
         hostname,
-        bind_hostname,
+        tar_path,
+        cp_path,
         redis_server,
         redis_host,
         redis_port,
-        channel_prefix,
-        benchmark_run_dir,
-        tool_group,
-        tool_trigger,
-        tools,
-        tool_metadata,
+        params,
         optional_md,
         logger,
     ):
@@ -583,16 +783,21 @@ def __init__(
         # Save external state
         self.pbench_bin = pbench_bin
         self.hostname = hostname
-        self.bind_hostname = bind_hostname
+        self.tar_path = tar_path
+        self.cp_path = cp_path
         self.redis_server = redis_server
         self.redis_host = redis_host
         self.redis_port = redis_port
-        self.channel_prefix = channel_prefix
-        self.benchmark_run_dir = benchmark_run_dir
-        self.tool_group = tool_group
-        self.tool_trigger = tool_trigger
-        self.tools = tools
-        self.tool_metadata = tool_metadata
+        ret_val = self.fetch_params(params, pbench_run)
+        (
+            self.benchmark_run_dir,
+            self.bind_hostname,
+            self.channel_prefix,
+            self.tool_group,
+            self.tool_metadata,
+            self.tool_trigger,
+            self.tools,
+        ) = ret_val
         self.optional_md = optional_md
         self.logger = logger
         # Initialize internal state
@@ -615,6 +820,8 @@ def __init__(
         self._lock = Lock()
         self._cv = Condition(lock=self._lock)
         self.web_server_thread = None
+        self._tm_log_capture_thread_cv = Condition(lock=self._lock)
+        self._tm_log_capture_thread_state = None
         self.tm_log_capture_thread = None
 
     def __enter__(self):
@@ -630,13 +837,19 @@ def __enter__(self):
             callback=self.put_document,
         )
         self._server = DataSinkWsgiServer(
-            host=self.bind_hostname, port=tds_port, logger=self.logger
+            host=self.bind_hostname, port=def_tds_port, logger=self.logger
         )
         self.web_server_thread = Thread(target=self.web_server_run)
         self.web_server_thread.start()
-        # FIXME - ugly hack for consistent unit tests; why not just use a
-        # condition variable?
-        time.sleep(0.1)
+        err_text, err_code = self._server.wait()
+        if err_code > 0:
+            # Pass along the OSError with its errno, let's us handle cleanly
+            # EADDRINUSE errors.
+            raise OSError(err_code, err_text)
+        elif err_code < 0:
+            # All other errors encountered by the WSGI thread are already
+            # logged.
+            raise ToolDataSinkError(f"Failure to create WSGI server - {err_text!r}")
         self.logger.debug("web server 'run' thread started, processing payloads ...")
 
         # Setup the two Redis channels to which the Tool Data Sink subscribes.
@@ -654,10 +867,18 @@ def __enter__(self):
 
         self.tm_log_capture_thread = Thread(target=self.tm_log_capture)
         self.tm_log_capture_thread.start()
-        # FIXME - ugly hack for consistent unit tests; why not just use a
-        # condition variable?
-        time.sleep(0.1)
-        self.logger.debug("'tm_log_capture' thread started, processing logs ...")
+        with self._lock:
+            while self._tm_log_capture_thread_state is None:
+                self._tm_log_capture_thread_cv.wait()
+        if self._tm_log_capture_thread_state != "started":
+            self.logger.warning(
+                "'tm_log_capture' thread failed to start, not processing Tool"
+                " Meister logs ..."
+            )
+        else:
+            self.logger.debug(
+                "'tm_log_capture' thread started, processing Tool Meister logs ..."
+            )
 
         # The ToolDataSink object itself is the object of the context manager.
         return self
@@ -709,20 +930,18 @@ def tm_log_capture(self):
         # logs from remote Tool Meisters.
         logger = logging.getLogger("tm_log_capture_thread")
         logger.setLevel(logging.WARNING)
-        tm_log_file = self.benchmark_run_dir / "tm" / "tm.logs"
+        tm_log_file = self.benchmark_run_dir.local / "tm" / "tm.logs"
         with tm_log_file.open("w") as fp:
             try:
+                with self._lock:
+                    self._tm_log_capture_thread_state = "started"
+                    self._tm_log_capture_thread_cv.notify()
                 for log_msg in self._to_logging_chan.fetch_message(logger):
                     fp.write(f"{log_msg}\n")
                     fp.flush()
             except redis.ConnectionError:
                 # We don't bother reporting any connection errors.
                 pass
-            except ValueError as exc:
-                # FIXME - Why do we need to do this?
-                if exc.args[0] == "I/O operation on closed file.":
-                    pass
-                raise
             except Exception:
                 self.logger.exception("Failed to capture logs from Redis server")
 
@@ -796,9 +1015,9 @@ def record_tms(self, tms):
         """record_tms - record the Tool Meister data and metadata returned from
         the startup acknowledgement messages collected in "tms".
 
-        The first thing we have to do is setup self._tm_tracking properly,
-        adding which tools are no-ops, transient, and persistent, and properly
-        record the initial "posted" state.
+        The first thing we have to do is to determine which tools are no-ops,
+        transient, and persistent, and properly record the initial "posted"
+        state.
 
         The second thing we do is record all the data and metadata about the
         Tool Meisters in the ${benchmark_run_dir}/metadata.log file.
@@ -855,7 +1074,7 @@ def record_tms(self, tms):
         home = os.environ.get("HOME", "")
         if home:
             src = str(Path(home) / ".ssh" / "config")
-            dst = str(self.benchmark_run_dir / "ssh.config")
+            dst = str(self.benchmark_run_dir.local / "ssh.config")
             try:
                 shutil.copyfile(src, dst)
             except FileNotFoundError:
@@ -865,7 +1084,7 @@ def record_tms(self, tms):
         # cp -L  /etc/ssh/ssh_config   ${dir}/ > /dev/null 2>&1
         etc_ssh = Path("/etc") / "ssh"
         src = str(etc_ssh / "ssh_config")
-        dst = str(self.benchmark_run_dir / "ssh_config")
+        dst = str(self.benchmark_run_dir.local / "ssh_config")
         try:
             shutil.copyfile(src, dst)
         except FileNotFoundError:
@@ -898,13 +1117,18 @@ def record_tms(self, tms):
         #
         # cp -rL /etc/ssh/ssh_config.d ${dir}/ > /dev/null 2>&1
         subprocess.run(
-            [cp_path, "-rL", "/etc/ssh/ssh_config.d", f"{self.benchmark_run_dir}/"],
+            [
+                self.cp_path,
+                "-rL",
+                "/etc/ssh/ssh_config.d",
+                f"{self.benchmark_run_dir.local}/",
+            ],
             stdin=subprocess.DEVNULL,
             stdout=subprocess.DEVNULL,
             stderr=subprocess.DEVNULL,
         )
 
-        mdlog_name = self.benchmark_run_dir / "metadata.log"
+        mdlog_name = self.benchmark_run_dir.local / "metadata.log"
         mdlog = ConfigParser()
         try:
             with mdlog_name.open("r") as fp:
@@ -923,7 +1147,7 @@ def record_tms(self, tms):
         # Users have a funny way of adding '%' characters to the run
         # directory, so we have to be sure we handle "%" characters in the
         # directory name metadata properly.
-        mdlog.set(section, "name", self.benchmark_run_dir.name.replace("%", "%%"))
+        mdlog.set(section, "name", self.benchmark_run_dir.local.name.replace("%", "%%"))
         version, seqno, sha1, hostdata = collect_local_info(self.pbench_bin)
         rpm_version = f"v{version}-{seqno}g{sha1}"
         mdlog.set(section, "rpm-version", rpm_version)
@@ -1050,7 +1274,9 @@ def execute(self):
                 self._to_client_channel, json.dumps(started_msg, sort_keys=True)
             )
             if num_present == 0:
-                raise Exception("Tool Data Sink started by nobody is listening")
+                raise ToolDataSinkError(
+                    "Tool Data Sink started, but nobody is listening"
+                )
             self.logger.debug("published %s", self._to_client_channel)
 
             for data in self._from_client_chan.fetch_json(self.logger):
@@ -1214,7 +1440,7 @@ def execute_action(self, action, directory_str, args, data):
             # the caller wants to report that it is stopping all the Tool
             # Meisters due to an interruption (SIGINT or otherwise).
             #
-            mdlog_name = self.benchmark_run_dir / "metadata.log"
+            mdlog_name = self.benchmark_run_dir.local / "metadata.log"
             mdlog = ConfigParser()
             try:
                 with (mdlog_name).open("r") as fp:
@@ -1233,7 +1459,7 @@ def execute_action(self, action, directory_str, args, data):
                 if args["interrupt"]:
                     # args["interrupt"] == True ==> run / run_interrupted
                     mdlog.set(section, "run_interrupted", "true")
-                iterations = self.benchmark_run_dir / ".iterations"
+                iterations = self.benchmark_run_dir.local / ".iterations"
                 try:
                     iterations_val = iterations.read_text()
                 except FileNotFoundError:
@@ -1258,26 +1484,27 @@ def execute_action(self, action, directory_str, args, data):
             self._to_logging_chan.unsubscribe()
             return
 
-        directory = Path(directory_str)
-        if not directory.is_dir():
+        try:
+            local_dir = self.benchmark_run_dir.validate(directory_str)
+        except self.benchmark_run_dir.Prefix:
             self.logger.error(
-                "action '%s' with non-existent directory, '%s'", action, directory,
+                "action '%s' with invalid directory, '%s' (not a sub-directory of '%s')",
+                action,
+                directory_str,
+                self.benchmark_run_dir,
             )
-            self._send_client_status(action, "invalid directory")
+            self._send_client_status(action, "directory not a sub-dir of run directory")
             return
-        try:
-            # Check that "directory" has a prefix of self.benchmark_run_dir
-            directory.relative_to(self.benchmark_run_dir)
-        except ValueError:
+        except self.benchmark_run_dir.Exists:
             self.logger.error(
-                "action '%s' with invalid directory,"
-                " '%s' (not a sub-directory of '%s')",
+                "action '%s' with invalid directory, '%s' (does not exist)",
                 action,
-                directory,
-                self.benchmark_run_dir,
+                directory_str,
             )
-            self._send_client_status(action, "directory not a prefix of run directory")
+            self._send_client_status(action, "directory does not exist")
             return
+        else:
+            assert local_dir is not None, f"Logic bomb!  local_dir = {local_dir!r}"
 
         with self._lock:
             # Handle all actions underneath the lock for consistency.
@@ -1326,6 +1553,7 @@ def execute_action(self, action, directory_str, args, data):
                         self.tool_group,
                         prom_tool_dict,
                         self.tool_metadata,
+                        self.tar_path,
                         logger=self.logger,
                     )
                     self._prom_server.launch()
@@ -1340,6 +1568,7 @@ def execute_action(self, action, directory_str, args, data):
                         self.tool_group,
                         pcp_tool_dict,
                         self.tool_metadata,
+                        self.tar_path,
                         redis_host=self.redis_host,
                         redis_port=self.redis_port,
                         logger=self.logger,
@@ -1364,7 +1593,7 @@ def execute_action(self, action, directory_str, args, data):
                 # the URL for the PUT method.
                 directory_bytes = directory_str.encode("utf-8")
                 self.data_ctx = hashlib.md5(directory_bytes).hexdigest()
-                self.directory = Path(directory_str)
+                self.directory = local_dir
 
                 # Forward to TMs
                 ret_val = self._forward_tms(data)
@@ -1567,7 +1796,7 @@ def put_document(self, data_ctx, hostname):
                 # Invoke tar directly for efficiency.
                 with o_file.open("w") as ofp, e_file.open("w") as efp:
                     cp = subprocess.run(
-                        [tar_path, "-xf", host_data_tb_name],
+                        [self.tar_path, "-xf", host_data_tb_name],
                         cwd=target_dir,
                         stdin=None,
                         stdout=ofp,
@@ -1609,71 +1838,224 @@ def put_document(self, data_ctx, hostname):
             abort(500, "INTERNAL ERROR")
 
 
-def main(argv):
-    _prog = Path(argv[0])
-    PROG = _prog.name
-    pbench_bin = _prog.parent.parent
+def get_logger(PROG, daemon=False):
+    """get_logger - construct a logger for a Tool Meister instance.
 
+    If in the Unit Test environment, just log to console.
+    If in non-unit test environment:
+       If daemonized, log to syslog and log back to Redis.
+       If not daemonized, log to console AND log back to Redis
+    """
     logger = logging.getLogger(PROG)
-    fh = logging.FileHandler(f"{PROG}.log")
-    if os.environ.get("_PBENCH_UNIT_TESTS"):
-        fmtstr = "%(levelname)s %(name)s %(funcName)s -- %(message)s"
-    else:
-        fmtstr = (
-            "%(asctime)s %(levelname)s %(process)s %(thread)s"
-            " %(name)s %(funcName)s %(lineno)d -- %(message)s"
-        )
-    fhf = logging.Formatter(fmtstr)
-    fh.setFormatter(fhf)
     if os.environ.get("_PBENCH_TOOL_DATA_SINK_LOG_LEVEL") == "debug":
         log_level = logging.DEBUG
     else:
         log_level = logging.INFO
-    fh.setLevel(log_level)
-    logger.addHandler(fh)
     logger.setLevel(log_level)
 
+    unit_tests = bool(os.environ.get("_PBENCH_UNIT_TESTS"))
+    if unit_tests or not daemon:
+        sh = logging.StreamHandler()
+    else:
+        sh = logging.FileHandler(f"{PROG}.log")
+    sh.setLevel(log_level)
+    shf = logging.Formatter(fmtstr_ut if unit_tests else fmtstr)
+    sh.setFormatter(shf)
+    logger.addHandler(sh)
+
+    return logger
+
+
+def driver(
+    PROG,
+    redis_server,
+    redis_host,
+    redis_port,
+    pbench_bin,
+    pbench_run,
+    hostname,
+    tar_path,
+    cp_path,
+    param_key,
+    params,
+    optional_md,
+    logger=None,
+):
+    if logger is None:
+        logger = get_logger(PROG)
+
+    logger.debug("params_key (%s): %r", param_key, params)
+
+    try:
+        with ToolDataSink(
+            pbench_bin,
+            pbench_run,
+            hostname,
+            tar_path,
+            cp_path,
+            redis_server,
+            redis_host,
+            redis_port,
+            params,
+            optional_md,
+            logger,
+        ) as tds_app:
+            tds_app.execute()
+    except OSError as exc:
+        if exc.errno == errno.EADDRINUSE:
+            logger.error(
+                "ERROR - tool data sink failed to start, %s:%s already in use",
+                params["bind_hostname"],
+                def_tds_port,
+            )
+            ret_val = 8
+        else:
+            logger.exception("ERROR - failed to start the tool data sink")
+            ret_val = 9
+    except Exception:
+        logger.exception("ERROR - failed to start the tool data sink")
+        ret_val = 10
+    else:
+        ret_val = 0
+    return ret_val
+
+
+def daemon(
+    PROG,
+    redis_server,
+    redis_host,
+    redis_port,
+    pbench_bin,
+    pbench_run,
+    hostname,
+    tar_path,
+    cp_path,
+    param_key,
+    params,
+    optional_md,
+):
+    # Disconnect any existing connections to the Redis server.
+    redis_server.connection_pool.disconnect()
+    del redis_server
+
+    # Before we daemonize, flush any data written to stdout or stderr.
+    sys.stderr.flush()
+    sys.stdout.flush()
+
+    pidfile_name = f"{PROG}.pid"
+    pfctx = pidfile.PIDFile(pidfile_name)
+    with open(f"{PROG}.out", "w") as sofp, open(
+        f"{PROG}.err", "w"
+    ) as sefp, DaemonContext(
+        stdout=sofp,
+        stderr=sefp,
+        working_directory=os.getcwd(),
+        umask=0o022,
+        pidfile=pfctx,
+    ):
+        logger = get_logger(PROG, daemon=True)
+
+        # We have to re-open the connection to the redis server now that we
+        # are "daemonized".
+        logger.debug("re-constructing Redis server object")
+        try:
+            redis_server = redis.Redis(host=redis_host, port=redis_port, db=0)
+        except Exception as e:
+            logger.error(
+                "Unable to construct Redis server object, %s:%s: %s",
+                redis_host,
+                redis_port,
+                e,
+            )
+            return 7
+        else:
+            logger.debug("reconstructed Redis server object")
+        return driver(
+            PROG,
+            redis_server,
+            redis_host,
+            redis_port,
+            pbench_bin,
+            pbench_run,
+            hostname,
+            tar_path,
+            cp_path,
+            param_key,
+            params,
+            optional_md,
+            logger=logger,
+        )
+
+
+def main(argv):
+    _prog = Path(argv[0])
+    PROG = _prog.name
+    # The Tool Data Sink executable is in:
+    #   ${pbench_bin}/util-scripts/tool-meister/pbench-tool-data-sink
+    # So .parent at each level is:
+    #   _prog       ${pbench_bin}/util-scripts/tool-meister/pbench-tool-data-sink
+    #     .parent   ${pbench_bin}/util-scripts/tool-meister
+    #     .parent   ${pbench_bin}/util-scripts
+    #     .parent   ${pbench_bin}
+    pbench_bin = _prog.parent.parent.parent
+
     try:
         redis_host = argv[1]
         redis_port = argv[2]
         param_key = argv[3]
     except IndexError as e:
-        logger.error("Invalid arguments: %s", e)
+        print(f"{PROG}: Invalid arguments: {e}", file=sys.stderr)
         return 1
+    else:
+        if not redis_host or not redis_port or not param_key:
+            print(f"{PROG}: Invalid arguments: {argv!r}", file=sys.stderr)
+            return 1
+    try:
+        daemonize = argv[4]
+    except IndexError:
+        daemonize = "no"
 
-    global tar_path
     tar_path = find_executable("tar")
     if tar_path is None:
-        logger.error("External 'tar' executable not found")
+        print("External 'tar' executable not found", file=sys.stderr)
         return 2
 
-    global cp_path
     cp_path = find_executable("cp")
     if cp_path is None:
-        logger.error("External 'cp' executable not found")
+        print("External 'cp' executable not found", file=sys.stderr)
         return 2
 
+    try:
+        pbench_run = os.environ["pbench_run"]
+    except KeyError:
+        print(
+            "Unable to fetch pbench_run environment variable", file=sys.stderr,
+        )
+        return 3
+
     try:
         redis_server = redis.Redis(host=redis_host, port=redis_port, db=0)
     except Exception as e:
-        logger.error(
-            "Unable to connect to redis server, %s:%s: %s", redis_host, redis_port, e
+        print(
+            f"Unable to connect to redis server, {redis_host}:{redis_port}: {e}",
+            file=sys.stderr,
         )
         return 4
 
     try:
         hostname = os.environ["_pbench_full_hostname"]
     except KeyError:
-        logger.error("Unable to fetch _pbench_full_hostname environment variable")
-        return 4
+        print(
+            "Unable to fetch _pbench_full_hostname environment variable",
+            file=sys.stderr,
+        )
+        return 5
 
     try:
-        params_raw = redis_server.get(param_key)
-        if params_raw is None:
-            logger.error('Parameter key, "%s" does not exist.', param_key)
-            return 5
-        logger.debug("params_key (%s): %r", param_key, params_raw)
-        params_str = params_raw.decode("utf-8")
+        # Wait for the parameter key value to show up.
+        params_str = wait_for_conn_and_key(
+            redis_server, param_key, PROG, redis_host, redis_port
+        )
         # The expected parameters for this "data-sink" is what "channel" to
         # subscribe to for the tool meister operational life-cycle.  The
         # data-sink listens for the actions, sysinfo | init | start | stop |
@@ -1683,89 +2065,29 @@ def main(argv):
         # E.g. params = '{ "channel_prefix": "some-prefix",
         #                  "benchmark_run_dir": "/loo/goo" }'
         params = json.loads(params_str)
-        channel_prefix = params["channel_prefix"]
-        benchmark_run_dir = Path(params["benchmark_run_dir"]).resolve(strict=True)
-        bind_hostname = params["bind_hostname"]
-        tool_group = params["group"]
-        tool_trigger = params["tool_trigger"]
-        tools = params["tools"]
-        tool_metadata = ToolMetadata.tool_md_from_dict(params["tool_metadata"])
+        ToolDataSink.fetch_params(params, pbench_run)
     except Exception as ex:
-        logger.error("Unable to fetch and decode parameter key, %s: %s", param_key, ex)
+        print(
+            f"Unable to fetch and decode parameter key, {param_key}: {ex}",
+            file=sys.stderr,
+        )
         return 6
-    else:
-        if not benchmark_run_dir.is_dir():
-            logger.error(
-                "Run directory argument, %s, must be a real directory.",
-                benchmark_run_dir,
-            )
-            return 7
-        logger.debug("Tool Data Sink parameters check out, daemonizing ...")
-        redis_server.connection_pool.disconnect()
-        redis_server = None
 
     optional_md = params["optional_md"]
 
-    # Before we daemonize, flush any data written to stdout or stderr.
-    sys.stderr.flush()
-    sys.stdout.flush()
-
-    pidfile_name = f"{PROG}.pid"
-    pfctx = pidfile.PIDFile(pidfile_name)
-    with open(f"{PROG}.out", "w") as sofp, open(
-        f"{PROG}.err", "w"
-    ) as sefp, daemon.DaemonContext(
-        stdout=sofp,
-        stderr=sefp,
-        working_directory=os.getcwd(),
-        umask=0o022,
-        pidfile=pfctx,
-        files_preserve=[fh.stream.fileno()],
-    ):
-        try:
-            # We have to re-open the connection to the redis server now that we
-            # are "daemonized".
-            logger.debug("constructing Redis() object")
-            try:
-                redis_server = redis.Redis(host=redis_host, port=redis_port, db=0)
-            except Exception as e:
-                logger.error(
-                    "Unable to connect to redis server, %s:%s: %s",
-                    redis_host,
-                    redis_port,
-                    e,
-                )
-                return 8
-            else:
-                logger.debug("constructed Redis() object")
-
-            with ToolDataSink(
-                pbench_bin,
-                hostname,
-                bind_hostname,
-                redis_server,
-                redis_host,
-                redis_port,
-                channel_prefix,
-                benchmark_run_dir,
-                tool_group,
-                tool_trigger,
-                tools,
-                tool_metadata,
-                optional_md,
-                logger,
-            ) as tds_app:
-                tds_app.execute()
-        except OSError as exc:
-            if exc.errno == errno.EADDRINUSE:
-                logger.error(
-                    "ERROR - tool data sink failed to start, %s:%s already in use",
-                    bind_hostname,
-                    tds_port,
-                )
-            else:
-                logger.exception("ERROR - failed to start the tool data sink")
-        except Exception:
-            logger.exception("ERROR - failed to start the tool data sink")
-
-    return 0
+    func = daemon if daemonize == "yes" else driver
+    ret_val = func(
+        PROG,
+        redis_server,
+        redis_host,
+        redis_port,
+        pbench_bin,
+        pbench_run,
+        hostname,
+        tar_path,
+        cp_path,
+        param_key,
+        params,
+        optional_md,
+    )
+    return ret_val
diff --git a/lib/pbench/agent/tool_group.py b/lib/pbench/agent/tool_group.py
new file mode 100644
index 0000000000..4d2c5d7057
--- /dev/null
+++ b/lib/pbench/agent/tool_group.py
@@ -0,0 +1,157 @@
+import os
+import re
+
+from pathlib import Path
+
+
+class BadToolGroup(Exception):
+    """Exception representing a tool group that does not exist or is invalid.
+    """
+
+    pass
+
+
+class ToolGroup:
+    """Provides an in-memory representation of the registered tools as recorded
+    on-disk.
+    """
+
+    # Current tool group prefix in use.
+    TOOL_GROUP_PREFIX = "tools-v1"
+
+    @staticmethod
+    def verify_tool_group(group, pbench_run=None):
+        """verify_tool_group - given a tool group name, verify it exists in the
+        ${pbench_run} directory as a properly prefixed tool group directory
+        name.
+
+        Raises a BadToolGroup exception if the directory is invalid or does not
+        exist, or if the pbench_run argument is None and the environment
+        variable of the same name is missing.
+
+        Returns a Pathlib object of the tool group directory on success.
+        """
+        _pbench_run = os.environ.get("pbench_run") if pbench_run is None else pbench_run
+        if not _pbench_run:
+            raise BadToolGroup(
+                "Cannot validate tool group, '{group}', 'pbench_run'"
+                " environment variable missing"
+            )
+
+        tg_dir_name = Path(_pbench_run, f"{ToolGroup.TOOL_GROUP_PREFIX}-{group}")
+        try:
+            tg_dir = tg_dir_name.resolve(strict=True)
+        except FileNotFoundError:
+            raise BadToolGroup(
+                f"Bad tool group, '{group}': directory {tg_dir_name} does not exist"
+            )
+        except Exception as exc:
+            raise BadToolGroup(
+                f"Bad tool group, '{group}': error resolving {tg_dir_name} directory"
+            ) from exc
+        else:
+            if not tg_dir.is_dir():
+                raise BadToolGroup(
+                    f"Bad tool group, '{group}': directory {tg_dir_name} not valid"
+                )
+            else:
+                return tg_dir
+
+    def __init__(self, group):
+        """Construct a ToolGroup object from the on-disk data of the given
+        tool group.
+
+        If the given tool group is valid, the contents are read into the three
+        dictionary structures:
+
+          "toolnames" - each tool name is the key, with separate dictionaries
+          for each registered host
+
+          "hostnames" - each registered host is the key, with separate
+          dictionaries for each tool registered on that host
+
+          "labels" - each registered host name, that has a label, is the key,
+          and the label is the value; if a host is not labeled, it does not
+          show up in this dictionary
+
+        Raises BadToolGroup via the verify_tool_group() method on error.
+        """
+        self.tg_dir = self.verify_tool_group(group)
+        self.group = group
+
+        # __trigger__
+        try:
+            _trigger = (self.tg_dir / "__trigger__").read_text()
+        except FileNotFoundError:
+            # Ignore missing trigger file
+            self.trigger = None
+        else:
+            if len(_trigger) == 0:
+                # Ignore empty trigger file contents
+                self.trigger = None
+            else:
+                self.trigger = _trigger
+
+        # toolnames - Dict with tool name as the key, dictionary with host
+        # names and parameters for each host
+        self.toolnames = {}
+        # hostnames - Dict with host name as the key, dictionary with tool
+        # names and parameters for each tool
+        self.hostnames = {}
+        self.labels = {}
+        for hdirent in os.listdir(self.tg_dir):
+            if hdirent == "__trigger__":
+                # Ignore handled above
+                continue
+            if not (self.tg_dir / hdirent).is_dir():
+                # Ignore wayward non-directory files
+                continue
+            # We assume this directory is a hostname.
+            host = hdirent
+            assert (
+                host not in self.hostnames
+            ), f"Logic bomb!  {host} in {self.hostnames!r}"
+            self.hostnames[host] = {}
+            for tdirent in os.listdir(self.tg_dir / host):
+                if tdirent == "__label__":
+                    self.labels[host] = (
+                        (self.tg_dir / host / tdirent).read_text().strip()
+                    )
+                    continue
+                if tdirent.endswith("__noinstall__"):
+                    # FIXME: ignore "noinstall" for now, tools are going to be
+                    # in containers so this does not make sense going forward.
+                    continue
+                # This directory entry is the name of a tool.
+                tool = tdirent
+                tool_opts_text = (self.tg_dir / host / tool).read_text().strip()
+                tool_opts = re.sub(r"\n\s*", " ", tool_opts_text)
+                if tool not in self.toolnames:
+                    self.toolnames[tool] = {}
+                self.toolnames[tool][host] = tool_opts
+                assert (
+                    tool not in self.hostnames[host]
+                ), f"Logic bomb!  {tool} in {self.hostnames[host]!r}"
+                self.hostnames[host][tool] = tool_opts
+
+    def get_tools(self, host):
+        """get_tools - given a target host, return a dictionary with the list
+        of tool names as keys, and the values being their options for that
+        host.
+        """
+        tools = dict()
+        for tool, opts in self.toolnames.items():
+            try:
+                host_opts = opts[host]
+            except KeyError:
+                # This host does not have this tool registered, ignore.
+                pass
+            else:
+                tools[tool] = host_opts
+        return tools
+
+    def get_label(self, host):
+        """get_label - given a target host, return the label associated with
+        that host.
+        """
+        return self.labels.get(host, "")
diff --git a/lib/pbench/agent/tool_meister.py b/lib/pbench/agent/tool_meister.py
index 97e39c9320..19034f552b 100644
--- a/lib/pbench/agent/tool_meister.py
+++ b/lib/pbench/agent/tool_meister.py
@@ -65,7 +65,11 @@
     tm_channel_suffix_to_logging,
     TDS_RETRY_PERIOD_SECS,
 )
-from pbench.agent.redis import RedisHandler, RedisChannelSubscriber
+from pbench.agent.redis import (
+    RedisHandler,
+    RedisChannelSubscriber,
+    wait_for_conn_and_key,
+)
 from pbench.agent.toolmetadata import ToolMetadata
 from pbench.agent.utils import collect_local_info
 
@@ -396,66 +400,20 @@ class DcgmTool(PersistentTool):
     """DcgmTool - provide specific persistent tool behaviors for the "dcgm"
     tool.
 
-    In particular, the dcgm tool requires the "--inst" option, requires the
-    PYTHONPATH environment variable be set properly, and must use a python2
-    environment.
+    The only particular behavior is that we find the proper "dcgm-exporter"
+    executable in our PATH.
     """
 
     def __init__(self, name, tool_opts, logger=None, **kwargs):
         super().__init__(name, tool_opts, logger=logger, **kwargs)
-        # Looking for required "--inst" option, reformatting appropriately if
-        # found.
-        tool_opts_l = self.tool_opts.split(" ")
-        for opt in tool_opts_l:
-            if opt.startswith("--inst="):
-                if opt[-1] == "\n":
-                    install_path = opt[7:-1]
-                else:
-                    install_path = opt[7:]
-                self.install_path = Path(install_path)
-                self.logger.debug(
-                    "install path for tool %s, %s", name, self.install_path
-                )
-                break
-        else:
-            self.install_path = None
-            self.logger.debug("missing install path")
-        if self.install_path is None:
-            self.script_path = None
-            self.args = None
-            self.env = None
-        else:
-            self.script_path = (
-                self.install_path / "samples" / "scripts" / "dcgm_prometheus.py"
-            )
-            if not self.script_path.exists():
-                self.logger.error("missing script path, %s", self.script_path)
-                self.args = None
-                self.env = None
-            else:
-                self.args = ["python2", f"{self.script_path}"]
-                new_path_l = [
-                    str(self.install_path / "bindings"),
-                    str(self.install_path / "bindings" / "common"),
-                ]
-                unit_tests = bool(os.environ.get("_PBENCH_UNIT_TESTS"))
-                prev_path = os.environ.get("PYTHONPATH", "")
-                if prev_path and not unit_tests:
-                    new_path_l.append(prev_path)
-                self.env = os.environ.copy()
-                self.env["PYTHONPATH"] = ":".join(new_path_l)
+        executable = find_executable("dcgm-exporter")
+        self.args = None if executable is None else [executable]
 
     def install(self):
-        if self.install_path is None:
-            return (1, "dcgm tool --inst argument missing")
-        elif self.args is None:
-            return (1, f"dcgm tool path, '{self.script_path}', not found")
+        if self.args is None:
+            return (1, "dcgm-exporter tool not found")
         return (0, "dcgm tool properly installed")
 
-    def start(self):
-        # The dcgm tool needs PYTHONPATH, and run via the shell.
-        super().start(env=self.env)
-
 
 class NodeExporterTool(PersistentTool):
     """NodeExporterTool - provide specifics for running the "node-exporter"
@@ -468,10 +426,7 @@ class NodeExporterTool(PersistentTool):
     def __init__(self, name, tool_opts, logger=None, **kwargs):
         super().__init__(name, tool_opts, logger=logger, **kwargs)
         executable = find_executable("node_exporter")
-        if executable is None:
-            self.args = None
-        else:
-            self.args = [executable]
+        self.args = None if executable is None else [executable]
 
     def install(self):
         if self.args is None:
@@ -779,7 +734,7 @@ def __enter__(self):
                 num_present = 0
             if num_present == 0 and time.time() >= timeout:
                 raise Exception(
-                    "Unable to publish startup ack message, {started_msg!r}"
+                    f"Unable to publish startup ack message, {started_msg!r}"
                 )
         self.logger.debug("published %s", self._from_tms_channel)
         return self
@@ -999,13 +954,17 @@ def start_tools(self, data):
 
         # Name of the temporary tool data directory to use when invoking
         # tools.  This is a local temporary directory when the Tool Meister is
-        # remote from the pbench controller.
-        if self._controller == self._hostname:
+        # remote from the pbench controller.  When the Tool Meister is run in
+        # a container the "directory" parameter will not map into its
+        # namespace, so we always consider containerized Tool Meisters as
+        # remote.
+        _dir = Path(data["directory"])
+        if self._controller == self._hostname and _dir.exists():
             # This is the case when the Tool Meister instance is running on
             # the same host as the controller.  We just use the directory
             # given to us in the `start` message.
             try:
-                _dir = Path(data["directory"]).resolve(strict=True)
+                _dir = _dir.resolve(strict=True)
             except Exception:
                 self.logger.exception(
                     "Failed to access provided result directory, %s", data["directory"]
@@ -1705,7 +1664,7 @@ def daemon(
             redis_server = redis.Redis(host=redis_host, port=redis_port, db=0)
         except Exception as exc:
             logger.error(
-                "Unable to construct to Redis server object, %s:%s: %s",
+                "Unable to construct Redis server object, %s:%s: %s",
                 redis_host,
                 redis_port,
                 exc,
@@ -1755,10 +1714,17 @@ def main(argv):
     except IndexError as e:
         print(f"{PROG}: Invalid arguments: {e}", file=sys.stderr)
         return 1
+    else:
+        if not redis_host or not redis_port or not param_key:
+            print(f"{PROG}: Invalid arguments: {argv!r}", file=sys.stderr)
+            return 1
     try:
         daemonize = argv[4]
     except IndexError:
         daemonize = "no"
+    else:
+        if not daemonize:
+            daemonize = "no"
 
     tar_path = find_executable("tar")
     if tar_path is None:
@@ -1824,13 +1790,10 @@ def main(argv):
         return 5
 
     try:
-        params_raw = redis_server.get(param_key)
-        if params_raw is None:
-            print(
-                f'{PROG}: Parameter key, "{param_key}" does not exist.', file=sys.stderr
-            )
-            return 6
-        params_str = params_raw.decode("utf-8")
+        # Wait for the key to show up with a value.
+        params_str = wait_for_conn_and_key(
+            redis_server, param_key, PROG, redis_host, redis_port
+        )
         params = json.loads(params_str)
         # Validate the tool meister parameters without constructing an object
         # just yet, as we want to make sure we can talk to the redis server
@@ -1841,30 +1804,20 @@ def main(argv):
             f"{PROG}: Unable to fetch and decode parameter key, '{param_key}': {exc}",
             file=sys.stderr,
         )
-        return 7
+        return 6
 
+    func_args = (
+        PROG,
+        tar_path,
+        sysinfo_dump,
+        pbench_install_dir,
+        tmp_dir,
+        param_key,
+        params,
+        redis_server,
+    )
     if daemonize == "yes":
-        ret_val = daemon(
-            PROG,
-            tar_path,
-            sysinfo_dump,
-            pbench_install_dir,
-            tmp_dir,
-            param_key,
-            params,
-            redis_server,
-            redis_host,
-            redis_port,
-        )
+        ret_val = daemon(*func_args, redis_host, redis_port)
     else:
-        ret_val = driver(
-            PROG,
-            tar_path,
-            sysinfo_dump,
-            pbench_install_dir,
-            tmp_dir,
-            param_key,
-            params,
-            redis_server,
-        )
+        ret_val = driver(*func_args)
     return ret_val
diff --git a/lib/pbench/agent/utils.py b/lib/pbench/agent/utils.py
index 995f196a7e..95bbbb1e37 100644
--- a/lib/pbench/agent/utils.py
+++ b/lib/pbench/agent/utils.py
@@ -4,7 +4,6 @@
 import sys
 
 from datetime import datetime
-from pathlib import Path
 
 from pbench.agent.constants import (
     sysinfo_opts_available,
@@ -189,40 +188,3 @@ def collect_local_info(pbench_bin):
         hostdata[arg] = cp.stdout.strip() if cp.stdout is not None else ""
 
     return (version, seqno, sha1, hostdata)
-
-
-class BadToolGroup(Exception):
-    """Exception representing a tool group that does not exist or is invalid.
-    """
-
-    pass
-
-
-# Current tool group prefix in use.
-TOOL_GROUP_PREFIX = "tools-v1"
-
-
-def verify_tool_group(group, pbench_run=None):
-    """verify_tool_group - given a tool group name, verify it exists in the
-    ${pbench_run} directory as a properly prefixed tool group directory name.
-
-    Raises a BadToolGroup exception if the directory is invalid or does not
-    exist.
-
-    Returns a Pathlib object of the tool group directory on success.
-    """
-    _pbench_run = os.environ["pbench_run"] if pbench_run is None else pbench_run
-    tg_dir_name = Path(_pbench_run, f"{TOOL_GROUP_PREFIX}-{group}")
-    try:
-        tg_dir = tg_dir_name.resolve(strict=True)
-    except FileNotFoundError:
-        raise BadToolGroup(
-            f"Bad tool group, '{group}': directory {tg_dir_name} does not exist"
-        )
-    else:
-        if not tg_dir.is_dir():
-            raise BadToolGroup(
-                f"Bad tool group, '{group}': directory {tg_dir_name} not valid"
-            )
-        else:
-            return tg_dir
diff --git a/lib/pbench/test/unit/agent/test_tool_data_sink.py b/lib/pbench/test/unit/agent/test_tool_data_sink.py
new file mode 100644
index 0000000000..50b6527b76
--- /dev/null
+++ b/lib/pbench/test/unit/agent/test_tool_data_sink.py
@@ -0,0 +1,480 @@
+"""Tests for the Tool Data Sink module.
+"""
+
+import logging
+import pytest
+import shutil
+import time
+
+from http import HTTPStatus
+from io import BytesIO
+from pathlib import Path
+from threading import Condition, Lock, Thread
+from unittest.mock import patch
+from wsgiref.simple_server import WSGIRequestHandler
+
+from pbench.agent import tool_data_sink
+from pbench.agent.tool_data_sink import (
+    BenchmarkRunDir,
+    ToolDataSinkError,
+    DataSinkWsgiServer,
+)
+
+
+class TestBenchmarkRunDir:
+    """Verify the Tool Data Sink BenchmarkRunDir class.
+    """
+
+    @pytest.fixture
+    def cleanup_tmp(self, pytestconfig):
+        TMP = Path(pytestconfig.cache.get("TMP", None))
+        self.int_pb_run = TMP / "pbench-run-int"
+        self.ext_pb_run = TMP / "pbench-run-ext"
+        yield
+        try:
+            shutil.rmtree(self.int_pb_run)
+        except Exception as exc:
+            print(exc)
+        try:
+            shutil.rmtree(self.ext_pb_run)
+        except Exception as exc:
+            print(exc)
+
+    def test_validate(self, cleanup_tmp):
+        """test_validate - verify the behavior of the validate() using both an
+        internal - external difference and when the internal and external
+        directories are the same.
+
+        This implicitly tests the constructor as well.
+        """
+        self.int_pb_run.mkdir()
+        ext_bm_rd = self.int_pb_run / "bm-run-dir"
+        ext_bm_rd.mkdir()
+        brd = BenchmarkRunDir(str(ext_bm_rd), str(self.int_pb_run))
+        assert str(ext_bm_rd) == str(brd)
+
+        valpre = ext_bm_rd / "valid-prefix"
+        valpre.mkdir()
+        obj = brd.validate(str(valpre))
+        assert str(valpre) == str(obj)
+
+        with pytest.raises(brd.Prefix):
+            brd.validate("/not/a/valid-prefix")
+
+        self.ext_pb_run.mkdir()
+        ext_bm_rd = self.ext_pb_run / "bm-run-dir"
+        ext_bm_rd.mkdir()
+        brd = BenchmarkRunDir(str(ext_bm_rd), str(self.int_pb_run))
+
+        valpre = ext_bm_rd / "not-a-prefix"
+        with pytest.raises(brd.Exists):
+            brd.validate(valpre)
+
+    def test_constructor_errors(self, cleanup_tmp):
+        """test_constructor_errors - verify errors are properly raised during
+        the execution of the constructor.
+        """
+        self.int_pb_run.mkdir()
+
+        ext_bm_rd = self.int_pb_run / "bm-run-dir"
+        ext_bm_rd.write_text("Should be a directory!")
+        with pytest.raises(ToolDataSinkError) as exc:
+            BenchmarkRunDir(str(ext_bm_rd), str(self.int_pb_run))
+        exp_err = f"Run directory parameter, '{ext_bm_rd}', must be a real directory."
+        assert exp_err == str(exc.value)
+        ext_bm_rd.unlink()
+
+        # NOTE: in a container the "internal" pbench run directory must exist,
+        # the external pbench run directory does not exist from within the
+        # container.
+        ext_bm_rd = self.ext_pb_run / "bm-run-dir"
+        int_bm_rd = self.int_pb_run / "bm-run-dir"
+        int_bm_rd.mkdir()
+        with pytest.raises(ToolDataSinkError) as exc:
+            BenchmarkRunDir(str(ext_bm_rd), str(self.int_pb_run))
+        exp_err = (
+            f"Run directory parameter, '{ext_bm_rd}', must be an existing"
+            f" directory ('{self.ext_pb_run}/.path' not found, '"
+        )
+        assert str(exc.value).startswith(exp_err)
+
+        self.ext_pb_run.mkdir()
+        dot_path = self.int_pb_run / ".path"
+        dot_path_contents = f"{self.ext_pb_run}-mismatch"
+        dot_path.write_text(dot_path_contents)
+        with pytest.raises(ToolDataSinkError) as exc:
+            BenchmarkRunDir(str(ext_bm_rd), str(self.int_pb_run))
+        exp_err = (
+            f"Run directory parameter, '{ext_bm_rd}', must be an existing"
+            f" directory (.path contents mismatch, .path='{dot_path_contents}'"
+            f" != '{self.ext_pb_run}')."
+        )
+        assert exp_err == str(exc.value)
+
+
+def _test_app(environ, start_response):
+    start_response(
+        "200 OK",
+        [("Content-Type", "text/plain"), ("Date", "Fri, 12 Feb 2021 23:35:42 UTC")],
+    )
+    return [b"Hello, world! 42"]
+
+
+class TestDataSinkWsgiServer:
+    """Verify the DataSinkWsgiServer wrapper class.
+    """
+
+    def test_constructor(self):
+        """test_constructor - verify the DataSinkWsgiServer constructor.
+        """
+        with pytest.raises(Exception) as exc:
+            DataSinkWsgiServer()
+        assert "DataSinkWsgiServer requires a logger" == str(exc.value)
+
+        wsgi = DataSinkWsgiServer(
+            host="host.example.com", port="42", logger="__logger__"
+        )
+        assert wsgi.options.get("handler_class", "missing") != "missing"
+        klass = wsgi.options.get("handler_class")
+        assert isinstance(klass, type(WSGIRequestHandler))
+        assert wsgi._server is None
+        assert wsgi._err_code is None
+        assert wsgi._err_text is None
+        assert isinstance(wsgi._lock, type(Lock()))
+        assert isinstance(wsgi._cv, type(Condition()))
+        assert wsgi._logger == "__logger__"
+
+    def test_log_methods(self, caplog):
+        logger = logging.getLogger("test_log_methods")
+        wsgi_server = DataSinkWsgiServer(
+            host="host.example.com", port="42", logger=logger
+        )
+        wrh = wsgi_server.options["handler_class"]
+        # This forces the base WSGI methods to not buffer writes.
+        wrh.wbufsize = 1
+
+        class MockBytesIO(BytesIO):
+            def close(self, *args, **kwargs):
+                self._saved_value = self.getvalue()
+                super().close(*args, **kwargs)
+
+        class MockSocket:
+            def getsockname(self):
+                return ("sockname",)
+
+        class MockRequest:
+            _sock = MockSocket()
+
+            def __init__(self, path):
+                self._path = path
+
+            def makefile(self, *args, **kwargs):
+                if args[0] == "rb":
+                    return MockBytesIO(b"GET %s HTTP/1.1" % self._path)
+                elif args[0] == "wb":
+                    return MockBytesIO(b"")
+                else:
+                    raise ValueError(
+                        "MockRequest: unrecognized file type", args, kwargs
+                    )
+
+        class MockServer:
+            def __init__(self):
+                self.base_environ = {}
+
+            def get_app(self):
+                return _test_app
+
+        mock_server = MockServer()
+
+        # We perform all these above mock infrastructure just to get a usable
+        # DataSinkWsgiRequestHandler() object.  The MockRequest() mimics a
+        # single request being handled, where a response is generated, and
+        # captured in the handlers "wfile" attribute value.  This one request
+        # will also emit one informational log.
+        handler = wrh(MockRequest(b"/"), (0, 0), mock_server)
+        assert handler.wfile._saved_value.startswith(b"HTTP/1.0 200 OK")
+        assert handler.wfile._saved_value.endswith(b"Hello, world! 42")
+        assert caplog.records[0].levelname == "INFO"
+        assert caplog.records[0].message == '0 - - "GET / HTTP/1.1" 200 16'
+
+        # Now that we have this handler object, we can directly invoke the
+        # other logging methods to verify their behavior.
+        handler.log_error("test error %d %s", 42, "43")
+        assert caplog.records[1].levelname == "ERROR"
+        assert caplog.records[1].message == "0 - - test error 42 43"
+        handler.log_message("test msg %d %s", 42, "43")
+        assert caplog.records[2].levelname == "WARNING"
+        assert caplog.records[2].message == "0 - - test msg 42 43"
+        handler.log_request(code=HTTPStatus(404), size=42)
+        assert caplog.records[3].levelname == "INFO"
+        assert caplog.records[3].message == '0 - - "GET / HTTP/1.1" 404 42'
+
+    class MockServer:
+        def __init__(self, host, port, app, *args, **kwargs):
+            self.host = host
+            self.port = port
+            self.app = app
+            self.args = args
+            self.kwargs = kwargs
+            self.serve_forever_called = False
+            self.shutdown_called = False
+            if self.host.startswith("oserror"):
+                raise OSError(42, "oserror")
+            elif self.host.startswith("exception"):
+                raise Exception("exception")
+
+        def shutdown(self):
+            self.shutdown_called = True
+
+        def serve_forever(self):
+            self.serve_forever_called = True
+
+    def test_run(self, caplog):
+        """test_run - verify code paths of run method directly.
+
+        NOTE: We are not using threads to do this.  Instead we are mocking out
+        the `make_server` call to create a fake server that we control that
+        does nothing when "serve_forever" is called.
+        """
+        logger = logging.getLogger("test_run")
+        wsgi_server = DataSinkWsgiServer(
+            host="host.example.com", port="42", logger=logger
+        )
+        mocked_servers = []
+
+        def mock_make_server(host, port, app, *args, **kwargs):
+            mocked_server = self.MockServer(host, port, app, *args, **kwargs)
+            mocked_servers.append(mocked_server)
+            return mocked_server
+
+        with patch.object(tool_data_sink, "make_server", mock_make_server):
+            # First we invoke the "run" method once to let it execute normally.
+            try:
+                wsgi_server.run(_test_app)
+            except Exception as exc:
+                pytest.fail(f"WSGI server failed with an exception, {exc}")
+            else:
+                # Retrieve the internal server object that we created, and
+                # verify that it is created as expected, and that
+                # "serve_forever" was called.
+                mock_server = mocked_servers[0]
+                assert wsgi_server._server is mock_server
+                assert wsgi_server._err_code == 0
+                assert wsgi_server._err_text is None
+                assert mock_server.host == "host.example.com"
+                assert mock_server.port == 42
+                assert mock_server.app is _test_app
+                assert mock_server.args == ()
+                klass = mock_server.kwargs.get("handler_class")
+                assert isinstance(klass, type(WSGIRequestHandler))
+                assert mock_server.serve_forever_called
+                # The success path of "run" should have emitted three debug
+                # messages.
+                assert len(caplog.records) == 3
+                assert caplog.records[0].levelname == "DEBUG"
+                assert (
+                    caplog.records[0].message == "Making tool data sink WSGI server ..."
+                )
+                assert caplog.records[1].levelname == "DEBUG"
+                assert caplog.records[1].message == "Successfully created WSGI server"
+                assert caplog.records[2].levelname == "DEBUG"
+                assert (
+                    caplog.records[2].message
+                    == "Running tool data sink WSGI server ..."
+                )
+            with pytest.raises(AssertionError) as exc:
+                # Call it again to verify the assertion fires
+                wsgi_server.run(_test_app)
+            assert "'run' method called twice" in str(exc.value), f"{exc.value}"
+            # No logs should have been emitted.
+            assert len(caplog.records) == 3
+
+    def test_stop_and_wait(self, caplog):
+        """test_stop_and_wait - verify the operation of run() in conjunction
+        with stop() and wait() methods from separate threads.
+
+        There are a number of scenarios for the order of operations between
+        threads that we need to test.  We list them here using "MainThr" as the
+        name of the "main thread" which _creates_ the WSGI thread, and "WsgiThr"
+        as the name of the created WSGI thread invoking the "run" method.
+
+        References:
+            .wait() called in
+                .stop() method
+                __enter__() method
+            .stop() called in
+                __exit__() method
+
+        Scenario A:
+
+          * MainThr creates WSGI thread (WsgiThr not running)
+          * MainThr calls stop()
+          * WsgiThr starts running
+          * WsgiThr reports err_code == 0
+
+        Scenario B:
+
+          * MainThr creates WSGI thread
+          * WsgiThr starts running
+          * WsgiThr reports err_code == 0
+          * MainThr calls stop()
+
+        Scenario C:
+
+          * MainThr creates WSGI thread (WsgiThr not running)
+          * MainThr calls stop()
+          * WsgiThr starts running
+          * WsgiThr reports err_code > 0
+
+        Scenario D:
+
+          * MainThr creates WSGI thread
+          * WsgiThr starts running
+          * WsgiThr reports err_code > 0
+          * MainThr calls stop()
+
+        Scenario E:
+
+          * MainThr creates WSGI thread (WsgiThr not running)
+          * MainThr calls stop()
+          * WsgiThr starts running
+          * WsgiThr reports err_code < 0
+
+        Scenario F:
+
+          * MainThr creates WSGI thread
+          * WsgiThr starts running
+          * WsgiThr reports err_code < 0
+          * MainThr calls stop()
+        """
+
+        def wsgi_run(scenario, wsgi_server, trace):
+            ret_val = None
+            if scenario in ("A", "C", "E"):
+                time.sleep(0.1)
+            try:
+                trace.append("WsgiThr - run")
+                wsgi_server.run(_test_app)
+            except Exception as exc:
+                ret_val = exc
+            return ret_val
+
+        def do_wait(scenario, wsgi_server, trace):
+            if scenario in ("B", "D", "F"):
+                time.sleep(0.1)
+            trace.append("MainThr - wait")
+            err_text, err_code = wsgi_server.wait()
+            return err_text, err_code
+
+        def do_stop(scenario, wsgi_server, trace):
+            if scenario in ("B", "D", "F"):
+                time.sleep(0.1)
+            trace.append("MainThr - stop")
+            wsgi_server.stop()
+
+        # The host name prefix directs the MockServer class to behave by
+        # raising an OSError or Exception base on the name.
+        hostnames = dict(
+            A="host.example.com",
+            B="host.example.com",
+            C="oserror.example.com",
+            D="oserror.example.com",
+            E="exception.example.com",
+            F="exception.example.com",
+        )
+        caplog_idx = 0
+        logger = logging.getLogger("test_run")
+        for scenario in ["A", "B", "C", "D", "E", "F"]:
+            wsgi_server = DataSinkWsgiServer(
+                host=hostnames[scenario], port="42", logger=logger
+            )
+            mocked_servers = []
+
+            def mock_make_server(host, port, app, *args, **kwargs):
+                mocked_server = self.MockServer(host, port, app, *args, **kwargs)
+                mocked_servers.append(mocked_server)
+                return mocked_server
+
+            with patch.object(tool_data_sink, "make_server", mock_make_server):
+                trace = []
+                wsgithr = Thread(target=wsgi_run, args=(scenario, wsgi_server, trace))
+                wsgithr.start()
+                err_text, err_code = do_wait(scenario, wsgi_server, trace)
+                wsgithr.join()
+                assert caplog.records[caplog_idx].levelname == "DEBUG"
+                assert (
+                    caplog.records[caplog_idx].message
+                    == "Making tool data sink WSGI server ..."
+                )
+                caplog_idx += 1
+                if scenario in ("A", "B"):
+                    mock_server = mocked_servers[0]
+                    assert mock_server.serve_forever_called
+                    assert not mock_server.shutdown_called
+                    assert err_code == 0
+                    assert err_text is None
+                    assert caplog.records[caplog_idx].levelname == "DEBUG"
+                    assert (
+                        caplog.records[caplog_idx].message
+                        == "Successfully created WSGI server"
+                    )
+                    caplog_idx += 1
+                    assert caplog.records[caplog_idx].levelname == "DEBUG"
+                    assert (
+                        caplog.records[caplog_idx].message
+                        == "Running tool data sink WSGI server ..."
+                    )
+                    caplog_idx += 1
+                elif scenario in ("C", "D"):
+                    assert len(mocked_servers) == 0
+                    assert err_code == 42
+                    assert err_text == "[Errno 42] oserror"
+                    # Only 1 log message is emitted when OSErrors are encountered
+                else:
+                    assert scenario in ("E", "F")
+                    assert len(mocked_servers) == 0
+                    assert err_code == -1
+                    assert err_text == "exception"
+                    assert caplog.records[caplog_idx].levelname == "ERROR"
+                    assert (
+                        caplog.records[caplog_idx].message
+                        == "Unexpected error in WSGI server"
+                    )
+                    caplog_idx += 1
+                assert len(caplog.records) == caplog_idx
+
+        # Now we test two cases for the stop() method
+        for scenario in ["A", "E"]:
+            wsgi_server = DataSinkWsgiServer(
+                host=hostnames[scenario], port="42", logger=logger
+            )
+            mocked_servers = []
+
+            def mock_make_server(host, port, app, *args, **kwargs):
+                mocked_server = self.MockServer(host, port, app, *args, **kwargs)
+                mocked_servers.append(mocked_server)
+                return mocked_server
+
+            with patch.object(tool_data_sink, "make_server", mock_make_server):
+                trace = []
+                wsgithr = Thread(target=wsgi_run, args=(scenario, wsgi_server, trace))
+                wsgithr.start()
+                do_stop(scenario, wsgi_server, trace)
+                wsgithr.join()
+                assert caplog.records[caplog_idx].levelname == "DEBUG"
+                assert (
+                    caplog.records[caplog_idx].message
+                    == "Making tool data sink WSGI server ..."
+                )
+                caplog_idx += 1
+                if scenario == "A":
+                    mock_server = mocked_servers[0]
+                    assert mock_server.serve_forever_called
+                    assert mock_server.shutdown_called
+                    caplog_idx += 2
+                else:
+                    assert scenario == "E"
+                    assert len(mocked_servers) == 0
+                    caplog_idx += 1
+                assert len(caplog.records) == caplog_idx