diff --git a/doc/support.md b/doc/support.md new file mode 100644 index 000000000..4d35a42aa --- /dev/null +++ b/doc/support.md @@ -0,0 +1,91 @@ +# Support Data Collection + +When troubleshooting issues or seeking support, the `support` command +provides a convenient way to collect comprehensive system diagnostics. +This command gathers configuration files, logs, network state, and other +system information into a single compressed archive. + +## Collecting Support Data + +To collect support data and save it to a file: + +```bash +admin@host:~$ support collect > support-data.tar.gz +(admin@host) Password: *********** +Starting support data collection from host... +This may take up to a minute. Please wait... +Tailing /var/log/messages for 30 seconds (please wait)... +Log tail complete. +Collection complete. Creating archive... +admin@host:~$ ls -l support-data.tar.gz +-rw-rw-r-- 1 admin admin 508362 nov 30 13:05 support-data.tar.gz +``` + +The command can also be run remotely via SSH from your workstation: + +```bash +$ ssh admin@host support collect > support-data.tar.gz +... +``` + +The collection process may take up to a minute depending on system load +and the amount of logging data. Progress messages are shown during the +collection process. + +## Encrypted Collection + +For secure transmission of support data, the archive can be encrypted +with GPG using a password: + +```bash +admin@host:~$ support collect -p mypassword > support-data.tar.gz.gpg +Starting support data collection from host... +This may take up to a minute. Please wait... +... +Collection complete. Creating archive... +Encrypting with GPG... +``` + +The `support collect` command even supports omitting `mypassword` and +will then prompt interactively for the password. This works over SSH too, +but the local ssh client may then echo the password. + +> [!TIP] +> To hide the encryption password for an SSH session, the script supports +> reading from stdin: +> `echo "$MYSECRET" | ssh user@device support collect -p > +> file.tar.gz.gpg` + +After transferring the resulting file to your workstation, decrypt it +with the password: + +```bash +$ gpg -d support-data.tar.gz.gpg > support-data.tar.gz +$ tar xzf support-data.tar.gz +... +``` + +or + +```bash +$ gpg -d support-data.tar.gz.gpg | tar xz +... +``` + +> [!IMPORTANT] +> Make sure to share `mypassword` out-of-band from the encrypted data +> with the recipient of the data. I.e., avoid sending both in the same +> plain-text email for example. + +## What is Collected + +The support archive includes: + +- System identification (hostname, uptime, kernel version) +- Running and operational configuration (sysrepo datastores) +- System logs (`/var/log` directory and live tail of messages log) +- Network configuration and state (interfaces, routes, neighbors, bridges) +- FRRouting information (OSPF, BFD status) +- Container information (podman containers and their configuration) +- System resource usage (CPU, memory, disk, processes) +- Hardware information (PCI, USB devices, network interfaces) diff --git a/doc/system.md b/doc/system.md index 86648df26..02d9b6445 100644 --- a/doc/system.md +++ b/doc/system.md @@ -323,94 +323,6 @@ reference ID, stratum, time offsets, frequency, and root delay. > The system uses `chronyd` Network Time Protocol (NTP) daemon. The > output shown here is best explained in the [Chrony documentation][4]. -## Support Data Collection - -When troubleshooting issues or seeking support, the `support` command -provides a convenient way to collect comprehensive system diagnostics. -This command gathers configuration files, logs, network state, and other -system information into a single compressed archive. - -### Collecting Support Data - -To collect support data and save it to a file: - -```bash -admin@host:~$ support collect > support-data.tar.gz -(admin@host) Password: *********** -Starting support data collection from host... -This may take up to a minute. Please wait... -Tailing /var/log/messages for 30 seconds (please wait)... -Log tail complete. -Collection complete. Creating archive... -admin@host:~$ ls -l support-data.tar.gz --rw-rw-r-- 1 admin admin 508362 nov 30 13:05 support-data.tar.gz -``` - -The command can also be run remotely via SSH from your workstation: - -```bash -$ ssh admin@host support collect > support-data.tar.gz -... -``` - -The collection process may take up to a minute depending on system load -and the amount of logging data. Progress messages are shown during the -collection process. - -### Encrypted Collection - -For secure transmission of support data, the archive can be encrypted -with GPG using a password: - -```bash -admin@host:~$ support collect -p mypassword > support-data.tar.gz.gpg -Starting support data collection from host... -This may take up to a minute. Please wait... -... -Collection complete. Creating archive... -Encrypting with GPG... -``` - -The `support collect` command even supports omitting `mypassword` and -will then prompt interactively for the password. This works over SSH too, -but the local ssh client may then echo the password. - -> [!TIP] -> To hide the encryption password for an SSH session, the script supports reading from stdin: -> `echo "$MYSECRET" | ssh user@device support collect -p > file.tar.gz.gpg` - -After transferring the resulting file to your workstation, decrypt it -with the password: - -```bash -$ gpg -d support-data.tar.gz.gpg > support-data.tar.gz -$ tar xzf support-data.tar.gz -``` - -or - -```bash -$ gpg -d support-data.tar.gz.gpg | tar xz -``` - -> [!IMPORTANT] -> Make sure to share `mypassword` out-of-band from the encrypted data -> with the recipient of the data. I.e., avoid sending both in the same -> plain-text email for example. - -### What is Collected - -The support archive includes: - - - System identification (hostname, uptime, kernel version) - - Running and operational configuration (sysrepo datastores) - - System logs (`/var/log` directory and live tail of messages log) - - Network configuration and state (interfaces, routes, neighbors, bridges) - - FRRouting information (OSPF, BFD status) - - Container information (podman containers and their configuration) - - System resource usage (CPU, memory, disk, processes) - - Hardware information (PCI, USB devices, network interfaces) - [1]: https://www.rfc-editor.org/rfc/rfc7317 [2]: https://github.com/kernelkit/infix/blob/main/src/confd/yang/infix-system%402024-02-29.yang [3]: https://www.rfc-editor.org/rfc/rfc8341 diff --git a/mkdocs.yml b/mkdocs.yml index 5d768c0c8..5d179c4f4 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -43,6 +43,7 @@ nav: - Hardware Info & Status: hardware.md - Management: management.md - Syslog Support: syslog.md + - Support Data: support.md - Upgrade: upgrade.md - Scripting: - Introduction: scripting.md diff --git a/src/bin/support b/src/bin/support index 117f008ac..5c7f840e1 100755 --- a/src/bin/support +++ b/src/bin/support @@ -1,5 +1,9 @@ #!/bin/sh # Support utilities for troubleshooting Infix systems + +# Program name for usage messages (supports being renamed by users) +prognm=$(basename "$0") + # # The collect command gathers system information and outputs a tarball. # Data is collected to /var/lib/support (or $HOME as fallback) and then @@ -9,35 +13,36 @@ # systems that do not yet have this script in the root fileystems. # # 1. Copy this script to the target device's home directory: -# scp support user@device: +# +# scp support user@device: # # 2. SSH to the device and make it executable: -# ssh user@device -# chmod +x ~/support +# +# ssh user@device chmod +x support # # 3. Run the script from your home directory: # -# ~/support collect > support-data.tar.gz +# ./support collect > support-data.tar.gz # # Or directly via SSH from your workstation: # -# ssh user@device '~/support collect' > support-data.tar.gz +# ssh user@device './support collect' > support-data.tar.gz # # Optionally, the output can be encrypted with GPG using a password for # secure transmission to support personnel, see below. # # Examples: -# support collect > support-data.tar.gz -# support collect -s 5 > support-data.tar.gz -# support collect -p > support-data.tar.gz.gpg -# support collect -p mypass > support-data.tar.gz.gpg +# ./support collect > support-data.tar.gz +# ./support collect -s 5 > support-data.tar.gz +# ./support collect -p > support-data.tar.gz.gpg +# ./support collect -p mypass > support-data.tar.gz.gpg # -# ssh user@device support collect > support-data.tar.gz -# ssh user@device support collect -p mypass > support-data.tar.gz.gpg +# ssh user@device ./support collect > support-data.tar.gz +# ssh user@device ./support collect -p mypass > support-data.tar.gz.gpg # # Note, interactive password prompt (-p without argument) may echo characters # over SSH due to local terminal echo. Use -p PASSWORD for remote execution, -# or pipe the password: echo "password" | ssh user@device support collect -p +# or pipe the password: echo "password" | ssh user@device ./support collect -p # meaning you can even: echo "$SECRET_VARIABLE" | ... which in some cases can # come in handy. # @@ -91,7 +96,7 @@ cmd_collect() ;; *) echo "Error: Unknown option '$1'" >&2 - echo "Usage: support collect [--log-sec|-s N] [--password|-p PASSWORD]" >&2 + echo "Usage: $prognm collect [--log-sec|-s N] [--password|-p PASSWORD]" >&2 exit 1 ;; esac @@ -129,8 +134,12 @@ cmd_collect() # Cleanup on exit cleanup() { + echo "[$(date -Iseconds)] Cleanup called (signal: ${1:-EXIT})" >> "${EXEC_LOG}" 2>&1 || echo "[$(date -Iseconds)] Cleanup called (signal: ${1:-EXIT})" >&2 if [ -d "${COLLECT_DIR}" ]; then + echo "[$(date -Iseconds)] Removing collection directory: ${COLLECT_DIR}" >> "${EXEC_LOG}" 2>&1 || echo "[$(date -Iseconds)] Removing: ${COLLECT_DIR}" >&2 rm -rf "${COLLECT_DIR}" + else + echo "[$(date -Iseconds)] Collection directory already gone: ${COLLECT_DIR}" >> "${EXEC_LOG}" 2>&1 || echo "[$(date -Iseconds)] Already gone: ${COLLECT_DIR}" >&2 fi } trap cleanup EXIT INT TERM @@ -383,7 +392,14 @@ cmd_collect() # Create final tar.gz and output to stdout # Use -C to change to parent directory so paths in archive don't include full path - cd "${WORK_DIR}" + echo "[$(date -Iseconds)] Changing to work directory: ${WORK_DIR}" >> "${EXEC_LOG}" 2>&1 + if ! cd "${WORK_DIR}"; then + echo "[$(date -Iseconds)] ERROR: Failed to cd to ${WORK_DIR}" >> "${EXEC_LOG}" 2>&1 + echo "Error: Cannot change to work directory ${WORK_DIR}" >&2 + exit 1 + fi + echo "[$(date -Iseconds)] Successfully changed to: $(pwd)" >> "${EXEC_LOG}" 2>&1 + echo "[$(date -Iseconds)] Creating archive from: $(basename "${COLLECT_DIR}")" >> "${EXEC_LOG}" 2>&1 # Check if password encryption is requested if [ -n "$PASSWORD" ]; then @@ -392,14 +408,29 @@ cmd_collect() exit 1 fi echo "Encrypting with GPG..." >&2 + echo "[$(date -Iseconds)] Starting tar with GPG encryption" >> "${EXEC_LOG}" 2>&1 tar czf - "$(basename "${COLLECT_DIR}")" 2>> "${EXEC_LOG}" | \ gpg --batch --yes --passphrase "$PASSWORD" --pinentry-mode loopback -c 2>> "${EXEC_LOG}" + tar_exit=$? + echo "[$(date -Iseconds)] tar+gpg pipeline exit code: $tar_exit" >> "${EXEC_LOG}" 2>&1 echo "" >&2 echo "WARNING: Remember to share the encryption password out-of-band!" >&2 echo " Do not send it in the same email as the encrypted file." >&2 + if [ $tar_exit -ne 0 ]; then + echo "[$(date -Iseconds)] ERROR: tar+gpg failed with exit code $tar_exit" >> "${EXEC_LOG}" 2>&1 + exit $tar_exit + fi else + echo "[$(date -Iseconds)] Starting tar (no encryption)" >> "${EXEC_LOG}" 2>&1 tar czf - "$(basename "${COLLECT_DIR}")" 2>> "${EXEC_LOG}" + tar_exit=$? + echo "[$(date -Iseconds)] tar exit code: $tar_exit" >> "${EXEC_LOG}" 2>&1 + if [ $tar_exit -ne 0 ]; then + echo "[$(date -Iseconds)] ERROR: tar failed with exit code $tar_exit" >> "${EXEC_LOG}" 2>&1 + exit $tar_exit + fi fi + echo "[$(date -Iseconds)] Archive creation completed successfully" >> "${EXEC_LOG}" 2>&1 } cmd_clean() @@ -424,7 +455,7 @@ cmd_clean() ;; *) echo "Error: Unknown option '$1'" >&2 - echo "Usage: support clean [--dry-run] [--days N]" >&2 + echo "Usage: $prognm clean [--dry-run] [--days N]" >&2 exit 1 ;; esac @@ -483,7 +514,7 @@ cmd_clean() usage() { - echo "Usage: support [global-options] [options]" + echo "Usage: $prognm [global-options] [options]" echo "" echo "Global options:" echo " -w, --work-dir PATH Use PATH as working directory for collection/cleanup" @@ -505,14 +536,14 @@ usage() echo " -d, --days N Remove directories older than N days (default: 7)" echo "" echo "Examples:" - echo " support collect > support-data.tar.gz" - echo " support collect -p > support-data.tar.gz.gpg" - echo " support collect --password mypass > support-data.tar.gz.gpg" - echo " support --work-dir /tmp/ram collect > support-data.tar.gz" - echo " ssh user@device support collect > support-data.tar.gz" - echo " support clean --dry-run" - echo " support clean --days 30" - echo " support --work-dir /tmp/ram clean" + echo " $prognm collect > support-data.tar.gz" + echo " $prognm collect -p > support-data.tar.gz.gpg" + echo " $prognm collect --password mypass > support-data.tar.gz.gpg" + echo " $prognm --work-dir /tmp/ram collect > support-data.tar.gz" + echo " ssh user@device $prognm collect > support-data.tar.gz" + echo " $prognm clean --dry-run" + echo " $prognm clean --days 30" + echo " $prognm --work-dir /tmp/ram clean" exit 1 } diff --git a/test/case/misc/support_collect/test.py b/test/case/misc/support_collect/test.py index 5713472f9..4852f3207 100755 --- a/test/case/misc/support_collect/test.py +++ b/test/case/misc/support_collect/test.py @@ -48,6 +48,21 @@ stderr_output = result.stderr.decode('utf-8') if result.stderr else "" print(f"support collect failed with return code {result.returncode}") print(f"stderr: {stderr_output}") + + # Try to retrieve the collection.log for debugging + print("\n=== Attempting to retrieve collection.log for debugging ===") + try: + log_result = tgtssh.run("find /tmp -name 'support-*' -type d -exec cat {}/collection.log \\; 2>/dev/null || echo 'No collection.log found'", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=10, + check=False) + if log_result.stdout: + log_output = log_result.stdout.decode('utf-8') + print(f"collection.log contents:\n{log_output}") + except Exception as e: + print(f"Could not retrieve collection.log: {e}") + raise Exception("support collect command failed") with test.step("Verify tarball was created and is valid"): @@ -110,6 +125,21 @@ if result.returncode != 0: stderr_output = result.stderr.decode('utf-8') if result.stderr else "" print(f"support collect with encryption failed: {stderr_output}") + + # Try to retrieve the collection.log for debugging + print("\n=== Attempting to retrieve collection.log for debugging ===") + try: + log_result = tgtssh.run("find /tmp -name 'support-*' -type d -exec cat {}/collection.log \\; 2>/dev/null || echo 'No collection.log found'", + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + timeout=10, + check=False) + if log_result.stdout: + log_output = log_result.stdout.decode('utf-8') + print(f"collection.log contents:\n{log_output}") + except Exception as e: + print(f"Could not retrieve collection.log: {e}") + raise Exception("support collect with --password failed") with test.step("Verify encrypted file and decrypt it"):