From 14771a57269ce722772095e12f79f98a1c63c36f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
<41898282+github-actions[bot]@users.noreply.github.com>
Date: Fri, 7 Nov 2025 16:20:39 +0000
Subject: [PATCH 1/2] Backport: Copy
vcluster/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
to
vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
---
.../backing-store/etcd/embedded.mdx | 361 ++++++++++++++----
1 file changed, 294 insertions(+), 67 deletions(-)
diff --git a/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx b/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
index 3f65ac119..87e28142c 100644
--- a/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
+++ b/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
@@ -2,20 +2,35 @@
title: Embedded etcd
sidebar_label: embedded
sidebar_position: 2
-sidebar_class_name: pro
+sidebar_class_name: pro host-nodes private-nodes standalone
description: Configure an embedded etcd instance as the virtual cluster's backing store.
---
import ConfigReference from '../../../../../../_partials/config/controlPlane/backingStore/etcd/embedded.mdx'
import ProAdmonition from '../../../../../../_partials/admonitions/pro-admonition.mdx'
import InterpolatedCodeBlock from "@site/src/components/InterpolatedCodeBlock";
+import PageVariables from "@site/src/components/PageVariables";
import Flow, { Step } from '@site/src/components/Flow';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
+import Admonition from '@theme/Admonition';
+import TenancySupport from '../../../../../../_fragments/tenancy-support.mdx';
+
+:::warning Upgrade Notice
+
+An issue exists when upgrading etcd (from version 3.5.1 or later, but earlier than 3.5.20) to version 3.6. This upgrade path can lead to a failed upgrade and cause the virtual cluster to break. etcd version 3.5.20 includes a fix that migrates membership data to the v3 data store. This migration prevents the issue when upgrading to version 3.6.
+
+To avoid this issue, vCluster does not upgrade etcd to version 3.6 until vCluster version 0.29.0.
+
+Any vCluster running a version earlier than 0.24.2, must first be upgraded to a version between 0.24.2 and 0.28.x, before upgrading to version 0.29.0.
+
+For more information, see the [official etcd documentation](https://etcd.io/blog/2025/upgrade_from_3.5_to_3.6_issue/).
+:::
+
When using this backing store option, etcd is deployed as part of the vCluster control plane pod to reduce the overall footprint.
```yaml
@@ -34,7 +49,7 @@ vCluster fully manages embedded etcd and provides these capabilities:
- **Dynamic scaling**: Scales the etcd cluster up or down based on vCluster replica count.
- **Automatic recovery**: Recovers etcd in failure scenarios such as corrupted members.
-- **Seamless migration**: Migrates from SQLite or deployed etcd to embedded etcd automatically.
+- **Seamless migration**: Migrates from SQLite or [deployed etcd](../../../../../../manage/migrate-etcd-backing-store) to embedded etcd automatically.
- **Simplified deployment**: Requires no additional `StatefulSets` or `Deployments`.
@@ -81,6 +96,26 @@ Normal pod restarts or terminations do not require manual recovery. These events
Recovery procedures depend on whether the first replica (the pod ending with `-0`) is among the failing replicas.
+:::note
+The recovery procedure for the first replica also depends on your StatefulSet's `podManagementPolicy` configuration (`Parallel` or `OrderedReady`). See the [first replica recovery section](#migrate-to-parallel) for details on migrating between policies if needed.
+:::
+
+:::info Find your vCluster namespace
+If using VirtualClusterInstance (platform), the vCluster StatefulSet runs in a different namespace than the VirtualClusterInstance itself. Find the StatefulSet namespace with:
+```bash
+kubectl get virtualclusterinstance -n -o jsonpath='{.spec.clusterRef.namespace}'
+```
+For example, if your VirtualClusterInstance is named `my-vcluster` in the `p-default` namespace, the StatefulSet might be in `vcluster-my-vcluster-p-default`.
+
+If using Helm, the namespace is what you specified during installation (e.g., `vcluster-my-team`).
+:::
+
+
+
Use the following procedures when some replicas are still functioning:
@@ -92,7 +127,7 @@ Use the following procedures when some replicas are still functioning:
Scale the StatefulSet to one replica:
@@ -101,7 +136,7 @@ Scale the StatefulSet to one replica:
Verify only one pod is running:
@@ -110,7 +145,7 @@ Verify only one pod is running:
Monitor the rebuild process:
@@ -123,7 +158,7 @@ Watch for log messages indicating etcd is ready and the cluster is in good condi
Scale back up to your target replica count:
@@ -132,8 +167,8 @@ Scale back up to your target replica count:
Verify all replicas are running:
@@ -143,102 +178,197 @@ kubectl logs [[VAR:VCLUSTER NAME:my-vcluster]]-0 -n [[VAR:NAMESPACE:vcluster-my-
+:::warning
+Before attempting any recovery procedure, [create a backup](../../../../../../manage/backup-restore/backup.mdx) of your virtual cluster using `vcluster snapshot create --include-volumes`. This ensures both the virtual cluster's etcd data and persistent volumes are backed up.
+
+If the virtual cluster's etcd is in a bad state and the snapshot command fails, you can still back up from the host cluster (which has its own functioning etcd). Use your preferred backup solution (e.g., Velero, Kasten, or cloud-native backup tools) to back up the host cluster namespace containing the vCluster resources. Ensure the backup includes:
+- All Kubernetes resources in the vCluster namespace (StatefulSet, Services, etc.)
+- PersistentVolumeClaims and their associated volume data (contains the virtual cluster's etcd data)
+- Secrets and ConfigMaps
+
+When restored, the vCluster pods will restart and the virtual cluster will be recreated from the backed-up etcd data.
+
+If using namespace syncing, back up all synced namespaces on the host cluster as well.
+:::
+
+The recovery procedure depends on your StatefulSet `podManagementPolicy` configuration. vCluster version 0.20 and later use `Parallel` by default. Earlier versions used `OrderedReady`.
+
+:::info
+If more than one pod is down with `podManagementPolicy: OrderedReady`, you must first [migrate to `Parallel`](#migrate-to-parallel) before attempting recovery.
+:::
+
+Check your configuration:
+
+
+
+
+
+
-
-Stop all vCluster instances:
+
+First, identify the PVC for replica-0:
-
-Confirm all pods have terminated:
+The PVC name typically follows the pattern `data--0` but may vary if customized in your configuration. Note the exact name from the output above, then delete the corrupted pod and its PVC:
-
+
+
+
+The pod restarts with a new empty PVC. The initial attempts fail because the new member tries to join the existing etcd cluster but lacks the required data. After 1-3 pod restarts, vCluster's automatic recovery detects the empty member and properly adds it as a new learner, allowing it to sync data from healthy members and join the cluster.
-
-Delete the corrupted PVC for the first replica:
+
+Monitor the recovery process:
-
-Verify the PVC has been deleted:
+Check the logs to verify the pod rejoins successfully:
-
+
-
-Create a new PVC by [copying from a working replica](https://kubernetes.io/docs/concepts/storage/persistent-volumes/#volume-cloning):
+
-
+
+:::caution
+If more than one pod is down with `podManagementPolicy: OrderedReady`, migrate to `Parallel` first before attempting recovery.
+:::
+
+
+
+Check that the StatefulSet retains PVCs on deletion:
+
+
-Apply the PVC:
+The policy should be `Retain`. This is the default but can be overridden by `controlPlane.statefulSet.persistence.volumeClaim.retentionPolicy` in your configuration.
+
+
+
+Delete the StatefulSet without deleting the pods:
-
-
-Start with one replica to verify the restored data:
+
+
-
+Then add or update this section in the spec:
+
+```yaml
+spec:
+ template:
+ helmRelease:
+ values: |
+ controlPlane:
+ statefulSet:
+ scheduling:
+ podManagementPolicy: Parallel
+```
+
-Monitor the startup:
+If using Helm, update your `values.yaml` to set the pod management policy:
-
-After it's stable, scale up to the desired number of replicas.
+The StatefulSet is recreated with `Parallel` policy and pods pick up the existing PVCs.
+
+
+
+Now follow the same procedure as for `Parallel` mode.
+
+First, identify the PVC for replica-0:
+
+
+
+
+
+The PVC name typically follows the pattern `data--0` but may vary if customized in your configuration. Note the exact name from the output above, then delete the corrupted pod and its PVC:
+
+
+
+
+
+The pod restarts with a new empty PVC. The initial attempts fail because the new member tries to join the existing etcd cluster but lacks the required data. After 1-3 pod restarts, vCluster's automatic recovery detects the empty member and properly adds it as a new learner, allowing it to sync data from healthy members and join the cluster.
+:::warning
+Never clone PVCs from other replicas. Cloning PVCs causes etcd member ID conflicts and results in data loss.
+:::
+
+
+
+
### Complete data loss recovery
:::warning
@@ -247,19 +377,28 @@ This recovery method results in data loss up to the last backup point. Only proc
When the majority of etcd member replicas become corrupted or deleted simultaneously, the entire cluster requires recovery from backup.
+:::info Prerequisites
+Before starting recovery, ensure you have:
+- Created a snapshot using `vcluster snapshot create --include-volumes `
+- The snapshot location URL (for example, `s3://my-bucket/backup` or `oci://registry/repo:tag`)
+- Access to the host cluster namespace where the vCluster is deployed
+
+For detailed snapshot creation instructions, see [Create snapshots](../../../../../../manage/backup-restore/backup).
+:::
+
Verify all PVCs are corrupted or inaccessible:
@@ -268,53 +407,141 @@ Verify all PVCs are corrupted or inaccessible:
Stop all vCluster instances before beginning recovery:
+
+
+
+Verify all pods have terminated:
+
+
+
+:::warning PVC deletion timing
+After scaling down, wait a few seconds to ensure pods have fully terminated before deleting PVCs. If a pod restarts immediately after PVC deletion, the PVC may get stuck in a "Terminating" state. If this happens, delete the pod again to allow the PVC deletion to complete.
+:::
+
Delete all corrupted PVCs:
+
+
+
+Verify PVCs are deleted:
+
+
+
+Expected output: `No resources found`
-
-Follow a backup restoration procedure. This typically involves restoring PVCs from your backup solution (Velero, CSI snapshots, or similar tools).
+
+
+:::info Why scale up before restore?
+The vCluster CLI requires an accessible vCluster instance to execute the restore command. Scaling up creates a new, empty vCluster that the CLI can connect to. The `vcluster restore` command will then scale it back down automatically, restore the etcd data from the snapshot, and restart the vCluster with restored data.
+:::
+
+Scale up to the desired number of replicas:
+
+
+
+
+
+Wait for pods to be running:
+
+
-Restore from snapshot:
+Expected output showing all replicas running:
+```
+NAME READY STATUS RESTARTS AGE
+my-vcluster-0 1/1 Running 0 45s
+my-vcluster-1 1/1 Running 0 43s
+my-vcluster-2 1/1 Running 0 41s
+```
+
+
+
+Use the vCluster CLI to restore from your snapshot. The restore process will:
+1. Pause the vCluster (scale down to 0)
+2. Delete the current PVCs
+3. Start a snapshot pod to restore etcd data
+4. Restore PVCs from volume snapshots
+5. Resume the vCluster (scale back up)
+
+
+
+Expected output:
+```
+16:16:38 info Pausing vCluster my-vcluster
+16:16:38 info Scale down statefulSet vcluster-my-team/my-vcluster...
+16:16:39 info Deleting vCluster pvc vcluster-my-team/data-my-vcluster-0
+16:16:39 info Deleting vCluster pvc vcluster-my-team/data-my-vcluster-1
+16:16:39 info Deleting vCluster pvc vcluster-my-team/data-my-vcluster-2
+16:16:39 info Starting snapshot pod for vCluster vcluster-my-team/my-vcluster...
+...
+Successfully restored snapshot
+16:16:42 info Resuming vCluster my-vcluster
+```
+
+:::note Authentication for remote storage
+If using S3 or OCI registry, ensure you have the appropriate credentials configured:
+- **S3**: Use AWS CLI credentials or pass credentials in the URL
+- **OCI**: Use Docker login or pass credentials in the URL
+
+See [Create snapshots](../../../../../../manage/backup-restore/backup) for authentication details.
+:::
-
-Scale up to a single replica to verify the restoration:
+
+Connect to the vCluster and verify your workloads are restored:
-Monitor logs and verify the cluster starts successfully:
+Check that your resources are present:
-After it's verified, scale to the desired number of replicas.
+If everything looks correct, disconnect:
+
+
From 548197f9b713dc57fdf3d988c4c9883a75a36287 Mon Sep 17 00:00:00 2001
From: guowenatk
Date: Tue, 11 Nov 2025 16:33:22 +0100
Subject: [PATCH 2/2] fix broken link
---
.../components/backing-store/etcd/embedded.mdx | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx b/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
index 87e28142c..b7ee07ab7 100644
--- a/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
+++ b/vcluster_versioned_docs/version-0.26.0/configure/vcluster-yaml/control-plane/components/backing-store/etcd/embedded.mdx
@@ -49,7 +49,7 @@ vCluster fully manages embedded etcd and provides these capabilities:
- **Dynamic scaling**: Scales the etcd cluster up or down based on vCluster replica count.
- **Automatic recovery**: Recovers etcd in failure scenarios such as corrupted members.
-- **Seamless migration**: Migrates from SQLite or [deployed etcd](../../../../../../manage/migrate-etcd-backing-store) to embedded etcd automatically.
+- **Seamless migration**: Migrates from SQLite or deployed etcd to embedded etcd automatically.
- **Simplified deployment**: Requires no additional `StatefulSets` or `Deployments`.
@@ -179,7 +179,7 @@ kubectl logs [[GLOBAL:VCLUSTER_NAME]]-0 -n [[GLOBAL:NAMESPACE]] | grep "cluster
:::warning
-Before attempting any recovery procedure, [create a backup](../../../../../../manage/backup-restore/backup.mdx) of your virtual cluster using `vcluster snapshot create --include-volumes`. This ensures both the virtual cluster's etcd data and persistent volumes are backed up.
+Before attempting any recovery procedure, [create a backup](../../../../../../manage/backup-restore) of your virtual cluster using `vcluster snapshot create --include-volumes`. This ensures both the virtual cluster's etcd data and persistent volumes are backed up.
If the virtual cluster's etcd is in a bad state and the snapshot command fails, you can still back up from the host cluster (which has its own functioning etcd). Use your preferred backup solution (e.g., Velero, Kasten, or cloud-native backup tools) to back up the host cluster namespace containing the vCluster resources. Ensure the backup includes:
- All Kubernetes resources in the vCluster namespace (StatefulSet, Services, etc.)
@@ -383,7 +383,7 @@ Before starting recovery, ensure you have:
- The snapshot location URL (for example, `s3://my-bucket/backup` or `oci://registry/repo:tag`)
- Access to the host cluster namespace where the vCluster is deployed
-For detailed snapshot creation instructions, see [Create snapshots](../../../../../../manage/backup-restore/backup).
+For detailed snapshot creation instructions, see [Create snapshots](../../../../../../manage/backup-restore).
:::
@@ -512,7 +512,7 @@ If using S3 or OCI registry, ensure you have the appropriate credentials configu
- **S3**: Use AWS CLI credentials or pass credentials in the URL
- **OCI**: Use Docker login or pass credentials in the URL
-See [Create snapshots](../../../../../../manage/backup-restore/backup) for authentication details.
+See [Create snapshots](../../../../../../manage/backup-restore) for authentication details.
:::