From 25826a1c8e53f738bd5fdcbfbd8b06c07f0a5595 Mon Sep 17 00:00:00 2001 From: TJ Moore Date: Mon, 11 Aug 2025 18:03:17 -0400 Subject: [PATCH] Refactor Volume Auto Grow to support additional volume types This commit refactors the existing pgData volume auto grow code to better support upcoming feature enhancements relating to auto grow capability for the pgBackRest repository volume and pg_wal volume. Issue: PGO-2606 --- .../controller/postgrescluster/autogrow.go | 188 ++++++ .../postgrescluster/autogrow_test.go | 599 ++++++++++++++++++ .../controller/postgrescluster/instance.go | 69 +- .../postgrescluster/instance_test.go | 118 ---- .../controller/postgrescluster/postgres.go | 72 +-- .../postgrescluster/postgres_test.go | 315 --------- .../controller/postgrescluster/snapshots.go | 2 +- internal/postgres/config.go | 40 +- internal/postgres/reconcile_test.go | 35 +- 9 files changed, 841 insertions(+), 597 deletions(-) create mode 100644 internal/controller/postgrescluster/autogrow.go create mode 100644 internal/controller/postgrescluster/autogrow_test.go diff --git a/internal/controller/postgrescluster/autogrow.go b/internal/controller/postgrescluster/autogrow.go new file mode 100644 index 0000000000..9f17198229 --- /dev/null +++ b/internal/controller/postgrescluster/autogrow.go @@ -0,0 +1,188 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package postgrescluster + +import ( + "context" + + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/logging" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +// storeDesiredRequest saves the appropriate request value to the PostgresCluster +// status. If the value has grown, create an Event. +func (r *Reconciler) storeDesiredRequest( + ctx context.Context, cluster *v1beta1.PostgresCluster, + volumeType, instanceSetName, desiredRequest, desiredRequestBackup string, +) string { + var current resource.Quantity + var previous resource.Quantity + var err error + log := logging.FromContext(ctx) + + // Parse the desired request from the cluster's status. + if desiredRequest != "" { + current, err = resource.ParseQuantity(desiredRequest) + if err != nil { + log.Error(err, "Unable to parse "+volumeType+" volume request from status ("+ + desiredRequest+") for "+cluster.Name+"/"+instanceSetName) + // If there was an error parsing the value, treat as unset (equivalent to zero). + desiredRequest = "" + current, _ = resource.ParseQuantity("") + + } + } + + // Parse the desired request from the status backup. + if desiredRequestBackup != "" { + previous, err = resource.ParseQuantity(desiredRequestBackup) + if err != nil { + log.Error(err, "Unable to parse "+volumeType+" volume request from status backup ("+ + desiredRequestBackup+") for "+cluster.Name+"/"+instanceSetName) + // If there was an error parsing the value, treat as unset (equivalent to zero). + desiredRequestBackup = "" + previous, _ = resource.ParseQuantity("") + + } + } + + // determine if the appropriate volume limit is set + limitSet := limitIsSet(cluster, volumeType, instanceSetName) + + if limitSet && current.Value() > previous.Value() { + r.Recorder.Eventf(cluster, corev1.EventTypeNormal, "VolumeAutoGrow", + "%s volume expansion to %v requested for %s/%s.", + volumeType, current.String(), cluster.Name, instanceSetName) + } + + // If the desired size was not observed, update with previously stored value. + // This can happen in scenarios where the annotation on the Pod is missing + // such as when the cluster is shutdown or a Pod is in the middle of a restart. + if desiredRequest == "" { + desiredRequest = desiredRequestBackup + } + + return desiredRequest +} + +// limitIsSet determines if the limit is set for a given volume type and returns +// a corresponding boolean value +func limitIsSet(cluster *v1beta1.PostgresCluster, volumeType, instanceSetName string) bool { + + var limitSet bool + + switch volumeType { + + // Cycle through the instance sets to ensure the correct limit is identified. + case "pgData": + for _, specInstance := range cluster.Spec.InstanceSets { + if specInstance.Name == instanceSetName { + limitSet = !specInstance.DataVolumeClaimSpec.Resources.Limits.Storage().IsZero() + } + } + } + // TODO: Add cases for pgWAL and repo volumes + + return limitSet + +} + +// setVolumeSize compares the potential sizes from the instance spec, status +// and limit and sets the appropriate current value. +func (r *Reconciler) setVolumeSize(ctx context.Context, cluster *v1beta1.PostgresCluster, + pvc *corev1.PersistentVolumeClaim, volumeType, instanceSpecName string) { + + log := logging.FromContext(ctx) + + // Store the limit for this instance set. This value will not change below. + volumeLimitFromSpec := pvc.Spec.Resources.Limits.Storage() + + // This value will capture our desired update. + volumeRequestSize := pvc.Spec.Resources.Requests.Storage() + + // A limit of 0 is ignorned, so the volume request is used. + if volumeLimitFromSpec.IsZero() { + return + } + + // If the request value is greater than the set limit, use the limit and issue + // a warning event. + if volumeRequestSize.Value() > volumeLimitFromSpec.Value() { + r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "VolumeRequestOverLimit", + "%s volume request (%v) for %s/%s is greater than set limit (%v). Limit value will be used.", + volumeType, volumeRequestSize, cluster.Name, instanceSpecName, volumeLimitFromSpec) + + pvc.Spec.Resources.Requests = corev1.ResourceList{ + corev1.ResourceStorage: *resource.NewQuantity(volumeLimitFromSpec.Value(), resource.BinarySI), + } + // Otherwise, if the feature gate is not enabled, do not autogrow. + } else if feature.Enabled(ctx, feature.AutoGrowVolumes) { + + // determine the appropriate volume request based on what's set in the status + if dpv, err := getDesiredVolumeSize( + cluster, volumeType, instanceSpecName, volumeRequestSize, + ); err != nil { + log.Error(err, "For "+cluster.Name+"/"+instanceSpecName+ + ": Unable to parse "+volumeType+" volume request: "+dpv) + } + + // If the volume request size is greater than or equal to the limit and the + // limit is not zero, update the request size to the limit value. + // If the user manually requests a lower limit that is smaller than the current + // or requested volume size, it will be ignored in favor of the limit value. + if volumeRequestSize.Value() >= volumeLimitFromSpec.Value() { + + r.Recorder.Eventf(cluster, corev1.EventTypeNormal, "VolumeLimitReached", + "%s volume(s) for %s/%s are at size limit (%v).", volumeType, + cluster.Name, instanceSpecName, volumeLimitFromSpec) + + // If the volume size request is greater than the limit, issue an + // additional event warning. + if volumeRequestSize.Value() > volumeLimitFromSpec.Value() { + r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "DesiredVolumeAboveLimit", + "The desired size (%v) for the %s/%s %s volume(s) is greater than the size limit (%v).", + volumeRequestSize, cluster.Name, instanceSpecName, volumeType, volumeLimitFromSpec) + } + + volumeRequestSize = volumeLimitFromSpec + } + pvc.Spec.Resources.Requests = corev1.ResourceList{ + corev1.ResourceStorage: *resource.NewQuantity(volumeRequestSize.Value(), resource.BinarySI), + } + } +} + +// getDesiredVolumeSize compares the volume request size to the suggested autogrow +// size stored in the status and updates the value when the status value is larger. +func getDesiredVolumeSize(cluster *v1beta1.PostgresCluster, + volumeType, instanceSpecName string, + volumeRequestSize *resource.Quantity) (string, error) { + + switch volumeType { + case "pgData": + for i := range cluster.Status.InstanceSets { + if instanceSpecName == cluster.Status.InstanceSets[i].Name { + for _, dpv := range cluster.Status.InstanceSets[i].DesiredPGDataVolume { + if dpv != "" { + desiredRequest, err := resource.ParseQuantity(dpv) + if err == nil { + if desiredRequest.Value() > volumeRequestSize.Value() { + *volumeRequestSize = desiredRequest + } + } else { + return dpv, err + } + } + } + } + } + // TODO: Add cases for pgWAL and repo volumes (requires relevant status sections) + } + return "", nil +} diff --git a/internal/controller/postgrescluster/autogrow_test.go b/internal/controller/postgrescluster/autogrow_test.go new file mode 100644 index 0000000000..7ec227b373 --- /dev/null +++ b/internal/controller/postgrescluster/autogrow_test.go @@ -0,0 +1,599 @@ +// Copyright 2021 - 2025 Crunchy Data Solutions, Inc. +// +// SPDX-License-Identifier: Apache-2.0 + +package postgrescluster + +import ( + "context" + "testing" + + "github.com/go-logr/logr/funcr" + "gotest.tools/v3/assert" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + "github.com/crunchydata/postgres-operator/internal/controller/runtime" + "github.com/crunchydata/postgres-operator/internal/feature" + "github.com/crunchydata/postgres-operator/internal/initialize" + "github.com/crunchydata/postgres-operator/internal/logging" + "github.com/crunchydata/postgres-operator/internal/naming" + "github.com/crunchydata/postgres-operator/internal/testing/cmp" + "github.com/crunchydata/postgres-operator/internal/testing/events" + "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" +) + +func TestStoreDesiredRequest(t *testing.T) { + ctx := context.Background() + + setupLogCapture := func(ctx context.Context) (context.Context, *[]string) { + calls := []string{} + testlog := funcr.NewJSON(func(object string) { + calls = append(calls, object) + }, funcr.Options{ + Verbosity: 1, + }) + return logging.NewContext(ctx, testlog), &calls + } + + cluster := v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rhino", + Namespace: "test-namespace", + }, + Spec: v1beta1.PostgresClusterSpec{ + InstanceSets: []v1beta1.PostgresInstanceSetSpec{{ + Name: "red", + Replicas: initialize.Int32(1), + DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }}}, + }, { + Name: "blue", + Replicas: initialize.Int32(1), + }}}} + + t.Run("BadRequestNoBackup", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + value := reconciler.storeDesiredRequest(ctx, &cluster, "pgData", "red", "woot", "") + + assert.Equal(t, value, "") + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 1) + assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse pgData volume request from status")) + }) + + t.Run("BadRequestWithBackup", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + value := reconciler.storeDesiredRequest(ctx, &cluster, "pgData", "red", "foo", "1Gi") + + assert.Equal(t, value, "1Gi") + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 1) + assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse pgData volume request from status (foo) for rhino/red")) + }) + + t.Run("NoLimitNoEvent", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + value := reconciler.storeDesiredRequest(ctx, &cluster, "pgData", "blue", "1Gi", "") + + assert.Equal(t, value, "1Gi") + assert.Equal(t, len(*logs), 0) + assert.Equal(t, len(recorder.Events), 0) + }) + + t.Run("BadBackupRequest", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + value := reconciler.storeDesiredRequest(ctx, &cluster, "pgData", "red", "2Gi", "bar") + + assert.Equal(t, value, "2Gi") + assert.Equal(t, len(*logs), 1) + assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse pgData volume request from status backup (bar) for rhino/red")) + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "VolumeAutoGrow") + assert.Equal(t, recorder.Events[0].Note, "pgData volume expansion to 2Gi requested for rhino/red.") + }) + + t.Run("ValueUpdateWithEvent", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + value := reconciler.storeDesiredRequest(ctx, &cluster, "pgData", "red", "1Gi", "") + + assert.Equal(t, value, "1Gi") + assert.Equal(t, len(*logs), 0) + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "VolumeAutoGrow") + assert.Equal(t, recorder.Events[0].Note, "pgData volume expansion to 1Gi requested for rhino/red.") + }) + + t.Run("NoLimitNoEvent", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + value := reconciler.storeDesiredRequest(ctx, &cluster, "pgData", "blue", "1Gi", "") + + assert.Equal(t, value, "1Gi") + assert.Equal(t, len(*logs), 0) + assert.Equal(t, len(recorder.Events), 0) + }) +} + +func TestLimitIsSet(t *testing.T) { + + cluster := v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rhino", + Namespace: "test-namespace", + }, + Spec: v1beta1.PostgresClusterSpec{ + InstanceSets: []v1beta1.PostgresInstanceSetSpec{{ + Name: "red", + Replicas: initialize.Int32(1), + DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }}}, + }, { + Name: "blue", + Replicas: initialize.Int32(1), + }}}} + + testCases := []struct { + tcName string + Voltype string + instanceName string + expected bool + }{{ + tcName: "Limit is set for instance PGDATA volume", + Voltype: "pgData", + instanceName: "red", + expected: true, + }, { + tcName: "Limit is not set for instance PGDATA volume", + Voltype: "pgData", + instanceName: "blue", + expected: false, + }, { + tcName: "Check PGDATA volume for non-existent instance", + Voltype: "pgData", + instanceName: "orange", + expected: false, + }} + + for _, tc := range testCases { + t.Run(tc.tcName, func(t *testing.T) { + + limitSet := limitIsSet(&cluster, tc.Voltype, tc.instanceName) + assert.Check(t, limitSet == tc.expected) + }) + } +} + +func TestSetVolumeSize(t *testing.T) { + t.Parallel() + + ctx := context.Background() + cluster := v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "elephant", + Namespace: "test-namespace", + }, + Spec: v1beta1.PostgresClusterSpec{ + InstanceSets: []v1beta1.PostgresInstanceSetSpec{{ + Name: "some-instance", + Replicas: initialize.Int32(1), + }}, + }, + } + + instance := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "elephant-some-instance-wxyz-0", + Namespace: cluster.Namespace, + }} + + setupLogCapture := func(ctx context.Context) (context.Context, *[]string) { + calls := []string{} + testlog := funcr.NewJSON(func(object string) { + calls = append(calls, object) + }, funcr.Options{ + Verbosity: 1, + }) + return logging.NewContext(ctx, testlog), &calls + } + + // helper functions + instanceSetSpec := func(request, limit string) *v1beta1.PostgresInstanceSetSpec { + return &v1beta1.PostgresInstanceSetSpec{ + Name: "some-instance", + DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse(request), + }, + Limits: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse(limit), + }}}} + } + + desiredStatus := func(request string) v1beta1.PostgresClusterStatus { + desiredMap := make(map[string]string) + desiredMap["elephant-some-instance-wxyz-0"] = request + return v1beta1.PostgresClusterStatus{ + InstanceSets: []v1beta1.PostgresInstanceSetStatus{{ + Name: "some-instance", + DesiredPGDataVolume: desiredMap, + }}} + } + + t.Run("RequestAboveLimit", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("4Gi", "3Gi") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 3Gi + requests: + storage: 3Gi +`)) + assert.Equal(t, len(*logs), 0) + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "VolumeRequestOverLimit") + assert.Equal(t, recorder.Events[0].Note, "pgData volume request (4Gi) for elephant/some-instance is greater than set limit (3Gi). Limit value will be used.") + }) + + t.Run("NoFeatureGate", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("1Gi", "3Gi") + + desiredMap := make(map[string]string) + desiredMap["elephant-some-instance-wxyz-0"] = "2Gi" + cluster.Status = v1beta1.PostgresClusterStatus{ + InstanceSets: []v1beta1.PostgresInstanceSetStatus{{ + Name: "some-instance", + DesiredPGDataVolume: desiredMap, + }}, + } + + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 3Gi + requests: + storage: 1Gi + `)) + + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 0) + + // clear status for other tests + cluster.Status = v1beta1.PostgresClusterStatus{} + }) + + t.Run("FeatureEnabled", func(t *testing.T) { + gate := feature.NewGate() + assert.NilError(t, gate.SetFromMap(map[string]bool{ + feature.AutoGrowVolumes: true, + })) + ctx := feature.NewContext(ctx, gate) + + t.Run("StatusNoLimit", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := &v1beta1.PostgresInstanceSetSpec{ + Name: "some-instance", + DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, + Resources: corev1.VolumeResourceRequirements{ + Requests: map[corev1.ResourceName]resource.Quantity{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }}}} + cluster.Status = desiredStatus("2Gi") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + requests: + storage: 1Gi +`)) + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 0) + + // clear status for other tests + cluster.Status = v1beta1.PostgresClusterStatus{} + }) + + t.Run("LimitNoStatus", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("1Gi", "2Gi") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 2Gi + requests: + storage: 1Gi +`)) + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 0) + }) + + t.Run("BadStatusWithLimit", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("1Gi", "3Gi") + cluster.Status = desiredStatus("NotAValidValue") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 3Gi + requests: + storage: 1Gi +`)) + + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 1) + assert.Assert(t, cmp.Contains((*logs)[0], + "For elephant/some-instance: Unable to parse pgData volume request: NotAValidValue")) + }) + + t.Run("StatusWithLimit", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("1Gi", "3Gi") + cluster.Status = desiredStatus("2Gi") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 3Gi + requests: + storage: 2Gi +`)) + assert.Equal(t, len(recorder.Events), 0) + assert.Equal(t, len(*logs), 0) + }) + + t.Run("StatusWithLimitGrowToLimit", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("1Gi", "2Gi") + cluster.Status = desiredStatus("2Gi") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 2Gi + requests: + storage: 2Gi +`)) + + assert.Equal(t, len(*logs), 0) + assert.Equal(t, len(recorder.Events), 1) + assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) + assert.Equal(t, recorder.Events[0].Reason, "VolumeLimitReached") + assert.Equal(t, recorder.Events[0].Note, "pgData volume(s) for elephant/some-instance are at size limit (2Gi).") + }) + + t.Run("DesiredStatusOverLimit", func(t *testing.T) { + recorder := events.NewRecorder(t, runtime.Scheme) + reconciler := &Reconciler{Recorder: recorder} + ctx, logs := setupLogCapture(ctx) + + pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} + spec := instanceSetSpec("4Gi", "5Gi") + cluster.Status = desiredStatus("10Gi") + pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() + + reconciler.setVolumeSize(ctx, &cluster, pvc, "pgData", spec.Name) + + assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` +accessModes: +- ReadWriteOnce +resources: + limits: + storage: 5Gi + requests: + storage: 5Gi +`)) + + assert.Equal(t, len(*logs), 0) + assert.Equal(t, len(recorder.Events), 2) + var found1, found2 bool + for _, event := range recorder.Events { + if event.Reason == "VolumeLimitReached" { + found1 = true + assert.Equal(t, event.Regarding.Name, cluster.Name) + assert.Equal(t, event.Note, "pgData volume(s) for elephant/some-instance are at size limit (5Gi).") + } + if event.Reason == "DesiredVolumeAboveLimit" { + found2 = true + assert.Equal(t, event.Regarding.Name, cluster.Name) + assert.Equal(t, event.Note, + "The desired size (10Gi) for the elephant/some-instance pgData volume(s) is greater than the size limit (5Gi).") + } + } + assert.Assert(t, found1 && found2) + }) + + }) +} + +func TestDetermineDesiredVolumeRequest(t *testing.T) { + t.Parallel() + + cluster := v1beta1.PostgresCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "elephant", + Namespace: "test-namespace", + }, + Spec: v1beta1.PostgresClusterSpec{ + InstanceSets: []v1beta1.PostgresInstanceSetSpec{{ + Name: "some-instance", + Replicas: initialize.Int32(1), + }}, + }, + } + + pgDataStatus := func(request string) v1beta1.PostgresClusterStatus { + desiredMap := make(map[string]string) + desiredMap["elephant-some-instance-wxyz-0"] = request + return v1beta1.PostgresClusterStatus{ + InstanceSets: []v1beta1.PostgresInstanceSetStatus{{ + Name: "some-instance", + DesiredPGDataVolume: desiredMap, + }}} + } + + testCases := []struct { + tcName string + sizeFromStatus string + pvcRequestSize string + volType string + instanceName string + expected string + }{{ + tcName: "Larger size requested", + sizeFromStatus: "3Gi", + pvcRequestSize: "2Gi", + volType: "pgData", + instanceName: "some-instance", + expected: "3Gi", + }, { + tcName: "PVC is desired size", + sizeFromStatus: "2Gi", + pvcRequestSize: "2Gi", + volType: "pgData", + instanceName: "some-instance", + expected: "2Gi", + }, { + tcName: "Original larger than status request", + sizeFromStatus: "1Gi", + pvcRequestSize: "2Gi", + volType: "pgData", + instanceName: "some-instance", + expected: "2Gi", + }, { + tcName: "Instance doesn't exist", + sizeFromStatus: "2Gi", + pvcRequestSize: "1Gi", + volType: "pgData", + instanceName: "not-an-instance", + expected: "1Gi", + }, { + tcName: "Bad Value", + sizeFromStatus: "batman", + pvcRequestSize: "1Gi", + volType: "pgData", + instanceName: "some-instance", + expected: "1Gi", + }} + + for _, tc := range testCases { + t.Run(tc.tcName, func(t *testing.T) { + + cluster.Status = pgDataStatus(tc.sizeFromStatus) + request, err := resource.ParseQuantity(tc.pvcRequestSize) + assert.NilError(t, err) + + dpv, err := getDesiredVolumeSize(&cluster, tc.volType, tc.instanceName, &request) + assert.Equal(t, request.String(), tc.expected) + + if tc.tcName != "Bad Value" { + assert.NilError(t, err) + assert.Assert(t, dpv == "") + } else { + assert.ErrorContains(t, err, "quantities must match the regular expression") + assert.Assert(t, dpv == "batman") + } + }) + } + +} diff --git a/internal/controller/postgrescluster/instance.go b/internal/controller/postgrescluster/instance.go index f9b8e12cd3..53e89f2433 100644 --- a/internal/controller/postgrescluster/instance.go +++ b/internal/controller/postgrescluster/instance.go @@ -17,7 +17,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" policyv1 "k8s.io/api/policy/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/intstr" @@ -357,10 +356,13 @@ func (r *Reconciler) observeInstances( } } - // If autogrow is enabled, get the desired volume size for each instance. + // If autogrow is enabled, determine the desired volume size for each instance + // now that all the pod annotations have been collected. This final value will be + // checked to ensure that the value from the annotations can be parsed to a valid + // value. Otherwise the previous value, if available, will be used. if autogrow { for _, instance := range observed.bySet[name] { - status.DesiredPGDataVolume[instance.Name] = r.storeDesiredRequest(ctx, cluster, + status.DesiredPGDataVolume[instance.Name] = r.storeDesiredRequest(ctx, cluster, "pgData", name, status.DesiredPGDataVolume[instance.Name], previousDesiredRequests[instance.Name]) } } @@ -371,67 +373,6 @@ func (r *Reconciler) observeInstances( return observed, err } -// storeDesiredRequest saves the appropriate request value to the PostgresCluster -// status. If the value has grown, create an Event. -func (r *Reconciler) storeDesiredRequest( - ctx context.Context, cluster *v1beta1.PostgresCluster, - instanceSetName, desiredRequest, desiredRequestBackup string, -) string { - var current resource.Quantity - var previous resource.Quantity - var err error - log := logging.FromContext(ctx) - - // Parse the desired request from the cluster's status. - if desiredRequest != "" { - current, err = resource.ParseQuantity(desiredRequest) - if err != nil { - log.Error(err, "Unable to parse pgData volume request from status ("+ - desiredRequest+") for "+cluster.Name+"/"+instanceSetName) - // If there was an error parsing the value, treat as unset (equivalent to zero). - desiredRequest = "" - current, _ = resource.ParseQuantity("") - - } - } - - // Parse the desired request from the status backup. - if desiredRequestBackup != "" { - previous, err = resource.ParseQuantity(desiredRequestBackup) - if err != nil { - log.Error(err, "Unable to parse pgData volume request from status backup ("+ - desiredRequestBackup+") for "+cluster.Name+"/"+instanceSetName) - // If there was an error parsing the value, treat as unset (equivalent to zero). - desiredRequestBackup = "" - previous, _ = resource.ParseQuantity("") - - } - } - - // Determine if the limit is set for this instance set. - var limitSet bool - for _, specInstance := range cluster.Spec.InstanceSets { - if specInstance.Name == instanceSetName { - limitSet = !specInstance.DataVolumeClaimSpec.Resources.Limits.Storage().IsZero() - } - } - - if limitSet && current.Value() > previous.Value() { - r.Recorder.Eventf(cluster, corev1.EventTypeNormal, "VolumeAutoGrow", - "pgData volume expansion to %v requested for %s/%s.", - current.String(), cluster.Name, instanceSetName) - } - - // If the desired size was not observed, update with previously stored value. - // This can happen in scenarios where the annotation on the Pod is missing - // such as when the cluster is shutdown or a Pod is in the middle of a restart. - if desiredRequest == "" { - desiredRequest = desiredRequestBackup - } - - return desiredRequest -} - // +kubebuilder:rbac:groups="",resources="pods",verbs={list} // +kubebuilder:rbac:groups="apps",resources="statefulsets",verbs={patch} diff --git a/internal/controller/postgrescluster/instance_test.go b/internal/controller/postgrescluster/instance_test.go index 83afc6d20f..bc4402183e 100644 --- a/internal/controller/postgrescluster/instance_test.go +++ b/internal/controller/postgrescluster/instance_test.go @@ -14,7 +14,6 @@ import ( "testing" "time" - "github.com/go-logr/logr/funcr" "github.com/google/go-cmp/cmp/cmpopts" "gotest.tools/v3/assert" appsv1 "k8s.io/api/apps/v1" @@ -36,10 +35,8 @@ import ( "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" - "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/testing/cmp" - "github.com/crunchydata/postgres-operator/internal/testing/events" "github.com/crunchydata/postgres-operator/internal/testing/require" "github.com/crunchydata/postgres-operator/pkg/apis/postgres-operator.crunchydata.com/v1beta1" ) @@ -260,121 +257,6 @@ func TestNewObservedInstances(t *testing.T) { }) } -func TestStoreDesiredRequest(t *testing.T) { - ctx := context.Background() - - setupLogCapture := func(ctx context.Context) (context.Context, *[]string) { - calls := []string{} - testlog := funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - }) - return logging.NewContext(ctx, testlog), &calls - } - - cluster := v1beta1.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "rhino", - Namespace: "test-namespace", - }, - Spec: v1beta1.PostgresClusterSpec{ - InstanceSets: []v1beta1.PostgresInstanceSetSpec{{ - Name: "red", - Replicas: initialize.Int32(1), - DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, - Resources: corev1.VolumeResourceRequirements{ - Limits: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceStorage: resource.MustParse("1Gi"), - }}}, - }, { - Name: "blue", - Replicas: initialize.Int32(1), - }}}} - - t.Run("BadRequestNoBackup", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - value := reconciler.storeDesiredRequest(ctx, &cluster, "red", "woot", "") - - assert.Equal(t, value, "") - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 1) - assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse pgData volume request from status")) - }) - - t.Run("BadRequestWithBackup", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - value := reconciler.storeDesiredRequest(ctx, &cluster, "red", "foo", "1Gi") - - assert.Equal(t, value, "1Gi") - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 1) - assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse pgData volume request from status (foo) for rhino/red")) - }) - - t.Run("NoLimitNoEvent", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - value := reconciler.storeDesiredRequest(ctx, &cluster, "blue", "1Gi", "") - - assert.Equal(t, value, "1Gi") - assert.Equal(t, len(*logs), 0) - assert.Equal(t, len(recorder.Events), 0) - }) - - t.Run("BadBackupRequest", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - value := reconciler.storeDesiredRequest(ctx, &cluster, "red", "2Gi", "bar") - - assert.Equal(t, value, "2Gi") - assert.Equal(t, len(*logs), 1) - assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse pgData volume request from status backup (bar) for rhino/red")) - assert.Equal(t, len(recorder.Events), 1) - assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) - assert.Equal(t, recorder.Events[0].Reason, "VolumeAutoGrow") - assert.Equal(t, recorder.Events[0].Note, "pgData volume expansion to 2Gi requested for rhino/red.") - }) - - t.Run("ValueUpdateWithEvent", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - value := reconciler.storeDesiredRequest(ctx, &cluster, "red", "1Gi", "") - - assert.Equal(t, value, "1Gi") - assert.Equal(t, len(*logs), 0) - assert.Equal(t, len(recorder.Events), 1) - assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) - assert.Equal(t, recorder.Events[0].Reason, "VolumeAutoGrow") - assert.Equal(t, recorder.Events[0].Note, "pgData volume expansion to 1Gi requested for rhino/red.") - }) - - t.Run("NoLimitNoEvent", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - value := reconciler.storeDesiredRequest(ctx, &cluster, "blue", "1Gi", "") - - assert.Equal(t, value, "1Gi") - assert.Equal(t, len(*logs), 0) - assert.Equal(t, len(recorder.Events), 0) - }) -} - func TestWritablePod(t *testing.T) { container := "container" diff --git a/internal/controller/postgrescluster/postgres.go b/internal/controller/postgrescluster/postgres.go index 8922e5f736..4dd4a9d78a 100644 --- a/internal/controller/postgrescluster/postgres.go +++ b/internal/controller/postgrescluster/postgres.go @@ -19,7 +19,6 @@ import ( "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/util/sets" @@ -775,7 +774,7 @@ func (r *Reconciler) reconcilePostgresDataVolume( } } - r.setVolumeSize(ctx, cluster, pvc, instanceSpec.Name) + r.setVolumeSize(ctx, cluster, pvc, "pgData", instanceSpec.Name) // Clear any set limit before applying PVC. This is needed to allow the limit // value to change later. @@ -789,75 +788,6 @@ func (r *Reconciler) reconcilePostgresDataVolume( return pvc, err } -// setVolumeSize compares the potential sizes from the instance spec, status -// and limit and sets the appropriate current value. -func (r *Reconciler) setVolumeSize(ctx context.Context, cluster *v1beta1.PostgresCluster, - pvc *corev1.PersistentVolumeClaim, instanceSpecName string) { - log := logging.FromContext(ctx) - - // Store the limit for this instance set. This value will not change below. - volumeLimitFromSpec := pvc.Spec.Resources.Limits.Storage() - - // Capture the largest pgData volume size currently defined for a given instance set. - // This value will capture our desired update. - volumeRequestSize := pvc.Spec.Resources.Requests.Storage() - - // If the request value is greater than the set limit, use the limit and issue - // a warning event. A limit of 0 is ignorned. - if !volumeLimitFromSpec.IsZero() && - volumeRequestSize.Value() > volumeLimitFromSpec.Value() { - r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "VolumeRequestOverLimit", - "pgData volume request (%v) for %s/%s is greater than set limit (%v). Limit value will be used.", - volumeRequestSize, cluster.Name, instanceSpecName, volumeLimitFromSpec) - - pvc.Spec.Resources.Requests = corev1.ResourceList{ - corev1.ResourceStorage: *resource.NewQuantity(volumeLimitFromSpec.Value(), resource.BinarySI), - } - // Otherwise, if the limit is not set or the feature gate is not enabled, do not autogrow. - } else if !volumeLimitFromSpec.IsZero() && feature.Enabled(ctx, feature.AutoGrowVolumes) { - for i := range cluster.Status.InstanceSets { - if instanceSpecName == cluster.Status.InstanceSets[i].Name { - for _, dpv := range cluster.Status.InstanceSets[i].DesiredPGDataVolume { - if dpv != "" { - desiredRequest, err := resource.ParseQuantity(dpv) - if err == nil { - if desiredRequest.Value() > volumeRequestSize.Value() { - volumeRequestSize = &desiredRequest - } - } else { - log.Error(err, "Unable to parse volume request: "+dpv) - } - } - } - } - } - - // If the volume request size is greater than or equal to the limit and the - // limit is not zero, update the request size to the limit value. - // If the user manually requests a lower limit that is smaller than the current - // or requested volume size, it will be ignored in favor of the limit value. - if volumeRequestSize.Value() >= volumeLimitFromSpec.Value() { - - r.Recorder.Eventf(cluster, corev1.EventTypeNormal, "VolumeLimitReached", - "pgData volume(s) for %s/%s are at size limit (%v).", cluster.Name, - instanceSpecName, volumeLimitFromSpec) - - // If the volume size request is greater than the limit, issue an - // additional event warning. - if volumeRequestSize.Value() > volumeLimitFromSpec.Value() { - r.Recorder.Eventf(cluster, corev1.EventTypeWarning, "DesiredVolumeAboveLimit", - "The desired size (%v) for the %s/%s pgData volume(s) is greater than the size limit (%v).", - volumeRequestSize, cluster.Name, instanceSpecName, volumeLimitFromSpec) - } - - volumeRequestSize = volumeLimitFromSpec - } - pvc.Spec.Resources.Requests = corev1.ResourceList{ - corev1.ResourceStorage: *resource.NewQuantity(volumeRequestSize.Value(), resource.BinarySI), - } - } -} - // +kubebuilder:rbac:groups="",resources="persistentvolumeclaims",verbs={create,patch} // reconcileTablespaceVolumes writes the PersistentVolumeClaims for instance's diff --git a/internal/controller/postgrescluster/postgres_test.go b/internal/controller/postgrescluster/postgres_test.go index e1a1a5da0f..e9b6432886 100644 --- a/internal/controller/postgrescluster/postgres_test.go +++ b/internal/controller/postgrescluster/postgres_test.go @@ -13,21 +13,18 @@ import ( "strings" "testing" - "github.com/go-logr/logr/funcr" "github.com/google/go-cmp/cmp/cmpopts" volumesnapshotv1 "github.com/kubernetes-csi/external-snapshotter/client/v8/apis/volumesnapshot/v1" "gotest.tools/v3/assert" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/controller-runtime/pkg/client" "github.com/crunchydata/postgres-operator/internal/controller/runtime" "github.com/crunchydata/postgres-operator/internal/feature" "github.com/crunchydata/postgres-operator/internal/initialize" - "github.com/crunchydata/postgres-operator/internal/logging" "github.com/crunchydata/postgres-operator/internal/naming" "github.com/crunchydata/postgres-operator/internal/postgres" "github.com/crunchydata/postgres-operator/internal/testing/cmp" @@ -851,318 +848,6 @@ volumeMode: Filesystem }) } -func TestSetVolumeSize(t *testing.T) { - t.Parallel() - - ctx := context.Background() - cluster := v1beta1.PostgresCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elephant", - Namespace: "test-namespace", - }, - Spec: v1beta1.PostgresClusterSpec{ - InstanceSets: []v1beta1.PostgresInstanceSetSpec{{ - Name: "some-instance", - Replicas: initialize.Int32(1), - }}, - }, - } - - instance := &appsv1.StatefulSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: "elephant-some-instance-wxyz-0", - Namespace: cluster.Namespace, - }} - - setupLogCapture := func(ctx context.Context) (context.Context, *[]string) { - calls := []string{} - testlog := funcr.NewJSON(func(object string) { - calls = append(calls, object) - }, funcr.Options{ - Verbosity: 1, - }) - return logging.NewContext(ctx, testlog), &calls - } - - // helper functions - instanceSetSpec := func(request, limit string) *v1beta1.PostgresInstanceSetSpec { - return &v1beta1.PostgresInstanceSetSpec{ - Name: "some-instance", - DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, - Resources: corev1.VolumeResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceStorage: resource.MustParse(request), - }, - Limits: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceStorage: resource.MustParse(limit), - }}}} - } - - desiredStatus := func(request string) v1beta1.PostgresClusterStatus { - desiredMap := make(map[string]string) - desiredMap["elephant-some-instance-wxyz-0"] = request - return v1beta1.PostgresClusterStatus{ - InstanceSets: []v1beta1.PostgresInstanceSetStatus{{ - Name: "some-instance", - DesiredPGDataVolume: desiredMap, - }}} - } - - t.Run("RequestAboveLimit", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("4Gi", "3Gi") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 3Gi - requests: - storage: 3Gi -`)) - assert.Equal(t, len(*logs), 0) - assert.Equal(t, len(recorder.Events), 1) - assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) - assert.Equal(t, recorder.Events[0].Reason, "VolumeRequestOverLimit") - assert.Equal(t, recorder.Events[0].Note, "pgData volume request (4Gi) for elephant/some-instance is greater than set limit (3Gi). Limit value will be used.") - }) - - t.Run("NoFeatureGate", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("1Gi", "3Gi") - - desiredMap := make(map[string]string) - desiredMap["elephant-some-instance-wxyz-0"] = "2Gi" - cluster.Status = v1beta1.PostgresClusterStatus{ - InstanceSets: []v1beta1.PostgresInstanceSetStatus{{ - Name: "some-instance", - DesiredPGDataVolume: desiredMap, - }}, - } - - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 3Gi - requests: - storage: 1Gi - `)) - - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 0) - - // clear status for other tests - cluster.Status = v1beta1.PostgresClusterStatus{} - }) - - t.Run("FeatureEnabled", func(t *testing.T) { - gate := feature.NewGate() - assert.NilError(t, gate.SetFromMap(map[string]bool{ - feature.AutoGrowVolumes: true, - })) - ctx := feature.NewContext(ctx, gate) - - t.Run("StatusNoLimit", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := &v1beta1.PostgresInstanceSetSpec{ - Name: "some-instance", - DataVolumeClaimSpec: v1beta1.VolumeClaimSpec{ - AccessModes: []corev1.PersistentVolumeAccessMode{corev1.ReadWriteOnce}, - Resources: corev1.VolumeResourceRequirements{ - Requests: map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceStorage: resource.MustParse("1Gi"), - }}}} - cluster.Status = desiredStatus("2Gi") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - requests: - storage: 1Gi -`)) - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 0) - - // clear status for other tests - cluster.Status = v1beta1.PostgresClusterStatus{} - }) - - t.Run("LimitNoStatus", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("1Gi", "2Gi") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 2Gi - requests: - storage: 1Gi -`)) - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 0) - }) - - t.Run("BadStatusWithLimit", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("1Gi", "3Gi") - cluster.Status = desiredStatus("NotAValidValue") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 3Gi - requests: - storage: 1Gi -`)) - - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 1) - assert.Assert(t, cmp.Contains((*logs)[0], "Unable to parse volume request: NotAValidValue")) - }) - - t.Run("StatusWithLimit", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("1Gi", "3Gi") - cluster.Status = desiredStatus("2Gi") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 3Gi - requests: - storage: 2Gi -`)) - assert.Equal(t, len(recorder.Events), 0) - assert.Equal(t, len(*logs), 0) - }) - - t.Run("StatusWithLimitGrowToLimit", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("1Gi", "2Gi") - cluster.Status = desiredStatus("2Gi") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 2Gi - requests: - storage: 2Gi -`)) - - assert.Equal(t, len(*logs), 0) - assert.Equal(t, len(recorder.Events), 1) - assert.Equal(t, recorder.Events[0].Regarding.Name, cluster.Name) - assert.Equal(t, recorder.Events[0].Reason, "VolumeLimitReached") - assert.Equal(t, recorder.Events[0].Note, "pgData volume(s) for elephant/some-instance are at size limit (2Gi).") - }) - - t.Run("DesiredStatusOverLimit", func(t *testing.T) { - recorder := events.NewRecorder(t, runtime.Scheme) - reconciler := &Reconciler{Recorder: recorder} - ctx, logs := setupLogCapture(ctx) - - pvc := &corev1.PersistentVolumeClaim{ObjectMeta: naming.InstancePostgresDataVolume(instance)} - spec := instanceSetSpec("4Gi", "5Gi") - cluster.Status = desiredStatus("10Gi") - pvc.Spec = spec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() - - reconciler.setVolumeSize(ctx, &cluster, pvc, spec.Name) - - assert.Assert(t, cmp.MarshalMatches(pvc.Spec, ` -accessModes: -- ReadWriteOnce -resources: - limits: - storage: 5Gi - requests: - storage: 5Gi -`)) - - assert.Equal(t, len(*logs), 0) - assert.Equal(t, len(recorder.Events), 2) - var found1, found2 bool - for _, event := range recorder.Events { - if event.Reason == "VolumeLimitReached" { - found1 = true - assert.Equal(t, event.Regarding.Name, cluster.Name) - assert.Equal(t, event.Note, "pgData volume(s) for elephant/some-instance are at size limit (5Gi).") - } - if event.Reason == "DesiredVolumeAboveLimit" { - found2 = true - assert.Equal(t, event.Regarding.Name, cluster.Name) - assert.Equal(t, event.Note, - "The desired size (10Gi) for the elephant/some-instance pgData volume(s) is greater than the size limit (5Gi).") - } - } - assert.Assert(t, found1 && found2) - }) - - }) -} - func TestReconcileDatabaseInitSQL(t *testing.T) { ctx := context.Background() var called bool diff --git a/internal/controller/postgrescluster/snapshots.go b/internal/controller/postgrescluster/snapshots.go index ff00928d6b..a0fa8c94ec 100644 --- a/internal/controller/postgrescluster/snapshots.go +++ b/internal/controller/postgrescluster/snapshots.go @@ -315,7 +315,7 @@ func (r *Reconciler) createDedicatedSnapshotVolume(ctx context.Context, pvc.Spec = instanceSpec.DataVolumeClaimSpec.AsPersistentVolumeClaimSpec() // Set the snapshot volume to the same size as the pgdata volume. The size should scale with auto-grow. - r.setVolumeSize(ctx, cluster, pvc, instanceSpec.Name) + r.setVolumeSize(ctx, cluster, pvc, "pgData", instanceSpec.Name) // Clear any set limit before applying PVC. This is needed to allow the limit // value to change later. diff --git a/internal/postgres/config.go b/internal/postgres/config.go index 174aee34b5..65c26dec6d 100644 --- a/internal/postgres/config.go +++ b/internal/postgres/config.go @@ -264,6 +264,25 @@ NAMESPACE=$(cat ${SERVICEACCOUNT}/namespace) TOKEN=$(cat ${SERVICEACCOUNT}/token) CACERT=${SERVICEACCOUNT}/ca.crt +# Manage autogrow annotation. +# Return size in Mebibytes. +manageAutogrowAnnotation() { + local volume=$1 + + size=$(df --human-readable --block-size=M /"${volume}" | awk 'FNR == 2 {print $2}') + use=$(df --human-readable /"${volume}" | awk 'FNR == 2 {print $5}') + sizeInt="${size//M/}" + # Use the sed punctuation class, because the shell will not accept the percent sign in an expansion. + useInt=$(echo $use | sed 's/[[:punct:]]//g') + triggerExpansion="$((useInt > 75))" + if [ $triggerExpansion -eq 1 ]; then + newSize="$(((sizeInt / 2)+sizeInt))" + newSizeMi="${newSize}Mi" + d='[{"op": "add", "path": "/metadata/annotations/suggested-'"${volume}"'-pvc-size", "value": "'"$newSizeMi"'"}]' + curl --cacert ${CACERT} --header "Authorization: Bearer ${TOKEN}" -XPATCH "${APISERVER}/api/v1/namespaces/${NAMESPACE}/pods/${HOSTNAME}?fieldManager=kubectl-annotate" -H "Content-Type: application/json-patch+json" --data "$d" + fi +} + declare -r directory=%q exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do @@ -276,20 +295,8 @@ while read -r -t 5 -u "${fd}" ||:; do stat --format='Loaded certificates dated %%y' "${directory}" fi - # Manage autogrow annotation. - # Return size in Mebibytes. - size=$(df --human-readable --block-size=M /pgdata | awk 'FNR == 2 {print $2}') - use=$(df --human-readable /pgdata | awk 'FNR == 2 {print $5}') - sizeInt="${size//M/}" - # Use the sed punctuation class, because the shell will not accept the percent sign in an expansion. - useInt=$(echo $use | sed 's/[[:punct:]]//g') - triggerExpansion="$((useInt > 75))" - if [ $triggerExpansion -eq 1 ]; then - newSize="$(((sizeInt / 2)+sizeInt))" - newSizeMi="${newSize}Mi" - d='[{"op": "add", "path": "/metadata/annotations/suggested-pgdata-pvc-size", "value": "'"$newSizeMi"'"}]' - curl --cacert ${CACERT} --header "Authorization: Bearer ${TOKEN}" -XPATCH "${APISERVER}/api/v1/namespaces/${NAMESPACE}/pods/${HOSTNAME}?fieldManager=kubectl-annotate" -H "Content-Type: application/json-patch+json" --data "$d" - fi + # manage autogrow annotation for the pgData volume + manageAutogrowAnnotation "pgdata" done `, naming.CertMountPath, @@ -299,6 +306,11 @@ done naming.ReplicationCACertPath, ) + // this is used to close out the while loop started above after adding the required + // auto grow annotation scripts + // finalDone := `done + // ` + // Elide the above script from `ps` and `top` by wrapping it in a function // and calling that. wrapper := `monitor() {` + script + `}; export -f monitor; exec -a "$0" bash -ceu monitor` diff --git a/internal/postgres/reconcile_test.go b/internal/postgres/reconcile_test.go index e90cb3c75d..c001ce890b 100644 --- a/internal/postgres/reconcile_test.go +++ b/internal/postgres/reconcile_test.go @@ -175,6 +175,25 @@ containers: TOKEN=$(cat ${SERVICEACCOUNT}/token) CACERT=${SERVICEACCOUNT}/ca.crt + # Manage autogrow annotation. + # Return size in Mebibytes. + manageAutogrowAnnotation() { + local volume=$1 + + size=$(df --human-readable --block-size=M /"${volume}" | awk 'FNR == 2 {print $2}') + use=$(df --human-readable /"${volume}" | awk 'FNR == 2 {print $5}') + sizeInt="${size//M/}" + # Use the sed punctuation class, because the shell will not accept the percent sign in an expansion. + useInt=$(echo $use | sed 's/[[:punct:]]//g') + triggerExpansion="$((useInt > 75))" + if [ $triggerExpansion -eq 1 ]; then + newSize="$(((sizeInt / 2)+sizeInt))" + newSizeMi="${newSize}Mi" + d='[{"op": "add", "path": "/metadata/annotations/suggested-'"${volume}"'-pvc-size", "value": "'"$newSizeMi"'"}]' + curl --cacert ${CACERT} --header "Authorization: Bearer ${TOKEN}" -XPATCH "${APISERVER}/api/v1/namespaces/${NAMESPACE}/pods/${HOSTNAME}?fieldManager=kubectl-annotate" -H "Content-Type: application/json-patch+json" --data "$d" + fi + } + declare -r directory="/pgconf/tls" exec {fd}<> <(:||:) while read -r -t 5 -u "${fd}" ||:; do @@ -187,20 +206,8 @@ containers: stat --format='Loaded certificates dated %y' "${directory}" fi - # Manage autogrow annotation. - # Return size in Mebibytes. - size=$(df --human-readable --block-size=M /pgdata | awk 'FNR == 2 {print $2}') - use=$(df --human-readable /pgdata | awk 'FNR == 2 {print $5}') - sizeInt="${size//M/}" - # Use the sed punctuation class, because the shell will not accept the percent sign in an expansion. - useInt=$(echo $use | sed 's/[[:punct:]]//g') - triggerExpansion="$((useInt > 75))" - if [ $triggerExpansion -eq 1 ]; then - newSize="$(((sizeInt / 2)+sizeInt))" - newSizeMi="${newSize}Mi" - d='[{"op": "add", "path": "/metadata/annotations/suggested-pgdata-pvc-size", "value": "'"$newSizeMi"'"}]' - curl --cacert ${CACERT} --header "Authorization: Bearer ${TOKEN}" -XPATCH "${APISERVER}/api/v1/namespaces/${NAMESPACE}/pods/${HOSTNAME}?fieldManager=kubectl-annotate" -H "Content-Type: application/json-patch+json" --data "$d" - fi + # manage autogrow annotation for the pgData volume + manageAutogrowAnnotation "pgdata" done }; export -f monitor; exec -a "$0" bash -ceu monitor - replication-cert-copy