diff --git a/.gitignore b/.gitignore index df228d285f..90780954ac 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,9 @@ build/.out build/out dist/ +# Test artifacts +tests/**/*.csv + # Node modules node_modules/ diff --git a/.markdownlint-cli2.yaml b/.markdownlint-cli2.yaml index 416fbcedb1..67f5acda14 100644 --- a/.markdownlint-cli2.yaml +++ b/.markdownlint-cli2.yaml @@ -18,3 +18,4 @@ config: # Define glob expressions to ignore ignores: - ".github/" + - "tests/results/" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 842669cb30..8a3cb67832 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,6 +5,7 @@ repos: rev: v4.5.0 hooks: - id: trailing-whitespace + exclude: (^tests/results/) - id: end-of-file-fixer - id: check-yaml args: [--allow-multiple-documents] diff --git a/tests/Makefile b/tests/Makefile index febe7197e6..fd14f5ec0f 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -2,7 +2,8 @@ TAG = edge PREFIX = nginx-gateway-fabric NGINX_PREFIX = $(PREFIX)/nginx PULL_POLICY=Never -GW_API_VERSION ?= 1.0.0 +GW_API_PREV_VERSION ?= 1.0.0 ## Supported Gateway API version from previous NGF release +GW_API_VERSION ?= 1.0.0 ## Supported Gateway API version for NGF under test K8S_VERSION ?= latest ## Expected format: 1.24 (major.minor) or latest GW_SERVICE_TYPE=NodePort GW_SVC_GKE_INTERNAL=false @@ -30,7 +31,8 @@ load-images: ## Load NGF and NGINX images on configured kind cluster kind load docker-image $(PREFIX):$(TAG) $(NGINX_PREFIX):$(TAG) test: ## Run the system tests against your default k8s cluster - go test -v ./suite $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) --image-tag=$(TAG) \ + go test -v ./suite $(GINKGO_FLAGS) -args --gateway-api-version=$(GW_API_VERSION) \ + --gateway-api-prev-version=$(GW_API_PREV_VERSION) --image-tag=$(TAG) \ --ngf-image-repo=$(PREFIX) --nginx-image-repo=$(NGINX_PREFIX) --pull-policy=$(PULL_POLICY) \ --k8s-version=$(K8S_VERSION) --service-type=$(GW_SERVICE_TYPE) --is-gke-internal-lb=$(GW_SVC_GKE_INTERNAL) @@ -46,9 +48,20 @@ run-tests-on-vm: ## Run the tests on a GCP VM create-and-setup-vm: ## Create and setup a GCP VM for tests bash scripts/create-and-setup-gcp-vm.sh -.PHONY: create-vm-and-run-tests -create-vm-and-run-tests: create-and-setup-vm run-tests-on-vm ## Create and setup a GCP VM for tests and run the tests - .PHONY: cleanup-vm cleanup-vm: ## Delete the test GCP VM and delete the firewall rule bash scripts/cleanup-vm.sh + +.PHONY: create-gke-router +create-gke-router: ## Create a GKE router to allow egress traffic from private nodes (allows for external image pulls) + bash scripts/create-gke-router.sh + +.PHONY: cleanup-router +cleanup-router: ## Delete the GKE router + bash scripts/cleanup-router.sh + +.PHONY: setup-gcp-and-run-tests +setup-gcp-and-run-tests: create-gke-router create-and-setup-vm run-tests-on-vm ## Create and setup a GKE router and GCP VM for tests and run the tests + +.PHONY: cleanup-gcp +cleanup-gcp: cleanup-router cleanup-vm ## Cleanup all GCP resources diff --git a/tests/README.md b/tests/README.md index 99b196e38b..5a2c4514a9 100644 --- a/tests/README.md +++ b/tests/README.md @@ -38,14 +38,17 @@ make ```text build-images Build NGF and NGINX images -cleanup-vm Delete the test GCP VM and the firewall rule +cleanup-gcp Cleanup all GCP resources +cleanup-router Delete the GKE router +cleanup-vm Delete the test GCP VM and delete the firewall rule create-and-setup-vm Create and setup a GCP VM for tests +create-gke-router Create a GKE router to allow egress traffic from private nodes (allows for external image pulls) create-kind-cluster Create a kind cluster -create-vm-and-run-tests Create and setup a GCP VM for tests and run the tests delete-kind-cluster Delete kind cluster help Display this help load-images Load NGF and NGINX images on configured kind cluster run-tests-on-vm Run the tests on a GCP VM +setup-gcp-and-run-tests Create and setup a GKE router and GCP VM for tests and run the tests test Run the system tests against your default k8s cluster ``` @@ -101,15 +104,24 @@ make test TAG=$(whoami) This step only applies if you would like to run the tests from a GCP based VM. Before running the below `make` command, copy the `scripts/vars.env-example` file to `scripts/vars.env` and populate the -required env vars. The `GKE_CLUSTER_ZONE` needs to be the zone of your GKE cluster, and `GKE_SVC_ACCOUNT` needs to be -the name of a service account that has Kubernetes admin permissions. +required env vars. `GKE_SVC_ACCOUNT` needs to be the name of a service account that has Kubernetes admin permissions. -To create and setup the VM (including creating a firewall rule allowing SSH access from your local machine, and -optionally adding the VM IP to the `master-authorized-networks` list of your GKE cluster if -`ADD_VM_IP_AUTH_NETWORKS` is set to `true`) and run the tests, run the following +In order to run the tests in GCP, you need a few things: + +- GKE router to allow egress traffic (used by upgrade tests for pulling images from Github) + - this assumes that your GKE cluster is using private nodes. If using public nodes, you don't need this. +- GCP VM and firewall rule to send ingress traffic to GKE + +To set up the GCP environment with the router and VM and then run the tests, run the following command: ```makefile -make create-vm-and-run-tests +make setup-gcp-and-run-tests +``` + +If you just need a VM and no router (this will not run the tests): + +```makefile +make create-and-setup-vm ``` To use an existing VM to run the tests, run the following @@ -179,7 +191,17 @@ For more information of filtering specs, see [the docs here](https://onsi.github make delete-kind-cluster ``` -2. Delete the cloud VM and cleanup the firewall rule, if required +2. Delete the GCP components (GKE router, VM, and firewall rule), if required + + ```makefile + make cleanup-gcp + ``` + + or + + ```makefile + make cleanup-router + ``` ```makefile make cleanup-vm diff --git a/tests/framework/common.go b/tests/framework/common.go deleted file mode 100644 index 04e3a302ad..0000000000 --- a/tests/framework/common.go +++ /dev/null @@ -1,28 +0,0 @@ -package framework - -import ( - "fmt" - "os" -) - -// WriteSystemInfoToFile writes the cluster system info to the given file -func WriteSystemInfoToFile(file *os.File, ci ClusterInfo) error { - clusterType := "Local" - if ci.IsGKE { - clusterType = "GKE" - } - text := fmt.Sprintf( - //nolint:lll - "# Results\n\n## Test environment\n\n%s Cluster:\n\n- Node count: %d\n- k8s version: %s\n- vCPUs per node: %d\n- RAM per node: %s\n- Max pods per node: %d\n", - clusterType, ci.NodeCount, ci.K8sVersion, ci.CPUCountPerNode, ci.MemoryPerNode, ci.MaxPodsPerNode, - ) - if _, err := fmt.Fprint(file, text); err != nil { - return err - } - if ci.IsGKE { - if _, err := fmt.Fprintf(file, "- Zone: %s\n- Instance Type: %s\n", ci.GkeZone, ci.GkeInstanceType); err != nil { - return err - } - } - return nil -} diff --git a/tests/framework/load.go b/tests/framework/load.go index dcf21ba767..a6347f720e 100644 --- a/tests/framework/load.go +++ b/tests/framework/load.go @@ -1,10 +1,10 @@ package framework import ( - "fmt" + "context" + "crypto/tls" + "net" "net/http" - "net/url" - "os" "time" vegeta "github.com/tsenart/vegeta/v12/lib" @@ -31,38 +31,57 @@ func convertTargetToVegetaTarget(targets []Target) []vegeta.Target { return vegTargets } +// LoadTestConfig is the configuration to run a load test. +type LoadTestConfig struct { + Description string + Proxy string + ServerName string + Targets []Target + Rate int + Duration time.Duration +} + +// Metrics is a wrapper around the vegeta Metrics. +type Metrics struct { + vegeta.Metrics +} + // RunLoadTest uses Vegeta to send traffic to the provided Targets at the given rate for the given duration and writes // the results to the provided file -func RunLoadTest( - targets []Target, - rate int, - duration time.Duration, - desc string, - outFile *os.File, - proxy string, -) error { - vegTargets := convertTargetToVegetaTarget(targets) +func RunLoadTest(cfg LoadTestConfig) (vegeta.Results, Metrics) { + vegTargets := convertTargetToVegetaTarget(cfg.Targets) targeter := vegeta.NewStaticTargeter(vegTargets...) - proxyURL, err := url.Parse(proxy) - if err != nil { - return fmt.Errorf("error getting proxy URL: %w", err) + + dialer := &net.Dialer{ + LocalAddr: &net.TCPAddr{IP: vegeta.DefaultLocalAddr.IP, Zone: vegeta.DefaultLocalAddr.Zone}, + KeepAlive: 30 * time.Second, } - attacker := vegeta.NewAttacker( - vegeta.Proxy(http.ProxyURL(proxyURL)), - ) + httpClient := http.Client{ + Timeout: vegeta.DefaultTimeout, + Transport: &http.Transport{ + DialContext: func(ctx context.Context, network, addr string) (net.Conn, error) { + return dialer.DialContext(ctx, network, cfg.Proxy) + }, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, //nolint:gosec // self-signed cert for testing + ServerName: cfg.ServerName, + }, + MaxIdleConnsPerHost: vegeta.DefaultConnections, + MaxConnsPerHost: vegeta.DefaultMaxConnections, + }, + } + + attacker := vegeta.NewAttacker(vegeta.Client(&httpClient)) - r := vegeta.Rate{Freq: rate, Per: time.Second} + r := vegeta.Rate{Freq: cfg.Rate, Per: time.Second} + var results vegeta.Results var metrics vegeta.Metrics - for res := range attacker.Attack(targeter, r, duration, desc) { + for res := range attacker.Attack(targeter, r, cfg.Duration, cfg.Description) { + results = append(results, *res) metrics.Add(res) } metrics.Close() - reporter := vegeta.NewTextReporter(&metrics) - - if err = reporter.Report(outFile); err != nil { - return fmt.Errorf("error reporting results: %w", err) - } - return nil + return results, Metrics{metrics} } diff --git a/tests/framework/ngf.go b/tests/framework/ngf.go index 67c71fe00b..0f3162835e 100644 --- a/tests/framework/ngf.go +++ b/tests/framework/ngf.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "os/exec" + "path/filepath" "strings" "time" @@ -83,39 +84,50 @@ func InstallGatewayAPI( return nil, nil } +// UninstallGatewayAPI uninstalls the specified version of the Gateway API resources. +func UninstallGatewayAPI(apiVersion, k8sVersion string) ([]byte, error) { + apiPath := fmt.Sprintf("%s/v%s/standard-install.yaml", gwInstallBasePath, apiVersion) + + if webhookRequired(k8sVersion) { + webhookPath := fmt.Sprintf("%s/v%s/webhook-install.yaml", gwInstallBasePath, apiVersion) + + if output, err := exec.Command("kubectl", "delete", "-f", webhookPath).CombinedOutput(); err != nil { + return output, err + } + } + + output, err := exec.Command("kubectl", "delete", "-f", apiPath).CombinedOutput() + if err != nil && !strings.Contains(string(output), "not found") { + return output, err + } + + return nil, nil +} + // InstallNGF installs NGF. func InstallNGF(cfg InstallationConfig, extraArgs ...string) ([]byte, error) { args := []string{ "install", cfg.ReleaseName, cfg.ChartPath, "--create-namespace", "--namespace", cfg.Namespace, "--wait", } - if cfg.NgfImageRepository != "" { - args = append(args, formatValueSet("nginxGateway.image.repository", cfg.NgfImageRepository)...) - if cfg.ImageTag != "" { - args = append(args, formatValueSet("nginxGateway.image.tag", cfg.ImageTag)...) - } - if cfg.ImagePullPolicy != "" { - args = append(args, formatValueSet("nginxGateway.image.pullPolicy", cfg.ImagePullPolicy)...) - } - } + args = append(args, setImageArgs(cfg)...) + fullArgs := append(args, extraArgs...) - if cfg.NginxImageRepository != "" { - args = append(args, formatValueSet("nginx.image.repository", cfg.NginxImageRepository)...) - if cfg.ImageTag != "" { - args = append(args, formatValueSet("nginx.image.tag", cfg.ImageTag)...) - } - if cfg.ImagePullPolicy != "" { - args = append(args, formatValueSet("nginx.image.pullPolicy", cfg.ImagePullPolicy)...) - } + return exec.Command("helm", fullArgs...).CombinedOutput() +} + +// UpgradeNGF upgrades NGF. CRD upgrades assume the chart is local. +func UpgradeNGF(cfg InstallationConfig, extraArgs ...string) ([]byte, error) { + crdPath := filepath.Join(cfg.ChartPath, "crds") + if output, err := exec.Command("kubectl", "apply", "-f", crdPath).CombinedOutput(); err != nil { + return output, err } - if cfg.ServiceType != "" { - args = append(args, formatValueSet("service.type", cfg.ServiceType)...) - if cfg.ServiceType == "LoadBalancer" && cfg.IsGKEInternalLB { - args = append(args, formatValueSet(`service.annotations.networking\.gke\.io\/load-balancer-type`, "Internal")...) - } + args := []string{ + "upgrade", cfg.ReleaseName, cfg.ChartPath, "--namespace", cfg.Namespace, "--wait", } + args = append(args, setImageArgs(cfg)...) fullArgs := append(args, extraArgs...) return exec.Command("helm", fullArgs...).CombinedOutput() @@ -128,7 +140,7 @@ func UninstallNGF(cfg InstallationConfig, k8sClient client.Client) ([]byte, erro } output, err := exec.Command("helm", args...).CombinedOutput() - if err != nil { + if err != nil && !strings.Contains(string(output), "release: not found") { return output, err } @@ -157,19 +169,37 @@ func UninstallNGF(cfg InstallationConfig, k8sClient client.Client) ([]byte, erro return nil, nil } -// UninstallGatewayAPI uninstalls the specified version of the Gateway API resources. -func UninstallGatewayAPI(apiVersion, k8sVersion string) ([]byte, error) { - apiPath := fmt.Sprintf("%s/v%s/standard-install.yaml", gwInstallBasePath, apiVersion) +func setImageArgs(cfg InstallationConfig) []string { + var args []string - if webhookRequired(k8sVersion) { - webhookPath := fmt.Sprintf("%s/v%s/webhook-install.yaml", gwInstallBasePath, apiVersion) + if cfg.NgfImageRepository != "" { + args = append(args, formatValueSet("nginxGateway.image.repository", cfg.NgfImageRepository)...) + if cfg.ImageTag != "" { + args = append(args, formatValueSet("nginxGateway.image.tag", cfg.ImageTag)...) + } + if cfg.ImagePullPolicy != "" { + args = append(args, formatValueSet("nginxGateway.image.pullPolicy", cfg.ImagePullPolicy)...) + } + } - if output, err := exec.Command("kubectl", "delete", "-f", webhookPath).CombinedOutput(); err != nil { - return output, err + if cfg.NginxImageRepository != "" { + args = append(args, formatValueSet("nginx.image.repository", cfg.NginxImageRepository)...) + if cfg.ImageTag != "" { + args = append(args, formatValueSet("nginx.image.tag", cfg.ImageTag)...) + } + if cfg.ImagePullPolicy != "" { + args = append(args, formatValueSet("nginx.image.pullPolicy", cfg.ImagePullPolicy)...) + } + } + + if cfg.ServiceType != "" { + args = append(args, formatValueSet("service.type", cfg.ServiceType)...) + if cfg.ServiceType == "LoadBalancer" && cfg.IsGKEInternalLB { + args = append(args, formatValueSet(`service.annotations.networking\.gke\.io\/load-balancer-type`, "Internal")...) } } - return exec.Command("kubectl", "delete", "-f", apiPath).CombinedOutput() + return args } func formatValueSet(key, value string) []string { diff --git a/tests/framework/portforward.go b/tests/framework/portforward.go index 5e7378cc02..1efc16be3c 100644 --- a/tests/framework/portforward.go +++ b/tests/framework/portforward.go @@ -2,50 +2,16 @@ package framework import ( "bytes" - "context" - "errors" "fmt" "net/http" "net/url" "path" - "time" - core "k8s.io/api/core/v1" "k8s.io/client-go/rest" "k8s.io/client-go/tools/portforward" "k8s.io/client-go/transport/spdy" - "sigs.k8s.io/controller-runtime/pkg/client" ) -// GetNGFPodName returns the name of the NGF Pod. -func GetNGFPodName( - k8sClient client.Client, - namespace, - releaseName string, - timeout time.Duration, -) (string, error) { - ctx, cancel := context.WithTimeout(context.Background(), timeout) - defer cancel() - - var podList core.PodList - if err := k8sClient.List( - ctx, - &podList, - client.InNamespace(namespace), - client.MatchingLabels{ - "app.kubernetes.io/instance": releaseName, - }, - ); err != nil { - return "", fmt.Errorf("error getting list of Pods: %w", err) - } - - if len(podList.Items) > 0 { - return podList.Items[0].Name, nil - } - - return "", errors.New("unable to find NGF Pod") -} - // PortForward starts a port-forward to the specified Pod and returns the local port being forwarded. func PortForward(config *rest.Config, namespace, podName string, stopCh chan struct{}) (int, error) { roundTripper, upgrader, err := spdy.RoundTripperFor(config) diff --git a/tests/framework/resourcemanager.go b/tests/framework/resourcemanager.go index e8a7682afe..72c3992cb4 100644 --- a/tests/framework/resourcemanager.go +++ b/tests/framework/resourcemanager.go @@ -408,3 +408,40 @@ func (rm *ResourceManager) GetClusterInfo() (ClusterInfo, error) { return *ci, nil } + +// GetReadyNGFPodNames returns the name(s) of the NGF Pod(s). +func GetReadyNGFPodNames( + k8sClient client.Client, + namespace, + releaseName string, + timeout time.Duration, +) ([]string, error) { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + var podList core.PodList + if err := k8sClient.List( + ctx, + &podList, + client.InNamespace(namespace), + client.MatchingLabels{ + "app.kubernetes.io/instance": releaseName, + }, + ); err != nil { + return nil, fmt.Errorf("error getting list of Pods: %w", err) + } + + if len(podList.Items) > 0 { + var names []string + for _, pod := range podList.Items { + for _, cond := range pod.Status.Conditions { + if cond.Type == core.PodReady && cond.Status == core.ConditionTrue { + names = append(names, pod.Name) + } + } + } + return names, nil + } + + return nil, errors.New("unable to find NGF Pod(s)") +} diff --git a/tests/framework/results.go b/tests/framework/results.go new file mode 100644 index 0000000000..78d4dad254 --- /dev/null +++ b/tests/framework/results.go @@ -0,0 +1,83 @@ +package framework + +import ( + "fmt" + "io" + "os" + "os/exec" + "path/filepath" + + vegeta "github.com/tsenart/vegeta/v12/lib" +) + +// CreateResultsDir creates and returns the name of the results directory for a test. +func CreateResultsDir(testName, version string) (string, error) { + pwd, err := os.Getwd() + if err != nil { + return "", err + } + + dirName := filepath.Join(filepath.Dir(pwd), "results", testName, version) + + return dirName, os.MkdirAll(dirName, 0o777) +} + +// CreateResultsFile creates and returns the results file for a test. +func CreateResultsFile(filename string) (*os.File, error) { + outFile, err := os.OpenFile(filename, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o777) + if err != nil { + return nil, err + } + + return outFile, nil +} + +// WriteSystemInfoToFile writes the cluster system info to the given file. +func WriteSystemInfoToFile(file *os.File, ci ClusterInfo) error { + clusterType := "Local" + if ci.IsGKE { + clusterType = "GKE" + } + text := fmt.Sprintf( + //nolint:lll + "# Results\n\n## Test environment\n\n%s Cluster:\n\n- Node count: %d\n- k8s version: %s\n- vCPUs per node: %d\n- RAM per node: %s\n- Max pods per node: %d\n", + clusterType, ci.NodeCount, ci.K8sVersion, ci.CPUCountPerNode, ci.MemoryPerNode, ci.MaxPodsPerNode, + ) + if _, err := fmt.Fprint(file, text); err != nil { + return err + } + if ci.IsGKE { + if _, err := fmt.Fprintf(file, "- Zone: %s\n- Instance Type: %s\n", ci.GkeZone, ci.GkeInstanceType); err != nil { + return err + } + } + return nil +} + +// GeneratePNG generates a PNG using gnuplot. +func GeneratePNG(resultsDir, inputFilename, outputFilename string) ([]byte, error) { + pwd, err := os.Getwd() + if err != nil { + return nil, err + } + + gnuplotCfg := filepath.Join(filepath.Dir(pwd), "scripts", "requests-plot.gp") + + files := fmt.Sprintf("inputfile='%s';outputfile='%s'", inputFilename, outputFilename) + cmd := exec.Command("gnuplot", "-e", files, "-c", gnuplotCfg) + cmd.Dir = resultsDir + + return cmd.CombinedOutput() +} + +// WriteResults writes the vegeta metrics results to the results file in text format. +func WriteResults(resultsFile *os.File, metrics *Metrics) error { + reporter := vegeta.NewTextReporter(&metrics.Metrics) + + return reporter.Report(resultsFile) +} + +// NewCSVEncoder returns a vegeta CSV encoder. +func NewCSVEncoder(w io.Writer) vegeta.Encoder { + return vegeta.NewCSVEncoder(w) +} diff --git a/tests/zero-downtime-upgrades/results/1.0.0-special/1.0.0-special.md b/tests/results/ngf-upgrade/1.0.0-special/1.0.0-special.md similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0-special/1.0.0-special.md rename to tests/results/ngf-upgrade/1.0.0-special/1.0.0-special.md diff --git a/tests/zero-downtime-upgrades/results/1.0.0-special/http.csv b/tests/results/ngf-upgrade/1.0.0-special/http.csv similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0-special/http.csv rename to tests/results/ngf-upgrade/1.0.0-special/http.csv diff --git a/tests/zero-downtime-upgrades/results/1.0.0-special/http.png b/tests/results/ngf-upgrade/1.0.0-special/http.png similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0-special/http.png rename to tests/results/ngf-upgrade/1.0.0-special/http.png diff --git a/tests/zero-downtime-upgrades/results/1.0.0-special/https.csv b/tests/results/ngf-upgrade/1.0.0-special/https.csv similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0-special/https.csv rename to tests/results/ngf-upgrade/1.0.0-special/https.csv diff --git a/tests/zero-downtime-upgrades/results/1.0.0-special/https.png b/tests/results/ngf-upgrade/1.0.0-special/https.png similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0-special/https.png rename to tests/results/ngf-upgrade/1.0.0-special/https.png diff --git a/tests/zero-downtime-upgrades/results/1.0.0/1.0.0.md b/tests/results/ngf-upgrade/1.0.0/1.0.0.md similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0/1.0.0.md rename to tests/results/ngf-upgrade/1.0.0/1.0.0.md diff --git a/tests/zero-downtime-upgrades/results/1.0.0/http.csv b/tests/results/ngf-upgrade/1.0.0/http.csv similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0/http.csv rename to tests/results/ngf-upgrade/1.0.0/http.csv diff --git a/tests/zero-downtime-upgrades/results/1.0.0/http.png b/tests/results/ngf-upgrade/1.0.0/http.png similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0/http.png rename to tests/results/ngf-upgrade/1.0.0/http.png diff --git a/tests/zero-downtime-upgrades/results/1.0.0/https.csv b/tests/results/ngf-upgrade/1.0.0/https.csv similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0/https.csv rename to tests/results/ngf-upgrade/1.0.0/https.csv diff --git a/tests/zero-downtime-upgrades/results/1.0.0/https.png b/tests/results/ngf-upgrade/1.0.0/https.png similarity index 100% rename from tests/zero-downtime-upgrades/results/1.0.0/https.png rename to tests/results/ngf-upgrade/1.0.0/https.png diff --git a/tests/zero-downtime-upgrades/results/1.1.0/1.1.0.md b/tests/results/ngf-upgrade/1.1.0/1.1.0.md similarity index 100% rename from tests/zero-downtime-upgrades/results/1.1.0/1.1.0.md rename to tests/results/ngf-upgrade/1.1.0/1.1.0.md diff --git a/tests/zero-downtime-upgrades/results/1.1.0/http.csv b/tests/results/ngf-upgrade/1.1.0/http.csv similarity index 100% rename from tests/zero-downtime-upgrades/results/1.1.0/http.csv rename to tests/results/ngf-upgrade/1.1.0/http.csv diff --git a/tests/zero-downtime-upgrades/results/1.1.0/http.png b/tests/results/ngf-upgrade/1.1.0/http.png similarity index 100% rename from tests/zero-downtime-upgrades/results/1.1.0/http.png rename to tests/results/ngf-upgrade/1.1.0/http.png diff --git a/tests/zero-downtime-upgrades/results/1.1.0/https.csv b/tests/results/ngf-upgrade/1.1.0/https.csv similarity index 100% rename from tests/zero-downtime-upgrades/results/1.1.0/https.csv rename to tests/results/ngf-upgrade/1.1.0/https.csv diff --git a/tests/zero-downtime-upgrades/results/1.1.0/https.png b/tests/results/ngf-upgrade/1.1.0/https.png similarity index 100% rename from tests/zero-downtime-upgrades/results/1.1.0/https.png rename to tests/results/ngf-upgrade/1.1.0/https.png diff --git a/tests/scripts/cleanup-router.sh b/tests/scripts/cleanup-router.sh new file mode 100755 index 0000000000..e564aaa0e2 --- /dev/null +++ b/tests/scripts/cleanup-router.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +source scripts/vars.env + +gcloud compute routers nats delete ${RESOURCE_NAME} --quiet --router ${RESOURCE_NAME} --router-region ${GKE_CLUSTER_REGION} +gcloud compute routers delete ${RESOURCE_NAME} --quiet --region ${GKE_CLUSTER_REGION} diff --git a/tests/scripts/cleanup-vm.sh b/tests/scripts/cleanup-vm.sh index 4d51b4f47e..517d987502 100644 --- a/tests/scripts/cleanup-vm.sh +++ b/tests/scripts/cleanup-vm.sh @@ -4,12 +4,12 @@ source scripts/vars.env # Remove VM IP from GKE master control node access, if required if [ "${ADD_VM_IP_AUTH_NETWORKS}" = "true" ]; then - EXTERNAL_IP=$(gcloud compute instances describe ${VM_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} \ + EXTERNAL_IP=$(gcloud compute instances describe ${RESOURCE_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} \ --format='value(networkInterfaces[0].accessConfigs[0].natIP)') CURRENT_AUTH_NETWORK=$(gcloud container clusters describe ${GKE_CLUSTER_NAME} \ --format="value(masterAuthorizedNetworksConfig.cidrBlocks[0])" | sed 's/cidrBlock=//') gcloud container clusters update ${GKE_CLUSTER_NAME} --enable-master-authorized-networks --master-authorized-networks=${CURRENT_AUTH_NETWORK} fi -gcloud compute instances delete ${VM_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} -gcloud compute firewall-rules delete ${FIREWALL_RULE_NAME} --project=${GKE_PROJECT} +gcloud compute instances delete ${RESOURCE_NAME} --quiet --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} +gcloud compute firewall-rules delete ${RESOURCE_NAME} --quiet --project=${GKE_PROJECT} diff --git a/tests/scripts/create-and-setup-gcp-vm.sh b/tests/scripts/create-and-setup-gcp-vm.sh index 522b49305e..a28166b2fb 100644 --- a/tests/scripts/create-and-setup-gcp-vm.sh +++ b/tests/scripts/create-and-setup-gcp-vm.sh @@ -1,11 +1,10 @@ #!/bin/bash SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) -PARENT_DIR=$(dirname "$SCRIPT_DIR") source scripts/vars.env -gcloud compute firewall-rules create ${FIREWALL_RULE_NAME} \ +gcloud compute firewall-rules create ${RESOURCE_NAME} \ --project=${GKE_PROJECT} \ --direction=INGRESS \ --priority=1000 \ @@ -15,15 +14,15 @@ gcloud compute firewall-rules create ${FIREWALL_RULE_NAME} \ --source-ranges=${SOURCE_IP_RANGE} \ --target-tags=${NETWORK_TAGS} -gcloud compute instances create ${VM_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} --machine-type=e2-medium \ +gcloud compute instances create ${RESOURCE_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} --machine-type=e2-medium \ --network-interface=network-tier=PREMIUM,stack-type=IPV4_ONLY,subnet=default --maintenance-policy=MIGRATE \ --provisioning-model=STANDARD --service-account=${GKE_SVC_ACCOUNT} \ --scopes=https://www.googleapis.com/auth/devstorage.read_only,https://www.googleapis.com/auth/logging.write,https://www.googleapis.com/auth/monitoring.write,https://www.googleapis.com/auth/servicecontrol,https://www.googleapis.com/auth/service.management.readonly,https://www.googleapis.com/auth/trace.append,https://www.googleapis.com/auth/cloud-platform \ - --tags=${NETWORK_TAGS} --create-disk=auto-delete=yes,boot=yes,device-name=${VM_NAME},image=${IMAGE},mode=rw,size=10 --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --labels=goog-ec-src=vm_add-gcloud --reservation-affinity=any + --tags=${NETWORK_TAGS} --create-disk=auto-delete=yes,boot=yes,device-name=${RESOURCE_NAME},image=${IMAGE},mode=rw,size=10 --no-shielded-secure-boot --shielded-vtpm --shielded-integrity-monitoring --labels=goog-ec-src=vm_add-gcloud --reservation-affinity=any # Add VM IP to GKE master control node access, if required if [ "${ADD_VM_IP_AUTH_NETWORKS}" = "true" ]; then - EXTERNAL_IP=$(gcloud compute instances describe ${VM_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} \ + EXTERNAL_IP=$(gcloud compute instances describe ${RESOURCE_NAME} --project=${GKE_PROJECT} --zone=${GKE_CLUSTER_ZONE} \ --format='value(networkInterfaces[0].accessConfigs[0].natIP)') CURRENT_AUTH_NETWORK=$(gcloud container clusters describe ${GKE_CLUSTER_NAME} \ --format="value(masterAuthorizedNetworksConfig.cidrBlocks[0])" | sed 's/cidrBlock=//') @@ -35,7 +34,7 @@ MAX_RETRIES=30 RETRY_INTERVAL=10 for ((i=1; i<=MAX_RETRIES; i++)); do echo "Attempt $i to connect to the VM..." - gcloud compute ssh ${VM_NAME} --zone=${GKE_CLUSTER_ZONE} --quiet --command="echo 'VM is ready'" + gcloud compute ssh ${RESOURCE_NAME} --zone=${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --quiet --command="echo 'VM is ready'" if [ $? -eq 0 ]; then echo "SSH connection successful. VM is ready." break @@ -44,6 +43,6 @@ for ((i=1; i<=MAX_RETRIES; i++)); do sleep ${RETRY_INTERVAL} done -gcloud compute scp --zone ${GKE_CLUSTER_ZONE} ${SCRIPT_DIR}/vars.env ${VM_NAME}:~ +gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} ${SCRIPT_DIR}/vars.env ${RESOURCE_NAME}:~ -gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} ${VM_NAME} --command="bash -s" < ${SCRIPT_DIR}/remote-scripts/install-deps.sh +gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} ${RESOURCE_NAME} --command="bash -s" < ${SCRIPT_DIR}/remote-scripts/install-deps.sh diff --git a/tests/scripts/create-gke-router.sh b/tests/scripts/create-gke-router.sh new file mode 100755 index 0000000000..25bad4ce21 --- /dev/null +++ b/tests/scripts/create-gke-router.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +source scripts/vars.env + +gcloud compute routers create ${RESOURCE_NAME} \ + --region ${GKE_CLUSTER_REGION} \ + --network default + +gcloud compute routers nats create ${RESOURCE_NAME} \ + --router-region ${GKE_CLUSTER_REGION} \ + --router ${RESOURCE_NAME} \ + --nat-all-subnet-ip-ranges \ + --auto-allocate-nat-external-ips diff --git a/tests/scripts/remote-scripts/install-deps.sh b/tests/scripts/remote-scripts/install-deps.sh index df4a3cd461..371f75ff62 100644 --- a/tests/scripts/remote-scripts/install-deps.sh +++ b/tests/scripts/remote-scripts/install-deps.sh @@ -4,12 +4,12 @@ set -e source ~/vars.env -sudo apt-get -y update && sudo apt-get -y install git make kubectl google-cloud-sdk-gke-gcloud-auth-plugin jq && \ +sudo apt-get -y update && sudo apt-get -y install git make kubectl google-cloud-sdk-gke-gcloud-auth-plugin jq gnuplot && \ curl https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 | bash && \ export GO_VERSION=$(curl -sSL "https://golang.org/dl/?mode=json" | jq -r '.[0].version') && \ wget https://go.dev/dl/${GO_VERSION}.linux-amd64.tar.gz && \ sudo rm -rf /usr/local/go && sudo tar -C /usr/local -xzf ${GO_VERSION}.linux-amd64.tar.gz && \ rm -rf ${GO_VERSION}.linux-amd64.tar.gz && \ -gcloud container clusters get-credentials ${GKE_CLUSTER_NAME} --zone ${GKE_CLUSTER_ZONE} +gcloud container clusters get-credentials ${GKE_CLUSTER_NAME} --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} git clone https://github.com/${NGF_REPO}/nginx-gateway-fabric.git && cd nginx-gateway-fabric/tests && git checkout ${NGF_BRANCH} diff --git a/tests/scripts/requests-plot.gp b/tests/scripts/requests-plot.gp new file mode 100644 index 0000000000..b625ab96e2 --- /dev/null +++ b/tests/scripts/requests-plot.gp @@ -0,0 +1,16 @@ +set terminal png size 800,600 +set title "Request Outcomes Over Time" +set datafile separator "," +set output outputfile . "" + +# X-axis settings +set xdata time +set format "%M:%S" + +# Y-axis settings +set yrange [-50:550] +set ytics ("0" 0, "200" 200, "4xx" 403 404, "5xx" 500 502) +set grid ytics + +# Plotting data (vegeta timestamp is in nanoseconds, so convert to seconds) +plot inputfile using ($1/1000000000):2:2 pointtype 7 pointsize 1.5 notitle diff --git a/tests/scripts/run-tests-gcp-vm.sh b/tests/scripts/run-tests-gcp-vm.sh index 66bbe841f2..4a1872dab9 100644 --- a/tests/scripts/run-tests-gcp-vm.sh +++ b/tests/scripts/run-tests-gcp-vm.sh @@ -4,8 +4,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) source scripts/vars.env -gcloud compute scp --zone ${GKE_CLUSTER_ZONE} ${SCRIPT_DIR}/vars.env ${VM_NAME}:~ +gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} ${SCRIPT_DIR}/vars.env ${RESOURCE_NAME}:~ -gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} ${VM_NAME} --command="bash -s" < ${SCRIPT_DIR}/remote-scripts/run-tests.sh +gcloud compute ssh --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} ${RESOURCE_NAME} --command="bash -s" < ${SCRIPT_DIR}/remote-scripts/run-tests.sh -gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --recurse ${VM_NAME}:~/nginx-gateway-fabric/tests/results . +gcloud compute scp --zone ${GKE_CLUSTER_ZONE} --project=${GKE_PROJECT} --recurse ${RESOURCE_NAME}:~/nginx-gateway-fabric/tests/results . diff --git a/tests/scripts/vars.env-example b/tests/scripts/vars.env-example index aa35d62156..6801e34ee6 100644 --- a/tests/scripts/vars.env-example +++ b/tests/scripts/vars.env-example @@ -1,14 +1,14 @@ # WARNING: This is an example - please copy this file to vars.env and complete appropriately for your environment -VM_NAME=ngf-auto-testing +RESOURCE_NAME= TAG= PREFIX= NGINX_PREFIX= GKE_CLUSTER_NAME= GKE_CLUSTER_ZONE= +GKE_CLUSTER_REGION= GKE_PROJECT= GKE_SVC_ACCOUNT= IMAGE=projects/debian-cloud/global/images/debian-11-bullseye-v20231212 -FIREWALL_RULE_NAME= NETWORK_TAGS= NGF_REPO=nginxinc NGF_BRANCH=main diff --git a/tests/suite/dataplane_perf_test.go b/tests/suite/dataplane_perf_test.go index 55f402924d..a3f5e6514f 100644 --- a/tests/suite/dataplane_perf_test.go +++ b/tests/suite/dataplane_perf_test.go @@ -29,7 +29,7 @@ var _ = Describe("Dataplane performance", Ordered, Label("performance"), func() }, } - var port, parentDir, resultsFilePath, addr string + var addr string targetURL := "http://cafe.example.com" var outFile *os.File @@ -56,39 +56,51 @@ var _ = Describe("Dataplane performance", Ordered, Label("performance"), func() } BeforeAll(func() { - pwd, err := os.Getwd() - Expect(err).ToNot(HaveOccurred()) - parentDir = filepath.Dir(pwd) - resultsFilePath = fmt.Sprintf("%v/results/dp-perf/%v/", parentDir, version) - Expect(os.MkdirAll(resultsFilePath, 0o777)).To(Succeed()) - resultsFilePath = fmt.Sprintf("%v/%v.md", resultsFilePath, version) - if portFwdPort != 0 { - port = fmt.Sprintf(":%s", strconv.Itoa(portFwdPort)) - } - addr = fmt.Sprintf("http://%s%s", address, port) Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) - outFile, err = os.OpenFile(resultsFilePath, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0o777) - Expect(err).To(BeNil()) + + port := ":80" + if portFwdPort != 0 { + port = fmt.Sprintf(":%s", strconv.Itoa(portFwdPort)) + } + addr = fmt.Sprintf("%s%s", address, port) + + resultsDir, err := framework.CreateResultsDir("dp-perf", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, fmt.Sprintf("%s.md", version)) + outFile, err = framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) Expect(framework.WriteSystemInfoToFile(outFile, clusterInfo)).To(Succeed()) }) + AfterAll(func() { + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + outFile.Close() + }) + DescribeTable("Run each load test", func(target framework.Target, description string, counter int) { text := fmt.Sprintf("\n## Test%d: %s\n\n```text\n", counter, description) _, err := fmt.Fprint(outFile, text) - Expect(err).To(BeNil()) - Expect(framework.RunLoadTest( - []framework.Target{target}, - 1000, - 30*time.Second, - description, - outFile, - addr, - )).To(Succeed()) + Expect(err).ToNot(HaveOccurred()) + + cfg := framework.LoadTestConfig{ + Targets: []framework.Target{target}, + Rate: 1000, + Duration: 30 * time.Second, + Description: description, + Proxy: addr, + ServerName: "cafe.example.com", + } + _, metrics := framework.RunLoadTest(cfg) + + Expect(framework.WriteResults(outFile, &metrics)).To(Succeed()) + _, err = fmt.Fprint(outFile, "```\n") - Expect(err).To(BeNil()) + Expect(err).ToNot(HaveOccurred()) }, Entry("Running latte path based routing", t1, "Running latte path based routing", 1), Entry("Running coffee header based routing", t2, "Running coffee header based routing", 2), @@ -96,10 +108,4 @@ var _ = Describe("Dataplane performance", Ordered, Label("performance"), func() Entry("Running tea GET method based routing", t4, "Running tea GET method based routing", 4), Entry("Running tea POST method based routing", t5, "Running tea POST method based routing", 5), ) - - AfterAll(func() { - Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) - Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) - outFile.Close() - }) }) diff --git a/tests/zero-downtime-upgrades/manifests/cafe-routes.yaml b/tests/suite/manifests/ngf-upgrade/cafe-routes.yaml similarity index 100% rename from tests/zero-downtime-upgrades/manifests/cafe-routes.yaml rename to tests/suite/manifests/ngf-upgrade/cafe-routes.yaml diff --git a/tests/zero-downtime-upgrades/manifests/cafe-secret.yaml b/tests/suite/manifests/ngf-upgrade/cafe-secret.yaml similarity index 100% rename from tests/zero-downtime-upgrades/manifests/cafe-secret.yaml rename to tests/suite/manifests/ngf-upgrade/cafe-secret.yaml diff --git a/tests/zero-downtime-upgrades/manifests/cafe.yaml b/tests/suite/manifests/ngf-upgrade/cafe.yaml similarity index 100% rename from tests/zero-downtime-upgrades/manifests/cafe.yaml rename to tests/suite/manifests/ngf-upgrade/cafe.yaml diff --git a/tests/zero-downtime-upgrades/manifests/gateway-updated.yaml b/tests/suite/manifests/ngf-upgrade/gateway-updated.yaml similarity index 100% rename from tests/zero-downtime-upgrades/manifests/gateway-updated.yaml rename to tests/suite/manifests/ngf-upgrade/gateway-updated.yaml diff --git a/tests/zero-downtime-upgrades/manifests/gateway.yaml b/tests/suite/manifests/ngf-upgrade/gateway.yaml similarity index 100% rename from tests/zero-downtime-upgrades/manifests/gateway.yaml rename to tests/suite/manifests/ngf-upgrade/gateway.yaml diff --git a/tests/zero-downtime-upgrades/values.yaml b/tests/suite/manifests/ngf-upgrade/values.yaml similarity index 100% rename from tests/zero-downtime-upgrades/values.yaml rename to tests/suite/manifests/ngf-upgrade/values.yaml diff --git a/tests/suite/system_suite_test.go b/tests/suite/system_suite_test.go index eee4b4e66c..d21a330285 100644 --- a/tests/suite/system_suite_test.go +++ b/tests/suite/system_suite_test.go @@ -1,21 +1,29 @@ package suite import ( + "context" "embed" "flag" "path" "path/filepath" "runtime" + "strings" "testing" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" apps "k8s.io/api/apps/v1" + coordination "k8s.io/api/coordination/v1" core "k8s.io/api/core/v1" apiext "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" k8sRuntime "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" ctlr "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" v1 "sigs.k8s.io/gateway-api/apis/v1" "github.com/nginxinc/nginx-gateway-fabric/tests/framework" @@ -32,8 +40,11 @@ func TestNGF(t *testing.T) { } var ( - gatewayAPIVersion = flag.String("gateway-api-version", "", "Version of Gateway API to install") - k8sVersion = flag.String("k8s-version", "latest", "Version of k8s being tested on") + gatewayAPIVersion = flag.String("gateway-api-version", "", "Supported Gateway API version for NGF under test") + gatewayAPIPrevVersion = flag.String( + "gateway-api-prev-version", "", "Supported Gateway API version for previous NGF release", + ) + k8sVersion = flag.String("k8s-version", "latest", "Version of k8s being tested on") // Configurable NGF installation variables. Helm values will be used as defaults if not specified. ngfImageRepository = flag.String("ngf-image-repo", "", "Image repo for NGF control plane") nginxImageRepository = flag.String("nginx-image-repo", "", "Image repo for NGF data plane") @@ -51,17 +62,32 @@ var ( portForwardStopCh = make(chan struct{}, 1) portFwdPort int timeoutConfig framework.TimeoutConfig + localChartPath string address string version string clusterInfo framework.ClusterInfo ) -var _ = BeforeSuite(func() { +const ( + releaseName = "ngf-test" + ngfNamespace = "nginx-gateway" +) + +type setupConfig struct { + chartPath string + gwAPIVersion string + deploy bool +} + +func setup(cfg setupConfig, extraInstallArgs ...string) { + log.SetLogger(GinkgoLogr) + k8sConfig := ctlr.GetConfigOrDie() scheme := k8sRuntime.NewScheme() Expect(core.AddToScheme(scheme)).To(Succeed()) Expect(apps.AddToScheme(scheme)).To(Succeed()) Expect(apiext.AddToScheme(scheme)).To(Succeed()) + Expect(coordination.AddToScheme(scheme)).To(Succeed()) Expect(v1.AddToScheme(scheme)).To(Succeed()) options := client.Options{ @@ -79,20 +105,27 @@ var _ = BeforeSuite(func() { TimeoutConfig: timeoutConfig, } - _, file, _, _ := runtime.Caller(0) - fileDir := path.Join(path.Dir(file), "../") - basepath := filepath.Dir(fileDir) + clusterInfo, err = resourceManager.GetClusterInfo() + Expect(err).ToNot(HaveOccurred()) - cfg := framework.InstallationConfig{ - ReleaseName: "ngf-test", - Namespace: "nginx-gateway", - ChartPath: filepath.Join(basepath, "deploy/helm-chart"), - NgfImageRepository: *ngfImageRepository, - NginxImageRepository: *nginxImageRepository, - ImageTag: *imageTag, - ImagePullPolicy: *imagePullPolicy, - ServiceType: *serviceType, - IsGKEInternalLB: *isGKEInternalLB, + if !cfg.deploy { + return + } + + installCfg := framework.InstallationConfig{ + ReleaseName: releaseName, + Namespace: ngfNamespace, + ChartPath: cfg.chartPath, + ServiceType: *serviceType, + IsGKEInternalLB: *isGKEInternalLB, + } + + // if we aren't installing from the public charts, then set the custom images + if !strings.HasPrefix(cfg.chartPath, "oci://") { + installCfg.NgfImageRepository = *ngfImageRepository + installCfg.NginxImageRepository = *nginxImageRepository + installCfg.ImageTag = *imageTag + installCfg.ImagePullPolicy = *imagePullPolicy } if *imageTag != "" { @@ -101,33 +134,38 @@ var _ = BeforeSuite(func() { version = "edge" } - clusterInfo, err = resourceManager.GetClusterInfo() - Expect(err).ToNot(HaveOccurred()) - - output, err := framework.InstallGatewayAPI(k8sClient, *gatewayAPIVersion, *k8sVersion) + output, err := framework.InstallGatewayAPI(k8sClient, cfg.gwAPIVersion, *k8sVersion) Expect(err).ToNot(HaveOccurred(), string(output)) - output, err = framework.InstallNGF(cfg) + output, err = framework.InstallNGF(installCfg, extraInstallArgs...) Expect(err).ToNot(HaveOccurred(), string(output)) - podName, err := framework.GetNGFPodName(k8sClient, cfg.Namespace, cfg.ReleaseName, timeoutConfig.CreateTimeout) + podNames, err := framework.GetReadyNGFPodNames( + k8sClient, + installCfg.Namespace, + installCfg.ReleaseName, + timeoutConfig.CreateTimeout, + ) Expect(err).ToNot(HaveOccurred()) + Expect(podNames).ToNot(HaveLen(0)) if *serviceType != "LoadBalancer" { - portFwdPort, err = framework.PortForward(k8sConfig, cfg.Namespace, podName, portForwardStopCh) + portFwdPort, err = framework.PortForward(k8sConfig, installCfg.Namespace, podNames[0], portForwardStopCh) address = "127.0.0.1" } else { - address, err = resourceManager.GetLBIPAddress(cfg.Namespace) + address, err = resourceManager.GetLBIPAddress(installCfg.Namespace) } Expect(err).ToNot(HaveOccurred()) -}) +} -var _ = AfterSuite(func() { - portForwardStopCh <- struct{}{} +func teardown() { + if portFwdPort != 0 { + portForwardStopCh <- struct{}{} + } cfg := framework.InstallationConfig{ - ReleaseName: "ngf-test", - Namespace: "nginx-gateway", + ReleaseName: releaseName, + Namespace: ngfNamespace, } output, err := framework.UninstallNGF(cfg, k8sClient) @@ -135,4 +173,47 @@ var _ = AfterSuite(func() { output, err = framework.UninstallGatewayAPI(*gatewayAPIVersion, *k8sVersion) Expect(err).ToNot(HaveOccurred(), string(output)) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute) + defer cancel() + + Expect(wait.PollUntilContextCancel( + ctx, + 500*time.Millisecond, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { + key := types.NamespacedName{Name: ngfNamespace} + if err := k8sClient.Get(ctx, key, &core.Namespace{}); err != nil && apierrors.IsNotFound(err) { + return true, nil + } + + return false, nil + }, + )).To(Succeed()) +} + +var _ = BeforeSuite(func() { + _, file, _, _ := runtime.Caller(0) + fileDir := path.Join(path.Dir(file), "../") + basepath := filepath.Dir(fileDir) + localChartPath = filepath.Join(basepath, "deploy/helm-chart") + + cfg := setupConfig{ + chartPath: localChartPath, + gwAPIVersion: *gatewayAPIVersion, + deploy: true, + } + + // If we are running the upgrade test only, then skip the initial deployment. + // The upgrade test will deploy its own version of NGF. + suiteConfig, _ := GinkgoConfiguration() + if suiteConfig.LabelFilter == "upgrade" { + cfg.deploy = false + } + + setup(cfg) +}) + +var _ = AfterSuite(func() { + teardown() }) diff --git a/tests/suite/upgrade_test.go b/tests/suite/upgrade_test.go new file mode 100644 index 0000000000..41b239f642 --- /dev/null +++ b/tests/suite/upgrade_test.go @@ -0,0 +1,266 @@ +package suite + +import ( + "bytes" + "context" + "fmt" + "os" + "path/filepath" + "strings" + "sync" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + coordination "k8s.io/api/coordination/v1" + core "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/wait" + "sigs.k8s.io/controller-runtime/pkg/client" + v1 "sigs.k8s.io/gateway-api/apis/v1" + + "github.com/nginxinc/nginx-gateway-fabric/tests/framework" +) + +// This test installs the latest released version of NGF, then upgrades to the edge version (or dev version). +// During the upgrade, traffic is continuously sent to ensure no downtime. +// We also check that the leader election lease has been updated, and that Gateway updates are processed. +var _ = Describe("Upgrade testing", Label("upgrade"), func() { + var ( + files = []string{ + "ngf-upgrade/cafe.yaml", + "ngf-upgrade/cafe-secret.yaml", + "ngf-upgrade/gateway.yaml", + "ngf-upgrade/cafe-routes.yaml", + } + + ns = &core.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + Name: "ngf-upgrade", + }, + } + + valuesFile = "manifests/ngf-upgrade/values.yaml" + resultsFile *os.File + resultsDir string + skipped bool + ) + + BeforeEach(func() { + if !clusterInfo.IsGKE { + skipped = true + Skip("Upgrade tests can only run in GKE") + } + + if *serviceType != "LoadBalancer" { + skipped = true + Skip("GW_SERVICE_TYPE must be 'LoadBalancer' for upgrade tests") + } + + // this test is unique in that it needs to install the previous version of NGF, + // so we need to uninstall the version installed at the suite level, then install the custom version + teardown() + + cfg := setupConfig{ + chartPath: "oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric", + gwAPIVersion: *gatewayAPIPrevVersion, + deploy: true, + } + setup(cfg, "--values", valuesFile) + + Expect(resourceManager.Apply([]client.Object{ns})).To(Succeed()) + Expect(resourceManager.ApplyFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.WaitForAppsToBeReady(ns.Name)).To(Succeed()) + + var err error + resultsDir, err = framework.CreateResultsDir("ngf-upgrade", version) + Expect(err).ToNot(HaveOccurred()) + + filename := filepath.Join(resultsDir, fmt.Sprintf("%s.md", version)) + resultsFile, err = framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + Expect(framework.WriteSystemInfoToFile(resultsFile, clusterInfo)).To(Succeed()) + }) + + AfterEach(func() { + if skipped { + Skip("") + } + + Expect(resourceManager.DeleteFromFiles(files, ns.Name)).To(Succeed()) + Expect(resourceManager.Delete([]client.Object{ns})).To(Succeed()) + resultsFile.Close() + }) + + It("upgrades NGF with zero downtime", func() { + cfg := framework.InstallationConfig{ + ReleaseName: releaseName, + Namespace: ngfNamespace, + ChartPath: localChartPath, + NgfImageRepository: *ngfImageRepository, + NginxImageRepository: *nginxImageRepository, + ImageTag: *imageTag, + ImagePullPolicy: *imagePullPolicy, + ServiceType: *serviceType, + IsGKEInternalLB: *isGKEInternalLB, + } + + type metricsResults struct { + metrics *framework.Metrics + testName string + scheme string + } + metricsCh := make(chan *metricsResults, 2) + var wg sync.WaitGroup + + type testCfg struct { + desc string + port string + target framework.Target + } + + tests := []testCfg{ + { + desc: "Send http /coffee traffic", + port: "80", + target: framework.Target{ + Method: "GET", + URL: "http://cafe.example.com/coffee", + }, + }, + { + desc: "Send https /tea traffic", + port: "443", + target: framework.Target{ + Method: "GET", + URL: "https://cafe.example.com/tea", + }, + }, + } + + for _, test := range tests { + wg.Add(1) + go func(cfg testCfg) { + defer GinkgoRecover() + defer wg.Done() + + loadTestCfg := framework.LoadTestConfig{ + Targets: []framework.Target{cfg.target}, + Rate: 100, + Duration: 60 * time.Second, + Description: cfg.desc, + Proxy: fmt.Sprintf("%s:%s", address, cfg.port), + ServerName: "cafe.example.com", + } + + results, metrics := framework.RunLoadTest(loadTestCfg) + + scheme := strings.Split(cfg.target.URL, "://")[0] + metricsRes := metricsResults{ + metrics: &metrics, + testName: fmt.Sprintf("\n## Test: %s\n\n```text\n", cfg.desc), + scheme: scheme, + } + + buf := new(bytes.Buffer) + encoder := framework.NewCSVEncoder(buf) + for _, res := range results { + res := res + Expect(encoder.Encode(&res)).To(Succeed()) + } + + csvName := fmt.Sprintf("%s.csv", scheme) + filename := filepath.Join(resultsDir, csvName) + csvFile, err := framework.CreateResultsFile(filename) + Expect(err).ToNot(HaveOccurred()) + + _, err = fmt.Fprint(csvFile, buf.String()) + Expect(err).ToNot(HaveOccurred()) + csvFile.Close() + + output, err := framework.GeneratePNG(resultsDir, csvName, fmt.Sprintf("%s.png", scheme)) + Expect(err).ToNot(HaveOccurred(), string(output)) + + metricsCh <- &metricsRes + }(test) + } + + // allow traffic flow to start + time.Sleep(2 * time.Second) + + // update Gateway API and NGF + output, err := framework.InstallGatewayAPI(k8sClient, *gatewayAPIVersion, *k8sVersion) + Expect(err).ToNot(HaveOccurred(), string(output)) + + output, err = framework.UpgradeNGF(cfg, "--values", valuesFile) + Expect(err).ToNot(HaveOccurred(), string(output)) + + Expect(resourceManager.ApplyFromFiles([]string{"ngf-upgrade/gateway-updated.yaml"}, ns.Name)).To(Succeed()) + + podNames, err := framework.GetReadyNGFPodNames(k8sClient, ngfNamespace, releaseName, timeoutConfig.GetTimeout) + Expect(err).ToNot(HaveOccurred()) + Expect(podNames).ToNot(HaveLen(0)) + + // ensure that the leader election lease has been updated to the new pods + leaseCtx, leaseCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer leaseCancel() + + var lease coordination.Lease + key := types.NamespacedName{Name: "ngf-test-nginx-gateway-fabric-leader-election", Namespace: ngfNamespace} + Expect(wait.PollUntilContextCancel( + leaseCtx, + 500*time.Millisecond, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { + Expect(k8sClient.Get(leaseCtx, key, &lease)).To(Succeed()) + + if lease.Spec.HolderIdentity != nil { + for _, podName := range podNames { + if podName == *lease.Spec.HolderIdentity { + return true, nil + } + } + } + + return false, nil + }, + )).To(Succeed()) + + // ensure that the Gateway has been properly updated with a new listener + gwCtx, gwCancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer gwCancel() + + var gw v1.Gateway + key = types.NamespacedName{Name: "gateway", Namespace: ns.Name} + Expect(wait.PollUntilContextCancel( + gwCtx, + 500*time.Millisecond, + true, /* poll immediately */ + func(ctx context.Context) (bool, error) { + Expect(k8sClient.Get(ctx, key, &gw)).To(Succeed()) + expListenerName := "http-new" + for _, listener := range gw.Status.Listeners { + if listener.Name == v1.SectionName(expListenerName) { + return true, nil + } + } + return false, nil + }, + )).To(Succeed()) + + wg.Wait() + close(metricsCh) + + // write out the results + for res := range metricsCh { + _, err := fmt.Fprint(resultsFile, res.testName) + Expect(err).ToNot(HaveOccurred()) + + Expect(framework.WriteResults(resultsFile, res.metrics)).To(Succeed()) + + _, err = fmt.Fprintf(resultsFile, "```\n\n![%[1]v.png](%[1]v.png)\n", res.scheme) + Expect(err).ToNot(HaveOccurred()) + } + }) +}) diff --git a/tests/zero-downtime-upgrades/requests-plot.gp b/tests/zero-downtime-upgrades/requests-plot.gp deleted file mode 100644 index b08003d703..0000000000 --- a/tests/zero-downtime-upgrades/requests-plot.gp +++ /dev/null @@ -1,21 +0,0 @@ -set terminal png size 800,600 -set output "graph.png" -set title "Request Outcomes Over Time" - -set xdata time -set timefmt "%Y-%m-%d %H:%M:%S" -set datafile separator "," - -# Y-axis settings -set yrange [-0.5:1.5] # Provide some padding around 0 and 1 for better visualization -set ytics ("Failed" 0, "Success" 1) -set grid ytics # Gridlines for Y - -# Define the palette: 0 for red (Failure) and 1 for green (Success) -set palette defined (0 "red", 1 "green") - -# Hide the colorbox -unset colorbox - -# Plotting data -plot "results.csv" using 1:2:2 with points palette pointtype 7 pointsize 1.5 title "Request Status" diff --git a/tests/zero-downtime-upgrades/zero-downtime-upgrades.md b/tests/zero-downtime-upgrades/zero-downtime-upgrades.md deleted file mode 100644 index e1b11b166d..0000000000 --- a/tests/zero-downtime-upgrades/zero-downtime-upgrades.md +++ /dev/null @@ -1,262 +0,0 @@ -# Zero-Downtime Upgrades - -This document describes a test plan for testing zero-downtime upgrades of NGF. - -*Zero-downtime upgrades* means that during an NGF upgrade clients don't experience any -interruptions to the traffic they send to applications exposed via NGF. - - - -- [Zero-Downtime Upgrades](#zero-downtime-upgrades) - - [Goals](#goals) - - [Non-Goals](#non-goals) - - [Test Environment](#test-environment) - - [Steps](#steps) - - [Start](#start) - - [Upgrade](#upgrade) - - [After Upgrade](#after-upgrade) - - [Analyze](#analyze) - - [Results](#results) - - [Appendix](#appendix) - - [Pod Affinity](#pod-affinity) - - [Converting Curl Output to a Graph](#converting-curl-output-to-a-graph) - - - -## Goals - -- Ensure that upgrading NGF doesn't lead to any loss of traffic flowing through the data plane. -- Ensure that after an upgrade, NGF can process changes to resources. -- Detect if any special instructions will be required to provide to users to perform - an upgrade. - -## Non-Goals - -During an upgrade, Kubernetes will shut down existing NGF Pods by sending a SIGTERM. If the Pod doesn't terminate in 30 -seconds (the default period) , Kubernetes will send a SIGKILL. - -When proxying Websocket or any long-lived connections, NGINX will not terminate until -that connection is closed by either the client or the backend. This means that unless all those connections are closed -by clients/backends before or during an upgrade (which is highly unlikely), NGINX will not terminate, which means -Kubernetes will kill NGINX. As a result, the clients will see the connections abruptly closed and thus experience -downtime. - -As a result, we *will not* use any long-live connections in this test, because NGF cannot support zero-downtime upgrades -in this case. - -## Test Environment - -- A Kubernetes cluster with 10 nodes on GKE - - Node: e2-medium (2 vCPU, 4GB memory) - - Enabled GKE logging. -- Tester VMs on Google Cloud Platform: - - Configuration: - - Debian - - Install packages: wrk, curl, gnuplot - - Location - same zone as the Kubernetes cluster. - - First VM for HTTP traffic - - Second VM - for sending HTTPs traffic -- NGF - - Deployment with 2 replicas scheduled on different nodes. - - Exposed via a Service with type LoadBalancer, private IP - - Gateway, two listeners - HTTP and HTTPs - - Two backends: - - Coffee - 3 replicas - - Tea - 3 replicas - - Two HTTPRoutes - - Coffee (HTTP) - - Tea (HTTPS) - -Notes: - -- For sending traffic, we will use both wrk and curl. - - *wrk* will generate a lot of traffic continuously, and it will have a high chance of catching of any - (however small) periods of downtime. - - *curl* will generate 1 request every 0.1s. While it might not catch small periods of downtime, it will - give us timeline of failed request for big periods of downtime, which wrk doesn't do. -- We use Pod anti-affinity to tell Kubernetes to schedule NGF Pods on different nodes. We also use a 10 node cluster so - that the chance of Kubernetes scheduling new Pods on the same - nodes is minimal. Scheduling new Pods on different nodes will help better catch - any interdependencies with an external load balancer (typically the node of a new Pod will be added - to the pool in the load balancer, and the node of an old one will be removed). - -## Steps - -### Start - -1. Create a cluster. -2. Install the Gateway API resources that are supported by the latest release of NGF. -3. Install the latest release of NGF: - - ```console - helm install ngf-test oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric --create-namespace --wait -n nginx-gateway --values values.yaml - ``` - - This deploys NGF with 2 replicas with added [anti-affinity](#pod-affinity). -4. Deploy backend apps: - - ```console - kubectl apply -f manifests/cafe.yaml - ``` - -5. Configure Gateway: - - ```console - kubectl apply -f manifests/cafe-secret.yaml - kubectl apply -f manifests/gateway.yaml - ``` - -6. Expose apps via HTTPRoutes - - ```console - kubectl apply -f manifests/cafe-routes.yaml - ``` - -7. Check statuses of the Gateway and HTTPRoutes for errors. -8. In Google Monitoring, check NGF and NGINX error logs for errors. -9. In Tester VMs, update `/etc/hosts` to have an entry with the External IP of the NGF Service (`10.128.0.10` in this - case): - - ```text - 10.128.0.10 cafe.example.com - ``` - -### Upgrade - -1. Follow the [upgrade instructions](https://docs.nginx.com/nginx-gateway-fabric/installation/installing-ngf/helm/#upgrade-nginx-gateway-fabric) to: - 1. Upgrade Gateway API version to the one that matches the supported version of new release. - 2. Upgrade NGF CRDs. -2. Start sending traffic using wrk from tester VMs for 1 minute: - - Tester VM 1: - - wrk: - - ```console - wrk -t2 -c100 -d60s --latency --timeout 2s http://cafe.example.com/coffee - ``` - - - curl: - - ```console - for i in `seq 1 600`; do printf "\nRequest $i\n" && date --rfc-3339=ns && curl -sS --connect-timeout 2 http://cafe.example.com/coffee 2>&1 && sleep 0.1s; done > results.txt - ``` - - - Tester VM 2: - - wrk: - - ```console - wrk -t2 -c100 -d60s --latency --timeout 2s https://cafe.example.com/tea - ``` - - - curl: - - ```console - for i in `seq 1 600`; do printf "\nRequest $i\n" && date --rfc-3339=ns && curl -k -sS --connect-timeout 2 https://cafe.example.com/tea 2>&1 && sleep 0.1s; done > results.txt - ``` - -3. **Immediately** upgrade NGF to the edge version: - - ```console - helm upgrade ngf-test oci://ghcr.io/nginxinc/charts/nginx-gateway-fabric -n nginx-gateway --version 0.0.0-edge --values values.yaml --wait - ``` - -4. Ensure the new Pods are running and the old ones terminate. - -### After Upgrade - -1. Update the Gateway resource by adding one new listener `http-new`: - - ```console - kubectl apply -f manifests/gateway-updated.yaml - ``` - -2. Check that at NGF has a leader elected among the new Pods: - - ```console - kubectl -n nginx-gateway get lease - ``` - -3. Ensure the status of the Gateway resource includes the new listener. - -### Analyze - -- Tester VMs: - - Analyze the output of wrk commands for errors and latencies. - - Create graphs from curl output (see [instructions](#converting-curl-output-to-a-graph) in Appendix) and check for - any failures on them. -- Check the old Pods logs in Google Monitoring - - NGINX Access logs - we expect only 200 responses. - Google Monitoring query: - - ```text - severity=INFO - "GET" "HTTP/1.1" -"200" - ``` - - - NGINX Error logs - we expect no errors or warnings - Google Monitoring query: - - ```text - severity=ERROR - SEARCH("`[warn]`") OR SEARCH("`[error]`") - ``` - - - NGF logs - we expect no errors - - Specifically look at the NGF logs before it exited, to make sure all components shutdown correctly. -- Check the new Pods (in Google Monitoring) - - NGINX Access logs - only 200 responses. - - NGINX Error logs - no errors or warnings. - - NGF logs - no errors - -## Results - -- [1.0.0](results/1.0.0/1.0.0.md) -- [1.0.0-special](results/1.0.0-special/1.0.0-special.md) -- [1.1.0](results/1.1.0/1.1.0.md) - -## Appendix - -### Pod Affinity - -- To ensure Kubernetes doesn't schedule NGF Pods on the same nodes, use an anti-affinity rule: - - ```yaml - spec: - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - topologyKey: kubernetes.io/hostname - labelSelector: - matchLabels: - app.kubernetes.io/name: nginx-gateway - ``` - -### Converting Curl Output to a Graph - -The output of a curl command is saved in `results.txt`. To convert it into a graph, -go through the following steps: - -1. Convert the output into a csv file: - - ```console - awk ' - /Request [0-9]+/ { - getline - datetime = $0 - getline - if ($1 == "curl:") { - print datetime ",0" # Failed - } else { - print datetime ",1" # Success - } - }' results.txt > results.csv - ``` - -2. Plot a graph using the csv file: - - ```console - gnuplot requests-plot.gp - ``` - - As a result, gnuplot will create `graph.png` with a graph. -3. Download the resulting `graph.png` to you local machine. -4. Also download `results.csv`.