From 7520fbf75422c7bd1c02c6babed5fb6769e26816 Mon Sep 17 00:00:00 2001 From: ChristianZaccaria Date: Mon, 15 Apr 2024 11:46:43 +0100 Subject: [PATCH 1/3] Rayclient Route to resolve its own ingress domain, and enable local_interactive by default --- pkg/controllers/raycluster_controller.go | 15 ++++++--------- pkg/controllers/support.go | 16 ---------------- 2 files changed, 6 insertions(+), 25 deletions(-) diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index db13f0854..f60845eb5 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -106,7 +106,6 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, client.IgnoreNotFound(err) } - isLocalInteractive := annotationBoolVal(ctx, &cluster, "sdk.codeflare.dev/local_interactive", false) if !r.IsOpenShiftInitialized { r.IsOpenShift = isOpenShift(ctx, r.kubeClient, &cluster) r.IsOpenShiftInitialized = true @@ -177,13 +176,11 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{RequeueAfter: requeueTime}, err } - if isLocalInteractive { - logger.Info("Creating RayClient Route") - _, err := r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredRayClientRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) - if err != nil { - logger.Error(err, "Failed to update RayClient Route") - return ctrl.Result{RequeueAfter: requeueTime}, err - } + logger.Info("Creating RayClient Route") + _, err = r.routeClient.Routes(cluster.Namespace).Apply(ctx, desiredRayClientRoute(&cluster), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to update RayClient Route") + return ctrl.Result{RequeueAfter: requeueTime}, err } } else if cluster.Status.State != "suspended" && !r.isRayDashboardOAuthEnabled() && !r.IsOpenShift { @@ -195,7 +192,7 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, true) return ctrl.Result{RequeueAfter: requeueTime}, err } - if isLocalInteractive && ingressDomain != "" { + if ingressDomain != "" { logger.Info("Creating RayClient Ingress") _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(&cluster, ingressDomain), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 44b21bda9..3749fdf03 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -3,7 +3,6 @@ package controllers import ( "context" "fmt" - "strconv" "strings" rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" @@ -30,7 +29,6 @@ func desiredRayClientRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConf return routeapply.Route(rayClientNameFromCluster(cluster), cluster.Namespace). WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). WithSpec(routeapply.RouteSpec(). - WithHost(rayClientNameFromCluster(cluster) + "-" + cluster.Namespace). WithTo(routeapply.RouteTargetReference().WithKind("Service").WithName(serviceNameFromCluster(cluster)).WithWeight(100)). WithPort(routeapply.RoutePort().WithTargetPort(intstr.FromString("client"))). WithTLS(routeapply.TLSConfig().WithTermination("passthrough")), @@ -172,17 +170,3 @@ func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool { } return true } - -func annotationBoolVal(ctx context.Context, cluster *rayv1.RayCluster, annotation string, defaultValue bool) bool { - logger := ctrl.LoggerFrom(ctx) - val, exists := cluster.ObjectMeta.Annotations[annotation] - if !exists || val == "" { - return defaultValue - } - boolVal, err := strconv.ParseBool(val) - if err != nil { - logger.Error(err, "Could not convert annotation value to bool", "annotation", annotation, "value", val) - return defaultValue - } - return boolVal -} From 69588ea624983df17b4af386f9fce1a67d4e7c51 Mon Sep 17 00:00:00 2001 From: ChristianZaccaria Date: Mon, 15 Apr 2024 16:09:39 +0100 Subject: [PATCH 2/3] Add ingressDomain to configmap to work with local and workflow e2e tests --- config/e2e/config.yaml | 1 + main.go | 1 + pkg/config/config.go | 2 ++ pkg/controllers/raycluster_controller.go | 18 ++++++------ pkg/controllers/support.go | 36 ++++++++---------------- 5 files changed, 24 insertions(+), 34 deletions(-) diff --git a/config/e2e/config.yaml b/config/e2e/config.yaml index 83320bb0c..6b8f55a0c 100644 --- a/config/e2e/config.yaml +++ b/config/e2e/config.yaml @@ -6,3 +6,4 @@ data: config.yaml: | kuberay: rayDashboardOAuthEnabled: false + ingressDomain: "kind" diff --git a/main.go b/main.go index 8a129e806..58c6c73cb 100644 --- a/main.go +++ b/main.go @@ -115,6 +115,7 @@ func main() { }, KubeRay: &config.KubeRayConfiguration{ RayDashboardOAuthEnabled: pointer.Bool(true), + IngressDomain: "fake.domain", }, } diff --git a/pkg/config/config.go b/pkg/config/config.go index 124bb0ebb..c71678f11 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -33,6 +33,8 @@ type CodeFlareOperatorConfiguration struct { type KubeRayConfiguration struct { RayDashboardOAuthEnabled *bool `json:"rayDashboardOAuthEnabled,omitempty"` + + IngressDomain string `json:"ingressDomain,omitempty"` } type ControllerManager struct { diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index f60845eb5..9fe6dfeb0 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -184,21 +184,21 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } else if cluster.Status.State != "suspended" && !r.isRayDashboardOAuthEnabled() && !r.IsOpenShift { - ingressDomain := "" // TODO: ingressDomain should be retrieved by the CFO and used here to fix local_interactive. Jira: https://issues.redhat.com/browse/RHOAIENG-5330 + logger.Info("We detected being on Vanilla Kubernetes!") logger.Info("Creating Dashboard Ingress") - _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, getIngressHost(ctx, r.kubeClient, &cluster, ingressDomain)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + dashboardName := dashboardNameFromCluster(&cluster) + _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, r.getIngressHost(ctx, r.kubeClient, &cluster, dashboardName)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { // This log is info level since errors are not fatal and are expected logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, true) return ctrl.Result{RequeueAfter: requeueTime}, err } - if ingressDomain != "" { - logger.Info("Creating RayClient Ingress") - _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(&cluster, ingressDomain), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) - if err != nil { - logger.Error(err, "Failed to update RayClient Ingress") - return ctrl.Result{RequeueAfter: requeueTime}, err - } + logger.Info("Creating RayClient Ingress") + rayClientName := rayClientNameFromCluster(&cluster) + _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(&cluster, r.getIngressHost(ctx, r.kubeClient, &cluster, rayClientName)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + if err != nil { + logger.Error(err, "Failed to update RayClient Ingress") + return ctrl.Result{RequeueAfter: requeueTime}, err } } diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 3749fdf03..495639454 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -8,7 +8,6 @@ import ( rayv1 "github.com/ray-project/kuberay/ray-operator/apis/ray/v1" networkingv1 "k8s.io/api/networking/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" v1 "k8s.io/client-go/applyconfigurations/meta/v1" @@ -39,7 +38,7 @@ func desiredRayClientRoute(cluster *rayv1.RayCluster) *routeapply.RouteApplyConf } // Create an Ingress object for the RayCluster -func desiredRayClientIngress(cluster *rayv1.RayCluster, ingressDomain string) *networkingv1ac.IngressApplyConfiguration { +func desiredRayClientIngress(cluster *rayv1.RayCluster, ingressHost string) *networkingv1ac.IngressApplyConfiguration { return networkingv1ac.Ingress(rayClientNameFromCluster(cluster), cluster.Namespace). WithLabels(map[string]string{"ray.io/cluster-name": cluster.Name}). WithAnnotations(map[string]string{ @@ -55,7 +54,7 @@ func desiredRayClientIngress(cluster *rayv1.RayCluster, ingressDomain string) *n WithSpec(networkingv1ac.IngressSpec(). WithIngressClassName("nginx"). WithRules(networkingv1ac.IngressRule(). - WithHost(rayClientNameFromCluster(cluster) + "-" + cluster.Namespace + "." + ingressDomain). + WithHost(ingressHost). WithHTTP(networkingv1ac.HTTPIngressRuleValue(). WithPaths(networkingv1ac.HTTPIngressPath(). WithPath("/"). @@ -85,7 +84,7 @@ func desiredClusterIngress(cluster *rayv1.RayCluster, ingressHost string) *netwo WithUID(types.UID(cluster.UID))). WithSpec(networkingv1ac.IngressSpec(). WithRules(networkingv1ac.IngressRule(). - WithHost(ingressHost). // KinD hostname or ingressDomain + WithHost(ingressHost). // Full Hostname WithHTTP(networkingv1ac.HTTPIngressRuleValue(). WithPaths(networkingv1ac.HTTPIngressPath(). WithPath("/"). @@ -104,19 +103,6 @@ func desiredClusterIngress(cluster *rayv1.RayCluster, ingressHost string) *netwo ) } -// isOnKindCluster checks if the current cluster is a KinD cluster. -// It searches for a node with a label commonly used by KinD clusters. -func isOnKindCluster(clientset *kubernetes.Clientset) (bool, error) { - nodes, err := clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{ - LabelSelector: "kubernetes.io/hostname=kind-control-plane", - }) - if err != nil { - return false, err - } - // If we find one or more nodes with the label, assume it's a KinD cluster. - return len(nodes.Items) > 0, nil -} - // getDiscoveryClient returns a discovery client for the current reconciler func getDiscoveryClient(config *rest.Config) (*discovery.DiscoveryClient, error) { return discovery.NewDiscoveryClientForConfig(config) @@ -153,15 +139,15 @@ func isOpenShift(ctx context.Context, clientset *kubernetes.Clientset, cluster * } // getIngressHost generates the cluster URL string based on the cluster type, RayCluster, and ingress domain. -func getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressDomain string) string { - logger := ctrl.LoggerFrom(ctx) - onKind, _ := isOnKindCluster(clientset) - if onKind && ingressDomain == "" { - logger.Info("We detected being on a KinD cluster!") - return "kind" +func (r *RayClusterReconciler) getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressNameFromCluster string) string { + ingressDomain := "fake.domain" + if r.Config != nil && r.Config.IngressDomain != "" { + ingressDomain = r.Config.IngressDomain } - logger.Info("We detected being on Vanilla Kubernetes!") - return fmt.Sprintf("ray-dashboard-%s-%s.%s", cluster.Name, cluster.Namespace, ingressDomain) + if ingressDomain == "kind" { + return ingressDomain + } + return fmt.Sprintf("%s-%s.%s", ingressNameFromCluster, cluster.Namespace, ingressDomain) } func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool { From 36704daec6f5026efe31187b8f35db68d448b590 Mon Sep 17 00:00:00 2001 From: ChristianZaccaria Date: Mon, 15 Apr 2024 17:57:55 +0100 Subject: [PATCH 3/3] Return error message if IngressDomain configuration not set --- main.go | 2 +- pkg/config/config.go | 2 +- pkg/controllers/raycluster_controller.go | 12 ++++++++++-- pkg/controllers/support.go | 10 ++++++---- 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/main.go b/main.go index 58c6c73cb..09ebe2599 100644 --- a/main.go +++ b/main.go @@ -115,7 +115,7 @@ func main() { }, KubeRay: &config.KubeRayConfiguration{ RayDashboardOAuthEnabled: pointer.Bool(true), - IngressDomain: "fake.domain", + IngressDomain: "", }, } diff --git a/pkg/config/config.go b/pkg/config/config.go index c71678f11..08e2579b9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -34,7 +34,7 @@ type CodeFlareOperatorConfiguration struct { type KubeRayConfiguration struct { RayDashboardOAuthEnabled *bool `json:"rayDashboardOAuthEnabled,omitempty"` - IngressDomain string `json:"ingressDomain,omitempty"` + IngressDomain string `json:"ingressDomain"` } type ControllerManager struct { diff --git a/pkg/controllers/raycluster_controller.go b/pkg/controllers/raycluster_controller.go index 9fe6dfeb0..4f60545f7 100644 --- a/pkg/controllers/raycluster_controller.go +++ b/pkg/controllers/raycluster_controller.go @@ -187,7 +187,11 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("We detected being on Vanilla Kubernetes!") logger.Info("Creating Dashboard Ingress") dashboardName := dashboardNameFromCluster(&cluster) - _, err := r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, r.getIngressHost(ctx, r.kubeClient, &cluster, dashboardName)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + dashboardIngressHost, err := r.getIngressHost(ctx, r.kubeClient, &cluster, dashboardName) + if err != nil { + return ctrl.Result{RequeueAfter: requeueTime}, err + } + _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredClusterIngress(&cluster, dashboardIngressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { // This log is info level since errors are not fatal and are expected logger.Info("WARN: Failed to update Dashboard Ingress", "error", err.Error(), logRequeueing, true) @@ -195,7 +199,11 @@ func (r *RayClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } logger.Info("Creating RayClient Ingress") rayClientName := rayClientNameFromCluster(&cluster) - _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(&cluster, r.getIngressHost(ctx, r.kubeClient, &cluster, rayClientName)), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) + rayClientIngressHost, err := r.getIngressHost(ctx, r.kubeClient, &cluster, rayClientName) + if err != nil { + return ctrl.Result{RequeueAfter: requeueTime}, err + } + _, err = r.kubeClient.NetworkingV1().Ingresses(cluster.Namespace).Apply(ctx, desiredRayClientIngress(&cluster, rayClientIngressHost), metav1.ApplyOptions{FieldManager: controllerName, Force: true}) if err != nil { logger.Error(err, "Failed to update RayClient Ingress") return ctrl.Result{RequeueAfter: requeueTime}, err diff --git a/pkg/controllers/support.go b/pkg/controllers/support.go index 495639454..7871e66a8 100644 --- a/pkg/controllers/support.go +++ b/pkg/controllers/support.go @@ -139,15 +139,17 @@ func isOpenShift(ctx context.Context, clientset *kubernetes.Clientset, cluster * } // getIngressHost generates the cluster URL string based on the cluster type, RayCluster, and ingress domain. -func (r *RayClusterReconciler) getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressNameFromCluster string) string { - ingressDomain := "fake.domain" +func (r *RayClusterReconciler) getIngressHost(ctx context.Context, clientset *kubernetes.Clientset, cluster *rayv1.RayCluster, ingressNameFromCluster string) (string, error) { + ingressDomain := "" if r.Config != nil && r.Config.IngressDomain != "" { ingressDomain = r.Config.IngressDomain + } else { + return "", fmt.Errorf("missing IngressDomain configuration in ConfigMap 'codeflare-operator-config'") } if ingressDomain == "kind" { - return ingressDomain + return ingressDomain, nil } - return fmt.Sprintf("%s-%s.%s", ingressNameFromCluster, cluster.Namespace, ingressDomain) + return fmt.Sprintf("%s-%s.%s", ingressNameFromCluster, cluster.Namespace, ingressDomain), nil } func (r *RayClusterReconciler) isRayDashboardOAuthEnabled() bool {