Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions api/v1alpha1/llmserverpool_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,19 @@ import (
// LLMServerPoolSpec defines the desired state of LLMServerPool
type LLMServerPoolSpec struct {

// ModelServerSelector uses label selection to watch model server pods
// ModelServerSelector uses a map of label to watch model server pods
// that should be included in the LLMServerPool. ModelServers should not
// be with any other Service or LLMServerPool, that behavior is not supported
// and will result in sub-optimal utilization.
ModelServerSelector metav1.LabelSelector `json:"modelServerSelector,omitempty"`
// Due to this selector being translated to a service a simple map is used instead
// of: https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelector
// To avoid footshoot errors when the https://pkg.go.dev/k8s.io/apimachinery/pkg/apis/meta/v1#LabelSelectorAsMap would be used.
ModelServerSelector map[string]string `json:"modelServerSelector,omitempty"`

// TargetPort is the port number that the model servers within the pool expect
// to recieve traffic from.
// This maps to the TargetPort in: https://pkg.go.dev/k8s.io/api/core/v1#ServicePort
TargetPort int32
}

// LLMServerPoolStatus defines the observed state of LLMServerPool
Expand Down
8 changes: 7 additions & 1 deletion api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 12 additions & 10 deletions client-go/applyconfiguration/api/v1alpha1/llmserverpoolspec.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pkg/ext-proc/backend/datastore.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ import (
type K8sDatastore struct {
LLMServerPool *v1alpha1.LLMServerPool
Pods *sync.Map
Port string
}

func (ds *K8sDatastore) GetPodIPs() []string {
Expand Down
3 changes: 2 additions & 1 deletion pkg/ext-proc/backend/endpointslice_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package backend

import (
"context"
"fmt"

discoveryv1 "k8s.io/api/discovery/v1"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -49,7 +50,7 @@ func (c *EndpointSliceReconciler) updateDatastore(slice *discoveryv1.EndpointSli
for _, endpoint := range slice.Endpoints {
klog.V(4).Infof("Zone: %v \n endpoint: %+v \n", c.Zone, endpoint)
if c.validPod(endpoint) {
pod := Pod{Name: *&endpoint.TargetRef.Name, Address: endpoint.Addresses[0] + ":" + c.Datastore.Port}
pod := Pod{Name: *&endpoint.TargetRef.Name, Address: endpoint.Addresses[0] + ":" + fmt.Sprint(c.Datastore.LLMServerPool.Spec.TargetPort)}
podMap[pod] = true
c.Datastore.Pods.Store(pod, true)
}
Expand Down
22 changes: 16 additions & 6 deletions pkg/ext-proc/backend/endpointslice_reconcilier_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"sync"
"testing"

"inference.networking.x-k8s.io/llm-instance-gateway/api/v1alpha1"
v1 "k8s.io/api/core/v1"
discoveryv1 "k8s.io/api/discovery/v1"
)
Expand All @@ -25,7 +26,11 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
name: "Add new pod",
datastore: K8sDatastore{
Pods: populateMap(basePod1, basePod2),
Port: "8000",
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
TargetPort: int32(8000),
},
},
},
incomingSlice: &discoveryv1.EndpointSlice{
Endpoints: []discoveryv1.Endpoint{
Expand Down Expand Up @@ -63,14 +68,17 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
},
want: K8sDatastore{
Pods: populateMap(basePod1, basePod2, basePod3),
Port: "8000",
},
},
{
name: "New pod, but its not ready yet. Do not add.",
datastore: K8sDatastore{
Pods: populateMap(basePod1, basePod2),
Port: "8000",
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
TargetPort: int32(8000),
},
},
},
incomingSlice: &discoveryv1.EndpointSlice{
Endpoints: []discoveryv1.Endpoint{
Expand Down Expand Up @@ -108,14 +116,17 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
},
want: K8sDatastore{
Pods: populateMap(basePod1, basePod2),
Port: "8000",
},
},
{
name: "Existing pod not ready, new pod added, and is ready",
datastore: K8sDatastore{
Pods: populateMap(basePod1, basePod2),
Port: "8000",
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
TargetPort: int32(8000),
},
},
},
incomingSlice: &discoveryv1.EndpointSlice{
Endpoints: []discoveryv1.Endpoint{
Expand Down Expand Up @@ -153,7 +164,6 @@ func TestUpdateDatastore_EndpointSliceReconciler(t *testing.T) {
},
want: K8sDatastore{
Pods: populateMap(basePod3, basePod2),
Port: "8000",
},
},
}
Expand Down
24 changes: 6 additions & 18 deletions pkg/ext-proc/backend/llmlserverpool_reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ func TestUpdateDatastore_LLMServerPoolReconciler(t *testing.T) {
datastore: K8sDatastore{
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
ModelServerSelector: metav1.LabelSelector{
MatchLabels: map[string]string{"app": "vllm"},
},
ModelServerSelector: map[string]string{"app": "vllm"},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Expand All @@ -32,9 +30,7 @@ func TestUpdateDatastore_LLMServerPoolReconciler(t *testing.T) {
},
incomingServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
ModelServerSelector: metav1.LabelSelector{
MatchLabels: map[string]string{"app": "not-vllm"},
},
ModelServerSelector: map[string]string{"app": "not-vllm"},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Expand All @@ -44,9 +40,7 @@ func TestUpdateDatastore_LLMServerPoolReconciler(t *testing.T) {
want: K8sDatastore{
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
ModelServerSelector: metav1.LabelSelector{
MatchLabels: map[string]string{"app": "not-vllm"},
},
ModelServerSelector: map[string]string{"app": "not-vllm"},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Expand All @@ -60,9 +54,7 @@ func TestUpdateDatastore_LLMServerPoolReconciler(t *testing.T) {
datastore: K8sDatastore{
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
ModelServerSelector: metav1.LabelSelector{
MatchLabels: map[string]string{"app": "vllm"},
},
ModelServerSelector: map[string]string{"app": "vllm"},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Expand All @@ -72,9 +64,7 @@ func TestUpdateDatastore_LLMServerPoolReconciler(t *testing.T) {
},
incomingServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
ModelServerSelector: metav1.LabelSelector{
MatchLabels: map[string]string{"technically": "this-should-never-happen"},
},
ModelServerSelector: map[string]string{"technically": "this-should-never-happen"},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Expand All @@ -84,9 +74,7 @@ func TestUpdateDatastore_LLMServerPoolReconciler(t *testing.T) {
want: K8sDatastore{
LLMServerPool: &v1alpha1.LLMServerPool{
Spec: v1alpha1.LLMServerPoolSpec{
ModelServerSelector: metav1.LabelSelector{
MatchLabels: map[string]string{"app": "vllm"},
},
ModelServerSelector: map[string]string{"app": "vllm"},
},
ObjectMeta: metav1.ObjectMeta{
Name: "test-pool",
Expand Down
1 change: 0 additions & 1 deletion pkg/ext-proc/backend/llmserverpool_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ type LLMServerPoolReconciler struct {
ServerPoolName string
Namespace string
Datastore *K8sDatastore
Port int
Zone string
}

Expand Down
3 changes: 1 addition & 2 deletions pkg/ext-proc/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ var (
serviceName = flag.String("serviceName", "", "Name of the service that will be used to read the endpointslices from")
namespace = flag.String("namespace", "default", "The Namespace that the server pool should exist in.")
zone = flag.String("zone", "", "The zone that this instance is created in. Will be passed to the corresponding endpointSlice. ")
desiredPort = flag.String("desiredPort", "8000", "The port that the model server exposes")
refreshPodsInterval = flag.Duration("refreshPodsInterval", 10*time.Second, "interval to refresh pods")
refreshMetricsInterval = flag.Duration("refreshMetricsInterval", 50*time.Millisecond, "interval to refresh metrics")
scheme = runtime.NewScheme()
Expand Down Expand Up @@ -71,7 +70,7 @@ func main() {
klog.Fatalf("failed to listen: %v", err)
}

datastore := &backend.K8sDatastore{LLMServerPool: &v1alpha1.LLMServerPool{}, Pods: &sync.Map{}, Port: *desiredPort}
datastore := &backend.K8sDatastore{LLMServerPool: &v1alpha1.LLMServerPool{}, Pods: &sync.Map{}}

mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{
Scheme: scheme,
Expand Down