Skip to content

Commit 1af2eae

Browse files
committed
Fix pod affinity
1 parent 550cf04 commit 1af2eae

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

pkg/controller/mpi_job_controller.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -468,8 +468,8 @@ func NewMPIJobControllerWithClock(
468468
oldExpandReplicas: make(map[string]int32),
469469
runningJobs: pqRunning,
470470
queuedJobs: pqQueued,
471-
freeSlots: 10,
472-
rescaleGap: 30 * time.Second,
471+
freeSlots: 60,
472+
rescaleGap: 1 * time.Second, // 3 minutes
473473
}
474474
// FIXME fix the free slots!
475475

@@ -2111,7 +2111,7 @@ func (c *MPIJobController) newWorker(mpiJob *kubeflow.MPIJob, index int) *corev1
21112111
LabelSelector: &metav1.LabelSelector{
21122112
MatchLabels: launcherMatch,
21132113
},
2114-
TopologyKey: "topology.kubernetes.io/zone",
2114+
TopologyKey: "kubernetes.io/hostname",
21152115
},
21162116
})
21172117
schedulingAffinity = append(schedulingAffinity, corev1.WeightedPodAffinityTerm{
@@ -2120,7 +2120,7 @@ func (c *MPIJobController) newWorker(mpiJob *kubeflow.MPIJob, index int) *corev1
21202120
LabelSelector: &metav1.LabelSelector{
21212121
MatchLabels: workerMatch,
21222122
},
2123-
TopologyKey: "topology.kubernetes.io/zone",
2123+
TopologyKey: "kubernetes.io/hostname",
21242124
},
21252125
})
21262126

pkg/controller/rescale_client.C

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ int main (int argc, char **argv)
5858
}
5959

6060
//printf("Waiting for reply...\n" );
61-
if (CcsRecvResponse(&server, cmdLen, msg , 15) == -1) {
61+
if (CcsRecvResponse(&server, cmdLen, msg , 180) == -1) {
6262
printf("0");
6363
return 0;
6464
}

0 commit comments

Comments
 (0)