Skip to content

Commit 89a0ad9

Browse files
authored
Merge pull request #1580 from hsyrja/main
xelink support
2 parents 47719d7 + 1d8e10d commit 89a0ad9

File tree

3 files changed

+110
-1
lines changed

3 files changed

+110
-1
lines changed
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{
2+
"Info": "2x 4 tile 4 GiB PVC [Ponte Vecchio] GPUs",
3+
"DevCount": 2,
4+
"TilesPerDev": 4,
5+
"DevsPerNode": 1,
6+
"DevMemSize": 4294967296,
7+
"Capabilities": {
8+
"platform": "fake_PVC",
9+
"connections": "0.1-0.0_0.2-0.0_0.3-0.0_1.0-0.0_1.1-0.0_1.2-0.0_1.3-0.0_0.2-0.1_0.3-0.1_1.0-0.1_1.1-0.1_1.2-0.1_1.3-0.1_0.3-0.2_1.0-0.2_1.1-0.2_1.2-0.2_1.3-0.2_1.0-0.3_1.1-0.3_1.2-0.3_1.3-0.3_1.1-1.0_1.2-1.0_1.3-1.0_1.2-1.1_1.3-1.1_1.3-1.2",
10+
"connection-topology": "RAW"
11+
}
12+
}
13+
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"Info": "8x 4 GiB PVC [Ponte Vecchio] GPUs",
3+
"DevCount": 8,
4+
"TilesPerDev": 2,
5+
"DevsPerNode": 2,
6+
"DevMemSize": 4294967296,
7+
"Capabilities": {
8+
"platform": "fake_PVC",
9+
"connections": "",
10+
"connection-topology": "FULL"
11+
}
12+
}

cmd/gpu_fakedev/gpu_fakedev.go

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021-2022 Intel Corporation. All Rights Reserved.
1+
// Copyright 2021-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -44,6 +44,7 @@ import (
4444
"os"
4545
"path/filepath"
4646
"strconv"
47+
"strings"
4748

4849
"golang.org/x/sys/unix"
4950
)
@@ -61,6 +62,9 @@ const (
6162
devNullMajor = 1
6263
devNullMinor = 3
6364
devNullType = unix.S_IFCHR
65+
// GPU connectivity.
66+
maxK8sLabelSize = 63
67+
fullyConnected = "FULL"
6468
)
6569

6670
var verbose bool
@@ -289,6 +293,86 @@ func generateDriFiles(opts genOptions) {
289293
}
290294
}
291295
log.Printf("Done, created %d dirs, %d devices and %d files.", opts.dirs, opts.devs, opts.files)
296+
297+
makeXelinkSideCar(opts)
298+
}
299+
300+
func makeXelinkSideCar(opts genOptions) {
301+
topology := opts.Capabilities["connection-topology"]
302+
gpus := opts.DevCount
303+
tiles := opts.TilesPerDev
304+
connections := opts.Capabilities["connections"]
305+
306+
if topology != fullyConnected {
307+
saveSideCarFile(connections)
308+
} else {
309+
saveSideCarFile(buildConnectionList(gpus, tiles))
310+
}
311+
312+
log.Printf("XELINK: generated xelink sidecar label file, using (GPUs: %d, Tiles: %d, Topology: %s)", gpus, tiles, topology)
313+
}
314+
315+
func buildConnectionList(gpus, tiles int) string {
316+
var nodes = make([]string, 0)
317+
318+
for mm := 0; mm < gpus; mm++ {
319+
for nn := 0; nn < tiles; nn++ {
320+
nodes = append(nodes, fmt.Sprintf("%d.%d", mm, nn))
321+
}
322+
}
323+
324+
var links = make(map[string]bool, 0)
325+
326+
var smap = make([]string, 0)
327+
328+
for _, from := range nodes {
329+
for _, to := range nodes {
330+
// no self links, TODO ignore in-gpu xelinks
331+
if to == from {
332+
continue
333+
}
334+
335+
link := fmt.Sprintf("%s-%s", to, from)
336+
337+
reverselink := fmt.Sprintf("%s-%s", from, to)
338+
if _, exists := links[reverselink]; !exists {
339+
links[link] = true
340+
341+
smap = append(smap, link)
342+
}
343+
}
344+
}
345+
346+
return strings.Join(smap, "_")
347+
}
348+
349+
func saveSideCarFile(connections string) {
350+
f, err := os.Create("xpum-sidecar-labels.txt")
351+
if err != nil {
352+
panic(err)
353+
}
354+
defer f.Close()
355+
356+
// Write first line without Z prefix
357+
line := fmt.Sprintf("xpumanager.intel.com/xe-links=%s", connections[:min(len(connections), maxK8sLabelSize)])
358+
fmt.Println(line)
359+
360+
if _, err := f.WriteString(line + "\n"); err != nil {
361+
panic(err)
362+
}
363+
364+
index := 2
365+
366+
// Write next lines with Z prefix
367+
for i := maxK8sLabelSize; i < len(connections); i += (maxK8sLabelSize - 1) {
368+
line := fmt.Sprintf("xpumanager.intel.com/xe-links%d=Z%s", index, connections[i:min(len(connections), i+maxK8sLabelSize-1)])
369+
fmt.Println(line)
370+
371+
if _, err := f.WriteString(line + "\n"); err != nil {
372+
panic(err)
373+
}
374+
index++
375+
}
292376
}
293377

294378
// getOptions parses options from given JSON file, validates and returns them.

0 commit comments

Comments
 (0)