lxd gpu设备发现:
// /dev/nvidia[0-9]+ type nvidiaGpuCards struct { path string major int minor int id string } // {/dev/nvidiactl, /dev/nvidia-uvm, ...} type nvidiaGpuDevices struct { path string major int minor int } // /dev/dri/card0. If we detect that vendor == nvidia, then nvidia will contain // the corresponding nvidia car, e.g. {/dev/dri/card1 --> /dev/nvidia1}. type gpuDevice struct { vendorid string productid string id string // card id e.g. 0 // If related devices have the same PCI address as the GPU we should // mount them all. Meaning if we detect /dev/dri/card0, // /dev/dri/controlD64, and /dev/dri/renderD128 with the same PCI // address, then they should all be made available in the container. pci string nvidia nvidiaGpuCards path string major int minor int } func (g *gpuDevice) isNvidiaGpu() bool { return strings.EqualFold(g.vendorid, "10de") } type cardIds struct { id string pci string } func deviceLoadGpu() ([]gpuDevice, []nvidiaGpuDevices, error) { const DRI_PATH = "/sys/bus/pci/devices" var gpus []gpuDevice var nvidiaDevices []nvidiaGpuDevices var cards []cardIds ents, err := ioutil.ReadDir(DRI_PATH) if err != nil { if os.IsNotExist(err) { return nil, nil, nil } return nil, nil, err } isNvidia := false for _, ent := range ents { // The pci address == the name of the directory. So let's use // this cheap way of retrieving it. pciAddr := ent.Name() // Make sure that we are dealing with a GPU by looking whether // the "drm" subfolder exists. drm := filepath.Join(DRI_PATH, pciAddr, "drm") drmEnts, err := ioutil.ReadDir(drm) if err != nil { if os.IsNotExist(err) { continue } } // Retrieve vendor ID. vendorIdPath := filepath.Join(DRI_PATH, pciAddr, "vendor") vendorId, err := ioutil.ReadFile(vendorIdPath) if err != nil { if os.IsNotExist(err) { continue } } // Retrieve device ID. productIdPath := filepath.Join(DRI_PATH, pciAddr, "device") productId, err := ioutil.ReadFile(productIdPath) if err != nil { if os.IsNotExist(err) { continue } } // Store all associated subdevices, e.g. controlD64, renderD128. // The name of the directory == the last part of the // /dev/dri/controlD64 path. So ent.Name() will give us // controlD64. for _, drmEnt := range drmEnts { vendorTmp := strings.TrimSpace(string(vendorId)) productTmp := strings.TrimSpace(string(productId)) vendorTmp = strings.TrimPrefix(vendorTmp, "0x") productTmp = strings.TrimPrefix(productTmp, "0x") tmpGpu := gpuDevice{ pci: pciAddr, vendorid: vendorTmp, productid: productTmp, path: filepath.Join("/dev/dri", drmEnt.Name()), } majMinPath := filepath.Join(drm, drmEnt.Name(), "dev") majMinByte, err := ioutil.ReadFile(majMinPath) if err != nil { if os.IsNotExist(err) { continue } } majMin := strings.TrimSpace(string(majMinByte)) majMinSlice := strings.Split(string(majMin), ":") if len(majMinSlice) != 2 { continue } majorInt, err := strconv.Atoi(majMinSlice[0]) if err != nil { continue } minorInt, err := strconv.Atoi(majMinSlice[1]) if err != nil { continue } tmpGpu.major = majorInt tmpGpu.minor = minorInt isCard, err := regexp.MatchString("^card[0-9]+", drmEnt.Name()) if err != nil { continue } if isCard { // If it is a card it's minor number will be its id. tmpGpu.id = strconv.Itoa(minorInt) tmp := cardIds{ id: tmpGpu.id, pci: tmpGpu.pci, } cards = append(cards, tmp) } // Find matching /dev/nvidia* entry for /dev/dri/card* if tmpGpu.isNvidiaGpu() && isCard { if !isNvidia { isNvidia = true } nvidiaPath := "/dev/nvidia" + strconv.Itoa(tmpGpu.minor) stat := syscall.Stat_t{} err := syscall.Stat(nvidiaPath, &stat) if err != nil { continue } tmpGpu.nvidia.path = nvidiaPath tmpGpu.nvidia.major = int(stat.Rdev / 256) tmpGpu.nvidia.minor = int(stat.Rdev % 256) tmpGpu.nvidia.id = strconv.Itoa(tmpGpu.nvidia.minor) } gpus = append(gpus, tmpGpu) } } // We detected a Nvidia card, so let's collect all other nvidia devices // that are not /dev/nvidia[0-9]+. if isNvidia { nvidiaEnts, err := ioutil.ReadDir("/dev") if err != nil { if os.IsNotExist(err) { return nil, nil, err } } validNvidia, err := regexp.Compile(`^nvidia[^0-9]+`) if err != nil { return nil, nil, err } for _, nvidiaEnt := range nvidiaEnts { if !validNvidia.MatchString(nvidiaEnt.Name()) { continue } nvidiaPath := filepath.Join("/dev", nvidiaEnt.Name()) stat := syscall.Stat_t{} err = syscall.Stat(nvidiaPath, &stat) if err != nil { continue } tmpNividiaGpu := nvidiaGpuDevices{ path: nvidiaPath, major: int(stat.Rdev / 256), minor: int(stat.Rdev % 256), } nvidiaDevices = append(nvidiaDevices, tmpNividiaGpu) } } // Since we'll give users to ability to specify and id we need to group // devices on the same PCI that belong to the same card by id. for _, card := range cards { for i := 0; i < len(gpus); i++ { if gpus[i].pci == card.pci { gpus[i].id = card.id } } } return gpus, nvidiaDevices, nil }
lxd gpu设备加载:由下可见
最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致。而客户端又是如何知道vendorid,pci等信息?实际一般是需要建立GPU资源池,GPU元数据由上层管理,通过一定调度规则指定。而GPU资源的发现实际可通过类似上面的函数进行发现或者通过lspci命令发现
else if m["type"] == "gpu" { if gpus == nil { gpus, nvidiaDevices, err = deviceLoadGpu() if err != nil { return "", err } } sawNvidia := false for _, gpu := range gpus {
//最终是否加载取决于Rest接口创建的request body中的config.devices.type是否是gpu以及指定的属性是否和发现上来的一致 if (m["vendorid"] != "" && gpu.vendorid != m["vendorid"]) || (m["pci"] != "" && gpu.pci != m["pci"]) || (m["productid"] != "" && gpu.productid != m["productid"]) || (m["id"] != "" && gpu.id != m["id"]) { continue } err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true) if err != nil { return "", err } if gpu.nvidia.path == "" { continue } err = c.setupUnixDevice(k, m, gpu.nvidia.major, gpu.nvidia.minor, gpu.nvidia.path, true) if err != nil { return "", err } sawNvidia = true } if sawNvidia { for _, gpu := range nvidiaDevices { err := c.setupUnixDevice(k, m, gpu.major, gpu.minor, gpu.path, true) if err != nil { return "", err } } } }
// setupUnixDevice() creates the unix device and sets up the necessary low-level // liblxc configuration items. func (c *containerLXC) setupUnixDevice(devType string, dev types.Device, major int, minor int, path string, createMustSucceed bool) error { if c.IsPrivileged() && !runningInUserns && cgDevicesController {
//设置设备访问白名单 err := lxcSetConfigItem(c.c, "lxc.cgroup.devices.allow", fmt.Sprintf("c %d:%d rwm", major, minor)) if err != nil { return err } } temp := types.Device{} if err := shared.DeepCopy(&dev, &temp); err != nil { return err } temp["major"] = fmt.Sprintf("%d", major) temp["minor"] = fmt.Sprintf("%d", minor) temp["path"] = path paths, err := c.createUnixDevice(temp) if err != nil { shared.LogDebug("failed to create device", log.Ctx{"err": err, "device": devType}) if createMustSucceed { return err } return nil } devPath := paths[0] tgtPath := paths[1] //设置挂载对象 err = lxcSetConfigItem(c.c, "lxc.mount.entry", fmt.Sprintf("%s %s none bind,create=file", devPath, tgtPath)) if err != nil { return err } return nil }
// Unix devices handling func (c *containerLXC) createUnixDevice(m types.Device) ([]string, error) { var err error var major, minor int // Our device paths srcPath := m["path"] tgtPath := strings.TrimPrefix(srcPath, "/") devName := fmt.Sprintf("unix.%s", strings.Replace(tgtPath, "/", "-", -1)) devPath := filepath.Join(c.DevicesPath(), devName)//var/lib/lxd/devices/容器名称/xxxx // Extra checks for nesting if runningInUserns { for key, value := range m { if shared.StringInSlice(key, []string{"major", "minor", "mode", "uid", "gid"}) && value != "" { return nil, fmt.Errorf("The "%s" property may not be set when adding a device to a nested container", key) } } } // Get the major/minor of the device we want to create if m["major"] == "" && m["minor"] == "" { // If no major and minor are set, use those from the device on the host _, major, minor, err = deviceGetAttributes(srcPath) if err != nil { return nil, fmt.Errorf("Failed to get device attributes for %s: %s", m["path"], err) } } else if m["major"] == "" || m["minor"] == "" { return nil, fmt.Errorf("Both major and minor must be supplied for device: %s", m["path"]) } else { major, err = strconv.Atoi(m["major"]) if err != nil { return nil, fmt.Errorf("Bad major %s in device %s", m["major"], m["path"]) } minor, err = strconv.Atoi(m["minor"]) if err != nil { return nil, fmt.Errorf("Bad minor %s in device %s", m["minor"], m["path"]) } } // Get the device mode mode := os.FileMode(0660) if m["mode"] != "" { tmp, err := deviceModeOct(m["mode"]) if err != nil { return nil, fmt.Errorf("Bad mode %s in device %s", m["mode"], m["path"]) } mode = os.FileMode(tmp) } if m["type"] == "unix-block" { mode |= syscall.S_IFBLK } else { mode |= syscall.S_IFCHR } // Get the device owner uid := 0 gid := 0 if m["uid"] != "" { uid, err = strconv.Atoi(m["uid"]) if err != nil { return nil, fmt.Errorf("Invalid uid %s in device %s", m["uid"], m["path"]) } } if m["gid"] != "" { gid, err = strconv.Atoi(m["gid"]) if err != nil { return nil, fmt.Errorf("Invalid gid %s in device %s", m["gid"], m["path"]) } } // Create the devices directory if missing if !shared.PathExists(c.DevicesPath()) { os.Mkdir(c.DevicesPath(), 0711) if err != nil { return nil, fmt.Errorf("Failed to create devices path: %s", err) } } // Clean any existing entry if shared.PathExists(devPath) { if runningInUserns { syscall.Unmount(devPath, syscall.MNT_DETACH) } err = os.Remove(devPath) if err != nil { return nil, fmt.Errorf("Failed to remove existing entry: %s", err) } } // Create the new entry if !runningInUserns { if err := syscall.Mknod(devPath, uint32(mode), minor|(major<<8)); err != nil { return nil, fmt.Errorf("Failed to create device %s for %s: %s", devPath, m["path"], err) } if err := os.Chown(devPath, uid, gid); err != nil { return nil, fmt.Errorf("Failed to chown device %s: %s", devPath, err) } // Needed as mknod respects the umask if err := os.Chmod(devPath, mode); err != nil { return nil, fmt.Errorf("Failed to chmod device %s: %s", devPath, err) } if c.idmapset != nil { if err := c.idmapset.ShiftFile(devPath); err != nil { // uidshift failing is weird, but not a big problem. Log and proceed shared.LogDebugf("Failed to uidshift device %s: %s ", m["path"], err) } } } else { f, err := os.Create(devPath) if err != nil { return nil, err } f.Close() err = deviceMountDisk(srcPath, devPath, false, false) if err != nil { return nil, err } } return []string{devPath, tgtPath}, nil }
func deviceMountDisk(srcPath string, dstPath string, readonly bool, recursive bool) error { var err error // Prepare the mount flags flags := 0 if readonly { flags |= syscall.MS_RDONLY } // Detect the filesystem fstype := "none" if deviceIsBlockdev(srcPath) { fstype, err = shared.BlockFsDetect(srcPath) if err != nil { return err } } else { flags |= syscall.MS_BIND if recursive { flags |= syscall.MS_REC } } // Mount the filesystem if err = syscall.Mount(srcPath, dstPath, fstype, uintptr(flags), ""); err != nil { return fmt.Errorf("Unable to mount %s at %s: %s", srcPath, dstPath, err) } flags = syscall.MS_REC | syscall.MS_SLAVE if err = syscall.Mount("", dstPath, "", uintptr(flags), ""); err != nil { return fmt.Errorf("unable to make mount %s private: %s", dstPath, err) } return nil }