zoukankan      html  css  js  c++  java
  • docker系列--namespace解读

    // Config defines configuration options for executing a process inside a contained environment.
    type Config struct {
        ...
     
        // Namespaces specifies the container's namespaces that it should setup when cloning the init process
        // If a namespace is not provided that namespace is shared from the container's parent process
        Namespaces Namespaces `json:"namespaces"`
     
        // UidMappings is an array of User ID mappings for User Namespaces
        UidMappings []IDMap `json:"uid_mappings"`
     
        // GidMappings is an array of Group ID mappings for User Namespaces
        GidMappings []IDMap `json:"gid_mappings"`
     
        ...
    }

    runC中namespace的源码主要在: runc/libcontainer/configs/namespaces_unix.go runC支持的namespce type包括($nsName) "net"、"mnt"、"pid"、"ipc"、"user"、"uts":

    const (
           NEWNET  NamespaceType = "NEWNET"
           NEWPID  NamespaceType = "NEWPID"
           NEWNS   NamespaceType = "NEWNS"
           NEWUTS  NamespaceType = "NEWUTS"
           NEWIPC  NamespaceType = "NEWIPC"
           NEWUSER NamespaceType = "NEWUSER"
    )

    除了验证 Namespce Type是否在以上常量中,还要去验证 /proc/self/ns/$nsName是否存在并且可以read,都通过时,才认为该Namespace是在当前系统中是被支持的。

    root@cloud:~/iso# ls /proc/self/ns/ -al
    total 0
    dr-x--x--x 2 root root 0 Dec  4 14:51 .
    dr-xr-xr-x 9 root root 0 Dec  4 14:51 ..
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 cgroup -> 'cgroup:[4026531835]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 ipc -> 'ipc:[4026531839]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 mnt -> 'mnt:[4026531840]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 net -> 'net:[4026531896]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 pid -> 'pid:[4026531836]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 pid_for_children -> 'pid:[4026531836]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 user -> 'user:[4026531837]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:51 uts -> 'uts:[4026531838]'
    root@cloud:~/iso# 
    root@cloud:~/iso# unshare -m -u --propagation unchanged /bin/bash
    
    root@cloud:~/iso# ls /proc/self/ns/ -al
    total 0
    dr-x--x--x 2 root root 0 Dec  4 14:52 .
    dr-xr-xr-x 9 root root 0 Dec  4 14:52 ..
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 cgroup -> 'cgroup:[4026531835]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 ipc -> 'ipc:[4026531839]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 mnt -> 'mnt:[4026533784]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 net -> 'net:[4026531896]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 pid -> 'pid:[4026531836]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 pid_for_children -> 'pid:[4026531836]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 user -> 'user:[4026531837]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:52 uts -> 'uts:[4026533786]'
    root@cloud:~/iso# 

    如下是NameSpace的完整定义,很简单,只包括NamespaceType 和对应的Path。

    // Namespace defines configuration for each namespace.  It specifies an
    // alternate path that is able to be joined via setns.
    type Namespace struct {
           Type NamespaceType `json:"type"`
           Path string        `json:"path"`
    }
    从Namespace的GetPath方法中可见,一个pid对应的namespace path为 /proc/$pid/ns/$nsName。
    func (n *Namespace) GetPath(pid int) string {
           if n.Path != "" {
                  return n.Path
           }
           return fmt.Sprintf("/proc/%d/ns/%s", pid, NsName(n.Type))
    }
    root@cloud:~/iso# ls /proc/$$/ns/ -al
    total 0
    dr-x--x--x 2 root root 0 Dec  4 14:55 .
    dr-xr-xr-x 9 root root 0 Dec  4 14:55 ..
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 cgroup -> 'cgroup:[4026531835]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 ipc -> 'ipc:[4026531839]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 mnt -> 'mnt:[4026533784]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 net -> 'net:[4026531896]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 pid -> 'pid:[4026531836]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 pid_for_children -> 'pid:[4026531836]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 user -> 'user:[4026531837]'
    lrwxrwxrwx 1 root root 0 Dec  4 14:55 uts -> 'uts:[4026533786]'
    root@cloud:~/iso# 
    func (c *linuxContainer) start(process *Process) error {
            //if false == cPathExists("/run/sockets/qemu_pipe") {
            //                                       return newSystemErrorWithCausef(nil, "mount bind /run/sockets failed %s , /run/sockets/qemu_pipe not exist", c.config.Rootfs)
            //}
            //input_dir := filepath.Join(c.config.Rootfs, "/vmi/sockets")
            //if err := os.MkdirAll(input_dir, 0777); err != nil {
            //       return newSystemErrorWithCause(err, "mkdir rootfs/sockets/")
            //}
            //if err := unix.Mount("/run/sockets/qemu_pipe", input_dir, "", unix.MS_REC|unix.MS_BIND, ""); err != nil {
            //       return newSystemErrorWithCausef(err, "mount bind /run/sockets failed %s", c.config.Rootfs)
            //}
            parent, err := c.newParentProcess(process)
            if err != nil {
                    return newSystemErrorWithCause(err, "creating new parent process")
            }
            parent.forwardChildLogs()
            if err := parent.start(); err != nil {
                    // terminate the process to ensure that it properly is reaped.
                    if err := ignoreTerminateErrors(parent.terminate()); err != nil {
                            logrus.Warn(err)
                    }
                    return newSystemErrorWithCause(err, "starting container process")
            }
            // generate a timestamp indicating when the container was started
            c.created = time.Now().UTC()
            if process.Init {
                    c.state = &createdState{
                            c: c,
                    }
                    state, err := c.updateState(parent)
                    if err != nil {
                            return err
                    }
                    c.initProcessStartTime = state.InitProcessStartTime
    
                    if c.config.Hooks != nil {
                            s, err := c.currentOCIState()
                            if err != nil {
                                    return err
                            }
                            for i, hook := range c.config.Hooks.Poststart {
                                    if err := hook.Run(s); err != nil {
                                            if err := ignoreTerminateErrors(parent.terminate()); err != nil {
                                                    logrus.Warn(err)
                                            }
                                            return newSystemErrorWithCausef(err, "running poststart hook %d", i)
                                    }
                            }
                    }
            }
            return nil
    }
    func (c *linuxContainer) updateState(process parentProcess) (*State, error) {
            if process != nil {
                    c.initProcess = process
            }
            state, err := c.currentState()
            if err != nil {
                    return nil, err
            }
            err = c.saveState(state)
            if err != nil {
                    return nil, err
            }
            return state, nil
    }
    func (c *linuxContainer) currentState() (*State, error) {
            var (
                    startTime           uint64
                    externalDescriptors []string
                    pid                 = -1
            )
            if c.initProcess != nil {
                    pid = c.initProcess.pid()
                    startTime, _ = c.initProcess.startTime()
                    externalDescriptors = c.initProcess.externalDescriptors()
            }
            intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
            if err != nil {
                    intelRdtPath = ""
            }
            state := &State{
                    BaseState: BaseState{
                            ID:                   c.ID(),
                            Config:               *c.config,
                            InitProcessPid:       pid,
                            InitProcessStartTime: startTime,
                            Created:              c.created,
                    },
                    Rootless:            c.config.RootlessEUID && c.config.RootlessCgroups,
                    CgroupPaths:         c.cgroupManager.GetPaths(),
                    IntelRdtPath:        intelRdtPath,
                    NamespacePaths:      make(map[configs.NamespaceType]string),
                    ExternalDescriptors: externalDescriptors,
            }
            if pid > 0 {
                    for _, ns := range c.config.Namespaces {
                            state.NamespacePaths[ns.Type] = ns.GetPath(pid)
                    }
                    for _, nsType := range configs.NamespaceTypes() {
                            if !configs.IsNamespaceSupported(nsType) {
                                    continue
                            }
                            if _, ok := state.NamespacePaths[nsType]; !ok {
                                    ns := configs.Namespace{Type: nsType}
                                    state.NamespacePaths[ns.Type] = ns.GetPath(pid)
                            }
                    }
            }
            return state, nil
    }

    除此之外,还定义了以下常用方法:

    func (n *Namespaces) Remove(t NamespaceType) bool {...}
     
    func (n *Namespaces) Add(t NamespaceType, path string) {...}
     
    func (n *Namespaces) index(t NamespaceType) int {...}
     
    func (n *Namespaces) Contains(t NamespaceType) bool {...}
     
    func (n *Namespaces) PathOf(t NamespaceType) string {...}

    在runc/libcontainer/configs/namespaces_syscall.go中,定义了linux clone时这些namespace对应的clone flags。

    var namespaceInfo = map[NamespaceType]int{
           NEWNET:  syscall.CLONE_NEWNET,
           NEWNS:   syscall.CLONE_NEWNS,
           NEWUSER: syscall.CLONE_NEWUSER,
           NEWIPC:  syscall.CLONE_NEWIPC,
           NEWUTS:  syscall.CLONE_NEWUTS,
           NEWPID:  syscall.CLONE_NEWPID,
    }
     
    // CloneFlags parses the container's Namespaces options to set the correct
    // flags on clone, unshare. This function returns flags only for new namespaces.
    func (n *Namespaces) CloneFlags() uintptr {
           var flag int
           for _, v := range *n {
                  if v.Path != "" {
                         continue
                  }
                  flag |= namespaceInfo[v.Type]
           }
           return uintptr(flag)
    }

    上面的CloneFlags()方法是用来解析linuxContainer的config中的namespace相关的参数,生成clone flags,提供给linuxContainer.bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) 来封装。

    // bootstrapData encodes the necessary data in netlink binary format
    // as a io.Reader.
    // Consumer can write the data to a bootstrap program
    // such as one that uses nsenter package to bootstrap the container's
    // init process correctly, i.e. with correct namespaces, uid/gid
    // mapping etc.
    func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) {
           // create the netlink message
           r := nl.NewNetlinkRequest(int(InitMsg), 0)
     
           // write cloneFlags
           r.AddData(&Int32msg{
                  Type:  CloneFlagsAttr,
                  Value: uint32(cloneFlags),
           })
     
           // write custom namespace paths
           if len(nsMaps) > 0 {
                  nsPaths, err := c.orderNamespacePaths(nsMaps)
                  if err != nil {
                         return nil, err
                  }
                  r.AddData(&Bytemsg{
                         Type:  NsPathsAttr,
                         Value: []byte(strings.Join(nsPaths, ",")),
                  })
           }
     
           // write namespace paths only when we are not joining an existing user ns
           _, joinExistingUser := nsMaps[configs.NEWUSER]
           if !joinExistingUser {
                  // write uid mappings
                  if len(c.config.UidMappings) > 0 {
                         b, err := encodeIDMapping(c.config.UidMappings)
                         if err != nil {
                                return nil, err
                         }
                         r.AddData(&Bytemsg{
                                Type:  UidmapAttr,
                                Value: b,
                         })
                  }
     
                  // write gid mappings
                  if len(c.config.GidMappings) > 0 {
                         b, err := encodeIDMapping(c.config.GidMappings)
                         if err != nil {
                                return nil, err
                         }
                         r.AddData(&Bytemsg{
                                Type:  GidmapAttr,
                                Value: b,
                         })
                         // check if we have CAP_SETGID to setgroup properly
                         pid, err := capability.NewPid(os.Getpid())
                         if err != nil {
                                return nil, err
                         }
                         if !pid.Get(capability.EFFECTIVE, capability.CAP_SETGID) {
                                r.AddData(&Boolmsg{
                                       Type:  SetgroupAttr,
                                       Value: true,
                                })
                         }
                  }
           }
     
           return bytes.NewReader(r.Serialize()), nil
    }
    linuxContainer.newInitProcess(...)最终会使用linuxContainer.bootstrapData封装的clone flags数据,完成initProcess的构建。
    func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, parentPipe, childPipe, rootDir *os.File) (*initProcess, error) {
           cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard))
           nsMaps := make(map[configs.NamespaceType]string)
           for _, ns := range c.config.Namespaces {
                  if ns.Path != "" {
                         nsMaps[ns.Type] = ns.Path
                  }
           }
           _, sharePidns := nsMaps[configs.NEWPID]
           data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps)
           if err != nil {
                  return nil, err
           }
           p.consoleChan = make(chan *os.File, 1)
           return &initProcess{
                  cmd:           cmd,
                  childPipe:     childPipe,
                  parentPipe:    parentPipe,
                  manager:       c.cgroupManager,
                  config:        c.newInitConfig(p),
                  container:     c,
                  process:       p,
                  bootstrapData: data,
                  sharePidns:    sharePidns,
                  rootDir:       rootDir,
           }, nil
    }
    func (p *initProcess) start() error {
            defer p.messageSockPair.parent.Close()
          
            err := p.cmd.Start()
            p.process.ops = p
            // close the write-side of the pipes (controlled by child)
            p.messageSockPair.child.Close()
            p.logFilePair.child.Close()
            if err != nil {
                    p.process.ops = nil
                    return newSystemErrorWithCause(err, "starting init process command")
            }
            // Do this before syncing with child so that no children can escape the
            // cgroup. We don't need to worry about not doing this and not being root
            // because we'd be using the rootless cgroup manager in that case.
            if err := p.manager.Apply(p.pid()); err != nil {
                    return newSystemErrorWithCause(err, "applying cgroup configuration for process")
            }
            if p.intelRdtManager != nil {
                    if err := p.intelRdtManager.Apply(p.pid()); err != nil {
                            return newSystemErrorWithCause(err, "applying Intel RDT configuration for process")
                    }
            }
            defer func() {
                    if err != nil {
                            // TODO: should not be the responsibility to call here
                            p.manager.Destroy()
                            if p.intelRdtManager != nil {
                                    p.intelRdtManager.Destroy()
                            }
                    }
            }()
    
            if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil {
                    return newSystemErrorWithCause(err, "copying bootstrap data to pipe")
            }
            childPid, err := p.getChildPid()
            if err != nil {
                    return newSystemErrorWithCause(err, "getting the final child's pid from pipe")
            }
    1. io.Copy 将 p.bootstrapData 中的数据通过 p.parentPipe 发送给子进程

    newInitProcess(...)在整个container create的流程中的位置,请参考:runC源码分析之Create/Run Container —— 王涛 如此,namespace在整个container create/run中的源码分析就完整了。

    充:runC中container的Spec是从bundle/config.json中解析得到的,见runC的create.go中的setupSpec(context)的调用。

    Action: func(context *cli.Context) error {
           if context.NArg() != 1 {
                  fmt.Printf("Incorrect Usage.
    
    ")
                  cli.ShowCommandHelp(context, "create")
                  return fmt.Errorf("runc: "create" requires exactly one argument")
           }
           if err := revisePidFile(context); err != nil {
                  return err
           }
           spec, err := setupSpec(context)
           if err != nil {
                  return err
           }
           status, err := startContainer(context, spec, true)
           if err != nil {
                  return err
           }

    setupSepc(context)会去loadSpec("config.json"):

    // setupSpec performs initial setup based on the cli.Context for the container
    func setupSpec(context *cli.Context) (*specs.Spec, error) {
           bundle := context.String("bundle")
           if bundle != "" {
                  if err := os.Chdir(bundle); err != nil {
                         return nil, err
                  }
           }
           spec, err := loadSpec(specConfig)
           if err != nil {
                  return nil, err
           }
           notifySocket := os.Getenv("NOTIFY_SOCKET")
           if notifySocket != "" {
                  setupSdNotify(spec, notifySocket)
           }
           if os.Geteuid() != 0 {
                  return nil, fmt.Errorf("runc should be run as root")
           }
           return spec, nil
    }

    config.json样例如下,namespace部分见 “.linux.namespaces”。

    {
        "ociVersion": "0.4.0",
        "platform": {
            "os": "linux",
            "arch": "amd64"
        },
        "process": {
            "terminal": true,
            "user": {},
            "args": [
                "redis-server",
                "--bind",
                "0.0.0.0"
            ],
            "env": [
                "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
                "TERM=xterm"
            ],
            "cwd": "/",
            "capabilities": [
                "CAP_AUDIT_WRITE",
                "CAP_KILL",
                "CAP_NET_BIND_SERVICE"
            ],
            "rlimits": [
                {
                    "type": "RLIMIT_NOFILE",
                    "hard": 1024,
                    "soft": 1024
                }
            ],
            "noNewPrivileges": true
        },
        "root": {
            "path": "rootfs",
            "readonly": true
        },
        "hostname": "runc",
        "mounts": [
            {
                "destination": "/proc",
                "type": "proc",
                "source": "proc"
            },
            {
                "destination": "/dev",
                "type": "tmpfs",
                "source": "tmpfs",
                "options": [
                    "nosuid",
                    "strictatime",
                    "mode=755",
                    "size=65536k"
                ]
            },
            {
                "destination": "/dev/pts",
                "type": "devpts",
                "source": "devpts",
                "options": [
                    "nosuid",
                    "noexec",
                    "newinstance",
                    "ptmxmode=0666",
                    "mode=0620",
                    "gid=5"
                ]
            },
            {
                "destination": "/dev/shm",
                "type": "tmpfs",
                "source": "shm",
                "options": [
                    "nosuid",
                    "noexec",
                    "nodev",
                    "mode=1777",
                    "size=65536k"
                ]
            },
            {
                "destination": "/dev/mqueue",
                "type": "mqueue",
                "source": "mqueue",
                "options": [
                    "nosuid",
                    "noexec",
                    "nodev"
                ]
            },
            {
                "destination": "/sys",
                "type": "sysfs",
                "source": "sysfs",
                "options": [
                    "nosuid",
                    "noexec",
                    "nodev",
                    "ro"
                ]
            },
            {
                "destination": "/sys/fs/cgroup",
                "type": "cgroup",
                "source": "cgroup",
                "options": [
                    "nosuid",
                    "noexec",
                    "nodev",
                    "relatime",
                    "ro"
                ]
            }
        ],
        "hooks": {},
        "linux": {
            "resources": {
                "devices": [
                    {
                        "allow": false,
                        "access": "rwm"
                    }
                ]
            },
            "namespaces": [
                {
                    "type": "pid"
                },
                {
                    "type": "ipc"
                },
                {
                    "type": "uts"
                },
                {
                    "type": "mount"
                }
            ],
            "devices": null
        }
    }

    libcontainer

    Libcontainer provides a native Go implementation for creating containers with namespaces, cgroups, capabilities, and filesystem access controls. It allows you to manage the lifecycle of the container performing additional operations after the container is created.

    Container

    A container is a self contained execution environment that shares the kernel of the host system and which is (optionally) isolated from other containers in the system.

    Using libcontainer

    Because containers are spawned in a two step process you will need a binary that will be executed as the init process for the container. In libcontainer, we use the current binary (/proc/self/exe) to be executed as the init process, and use arg "init", we call the first step process "bootstrap", so you always need a "init" function as the entry of "bootstrap".

    In addition to the go init function the early stage bootstrap is handled by importing nsenter.

    import (
    	_ "github.com/opencontainers/runc/libcontainer/nsenter"
    )
    
    func init() {
    	if len(os.Args) > 1 && os.Args[1] == "init" {
    		runtime.GOMAXPROCS(1)
    		runtime.LockOSThread()
    		factory, _ := libcontainer.New("")
    		if err := factory.StartInitialization(); err != nil {
    			logrus.Fatal(err)
    		}
    		panic("--this line should have never been executed, congratulations--")
    	}
    }

    Then to create a container you first have to initialize an instance of a factory that will handle the creation and initialization for a container.

    factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init"))
    if err != nil {
    	logrus.Fatal(err)
    	return
    }

    Once you have an instance of the factory created we can create a configuration struct describing how the container is to be created. A sample would look similar to this:

    defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV
    config := &configs.Config{
    	Rootfs: "/your/path/to/rootfs",
    	Capabilities: &configs.Capabilities{
    		Bounding: []string{
    			"CAP_CHOWN",
    			"CAP_DAC_OVERRIDE",
    			"CAP_FSETID",
    			"CAP_FOWNER",
    			"CAP_MKNOD",
    			"CAP_NET_RAW",
    			"CAP_SETGID",
    			"CAP_SETUID",
    			"CAP_SETFCAP",
    			"CAP_SETPCAP",
    			"CAP_NET_BIND_SERVICE",
    			"CAP_SYS_CHROOT",
    			"CAP_KILL",
    			"CAP_AUDIT_WRITE",
    		},
    		Effective: []string{
    			"CAP_CHOWN",
    			"CAP_DAC_OVERRIDE",
    			"CAP_FSETID",
    			"CAP_FOWNER",
    			"CAP_MKNOD",
    			"CAP_NET_RAW",
    			"CAP_SETGID",
    			"CAP_SETUID",
    			"CAP_SETFCAP",
    			"CAP_SETPCAP",
    			"CAP_NET_BIND_SERVICE",
    			"CAP_SYS_CHROOT",
    			"CAP_KILL",
    			"CAP_AUDIT_WRITE",
    		},
    		Inheritable: []string{
    			"CAP_CHOWN",
    			"CAP_DAC_OVERRIDE",
    			"CAP_FSETID",
    			"CAP_FOWNER",
    			"CAP_MKNOD",
    			"CAP_NET_RAW",
    			"CAP_SETGID",
    			"CAP_SETUID",
    			"CAP_SETFCAP",
    			"CAP_SETPCAP",
    			"CAP_NET_BIND_SERVICE",
    			"CAP_SYS_CHROOT",
    			"CAP_KILL",
    			"CAP_AUDIT_WRITE",
    		},
    		Permitted: []string{
    			"CAP_CHOWN",
    			"CAP_DAC_OVERRIDE",
    			"CAP_FSETID",
    			"CAP_FOWNER",
    			"CAP_MKNOD",
    			"CAP_NET_RAW",
    			"CAP_SETGID",
    			"CAP_SETUID",
    			"CAP_SETFCAP",
    			"CAP_SETPCAP",
    			"CAP_NET_BIND_SERVICE",
    			"CAP_SYS_CHROOT",
    			"CAP_KILL",
    			"CAP_AUDIT_WRITE",
    		},
    		Ambient: []string{
    			"CAP_CHOWN",
    			"CAP_DAC_OVERRIDE",
    			"CAP_FSETID",
    			"CAP_FOWNER",
    			"CAP_MKNOD",
    			"CAP_NET_RAW",
    			"CAP_SETGID",
    			"CAP_SETUID",
    			"CAP_SETFCAP",
    			"CAP_SETPCAP",
    			"CAP_NET_BIND_SERVICE",
    			"CAP_SYS_CHROOT",
    			"CAP_KILL",
    			"CAP_AUDIT_WRITE",
    		},
    	},
    	Namespaces: configs.Namespaces([]configs.Namespace{
    		{Type: configs.NEWNS},
    		{Type: configs.NEWUTS},
    		{Type: configs.NEWIPC},
    		{Type: configs.NEWPID},
    		{Type: configs.NEWUSER},
    		{Type: configs.NEWNET},
    		{Type: configs.NEWCGROUP},
    	}),
    	Cgroups: &configs.Cgroup{
    		Name:   "test-container",
    		Parent: "system",
    		Resources: &configs.Resources{
    			MemorySwappiness: nil,
    			Devices:          specconv.AllowedDevices,
    		},
    	},
    	MaskPaths: []string{
    		"/proc/kcore",
    		"/sys/firmware",
    	},
    	ReadonlyPaths: []string{
    		"/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus",
    	},
    	Devices:  specconv.AllowedDevices,
    	Hostname: "testing",
    	Mounts: []*configs.Mount{
    		{
    			Source:      "proc",
    			Destination: "/proc",
    			Device:      "proc",
    			Flags:       defaultMountFlags,
    		},
    		{
    			Source:      "tmpfs",
    			Destination: "/dev",
    			Device:      "tmpfs",
    			Flags:       unix.MS_NOSUID | unix.MS_STRICTATIME,
    			Data:        "mode=755",
    		},
    		{
    			Source:      "devpts",
    			Destination: "/dev/pts",
    			Device:      "devpts",
    			Flags:       unix.MS_NOSUID | unix.MS_NOEXEC,
    			Data:        "newinstance,ptmxmode=0666,mode=0620,gid=5",
    		},
    		{
    			Device:      "tmpfs",
    			Source:      "shm",
    			Destination: "/dev/shm",
    			Data:        "mode=1777,size=65536k",
    			Flags:       defaultMountFlags,
    		},
    		{
    			Source:      "mqueue",
    			Destination: "/dev/mqueue",
    			Device:      "mqueue",
    			Flags:       defaultMountFlags,
    		},
    		{
    			Source:      "sysfs",
    			Destination: "/sys",
    			Device:      "sysfs",
    			Flags:       defaultMountFlags | unix.MS_RDONLY,
    		},
    	},
    	UidMappings: []configs.IDMap{
    		{
    			ContainerID: 0,
    			HostID: 1000,
    			Size: 65536,
    		},
    	},
    	GidMappings: []configs.IDMap{
    		{
    			ContainerID: 0,
    			HostID: 1000,
    			Size: 65536,
    		},
    	},
    	Networks: []*configs.Network{
    		{
    			Type:    "loopback",
    			Address: "127.0.0.1/0",
    			Gateway: "localhost",
    		},
    	},
    	Rlimits: []configs.Rlimit{
    		{
    			Type: unix.RLIMIT_NOFILE,
    			Hard: uint64(1025),
    			Soft: uint64(1025),
    		},
    	},
    }

    Once you have the configuration populated you can create a container:

    container, err := factory.Create("container-id", config)
    if err != nil {
    	logrus.Fatal(err)
    	return
    }

    To spawn bash as the initial process inside the container and have the processes pid returned in order to wait, signal, or kill the process:

    process := &libcontainer.Process{
    	Args:   []string{"/bin/bash"},
    	Env:    []string{"PATH=/bin"},
    	User:   "daemon",
    	Stdin:  os.Stdin,
    	Stdout: os.Stdout,
    	Stderr: os.Stderr,
    	Init:   true,
    }
    
    err := container.Run(process)

    //process 运行哪个程序由 libcontainer.New指定

    if err != nil { container.Destroy() logrus.Fatal(err) return } // wait for the process to finish. _, err := process.Wait() if err != nil { logrus.Fatal(err) } // destroy the container. container.Destroy()

    Additional ways to interact with a running container are:

    // return all the pids for all processes running inside the container.
    processes, err := container.Processes()
    
    // get detailed cpu, memory, io, and network statistics for the container and
    // it's processes.
    stats, err := container.Stats()
    
    // pause all processes inside the container.
    container.Pause()
    
    // resume all paused processes.
    container.Resume()
    
    // send signal to container's init process.
    container.Signal(signal)
    
    // update container resource constraints.
    container.Set(config)
    
    // get current status of the container.
    status, err := container.Status()
    
    // get current container's state information.
    state, err := container.State()
  • 相关阅读:
    runloop源代码
    runloop的source
    How an Event Enters a Cocoa Application
    RunLoop主要处理以下6类事件
    NSRunloop总结
    performSelector与objc_msgSend
    iOSUI显示思想
    NSPort与NSRunloop的关系是流与消息调度的关系
    Core Animation 负责将bitmap绑定提交到 GPU-[CALayer _display]
    iOS构建流畅的交互界面--CPU,GPU资源消耗的原因和解决方案
  • 原文地址:https://www.cnblogs.com/dream397/p/14086094.html
Copyright © 2011-2022 走看看