zoukankan      html  css  js  c++  java
  • cAdvisor源码分析

    ##cAdvisor监控数据分析 cAdvisor

    入口

    cAdvisor 代码托管地址

    • 代码入口: github.com/google/cadvisor/cadvisor.go
    • API handler: github.com/google/cadvisor/api/handler.go
    • 基于 tag: v0.19.2
    # github.com/google/cadvisor/cadvisor.go
    ... ...
    73     containerManager, err := manager.New(memoryStorage, sysFs, *maxHousekeepingInterval, *allowDynamicHousekeeping)
    74     if err != nil {
    75         glog.Fatalf("Failed to create a Container Manager: %s", err)
    76     }
    77
    78     mux := http.DefaultServeMux
    79
    80     // Register all HTTP handlers.
    81     err = cadvisorHttp.RegisterHandlers(mux, containerManager, *httpAuthFile, *httpAuthRealm, *httpDigestFile, *httpDigestRealm)
    82     if err != nil {
    83         glog.Fatalf("Failed to register HTTP handlers: %v", err)
    84     }
    85
    86     cadvisorHttp.RegisterPrometheusHandler(mux, containerManager, *prometheusEndpoint, nil)
    87
    88     // Start the manager.
    89     if err := containerManager.Start(); err != nil {
    90         glog.Fatalf("Failed to start container manager: %v", err)
    91     }
    ... ...
    

    从入口可以得知真正获取 container 和 machine 信息的 interface 为 Manager,github.com/google/cadvisor/manager/manager.go

    # github.com/google/cadvisor/manager/manager.go
    52 // The Manager interface defines operations for starting a manager and getting
    53 // container and machine information.
    54 type Manager interface {
    55     // Start the manager. Calling other manager methods before this returns
    56     // may produce undefined behavior.
    57     Start() error
    58
    59     // Stops the manager.
    60     Stop() error
    61
    62     // Get information about a container.
    63     GetContainerInfo(containerName string, query *info.ContainerInfoRequest) (*info.ContainerInfo, error)
    ... ...
    

    以下对 cadvisor 具体如何获取到 machine 和 docker container 的信息结合代码具体讲解。

    Machine Info

    # github.com/google/cadvisor/manager/manager.go
    158     machineInfo, err := getMachineInfo(sysfs, fsInfo) // 此处通过 getMachineInfo() 函数获取 machine 信息
    159     if err != nil {
    160         return nil, err
    161     }
    162     newManager.machineInfo = *machineInfo
    163     glog.Infof("Machine: %+v", newManager.machineInfo)
    

    获取 machineinfo 的具体函数 getMachineInfo()

    # github.com/google/cadvisor/manager/machine.go
    52 func getMachineInfo(sysFs sysfs.SysFs, fsInfo fs.FsInfo) (*info.MachineInfo, error) {
    53     cpuinfo, err := ioutil.ReadFile("/proc/cpuinfo")
    54     clockSpeed, err := machine.GetClockSpeed(cpuinfo)
    55     if err != nil {
    56         return nil, err
    57     }
    58
    ... ...
    

    MachineInfo 数据结构:

    # github.com/google/cadvisor/info/v1/machine.go
    131 type MachineInfo struct {
    132     // The number of cores in this machine.
    133     NumCores int `json:"num_cores"`
    134
    135     // Maximum clock speed for the cores, in KHz.
    136     CpuFrequency uint64 `json:"cpu_frequency_khz"`
    137
    138     // The amount of memory (in bytes) in this machine
    139     MemoryCapacity int64 `json:"memory_capacity"`
    140
    141     // The machine id
    142     MachineID string `json:"machine_id"`
    143
    144     // The system uuid
    145     SystemUUID string `json:"system_uuid"`
    146
    147     // The boot id
    148     BootID string `json:"boot_id"`
    149
    150     // Filesystems on this machine.
    151     Filesystems []FsInfo `json:"filesystems"`
    152
    153     // Disk map
    154     DiskMap map[string]DiskInfo `json:"disk_map"`
    155
    156     // Network devices
    157     NetworkDevices []NetInfo `json:"network_devices"`
    158
    159     // Machine Topology
    160     // Describes cpu/memory layout and hierarchy.
    161     Topology []Node `json:"topology"`
    162
    163     // Cloud provider the machine belongs to.
    164     CloudProvider CloudProvider `json:"cloud_provider"`
    165
    166     // Type of cloud instance (e.g. GCE standard) the machine is.
    167     InstanceType InstanceType `json:"instance_type"`
    168 }
    
    • cpu /proc/cpuinfo
    • mem /proc/meminfo
    # github.com/google/cadvisor/utils/machine/machine.go
    76 func GetMachineMemoryCapacity() (int64, error) {
    77     out, err := ioutil.ReadFile("/proc/meminfo")
    78     if err != nil {
    79         return 0, err
    80     }
    81
    82     memoryCapacity, err := parseCapacity(out, memoryCapacityRegexp)
    83     if err != nil {
    84         return 0, err
    85     }
    86     return memoryCapacity, err
    87 }
    
    • machine_id /var/lib/dbus/machine-id
    • boot_id /proc/sys/kernel/random/boot_id
    ➜  ~  cat /var/lib/dbus/machine-id
    a8919ec7c9ee43bd1bcaf6cd55e1a7bc
    ➜  ~  cat /proc/sys/kernel/random/boot_id
    2f6bac7f-738a-4d85-8b58-53e71296119e
    
    • disk /proc/diskstats
    # github.com/google/cadvisor/fs/fs.go
    189 func (self *RealFsInfo) GetFsInfoForPath(mountSet map[string]struct{}) ([]Fs, error) {
    190     filesystems := make([]Fs, 0)
    191     deviceSet := make(map[string]struct{})
    192     diskStatsMap, err := getDiskStatsMap("/proc/diskstats") // 磁盘状态通过 /proc/diskstats 获取
    193     if err != nil {
    194         return nil, err
    195     }
    ... ...
    

    对于分配给容器的 pool 值,如果是 device mapper 类型的,则通过 dmsetup status pool_device_name 获取设备使用情况

    # github.com/google/cadvisor/fs/fs.go
    ... ...
    205             case "devicemapper":
    206                 total, free, avail, err = getDMStats(device, partition.blockSize)
    ....
    
    • 获取磁盘设备信息 /sys/block
    # github.com/google/cadvisor/utils/sysinfo/sysinfo.go
    31 func GetBlockDeviceInfo(sysfs sysfs.SysFs) (map[string]info.DiskInfo, error) {
    32     disks, err := sysfs.GetBlockDevices()
           // cadvisor/utils/sysfs/sysfs.go GetBlockDevices() 可知,通过 /sys/block 获取磁盘设备名,其它信息雷同
    33     if err != nil {
    34         return nil, err
    35     }
    ... ...
    
    ➜  ~  cat /sys/block/sda/dev 		        # 主次设备号
    8:0
    ➜  ~  cat /sys/block/sda/size		        # 磁盘设备大小
    976773168
    ➜  ~  cat /sys/block/sda/queue/scheduler    # 磁盘调度类型
    noop deadline [cfq]
    ➜  ~
    
    • netDevices /sys/class/net
    # github.com/google/cadvisor/utils/sysinfo/sysinfo.go
    87 func GetNetworkDevices(sysfs sysfs.SysFs) ([]info.NetInfo, error) {
    88     devs, err := sysfs.GetNetworkDevices()
    89     if err != nil {
    90         return nil, err
    91     }
    92     netDevices := []info.NetInfo{}
    ... ...
    
    ➜  ~  cat /sys/class/net/eth0/address
    f0:76:1c:6d:26:d6
    ➜  ~  cat /sys/class/net/eth0/mtu
    1500
    ➜  ~  cat /sys/class/net/eth0/speed
    10
    
    • system UUID /sys/class/dmi/id/product_uuid
    ➜  ~  sudo cat /sys/class/dmi/id/product_uuid
    F27316D5-4DB3-E411-A26A-F0761C6D26D6
    ➜  ~
    

    Docker Info

    # github.com/google/cadvisor/manager/manager.go
    201 // Start the container manager.
    202 func (self *manager) Start() error {
    203     // Register Docker container factory.
    204     err := docker.Register(self, self.fsInfo)
    205     if err != nil {
    206         glog.Errorf("Docker container factory registration failed: %v.", err)
    207     }
    ... ...
    

    cAdvisor 启动 container manager,通过调用 Linux 系统 inotify 监测 cgroup docker 目录的文件变化判断 docker 的创建和删除。

    Getprocesslist

    cAdvisor 首先判断自己是否运行在 container 中,如果是则 chroot /rootfs (cAdvisor 在 container 中运行会通过 volume 方式把系统 / mount 到 /rootfs 下)再执行 ps -e -o user,pid,ppid,stime,pcpu,pmem,rss,vsz,stat,time,comm,cgrou,否则直接执行获取。(github.com/google/cadvisor/manager/container.go GetProcessList() 函数实现)

    # github.com/google/cadvisor/manager/manager.go
    687 func (m *manager) GetProcessList(containerName string, options v2.RequestOptions) ([]v2.ProcessInfo, error) {
    688     // override recursive. Only support single container listing.
    689     options.Recursive = false
    690     conts, err := m.getRequestedContainers(containerName, options)
    691     if err != nil {
    692         return nil, err
    693     }
    694     if len(conts) != 1 {
    695         return nil, fmt.Errorf("Expected the request to match only one container")
    696     }
    ... ...
    

    如果是获取全局而非某个 container 的 processlist,则 containername 默认为 "/",判断逻辑如下:

    # github.com/google/cadvisor/manager/container.go
    273         if isRoot || c.info.Name == cgroup {
    274             processes = append(processes, v2.ProcessInfo{
    275                 User:          fields[0],
    276                 Pid:           pid,
    277                 Ppid:          ppid,
    278                 StartTime:     fields[3],
    279                 PercentCpu:    float32(percentCpu),
    ... ...
    

    ContainerInfo

    获取指定 container 的 info 数据:

    # github.com/google/cadvisor manager/manager.go
    531 func (self *manager) DockerContainer(containerName string, query *info.ContainerInfoRequest) (info.ContainerInfo, error) {
    532     container, err := self.getDockerContainer(containerName)
    533     if err != nil {
    534         return info.ContainerInfo{}, err
    535     }
    536
    ... ...
    

    具体获取函数:

    # github.com/google/cadvisor/manager/container.go
    102 func (c *containerData) GetInfo() (*containerInfo, error) {
    103     // Get spec and subcontainers.
    104     if time.Since(c.lastUpdatedTime) > 5*time.Second {   // 可以看出当上一次更新时间超过 5s 的时候才会更新 info 数据 5
    105         err := c.updateSpec()
    106         if err != nil {
    107             return nil, err
    108         }
    ... ...
    
    • ContainerSpec

    cAdvisor 获取 ContainerSpec 总体的逻辑是找出对应 container 对应的系统 Spec json 文件。

    container info handler:

    # github.com/google/cadvisor/container/docker/handler.go
    220 func (self *dockerContainerHandler) GetSpec() (info.ContainerSpec, error) {
    221     mi, err := self.machineInfoFactory.GetMachineInfo()
    222     if err != nil {
    223         return info.ContainerSpec{}, err
    224     }
    
    • /proc/self/mountinfo 当前运行进程的挂载信息
    • /proc/cgroups 当前内核支持的 cgroup 类型
    • libcontainer configpath /var/run/docker/execdriver/native/containerId
    # github.com/google/cadvisor/container/libcontainer/compatibility.go 读取 libcontainer config 文件信息
    150 func ReadConfig(dockerRoot, dockerRun, containerID string) (*configs.Config, error) {
    151     // Try using the new config if it is available.
    152     configPath := configPath(dockerRun, containerID)
    153     if utils.FileExists(configPath) {
    154         out, err := ioutil.ReadFile(configPath)
    155         if err != nil {
    156             return nil, err
    157         }
    158
    159         var state libcontainer.State
    160         err = json.Unmarshal(out, &state)
    161         if err != nil {
    162             return nil, err
    163         }
    164         return &state.Config, nil
    165     }
    
    # github.com/google/cadvisor/container/docker/handler.go 磁盘使用率统计目前支持 aufs,这是目前 CentOS7 上没有 filesystem 大小的原因
    ... ...
    232     // For now only enable for aufs filesystems
    233     spec.HasFilesystem = self.storageDriver == aufsStorageDriver
    ... ...
    241 func (self *dockerContainerHandler) getFsStats(stats *info.ContainerStats) error {
    242     // No support for non-aufs storage drivers.
    243     if self.storageDriver != aufsStorageDriver {
    244         return nil
    245     }
    ... ...
    
    • 文件系统相关 # github.com/google/cadvisor/fs/fs.go

    • aufs storage dir: /var/lib/docker/aufs/diff/containerId

    • 获取 cgroup 和 网络状态

    # github.com/google/cadvisor/container/docker/handler.go
    # cgroup 信息是调用 github.com/docker/libcontainer/cgroups 获取
    276 func (self *dockerContainerHandler) GetStats() (*info.ContainerStats, error) {
    277     stats, err := containerLibcontainer.GetStats(self.cgroupManager, self.rootFs, self.pid)
    278     if err != nil {
    279         return stats, err
    280     }
    281     // Clean up stats for containers that don't have their own network - this
    282     // includes containers running in Kubernetes pods that use the network of the
    283     // infrastructure container. This stops metrics being reported multiple times
    284     // for each container in a pod.  如果 container 没有自己的网络,则返回空。对于 K8s pod 内部 container 都是通过 pause container 网络通信
    285     if !hasNet(self.networkMode) {
    286         stats.Network = info.NetworkStats{}
    287     }
    288
    289     // Get filesystem stats.
    290     err = self.getFsStats(stats)
    291     if err != nil {
    292         return stats, err
    293     }
    294
    295     return stats, nil
    296 }
    

    具体获取 cgroup 和 network 的函数实现 GetStats()

    • container 网络状态 /proc/<pid>/net/dev
    # github.com/google/cadvisor/container/libcontainer/helpers.go
    81 // Get cgroup and networking stats of the specified container
    82 func GetStats(cgroupManager cgroups.Manager, rootFs string, pid int) (*info.ContainerStats, error) {
    83     cgroupStats, err := cgroupManager.GetStats()
    84     if err != nil {
    85         return nil, err
    86     }
    87     libcontainerStats := &libcontainer.Stats{
    88         CgroupStats: cgroupStats,
    89     }
    90     stats := toContainerStats(libcontainerStats)
    91
    92     // If we know the pid then get network stats from /proc/<pid>/net/dev
    93     if pid > 0 {
    94         netStats, err := networkStatsFromProc(rootFs, pid)
    95         if err != nil {
    96             glog.V(2).Infof("Unable to get network stats from pid %d: %v", pid, err)
    97         } else {
    98             stats.Network.Interfaces = append(stats.Network.Interfaces, netStats...)
    99         }
    ... ...
    

    cgroup status 包含信息如下(cadVisor 调用 licontainer/cgroups 获取 container 对应 cgroup 信息):

    # github.com/docker/libcontainer/cgroups/stats.go
    88 func NewStats() *Stats {
    89     memoryStats := MemoryStats{Stats: make(map[string]uint64)}
    90     hugetlbStats := make(map[string]HugetlbStats)
    91     return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats}
    92 }

  • 相关阅读:
    solr不是标准的java project解决方案
    solr 索引库的维护
    solr linux配置
    JSON跨域问题总结
    阿里云字体图标的引用
    Android 体系结构
    java.lang.NoClassDefFoundError 异常
    java java.uitl.Random产生随机数
    Android 应用间的集成
    iOS7状态栏字体颜色修改
  • 原文地址:https://www.cnblogs.com/ohgenlong/p/8601341.html
Copyright © 2011-2022 走看看