zoukankan      html  css  js  c++  java
  • kata tap

     k8s启动两个container

    容器1

    容器二

    k8s -kata 虚拟机

     两个容器和虚拟机的mac地址都一样

     

    #
    # Internetworking model
    # Determines how the VM should be connected to the
    # the container network interface
    # Options:
    #
    #   - macvtap
    #     Used when the Container network interface can be bridged using
    #     macvtap.
    #
    #   - none
    #     Used when customize network. Only creates a tap device. No veth pair.
    #
    #   - tcfilter
    #     Uses tc filter rules to redirect traffic from the network interface
    #     provided by plugin to a tap interface connected to the VM.
    #

    internetworking_model="tcfilter"

    # If enabled, the runtime will not create a network namespace for shim and hypervisor processes.
    # This option may have some potential impacts to your host. It should only be used when you know what you're doing.
    # `disable_new_netns` conflicts with `enable_netmon`
    # `disable_new_netns` conflicts with `internetworking_model=tcfilter` and `internetworking_model=macvtap`. It works only
    # with `internetworking_model=none`. The tap device will be in the host network namespace and can connect to a bridge
    # (like OVS) directly.
    # If you are using docker, `disable_new_netns` only works with `docker run --net=none`
    # (default: false)
    #disable_new_netns = true
    func createEndpoint(netInfo NetworkInfo, idx int, model NetInterworkingModel, link netlink.Link) (Endpoint, error) {
            var endpoint Endpoint
            // TODO: This is the incoming interface
            // based on the incoming interface we should create
            // an appropriate EndPoint based on interface type
            // This should be a switch
    
            // Check if interface is a physical interface. Do not create
            // tap interface/bridge if it is.
            isPhysical, err := isPhysicalIface(netInfo.Iface.Name)
            if err != nil {
                    return nil, err
            }
    
            if isPhysical {
                    networkLogger().WithField("interface", netInfo.Iface.Name).Info("Physical network interface found")
                    endpoint, err = createPhysicalEndpoint(netInfo)
            } else {
                    var socketPath string
    
                    // Check if this is a dummy interface which has a vhost-user socket associated with it
                    socketPath, err = vhostUserSocketPath(netInfo)
                    if err != nil {
                            return nil, err
                    }
    
                    if socketPath != "" {
                            networkLogger().WithField("interface", netInfo.Iface.Name).Info("VhostUser network interface found")
                            endpoint, err = createVhostUserEndpoint(netInfo, socketPath)
                    } else if netInfo.Iface.Type == "macvlan" {
                            networkLogger().Infof("macvlan interface found")
                            endpoint, err = createBridgedMacvlanNetworkEndpoint(idx, netInfo.Iface.Name, model)
                    } else if netInfo.Iface.Type == "macvtap" {
                            networkLogger().Infof("macvtap interface found")
                            endpoint, err = createMacvtapNetworkEndpoint(netInfo)
                    } else if netInfo.Iface.Type == "tap" {
                            networkLogger().Info("tap interface found")
                            endpoint, err = createTapNetworkEndpoint(idx, netInfo.Iface.Name)
                    } else if netInfo.Iface.Type == "tuntap" {
                            if link != nil {
                                    switch link.(*netlink.Tuntap).Mode {
                                    case 0:
                                            // mount /sys/class/net to get links
                                            return nil, fmt.Errorf("Network device mode not determined correctly. Mount sysfs in caller")
                                    case 1:
                                            return nil, fmt.Errorf("tun networking device not yet supported")
                                    case 2:
                                            networkLogger().Info("tuntap tap interface found")
                                            endpoint, err = createTuntapNetworkEndpoint(idx, netInfo.Iface.Name, netInfo.Iface.HardwareAddr, model)
                                    default:
                                            return nil, fmt.Errorf("tuntap network %v mode unsupported", link.(*netlink.Tuntap).Mode)
                                    }
                            }
                    } else if netInfo.Iface.Type == "veth" {
                            endpoint, err = createVethNetworkEndpoint(idx, netInfo.Iface.Name, model)
                    } else if netInfo.Iface.Type == "ipvlan" {
                            endpoint, err = createIPVlanNetworkEndpoint(idx, netInfo.Iface.Name)
                    } else {
                            return nil, fmt.Errorf("Unsupported network interface: %s", netInfo.Iface.Type)
                    }
            }
    
            return endpoint, err
    }
    func createTuntapNetworkEndpoint(idx int, ifName string, hwName net.HardwareAddr, internetworkingModel NetInterworkingModel) (*TuntapEndpoint, error) {
            if idx < 0 {
                    return &TuntapEndpoint{}, fmt.Errorf("invalid network endpoint index: %d", idx)
            }
    
            netPair, err := createNetworkInterfacePair(idx, ifName, internetworkingModel)
            if err != nil {
                    return nil, err
            }
    
            endpoint := &TuntapEndpoint{
                    NetPair: netPair,
                    TuntapInterface: TuntapInterface{
                            Name: fmt.Sprintf("eth%d", idx),
                            TAPIface: NetworkInterface{
                                    Name:     fmt.Sprintf("tap%d_kata", idx),
                                    HardAddr: fmt.Sprintf("%s", hwName), //nolint:gosimple
                            },
                    },
                    EndpointType: TuntapEndpointType,
            }
    
            if ifName != "" {
                    endpoint.TuntapInterface.Name = ifName
            }
    
            return endpoint, nil
    }

    tcFilterNetModelStr = "tcfilter"

    //SetModel change the model string value
    func (n *NetInterworkingModel) SetModel(modelName string) error {
            switch modelName {
            case defaultNetModelStr:
                    *n = DefaultNetInterworkingModel
                    return nil
            case macvtapNetModelStr:
                    *n = NetXConnectMacVtapModel
                    return nil
            case tcFilterNetModelStr:
                    *n = NetXConnectTCFilterModel
                    return nil
            case noneNetModelStr:
                    *n = NetXConnectNoneModel
                    return nil
            }
            return fmt.Errorf("Unknown type %s", modelName)
    }
    Host cgroups support: The virtual machine is now constrained in a host side cpu cgroup, enabling the requested cpu quota and periods to be better honored, protecting against a single container using up host resources which could lead to things like denial of service.
    NEMU `virt` machine type support: This new machine type is optimized for cloud environments. NEMU is a lighter weight version of QEMU intended to reduce the VM attack footprint, improving security. To learn more, see https://github.com/intel/nemu.
    New NetInterworkingModel `none`: It works with tap endpoint types so that enlightened CNI plugins can add tap devices to a sandbox directly, bypassing host network namespaces and providing better performance with less network setup complexity.
    New NetInterworkingModel `tcfilter`: Another method for Kata Containers to bridge the host netns veth and guest tap device, with TC filter rules. Delivers more compatibility with different network endpoint types and CNI plugins.
    Enable macvlan and ipvlan network support: Networking models provide lightweight, fast access to underlay or host interfaces without NATing.
    Guest rootfs image get `guest_hook_path`: Saves prestart/poststart/prestop/poststop hook binaries, and they will be executed in the guest at a specified container life cycle point accordingly. This helps with vendor-specific device passthrough to the Kata VM.
     Source: the source mode is used to filter traffic based on a list of allowed source MAC addresses to create MAC-based VLAN associations. Please see the commit message.
    
    The type is chosen according to different needs. Bridge mode is the most commonly used.
    
    Use a MACVLAN when you want to connect directly to a physical network from containers.
    
    Here’s how to set up a MACVLAN:
    
    # ip link add macvlan1 link eth0 type macvlan mode bridge
    # ip link add macvlan2 link eth0 type macvlan mode bridge
    # ip netns add net1
    # ip netns add net2
    # ip link set macvlan1 netns net1
    # ip link set macvlan2 netns net2
    This creates two new MACVLAN devices in bridge mode and assigns these two devices to two different namespaces.

    cgroup 子系统 net_cls (Network classifier cgroup)

    net_cls 可以给 packet 打上 classid 的标签,用于过滤分类,有了上面的详细解释,这个 classid 的作用也非常明显了,就是用于标记skb所属的 qdisc class 的。

    有了这个标签,流量控制器(tc)可以对不同的 cgroup 的 packet 起作用,Netfilter(iptables)也可以基于这个标签有对应的动作。创建一个 net_cls cgroup 对应的是创建一个 net_cls.classid 文件,这个文件初始化为 0。可以写 16 进制的 0xAAAABBBB 到这个文件里面,AAAA 是 major 号,BBBB 是 minor 号。读这个文件返回的是十进制的数字。

    例子

    1
    2
    3
    4
    mkdir /sys/fs/cgroup/net_cls
    mount -t cgroup -onet_cls net_cls /sys/fs/cgroup/net_cls
    mkdir /sys/fs/cgroup/net_cls/0
    echo 0x100001 > /sys/fs/cgroup/net_cls/0/net_cls.classid

    设置一个 10:1 handle.

    1
    2
    cat /sys/fs/cgroup/net_cls/0/net_cls.classid
    1048577

    配置 tc:

    1
    2
    tc qdisc add dev eth0 root handle 10: htb
    tc class add dev eth0 parent 10: classid 10:1 htb rate 40mbit

    创建 traffic class 10:1

    1
    tc filter add dev eth0 parent 10: protocol ip prio 10 handle 1: cgroup

    配置 iptables,也可以用于这个 classid。

    1
    iptables -A OUTPUT -m cgroup ! --cgroup 0x100001 -j DROP

    对应的实现在net/core/netclassid_cgroup.c下面。起作用的方式是css_cls_stateclassid并且sock_cgroup_set_classid(&sock->sk->sk_cgrp_data,(unsigned long)v)来设置sockclassid

    cgroup net_prio 子系统

    网络优先权(net_prio)子系统可以为各个 cgroup 中的应用程序动态配置每个网络接口的流量优先级。

    net_prio.prioidx

    只读文件。它包含一个特有整数值,kernel 使用该整数值作为这个 cgroup 的内部代表。

    net_prio.ifpriomap

    包含优先级图谱,这些优先级被分配给源于此群组进程的流量以及通过不同接口离开系统的流量。回顾pfifo里优先级映射,对应的就是这个值。该图用 *<network_interface> * 的形式以成对列表表示:

    1
    2
    3
    4
    ~]# cat /cgroup/net_prio/iscsi/net_prio.ifpriomap
    eth0 5
    eth1 4
    eth2 6

    net_prio.ifpriomap 文件的目录可以使用上述格式,通过将字符串回显至文件的方式来修改。例如:

    1
    ~]# echo "eth0 5" > /cgroup/net_prio/iscsi/net_prio.ifpriomap

    上述指令将强制设定任何源于 iscsi net_prio cgroup 进程的流量和 eth0 网络接口传出的流量的优先级为 5。父 cgroup 也有可写入的 net_prio.ifpriomap 文件,可以设定系统默认优先级。

    对应的实现在net/core/netprio_cgroup.c下面。实现方式是通过扩展dev->priomapprioid->prio的映射记录这个优先级和 cgroup 的关系。

    net_prio 使用每个 cgroup 的 id(cgroupo->id)作为 sequence number,并将这个存储在 sk_cgrp_prioidx 中。sk_cgrp_prioidx 这个是单纯的用于设置网络包的优先级,使用这个之后将会覆盖之前通过 SO_PRIORITY socket 选项或者其他方式设置的值。

  • 相关阅读:
    PHP安装linux
    nginx 安装
    Redis安装
    linux启动http服务
    收藏的有用的网页
    laravel框架部署后有用命令
    .net 报错access to the path c: empimagefilesmsc_cntr_0.txt is denied
    oracle 触发器
    学习Auxre记录
    mysql数据库索引
  • 原文地址:https://www.cnblogs.com/dream397/p/13999411.html
Copyright © 2011-2022 走看看