zoukankan      html  css  js  c++  java
  • UDP隧道

    隧道创建

    对于隧道L2TP、FOU/GUE、GENEVE和VXLAN,隧道创建时,都需要在内核中新建一个UDP套接口,框架中的函数udp_sock_create4提供此功能。不仅是套接口的创建,还有本机接口的绑定bind,以及如果特定隧道提供了对端地址信息,进行连接connect。

    int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg, struct socket **sockp)
    { 
    err = sock_create_kern(net, AF_INET, SOCK_DGRAM, 0, &sock);
    
    udp_addr.sin_family = AF_INET;
    udp_addr.sin_addr = cfg->local_ip;
    udp_addr.sin_port = cfg->local_udp_port; 
    err = kernel_bind(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr));
    
    if (cfg->peer_udp_port) { 
    udp_addr.sin_family = AF_INET;
    udp_addr.sin_addr = cfg->peer_ip;
    udp_addr.sin_port = cfg->peer_udp_port;
    err = kernel_connect(sock, (struct sockaddr *)&udp_addr, sizeof(udp_addr), 0);
    }
    sock->sk->sk_no_check_tx = !cfg->use_udp_checksums;
    }


    函数setup_udp_tunnel_sock建立套接口的隧道绑定。此函数将第二个参数socket套接口关联tunnel隧道属性,内核由此套接口接收到的数据包交由配置的encap_rcv回调函数处理(cfg->encap_rcv)。目前基于UDP的隧道协议主要有L2TP、VxLAN和GENEVE,分别注册了接收处理函数l2tp_udp_encap_recv、vxlan_rcv和geneve_udp_encap_recv。通用的UDP隧道协议FOU和GUE,处理函数分别为fou_udp_recv和gue_udp_recv。

    void setup_udp_tunnel_sock(struct net *net, struct socket *sock, struct udp_tunnel_sock_cfg *cfg)
    {
    struct sock *sk = sock->sk;
    
    udp_sk(sk)->encap_type = cfg->encap_type;
    udp_sk(sk)->encap_rcv = cfg->encap_rcv;
    udp_sk(sk)->encap_destroy = cfg->encap_destroy;
    udp_sk(sk)->gro_receive = cfg->gro_receive;
    udp_sk(sk)->gro_complete = cfg->gro_complete;
    
    udp_tunnel_encap_enable(sock);
    }

    UDP隧道接收

    在UDP数据包处理路径中,函数udp_queue_rcv_skb判断当前套接口的udp_encap_needed是否使能,并且encap_type不为0。随即调用绑定在此套接口上的封装数据包回调处理函数encap_rcv进行处理。

    static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
    {
        struct udp_sock *up = udp_sk(sk);
     
        if (static_key_false(&udp_encap_needed) && up->encap_type) {
            encap_rcv = READ_ONCE(up->encap_rcv);
            if (encap_rcv) {
                ret = encap_rcv(sk, skb);
        }
    
     
    void udp_tunnel_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb, ...)
    {
        uh = udp_hdr(skb);
        
        uh->dest = dst_port;
        uh->source = src_port; 
        uh->len = htons(skb->len);
        iptunnel_xmit(sk, rt, skb, src, dst, IPPROTO_UDP, tos, ttl, df, xnet);
    } 
     

    UDP隧道发送

    当数据包到达UDP隧道设备的发送函数(ndo_start_xmit)时,例如GENEVE隧道发送函数(geneve_xmit),进行特定隧道相关处理,之后由通用UDP隧道发送函数udp_tunnel_xmit_skb进行发送。
     

    const struct net_device_ops mlx5e_netdev_ops = {
            .ndo_open                = mlx5e_open,
            .ndo_stop                = mlx5e_close,
            .ndo_start_xmit          = mlx5e_xmit,
            .ndo_setup_tc            = mlx5e_setup_tc,
            .ndo_select_queue        = mlx5e_select_queue,
            .ndo_get_stats64         = mlx5e_get_stats,
            .ndo_set_rx_mode         = mlx5e_set_rx_mode,
            .ndo_set_mac_address     = mlx5e_set_mac,
            .ndo_vlan_rx_add_vid     = mlx5e_vlan_rx_add_vid,
            .ndo_vlan_rx_kill_vid    = mlx5e_vlan_rx_kill_vid,
            .ndo_set_features        = mlx5e_set_features,
            .ndo_fix_features        = mlx5e_fix_features,
            .ndo_change_mtu          = mlx5e_change_nic_mtu,
            .ndo_do_ioctl            = mlx5e_ioctl,
            .ndo_set_tx_maxrate      = mlx5e_set_tx_maxrate,
            .ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
            .ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
            .ndo_features_check      = mlx5e_features_check,
            .ndo_tx_timeout          = mlx5e_tx_timeout,
            .ndo_bpf                 = mlx5e_xdp,
            .ndo_xdp_xmit            = mlx5e_xdp_xmit,
            .ndo_xsk_wakeup          = mlx5e_xsk_wakeup,
    #ifdef CONFIG_MLX5_EN_ARFS
            .ndo_rx_flow_steer       = mlx5e_rx_flow_steer,
    #endif
    #ifdef CONFIG_MLX5_ESWITCH
            .ndo_bridge_setlink      = mlx5e_bridge_setlink,
            .ndo_bridge_getlink      = mlx5e_bridge_getlink,
    
            /* SRIOV E-Switch NDOs */
            .ndo_set_vf_mac          = mlx5e_set_vf_mac,
            .ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
            .ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
            .ndo_set_vf_trust        = mlx5e_set_vf_trust,
            .ndo_set_vf_rate         = mlx5e_set_vf_rate,
            .ndo_get_vf_config       = mlx5e_get_vf_config,
            .ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
            .ndo_get_vf_stats        = mlx5e_get_vf_stats,
    #endif
    };

    UDP隧道Offload

    对于支持UDP隧道(VXLAN/GENEVE)Offloading功能的物理网卡,其通过标志位NETDEV_UDP_TUNNEL_PUSH_INFO/NETDEV_UDP_TUNNEL_DROP_INFO进行表示。函数udp_tunnel_push_rx_port与udp_tunnel_drop_rx_port用于设置和取消网卡的Offloading功能。


     

    void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock,
                     unsigned short type)
    {
        struct sock *sk = sock->sk;
        struct udp_tunnel_info ti;
        if (!dev->netdev_ops->ndo_udp_tunnel_add ||
            !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
            return;
        ti.type = type;
        ti.sa_family = sk->sk_family;
        ti.port = inet_sk(sk)->inet_sport;
        dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti);
    }
    EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port);
    void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock,
                     unsigned short type)
    {
        struct sock *sk = sock->sk;
        struct udp_tunnel_info ti;
        if (!dev->netdev_ops->ndo_udp_tunnel_del ||
            !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT))
            return;
        ti.type = type;
        ti.sa_family = sk->sk_family;
        ti.port = inet_sk(sk)->inet_sport;
        dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti);
    }
    static int vxlan_netdevice_event(struct notifier_block *unused,
                                     unsigned long event, void *ptr)
    {
            struct net_device *dev = netdev_notifier_info_to_dev(ptr);
            struct vxlan_net *vn = net_generic(dev_net(dev), vxlan_net_id);
    
            if (event == NETDEV_UNREGISTER) {
                    vxlan_offload_rx_ports(dev, false);
                    vxlan_handle_lowerdev_unregister(vn, dev);
            } else if (event == NETDEV_REGISTER) {
                    vxlan_offload_rx_ports(dev, true);
            } else if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
                       event == NETDEV_UDP_TUNNEL_DROP_INFO) {
                    vxlan_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
            }
    
            return NOTIFY_DONE;
    }
    
    static void vxlan_offload_rx_ports(struct net_device *dev, bool push)
    {
            struct vxlan_sock *vs;
            struct net *net = dev_net(dev);
            struct vxlan_net *vn = net_generic(net, vxlan_net_id);
            unsigned int i;
    
            spin_lock(&vn->sock_lock);
            for (i = 0; i < PORT_HASH_SIZE; ++i) {
                    hlist_for_each_entry_rcu(vs, &vn->sock_list[i], hlist) {
                            unsigned short type;
    
                            if (vs->flags & VXLAN_F_GPE)
                                    type = UDP_TUNNEL_TYPE_VXLAN_GPE;
                            else
                                    type = UDP_TUNNEL_TYPE_VXLAN;
    
                            if (push)
                                    udp_tunnel_push_rx_port(dev, vs->sock, type);
                            else
                                    udp_tunnel_drop_rx_port(dev, vs->sock, type);
                    }
            }
            spin_unlock(&vn->sock_lock);
    }
    static void mlx5e_vxlan_queue_work(struct mlx5e_priv *priv, u16 port, int add)
    {
            struct mlx5e_vxlan_work *vxlan_work;
    
            vxlan_work = kmalloc(sizeof(*vxlan_work), GFP_ATOMIC);
            if (!vxlan_work)
                    return;
    
            if (add)
                    INIT_WORK(&vxlan_work->work, mlx5e_vxlan_add_work);
            else
                    INIT_WORK(&vxlan_work->work, mlx5e_vxlan_del_work);
    
            vxlan_work->priv = priv;
            vxlan_work->port = port;
            queue_work(priv->wq, &vxlan_work->work);
    }
    
    void mlx5e_add_vxlan_port(struct net_device *netdev, struct udp_tunnel_info *ti)
    {
            struct mlx5e_priv *priv = netdev_priv(netdev);
    
            if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
                    return;
    
            if (!mlx5_vxlan_allowed(priv->mdev->vxlan))
                    return;
    
            mlx5e_vxlan_queue_work(priv, be16_to_cpu(ti->port), 1);
    }
    
    static void mlx5e_vxlan_add_work(struct work_struct *work)
    {
            struct mlx5e_vxlan_work *vxlan_work =
                    container_of(work, struct mlx5e_vxlan_work, work);
            struct mlx5e_priv *priv = vxlan_work->priv;
            u16 port = vxlan_work->port;
    
            mutex_lock(&priv->state_lock);
            mlx5_vxlan_add_port(priv->mdev->vxlan, port);
            mutex_unlock(&priv->state_lock);
    
            kfree(vxlan_work);
    }
    
    int mlx5_vxlan_add_port(struct mlx5_vxlan *vxlan, u16 port)
    {
            ret = mlx5_vxlan_core_add_port_cmd(vxlan->mdev, port);
          
    }
    
    static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
    {
            u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
            u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
    
            MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
                     MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
            MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
            return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
    }

    外部UDP隧道

    对于非使用ip link系统生成的UDP隧道,即控制通道在外部系统的隧道,如路由系统,其通过ip route encap指定隧道参数,就需要将这些有路由相关的隧道信息保存在路由缓存中。参见UDP框架函数udp_tun_rx_dst,使用metadata_dst结构体保存通用路由信息和隧道信息。
     

    struct metadata_dst *udp_tun_rx_dst(struct sk_buff *skb,  unsigned short family, __be16 flags, __be64 tunnel_id, int md_size)
    {
        struct metadata_dst *tun_dst;
        struct ip_tunnel_info *info;
                             
        tun_dst = ip_tun_rx_dst(skb, flags, tunnel_id, md_size);
     
        info = &tun_dst->u.tun_info;
        info->key.tp_src = udp_hdr(skb)->source;
        info->key.tp_dst = udp_hdr(skb)->dest;
        if (udp_hdr(skb)->check)  
            info->key.tun_flags |= TUNNEL_CSUM;         
    } 
     
  • 相关阅读:
    MYSQL 优化(二),持续更新收藏
    一些linux命令 备份下
    lsyncd +xinetd+syncd 多服务器文件同步
    阿里slb+ecs+https
    微擎 从 php5 到php7 的各种填坑 持续更新
    lmap
    微擎的ifp ife ifpp
    工具索引 mark名字
    Funny Bug || Sky Hole
    mysql 查询小技巧
  • 原文地址:https://www.cnblogs.com/dream397/p/14467650.html
Copyright © 2011-2022 走看看