zoukankan      html  css  js  c++  java
  • Linux eth0, eth1, ..., eth%d 的生成【转】

    转自:https://blog.csdn.net/xiruanliuwei/article/details/78765255

    一直很好奇,Linux下的eth0, eth1,eth2等是如何生成的~

    特别函数:

    __dev_get_by_name

    通过 eth1 这样的名字,找 struct net_device

    分三个步骤:

    1. 创建 struct net_device 类型的变量 XA ;

    2. 将创建的变量 XA 通过 register_netdevice 函数进行注册;

    3. ifconfig ethx up,有了这最后一步,才能在 ifconfig 命令的输出中看到增加的 ethx;

    1. 从 alloc_etherdev 开始分析,其实 alloc_etherdev 是一个宏:

    alloc_etherdev   -->   alloc_etherdev_mq   -->   alloc_etherdev_mqs

    最后调用的 alloc_etherdev_mqs 才是一个函数,而这个函数又调用了 alloc_netdev_mqs 函数,具体的操作都是在这个函数中完成的,因此:

    alloc_etherdev   -->   alloc_etherdev_mq   -->   alloc_etherdev_mqs    -->   alloc_netdev_mqs

    /**
    * alloc_etherdev_mqs - Allocates and sets up an Ethernet device
    * @sizeof_priv: Size of additional driver-private structure to be allocated
    * for this Ethernet device
    * @txqs: The number of TX queues this device has.
    * @rxqs: The number of RX queues this device has.
    *
    * Fill in the fields of the device structure with Ethernet-generic
    * values. Basically does everything except registering the device.
    *
    * Constructs a new net device, complete with a private data area of
    * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for
    * this private data area.
    */

    struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
    unsigned int rxqs)
    {
    return alloc_netdev_mqs(sizeof_priv, "eth%d", NET_NAME_UNKNOWN,
    ether_setup, txqs, rxqs);
    }
     

    /**
    * alloc_netdev_mqs - allocate network device
    * @sizeof_priv: size of private data to allocate space for
    * @name: device name format string
    * @name_assign_type: origin of device name
    * @setup: callback to initialize device
    * @txqs: the number of TX subqueues to allocate
    * @rxqs: the number of RX subqueues to allocate
    *
    * Allocates a struct net_device with private data area for driver use
    * and performs basic initialization. Also allocates subqueue structs
    * for each queue on the device.
    */
    struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
    unsigned char name_assign_type,
    void (*setup)(struct net_device *),
    unsigned int txqs, unsigned int rxqs)
    {
    struct net_device *dev;
    size_t alloc_size;
    struct net_device *p;

    BUG_ON(strlen(name) >= sizeof(dev->name));

    if (txqs < 1) {
    pr_err("alloc_netdev: Unable to allocate device with zero queues ");
    return NULL;
    }

    #ifdef CONFIG_SYSFS
    if (rxqs < 1) {
    pr_err("alloc_netdev: Unable to allocate device with zero RX queues ");
    return NULL;
    }
    #endif

    alloc_size = sizeof(struct net_device);
    if (sizeof_priv) {
    /* ensure 32-byte alignment of private area */
    alloc_size = ALIGN(alloc_size, NETDEV_ALIGN);
    alloc_size += sizeof_priv;
    }
    /* ensure 32-byte alignment of whole construct */
    alloc_size += NETDEV_ALIGN - 1;

    // 分配 struct net_device 变量空间,在下一步中调用 PTR_ALIGN 宏进行对齐
    p = kvzalloc(alloc_size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
    if (!p)
    return NULL;

    // dev 的值,就是最后 alloc_etherdev 返回的指向 struct net_device 变量/空间的指针
    dev = PTR_ALIGN(p, NETDEV_ALIGN);
    dev->padded = (char *)dev - (char *)p;

    dev->pcpu_refcnt = alloc_percpu(int);
    if (!dev->pcpu_refcnt)
    goto free_dev;

    if (dev_addr_init(dev))
    goto free_pcpu;

    dev_mc_init(dev);
    dev_uc_init(dev);

    dev_net_set(dev, &init_net);

    dev->gso_max_size = GSO_MAX_SIZE;
    dev->gso_max_segs = GSO_MAX_SEGS;

    INIT_LIST_HEAD(&dev->napi_list);
    INIT_LIST_HEAD(&dev->unreg_list);
    INIT_LIST_HEAD(&dev->close_list);
    INIT_LIST_HEAD(&dev->link_watch_list);
    INIT_LIST_HEAD(&dev->adj_list.upper);
    INIT_LIST_HEAD(&dev->adj_list.lower);
    INIT_LIST_HEAD(&dev->ptype_all);
    INIT_LIST_HEAD(&dev->ptype_specific);
    #ifdef CONFIG_NET_SCHED
    hash_init(dev->qdisc_hash);
    #endif
    dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;

    // 这个 setup,是一个函数指针,由 alloc_etherdev_mqs 调用时传递过来的, alloc_etherdev_mqs 调用时,传递
    // 过来的函数是:void ether_setup(struct net_device *dev)
     setup(dev);

    if (!dev->tx_queue_len) {
    dev->priv_flags |= IFF_NO_QUEUE;
    dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN;
    }

    dev->num_tx_queues = txqs;
    dev->real_num_tx_queues = txqs;
    if (netif_alloc_netdev_queues(dev))
    goto free_all;

    #ifdef CONFIG_SYSFS
    dev->num_rx_queues = rxqs;
    dev->real_num_rx_queues = rxqs;
    if (netif_alloc_rx_queues(dev))
    goto free_all;
    #endif

    strcpy(dev->name, name);
    dev->name_assign_type = name_assign_type;
    dev->group = INIT_NETDEV_GROUP;
    if (!dev->ethtool_ops)
    dev->ethtool_ops = &default_ethtool_ops;

    nf_hook_ingress_init(dev);

    return dev;

    free_all:
    free_netdev(dev);
    return NULL;

    free_pcpu:
    free_percpu(dev->pcpu_refcnt);
    free_dev:
    netdev_freemem(dev);
    return NULL;
    }
     

    对于这两个函数,除了代码中的注释,最重要的就是 alloc_netdev_mqs 的第二个参数 "eth%d" :

    在 alloc_netdev_mqs 中,

    因此,此时 alloc_etherdev 返回的 net_device 中的数据成员 name 的值为 "eth%d" (不包含双引号)

    2.  从 register_netdevice 开始,步骤一种返回的 struct net_device* 值,刚好是 register_netdevice 函数的参数:

    /**
    * register_netdevice - register a network device
    * @dev: device to register
    *
    * Take a completed network device structure and add it to the kernel
    * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
    * chain. 0 is returned on success. A negative errno code is returned
    * on a failure to set up the device, or if the name is a duplicate.
    *
    * Callers must hold the rtnl semaphore. You may want
    * register_netdev() instead of this.
    *
    * BUGS:
    * The locking appears insufficient to guarantee two parallel registers
    * will not get the same name.
    */

    // 此时, dev->name 的值是 "eth%d", 不包含双引号
    int register_netdevice(struct net_device *dev)
    {
    int ret;
    struct net *net = dev_net(dev);

    BUG_ON(dev_boot_phase);
    ASSERT_RTNL();

    might_sleep();

    /* When net_device's are persistent, this will be fatal. */
    BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
    BUG_ON(!net);

    spin_lock_init(&dev->addr_list_lock);
    netdev_set_addr_lockdep_class(dev);

    ret = dev_get_valid_name(net, dev, dev->name);
    if (ret < 0)
    goto out;

    /* Init, if this function is available */
    if (dev->netdev_ops->ndo_init) {
    ret = dev->netdev_ops->ndo_init(dev);
    if (ret) {
    if (ret > 0)
    ret = -EIO;
    goto out;
    }
    }

    if (((dev->hw_features | dev->features) &
    NETIF_F_HW_VLAN_CTAG_FILTER) &&
    (!dev->netdev_ops->ndo_vlan_rx_add_vid ||
    !dev->netdev_ops->ndo_vlan_rx_kill_vid)) {
    netdev_WARN(dev, "Buggy VLAN acceleration in driver! ");
    ret = -EINVAL;
    goto err_uninit;
    }

    ret = -EBUSY;
    if (!dev->ifindex)
    dev->ifindex = dev_new_index(net);
    else if (__dev_get_by_index(net, dev->ifindex))
    goto err_uninit;

    /* Transfer changeable features to wanted_features and enable
    * software offloads (GSO and GRO).
    */
    dev->hw_features |= NETIF_F_SOFT_FEATURES;
    dev->features |= NETIF_F_SOFT_FEATURES;

    if (dev->netdev_ops->ndo_udp_tunnel_add) {
    dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT;
    dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT;
    }

    dev->wanted_features = dev->features & dev->hw_features;

    if (!(dev->flags & IFF_LOOPBACK))
    dev->hw_features |= NETIF_F_NOCACHE_COPY;

    /* If IPv4 TCP segmentation offload is supported we should also
    * allow the device to enable segmenting the frame with the option
    * of ignoring a static IP ID value. This doesn't enable the
    * feature itself but allows the user to enable it later.
    */
    if (dev->hw_features & NETIF_F_TSO)
    dev->hw_features |= NETIF_F_TSO_MANGLEID;
    if (dev->vlan_features & NETIF_F_TSO)
    dev->vlan_features |= NETIF_F_TSO_MANGLEID;
    if (dev->mpls_features & NETIF_F_TSO)
    dev->mpls_features |= NETIF_F_TSO_MANGLEID;
    if (dev->hw_enc_features & NETIF_F_TSO)
    dev->hw_enc_features |= NETIF_F_TSO_MANGLEID;

    /* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
    */
    dev->vlan_features |= NETIF_F_HIGHDMA;

    /* Make NETIF_F_SG inheritable to tunnel devices.
    */
    dev->hw_enc_features |= NETIF_F_SG | NETIF_F_GSO_PARTIAL;

    /* Make NETIF_F_SG inheritable to MPLS.
    */
    dev->mpls_features |= NETIF_F_SG;

    ret = call_netdevice_notifiers(NETDEV_POST_INIT, dev);
    ret = notifier_to_errno(ret);
    if (ret)
    goto err_uninit;

    ret = netdev_register_kobject(dev);
    if (ret)
    goto err_uninit;
    dev->reg_state = NETREG_REGISTERED;

    __netdev_update_features(dev);

    /*
    * Default initial state at registry is that the
    * device is present.
    */

    set_bit(__LINK_STATE_PRESENT, &dev->state);

    linkwatch_init_dev(dev);

    dev_init_scheduler(dev);
    dev_hold(dev);
    list_netdevice(dev);
    add_device_randomness(dev->dev_addr, dev->addr_len);

    /* If the device has permanent device address, driver should
    * set dev_addr and also addr_assign_type should be set to
    * NET_ADDR_PERM (default value).
    */
    if (dev->addr_assign_type == NET_ADDR_PERM)
    memcpy(dev->perm_addr, dev->dev_addr, dev->addr_len);

    /* Notify protocols, that a new device appeared. */
    ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
    ret = notifier_to_errno(ret);
    if (ret) {
    rollback_registered(dev);
    dev->reg_state = NETREG_UNREGISTERED;
    }
    /*
    * Prevent userspace races by waiting until the network
    * device is fully setup before sending notifications.
    */
    if (!dev->rtnl_link_ops ||
    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
    rtmsg_ifinfo(RTM_NEWLINK, dev, ~0U, GFP_KERNEL);

    out:
    return ret;

    err_uninit:
    if (dev->netdev_ops->ndo_uninit)
    dev->netdev_ops->ndo_uninit(dev);
    if (dev->priv_destructor)
    dev->priv_destructor(dev);
    goto out;
    }

    register_netdevice   -->   dev_get_valid_name   -->   dev_alloc_name_ns   -->   __dev_alloc_name

    register_netdevice 函数中调用:
    // 此时,dev->name 的值是 “eth%d”
    ret = dev_get_valid_name(net, dev, dev->name);

    int dev_get_valid_name(struct net *net, struct net_device *dev,
                   const char *name)
    {
        BUG_ON(!net);

        if (!dev_valid_name(name))
            return -EINVAL;

    // char *strchr(const char *s, int c);
    // The strchr() function returns a pointer to the first occurrence of the character c in the string s.
    // 只要 % 在 name 中出现过,则返回值不为 NULL,此处的 name 是 eth%d ,因此返回值不为 NULL
        if (strchr(name, '%'))
            return dev_alloc_name_ns(net, dev, name);
        else if (__dev_get_by_name(net, name))
            return -EEXIST;
        else if (dev->name != name)
            strlcpy(dev->name, name, IFNAMSIZ);

        return 0;
    }

    static int dev_alloc_name_ns(struct net *net,
                     struct net_device *dev,
                     const char *name)
    {
        char buf[IFNAMSIZ];
        int ret;
    // 此处的 name 是 eth%d , buf 中的内容应该是全 的,但是跟编译器有关系
        ret = __dev_alloc_name(net, name, buf);
        if (ret >= 0)
            strlcpy(dev->name, buf, IFNAMSIZ);
        return ret;
    }

    // 主要的操作在 __dev_alloc_name 函数中,一般 PAGE_SIZE 是 4K,因此, net_device 数目最多有 8 * 4K = 32K 个
    // 在这个函数中,利用了位图,每一个bit,代表一个 net_device
    static int __dev_alloc_name(struct net *net, const char *name, char *buf)
    {
        int i = 0;
        const char *p;
    // 通常 PAGE_SIZE 是 4K,因此是 32K
        const int max_netdevices = 8*PAGE_SIZE;
        unsigned long *inuse;
        struct net_device *d;

    // IFNAMSIZ 的值 是 16,此处在 前 16 - 1 = 15 个字符中匹配 %
    // 因为 net_device 中 name 的定义是: char name[IFNAMSIZ];
        p = strnchr(name, IFNAMSIZ-1, '%');
        if (p) {
            /*
             * Verify the string as this thing may have come from
             * the user.  There must be either one "%d" and no other "%"
             * characters.
             */
    // % 后必须是 d
            if (p[1] != 'd' || strchr(p + 2, '%'))
                return -EINVAL;

    // 申请一个全是 0 的 page
            /* Use one page as a bit array of possible slots */
            inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
            if (!inuse)
                return -ENOMEM;

    // 针对 net 空间中已经存在的 net_device,找到每个 net_device 在这个 page 中的 bit 的位置
            for_each_netdev(net, d) {
                if (!sscanf(d->name, name, &i)) // 特别需要注意 sscanf 函数的用法,此时 d->name 中
    // 的值应该是 eth1, eth2这样的,而 name 中是 eth%d,
    // 这个函数作用是将 1, 2这样的值,保存到 i 中
                    continue;

                if (i < 0 || i >= max_netdevices)
                    continue;

                /*  avoid cases where sscanf is not exact inverse of printf */
    // 将 sscanf 分解的 d->name 再重组一遍,进行比较,确定 sscanf 分解的没有错误,
    // 才通过 set_bit 将 page 中相应的 bit 值位
                snprintf(buf, IFNAMSIZ, name, i); // 重组的内容(字符串) 保存在 buf 中
                if (!strncmp(buf, d->name, IFNAMSIZ)) // 如果 buf 与 d->name 中长度 IFNAMSIZ 的内容完全一致,
    // 则返回 0
                    set_bit(i, inuse);
            }

    // 找到 page 中第一个没有被使用的 bit
            i = find_first_zero_bit(inuse, max_netdevices);
            free_page((unsigned long) inuse);
        }

    // 确定 buf 和 name 不是同一个空间,将组成的新名字保存到 buf 中,此时 name 是 eth%d ,
      // i 是一个1,2,3这样的数值
        if (buf != name)
            snprintf(buf, IFNAMSIZ, name, i);

    // 通过新组成的名字去找 net_device 设备,确保没有重复的
        if (!__dev_get_by_name(net, buf))
            return i;

        /* It is possible to run out of possible slots
         * when the name is long and there isn't enough space left
         * for the digits, or if all bits are used.
         */
        return -ENFILE;
    }

    在  static int __dev_alloc_name(struct net *net, const char *name, char *buf) 中组成的名字 ethx,会通过 buf 返回,然后在

    dev_alloc_name_ns 函数中保存到步骤一生成的 struct net_device 变量的成员 name 中:

    至此, 一个 新的 ethx 这样的名字就生成了。

    3. ifconfig ethx up,则在 ifconfig 输出中就能够看到它的相关信息了。
    ————————————————
    版权声明:本文为CSDN博主「xiruanliuwei」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/xiruanliuwei/article/details/78765255

  • 相关阅读:
    1010每次备份我的MySQL数据库
    1008win7与虚拟机中的linux共享文件的(详细)方法
    0930MySQL中实现高性能高并发计数器方案(例如文章点击数)
    0929shell操作mysql
    0929mysql前缀索引如何找到合适的位数
    0929mysql 用户管理和权限设置
    学习笔记之机器学习实战 (Machine Learning in Action)
    学习笔记之Python for Data Analysis
    学习笔记之入行数据科学,这些书一定要看
    面试总结之Python
  • 原文地址:https://www.cnblogs.com/sky-heaven/p/12092731.html
Copyright © 2011-2022 走看看