zoukankan      html  css  js  c++  java
  • ixgbe驱动初始化

    http://abcdxyzk.github.io/blog/2020/05/21/ixgbe-init/

    首先模块加载insmod ixgbe.ko

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    
    module_init(ixgbe_init_module);
    
    module_init(ixgbe_init_module);
    {
    	int ret;
    	pr_info("%s - version %s
    ", ixgbe_driver_string, ixgbe_driver_version);
    	pr_info("%s
    ", ixgbe_copyright);
    
    	ixgbe_dbg_init();
         ret = pci_register_driver(&ixgbe_driver);
    	if (ret) {
    		ixgbe_dbg_exit();
    		return ret;
    	}
    
    #ifdef CONFIG_IXGBE_DCA
    	dca_register_notify(&dca_notifier);
    #endif
    
    	return 0;
    }

    于是看pci设备的核心结构体

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    
    static struct pci_driver ixgbe_driver = {
    	.name     = ixgbe_driver_name,
    	.id_table = ixgbe_pci_tbl,
    	.probe    = ixgbe_probe,
    	.remove   = ixgbe_remove,
    #ifdef CONFIG_PM
    	.suspend  = ixgbe_suspend,
    	.resume   = ixgbe_resume,
    #endif
    	.shutdown = ixgbe_shutdown,
    	.sriov_configure = ixgbe_pci_sriov_configure,
    	.err_handler = &ixgbe_err_handler
    };

    当设备加载成功后,会执行ixgbe_probe函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    
    static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
    {
    	/*分配struct net_device *netdev 结构体*/
    	netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices);
    
    	if (!netdev) {
    		err = -ENOMEM;
    		goto err_alloc_etherdev;
    	}
    
    	SET_NETDEV_DEV(netdev, &pdev->dev);
    
    	/*分配struct ixgbe_adapter *adapter结构体*/
    	adapter = netdev_priv(netdev);
    
    	/*分配dev结构体的ops函数指针集合*/
    	netdev->netdev_ops = &ixgbe_netdev_ops;
    
    	err = ixgbe_sw_init(adapter);
    
    	err = ixgbe_init_interrupt_scheme(adapter);
    	/*设备注册完毕*/<br>
    	err = register_netdev(netdev);
    }

    重点看ixgbe_init_interrupt_scheme(adapter)函数,该函数里面会初始化adapter结构体以及napi相关的东西

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    
    int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
    {
    
    	err = ixgbe_alloc_q_vectors(adapter);
    
    }
    static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
    {
    
    	if (q_vectors >= (rxr_remaining + txr_remaining)) {
    		for (; rxr_remaining; v_idx++) {
    			err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
    						   0, 0, 1, rxr_idx);
    
    			if (err)
    				goto err_out;
    
    			/* update counts and index */
    			rxr_remaining--;
    			rxr_idx++;
    		}
    	}
    }
    static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
    				int v_count, int v_idx,
    				int txr_count, int txr_idx,
    				int rxr_count, int rxr_idx)
    {
    	/* setup affinity mask and node */
    	if (cpu != -1)
    		cpumask_set_cpu(cpu, &q_vector->affinity_mask);
    	q_vector->numa_node = node;
    
    #ifdef CONFIG_IXGBE_DCA
    	/* initialize CPU for DCA */
    	q_vector->cpu = -1;
    
    #endif
    	/* initialize NAPI */
    	netif_napi_add(adapter->netdev, &q_vector->napi,
    			   ixgbe_poll, 64);
    	napi_hash_add(&q_vector->napi);
    }

    到此为止,网卡设置初始化完毕  

    其中涉及到如下几个结构体

    ixgbe_adapter
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    
    /* board specific private data structure */
    struct ixgbe_adapter {
    
    	//发送的rings
    	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
    
    	//接收的rings
    	struct ixgbe_ring *rx_ring[MAX_RX_QUEUES];
    
    	//这个vector里面包含了napi结构
    	//应该是跟下面的entries一一对应起来做为是一个中断向量的东西吧
    	struct ixgbe_q_vector *q_vector[MAX_Q_VECTORS];
    
    	//这个里面估计是MSIX的多个中断对应的响应接口
    	struct msix_entry *msix_entries;
    }
    
    struct ixgbe_q_vector {
    	struct ixgbe_adapter *adapter;
    ifdef CONFIG_IXGBE_DCA
    	int cpu;            /* CPU for DCA */
    #endif
    	u16 v_idx;              /* index of q_vector within array, also used for
    				 * finding the bit in EICR and friends that
    				 * represents the vector for this ring */
    	u16 itr;                /* Interrupt throttle rate written to EITR */
    	struct ixgbe_ring_container rx, tx;
    
    	struct napi_struct napi;/*napi结构体*/
    	cpumask_t affinity_mask;
    	int numa_node;
    	struct rcu_head rcu;    /* to avoid race with update stats on free */
    	char name[IFNAMSIZ + 9];
    
    	/* for dynamic allocation of rings associated with this q_vector */
    	struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp;
    };
    
    struct napi_struct {
    	/* The poll_list must only be managed by the entity which
    	 * changes the state of the NAPI_STATE_SCHED bit.  This means
    	 * whoever atomically sets that bit can add this napi_struct
    	 * to the per-cpu poll_list, and whoever clears that bit
    	 * can remove from the list right before clearing the bit.
    	 */
    	struct list_head    poll_list;
    
    	unsigned long       state;
    	int         weight;
    	unsigned int        gro_count;
    	int         (*poll)(struct napi_struct *, int);//poll的接口实现
    #ifdef CONFIG_NETPOLL
    	spinlock_t      poll_lock;
    	int         poll_owner;
    #endif
    	struct net_device   *dev;
    	struct sk_buff      *gro_list;
    	struct sk_buff      *skb;
    	struct list_head    dev_list;
    };

    然后当我们ifconfig dev up 时,会执行dev_ops->open函数

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    
    static int ixgbe_open(struct net_device *netdev)
    {
    	/* allocate transmit descriptors */
    	err = ixgbe_setup_all_tx_resources(adapter);
    	if (err)
    		goto err_setup_tx;
    
    	/* allocate receive descriptors */
    	err = ixgbe_setup_all_rx_resources(adapter);
    	/*注册中断*/
    	err = ixgbe_request_irq(adapter);
    }
    
    static int ixgbe_request_irq(struct ixgbe_adapter *adapter)
    {
    	struct net_device *netdev = adapter->netdev;
    	int err;
    
    	if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED)
    		err = ixgbe_request_msix_irqs(adapter);
    	else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED)
    		err = request_irq(adapter->pdev->irq, ixgbe_intr, 0,
    				  netdev->name, adapter);
    	else
    		err = request_irq(adapter->pdev->irq, ixgbe_intr, IRQF_SHARED,
    				  netdev->name, adapter);
    
    	if (err)
    		e_err(probe, "request_irq failed, Error %d
    ", err);
    
    	return err;
    }
    
    static int ixgbe_request_msix_irqs(struct ixgbe_adapter *adapter)
    {
    	for (vector = 0; vector < adapter->num_q_vectors; vector++) {
    		struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
    		struct msix_entry *entry = &adapter->msix_entries[vector];
    
    		err = request_irq(entry->vector, &ixgbe_msix_clean_rings, 0,
    				  q_vector->name, q_vector);
    	}
    }

    从上面的代码流程可以看出,最终注册的中断处理函数为ixgbe_msix_clean_rings

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    
    static irqreturn_t ixgbe_msix_clean_rings(int irq, void *data)
    {
    	struct ixgbe_q_vector *q_vector = data;
    
    	/* EIAM disabled interrupts (on this vector) for us */
    
    	if (q_vector->rx.ring || q_vector->tx.ring)
    		napi_schedule(&q_vector->napi);
    
    	return IRQ_HANDLED;
    }

    从上述代码中可以看,该中断处理函数仅仅作为napi的调度者

    当数据包到来时,首先唤醒硬中断执行ixgbe_msix_clean_rings函数,最终napi_schedule会调用 __raise_softirq_irqoff 去触发一个软中断NET_RX_SOFTIRQ,然后又对应的软中断接口去实现往上的协议栈逻辑

    然后看看napi 调度函数都做了些什么工作

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    
    static inline void napi_schedule(struct napi_struct *n)
    {
    	if (napi_schedule_prep(n))
    		__napi_schedule(n);
    }
    void __napi_schedule(struct napi_struct *n)
    {
    	unsigned long flags;
    
    	local_irq_save(flags);
    	____napi_schedule(this_cpu_ptr(&softnet_data), n);
    	local_irq_restore(flags);
    }
    
    最终可以看出napi调度函数把napi结构体挂到了per cpu的私有数据结构softnet_data上
    struct softnet_data {
    	struct Qdisc        *output_queue;
    	struct Qdisc        **output_queue_tailp;
    	struct list_head    poll_list;
    	struct sk_buff      *completion_queue;
    	struct sk_buff_head process_queue;
    
    	/* stats */
    	unsigned int        processed;
    	unsigned int        time_squeeze;
    	unsigned int        cpu_collision;
    	unsigned int        received_rps;
    
    #ifdef CONFIG_RPS
    	struct softnet_data *rps_ipi_list;
    
    	/* Elements below can be accessed between CPUs for RPS */
    	struct call_single_data csd ____cacheline_aligned_in_smp;
    	struct softnet_data *rps_ipi_next;
    	unsigned int        cpu;
    	unsigned int        input_queue_head;
    	unsigned int        input_queue_tail;
    #endif
    	unsigned int        dropped;
    	struct sk_buff_head input_pkt_queue;
    	struct napi_struct  backlog;/*napi结构体里面的双向链表中*/
    };

    NET_RX_SOFTIRQ是收到数据包的软中断信号对应的接口是net_rx_action

    NET_TX_SOFTIRQ是发送完数据包后的软中断信号对应的接口是net_tx_action  

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    
    static void net_rx_action(struct softirq_action *h)
    {
    	/* 获取每个cpu的数据*/
    	struct softnet_data *sd = this_cpu_ptr(&softnet_data);
    	while (!list_empty(&sd->poll_list)) {
    		struct napi_struct *n;
    				n = list_first_entry(&sd->poll_list, struct napi_struct, poll_list);
    
    		if (test_bit(NAPI_STATE_SCHED, &n->state)) {
    			work = n->poll(n, weight);
    			trace_napi_poll(n);
    		}
    	}
    }

    于是就执行到初始化napi结构体中的poll函数,在这里为ixgbe_poll

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    
    int ixgbe_poll(struct napi_struct *napi, int budget)
    {
    	struct ixgbe_q_vector *q_vector =
    				container_of(napi, struct ixgbe_q_vector, napi);
    	struct ixgbe_adapter *adapter = q_vector->adapter;
    	struct ixgbe_ring *ring;
    	int per_ring_budget;
    	bool clean_complete = true;
    
    #ifdef CONFIG_IXGBE_DCA
    	if (adapter->flags & IXGBE_FLAG_DCA_ENABLED)
    		ixgbe_update_dca(q_vector);
    #endif
    
    	ixgbe_for_each_ring(ring, q_vector->tx)
    		clean_complete &= !!ixgbe_clean_tx_irq(q_vector, ring);
    
    	if (!ixgbe_qv_lock_napi(q_vector))
    		return budget;
    
    	/* attempt to distribute budget to each queue fairly, but don't allow
    	 * the budget to go below 1 because we'll exit polling */
    	if (q_vector->rx.count > 1)
    		per_ring_budget = max(budget/q_vector->rx.count, 1);
    	else
    		per_ring_budget = budget;
    
    	ixgbe_for_each_ring(ring, q_vector->rx)
    		clean_complete &= (ixgbe_clean_rx_irq(q_vector, ring,
    				   per_ring_budget) < per_ring_budget);
    
    	ixgbe_qv_unlock_napi(q_vector);
    	/* If all work not completed, return budget and keep polling */
    	if (!clean_complete)
    		return budget;
    
    	/* all work done, exit the polling mode */
    	napi_complete(napi);
    	if (adapter->rx_itr_setting & 1)
    		ixgbe_set_itr(q_vector);
    	if (!test_bit(__IXGBE_DOWN, &adapter->state))
    		ixgbe_irq_enable_queues(adapter, ((u64)1 << q_vector->v_idx));
    
    	return 0;
    }
    
    static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
    				   struct ixgbe_ring *rx_ring,
    				   const int budget)
    {
    	   ixgbe_rx_skb(q_vector, skb);
    }
    
    static void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector,
    			 struct sk_buff *skb)
    {
    	if (ixgbe_qv_busy_polling(q_vector))
    		netif_receive_skb(skb);
    	else
    		napi_gro_receive(&q_vector->napi, skb);
    }
    
    int netif_receive_skb(struct sk_buff *skb)
    {
    	int ret;
    
    	net_timestamp_check(netdev_tstamp_prequeue, skb);
    
    	if (skb_defer_rx_timestamp(skb))
    		return NET_RX_SUCCESS;
    
    	rcu_read_lock();
    
    #ifdef CONFIG_RPS
    	if (static_key_false(&rps_needed)) {
    		struct rps_dev_flow voidflow, *rflow = &voidflow;
    		int cpu = get_rps_cpu(skb->dev, skb, &rflow);
    
    		if (cpu >= 0) {
    			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
    			rcu_read_unlock();
    			return ret;
    		}
    	}
    #endif
    		/*最终协议栈开始收报*/
    	ret = __netif_receive_skb(skb);
    	rcu_read_unlock();
    	return ret;
    }
  • 相关阅读:
    MODULE_AUTHOR、MODULE_DESCRIPTION、MODULE_LICENSE宏
    Django-RQ首页、文档和下载
    Uncode-Schedule首页、文档和下载
    Kibana+Logstash+Elasticsearch 日志查询系统
    京东集团副总裁李大学:像CEO一样思考
    刘宇(正和磁系资本创始人)_百度百科
    京东离职员工成立“东成西就”微信群 前高管赵国庆李大...
    李大学:互联网裂变里最重要的是资金流|正和岛|正和岛联席总裁黄丽陆|2015正和岛东部(海宁)论坛_新浪财经_新浪网
    nopcommerce中文网
    Getting Started · Building a RESTful Web Service
  • 原文地址:https://www.cnblogs.com/dream397/p/13614876.html
Copyright © 2011-2022 走看看