zoukankan      html  css  js  c++  java
  • rps ipi

    Linux SMP 多核调用示例

    在多core系统中,系统启动后内核将会在一个core上执行,且会在不同core上进行调度。即内核模块(驱动)加载只会在一个core中执行一次初始化。那么需要在所有core,或某个特殊的core执行那么我们需要smp提供的多核接口。

    smp相关api

    linux/smp.h中定义了多core调用的函数以及相关的数据结构。

    在所有的core上执行函数func,info是传递给func的参数。

    void on_each_cpu(smp_call_func_t func, void *info, int wait);
    

    在给定cpumask中所有的core上执行函数func,info是传递给func的参数。

    void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait);
    

    除了调用core外所有的core都执行

    void smp_call_function(smp_call_func_t func, void *info, int wait);
    

    在指定的cpumask所对应的core上执行,但是需要除去当前调用的core。

    void smp_call_function_many(const struct cpumask *mask, smp_call_func_t func, void *info, bool wait);
    

    在指定的cpumask所对应的core中的其中一个core上执行一次

    int smp_call_function_any(const struct cpumask *mask, smp_call_func_t func, void *info, int wait);
    

    在指定的cpuid上执行一次

    int smp_call_function_single(int cpuid, smp_call_func_t func, void *info, int wait);
    

    在内核空间中,定义了几个全局cpumask变量。

    • cpu_possible_mask- has bit ‘cpu’ set iff cpu is populatable
    • cpu_present_mask - has bit ‘cpu’ set iff cpu is populated
    • cpu_online_mask - has bit ‘cpu’ set iff cpu available to scheduler
    • cpu_active_mask - has bit ‘cpu’ set iff cpu available to migration
    static int rps_ipi_queued(struct softnet_data *sd)
    {
    #ifdef CONFIG_RPS
        struct softnet_data *mysd = &__get_cpu_var(softnet_data);
    
        if (sd != mysd) {
            sd->rps_ipi_next = mysd->rps_ipi_list;
            mysd->rps_ipi_list = sd;
    
            __raise_softirq_irqoff(NET_RX_SOFTIRQ);
            return 1;
        }
    #endif /* CONFIG_RPS */
        return 0;
    }
    // rps决定的cpu input_pkt_queue,收到第一个包,需要调度对方cpu的napi执行,通过ipi的方式
            /* Schedule NAPI for backlog device
             * We can use non atomic operation since we own the queue lock
             */
            if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {// backlog还没调度
                if (!rps_ipi_queued(sd))    //rps决定的cpu不是本地cpu, 把sd添加到本地sd的rps_ipi_list链表中, 并设置本地软中断
                    ____napi_schedule(sd, &sd->backlog);    //sd是本地cpu上的,直接____napi_schedule调度napi
            }
    /*
     * Incoming packets are placed on per-cpu queues
     */
    struct softnet_data {
        struct Qdisc        *output_queue;
        struct Qdisc        **output_queue_tailp;
        struct list_head    poll_list;
        struct sk_buff        *completion_queue;
        struct sk_buff_head    process_queue;
    
        /* stats */
        unsigned int        processed;
        unsigned int        time_squeeze;
        unsigned int        cpu_collision;
        unsigned int        received_rps;
    
    #ifdef CONFIG_RPS
        struct softnet_data    *rps_ipi_list;
    
        /* Elements below can be accessed between CPUs for RPS */
        struct call_single_data    csd ____cacheline_aligned_in_smp;
        struct softnet_data    *rps_ipi_next;
        unsigned int        cpu;
        unsigned int        input_queue_head;
        unsigned int        input_queue_tail;
    #endif
        unsigned        dropped;
        struct sk_buff_head    input_pkt_queue;
        struct napi_struct    backlog;
    };
    /*
     * Incoming packets are placed on per-cpu queues
     */
    struct softnet_data {
        struct Qdisc        *output_queue;
        struct Qdisc        **output_queue_tailp;
        struct list_head    poll_list;
        struct sk_buff        *completion_queue;
        struct sk_buff_head    process_queue;
    
        /* stats */
        unsigned int        processed;
        unsigned int        time_squeeze;
        unsigned int        cpu_collision;
        unsigned int        received_rps;
    
    #ifdef CONFIG_RPS
        struct softnet_data    *rps_ipi_list;
    
        /* Elements below can be accessed between CPUs for RPS */
        struct call_single_data    csd ____cacheline_aligned_in_smp;
        struct softnet_data    *rps_ipi_next;
        unsigned int        cpu;
        unsigned int        input_queue_head;
        unsigned int        input_queue_tail;
    #endif
        unsigned        dropped;
        struct sk_buff_head    input_pkt_queue;
        struct napi_struct    backlog;
    };
    /* Called from hardirq (IPI) context */
    static void rps_trigger_softirq(void *data)
    {
            struct softnet_data *sd = data;
    
            ____napi_schedule(sd, &sd->backlog);
            sd->received_rps++;
    }
    /*
     *       This is called single threaded during boot, so no need
     *       to take the rtnl semaphore.
     */
    static int __init net_dev_init(void)
    {
            int i, rc = -ENOMEM;
    
            BUG_ON(!dev_boot_phase);
    
            if (dev_proc_init())
                    goto out;
    
            if (netdev_kobject_init())
                    goto out;
     *       This is called single threaded during boot, so no need
     *       to take the rtnl semaphore.
     */
    static int __init net_dev_init(void)
    {
            int i, rc = -ENOMEM;
    
            BUG_ON(!dev_boot_phase);
    
            if (dev_proc_init())
                    goto out;
    
            if (netdev_kobject_init())
                    goto out;
    
            INIT_LIST_HEAD(&ptype_all);
            for (i = 0; i < PTYPE_HASH_SIZE; i++)
                    INIT_LIST_HEAD(&ptype_base[i]);
    
            INIT_LIST_HEAD(&offload_base);
    
            if (register_pernet_subsys(&netdev_net_ops))
                    goto out;
    
            /*
             *      Initialise the packet receive queues.
             */
    
            for_each_possible_cpu(i) {
                    struct work_struct *flush = per_cpu_ptr(&flush_works, i);
                    struct softnet_data *sd = &per_cpu(softnet_data, i);
    
                    INIT_WORK(flush, flush_backlog);
    
                    skb_queue_head_init(&sd->input_pkt_queue);
                    skb_queue_head_init(&sd->process_queue);
    #ifdef CONFIG_XFRM_OFFLOAD
                    skb_queue_head_init(&sd->xfrm_backlog);
    #endif
                    INIT_LIST_HEAD(&sd->poll_list);
                    sd->output_queue_tailp = &sd->output_queue;
    #ifdef CONFIG_RPS
                    sd->csd.func = rps_trigger_softirq;
                    sd->csd.info = sd;
                    sd->cpu = i;
    #endif
    
                    init_gro_hash(&sd->backlog);
                    sd->backlog.poll = process_backlog;
                    sd->backlog.weight = weight_p;
            }
    
            dev_boot_phase = 0;
    
            /* The loopback device is special if any other network devices
             * is present in a network namespace the loopback device must
             * be present. Since we now dynamically allocate and free the
             * loopback device ensure this invariant is maintained by
             * keeping the loopback device as the first device on the
             * list of network devices.  Ensuring the loopback devices
             * is the first device that appears and the last network device
             * that disappears.
             */
               */
            if (register_pernet_device(&loopback_net_ops))
                    goto out;
    
            if (register_pernet_device(&default_device_ops))
                    goto out;
    
            open_softirq(NET_TX_SOFTIRQ, net_tx_action);
            open_softirq(NET_RX_SOFTIRQ, net_rx_action);
    
            rc = cpuhp_setup_state_nocalls(CPUHP_NET_DEV_DEAD, "net/dev:dead",
                                           NULL, dev_cpu_dead);
            WARN_ON(rc < 0);
            rc = 0;
    out:
            return rc;
    }
    void __smp_call_function_single(int cpu, struct call_single_data *data,
                    int wait)
    {
        unsigned int this_cpu;
        unsigned long flags;
    
        this_cpu = get_cpu();
        /*
         * Can deadlock when called with interrupts disabled.
         * We allow cpu's that are not yet online though, as no one else can
         * send smp call function interrupt to this cpu and as such deadlocks
         * can't happen.
         */
        WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled()
                 && !oops_in_progress);
    
        if (cpu == this_cpu) {
            local_irq_save(flags);
            data->func(data->info);
            local_irq_restore(flags);
        } else {
            csd_lock(data);
            generic_exec_single(cpu, data, wait);
        }
        put_cpu();
    }
    /*
     * net_rps_action sends any pending IPI's for rps.
     * Note: called with local irq disabled, but exits with local irq enabled.
     */
    static void net_rps_action_and_irq_enable(struct softnet_data *sd)
    {
    #ifdef CONFIG_RPS
        struct softnet_data *remsd = sd->rps_ipi_list;
    
        if (remsd) {
            sd->rps_ipi_list = NULL;
    
            local_irq_enable();
    
            /* Send pending IPI's to kick RPS processing on remote cpus. */
            while (remsd) {
                struct softnet_data *next = remsd->rps_ipi_next;
    
                if (cpu_online(remsd->cpu))
                    __smp_call_function_single(remsd->cpu,
                                   &remsd->csd, 0);
                remsd = next;
            }
        } else
    #endif
            local_irq_enable();
    }
    
    static int process_backlog(struct napi_struct *napi, int quota)
    {
        int work = 0;
        struct softnet_data *sd = container_of(napi, struct softnet_data, backlog);
    
    #ifdef CONFIG_RPS
        /* Check if we have pending ipi, its better to send them now,
         * not waiting net_rx_action() end.
         */
        if (sd->rps_ipi_list) {
            local_irq_disable();
            net_rps_action_and_irq_enable(sd);
        }
    #endif
        napi->weight = weight_p;
        local_irq_disable();
        while (work < quota) {
            struct sk_buff *skb;
            unsigned int qlen;
    
            while ((skb = __skb_dequeue(&sd->process_queue))) {
                local_irq_enable();
                __netif_receive_skb(skb);
                local_irq_disable();
                input_queue_head_incr(sd);
                if (++work >= quota) {
                    local_irq_enable();
                    return work;
                }
            }
    
            rps_lock(sd);
            qlen = skb_queue_len(&sd->input_pkt_queue);
            if (qlen)
                skb_queue_splice_tail_init(&sd->input_pkt_queue,
                               &sd->process_queue);
    
            if (qlen < quota - work) {
                /*
                 * Inline a custom version of __napi_complete().
                 * only current cpu owns and manipulates this napi,
                 * and NAPI_STATE_SCHED is the only possible flag set on backlog.
                 * we can use a plain write instead of clear_bit(),
                 * and we dont need an smp_mb() memory barrier.
                 */
                list_del(&napi->poll_list);
                napi->state = 0;
    
                quota = work + qlen;
            }
            rps_unlock(sd);
        }
        local_irq_enable();
    
        return work;
    }
    static void net_rps_send_ipi(struct softnet_data *remsd)
    {
    #ifdef CONFIG_RPS
            while (remsd) {
                    struct softnet_data *next = remsd->rps_ipi_next;
    
                    if (cpu_online(remsd->cpu))
                            smp_call_function_single_async(remsd->cpu, &remsd->csd);
                    remsd = next;
            }
    #endif
    }
  • 相关阅读:
    assert用法,原理,改编(C++)
    使用临界段实现优化的进程间同步对象原理和实现 (转)
    去除表达式里面多余的()
    为什么C++编译器不能支持对模板的分离式编译
    python试题[转]
    从CSDN的趣味题学Python
    即时战略游戏中如何协调对象移动
    贪心算法精讲
    游戏引擎大全
    I/O 完成端口( Windows核心编程 )
  • 原文地址:https://www.cnblogs.com/dream397/p/14536283.html
Copyright © 2011-2022 走看看