zoukankan      html  css  js  c++  java
  • Openvswitch原理与代码分析(2): ovs-vswitchd的启动

    ovs-vswitchd.c的main函数最终会进入一个while循环,在这个无限循环中,里面最重要的两个函数是bridge_run()和netdev_run()。

     

     

    Openvswitch主要管理两种类型的设备,一个是创建的虚拟网桥,一个是连接到虚拟网桥上的设备。

     

    其中bridge_run就是初始化数据库中已经创建的虚拟网桥。

     

    一、虚拟网桥的初始化bridge_run

     

    bridge_run会调用bridge_run__,bridge_run__中最重要的是对于所有的网桥,都调用ofproto_run

     

    1. static void
    2. bridge_run__(void)
    3. {
    4. ……
    5.     /* Let each bridge do the work that it needs to do. */
    6.     HMAP_FOR_EACH (br, node, &all_bridges) {
    7.         ofproto_run(br->ofproto);
    8.     }
    9. }

     

    Int ofproto_run(struct ofproto *p)会调用error = p->ofproto_class->run(p);

     

    ofproto_class的定义在ofproto-provider.h中,它的实现定义在ofproto-dpif.c中,这里面的所有的函数,在这个文件中都有定义。

     

    1. const struct ofproto_class ofproto_dpif_class = {
    2.     init,
    3.     enumerate_types,
    4.     enumerate_names,
    5.     del,
    6.     port_open_type,
    7.     type_run,
    8.     type_wait,
    9.     alloc,
    10.     construct,
    11.     destruct,
    12.     dealloc,
    13.     run,
    14.     wait,
    15.     NULL, /* get_memory_usage. */
    16.     type_get_memory_usage,
    17.     flush,
    18.     query_tables,
    19.     set_tables_version,
    20.     port_alloc,
    21.     port_construct,
    22.     port_destruct,
    23.     port_dealloc,
    24.     port_modified,
    25.     port_reconfigured,
    26.     port_query_by_name,
    27.     port_add,
    28.     port_del,
    29.     port_get_stats,
    30.     port_dump_start,
    31.     port_dump_next,
    32.     port_dump_done,
    33.     port_poll,
    34.     port_poll_wait,
    35.     port_is_lacp_current,
    36.     port_get_lacp_stats,
    37.     NULL, /* rule_choose_table */
    38.     rule_alloc,
    39.     rule_construct,
    40.     rule_insert,
    41.     rule_delete,
    42.     rule_destruct,
    43.     rule_dealloc,
    44.     rule_get_stats,
    45.     rule_execute,
    46.     set_frag_handling,
    47.     packet_out,
    48.     set_netflow,
    49.     get_netflow_ids,
    50.     set_sflow,
    51.     set_ipfix,
    52.     set_cfm,
    53.     cfm_status_changed,
    54.     get_cfm_status,
    55.     set_lldp,
    56.     get_lldp_status,
    57.     set_aa,
    58.     aa_mapping_set,
    59.     aa_mapping_unset,
    60.     aa_vlan_get_queued,
    61.     aa_vlan_get_queue_size,
    62.     set_bfd,
    63.     bfd_status_changed,
    64.     get_bfd_status,
    65.     set_stp,
    66.     get_stp_status,
    67.     set_stp_port,
    68.     get_stp_port_status,
    69.     get_stp_port_stats,
    70.     set_rstp,
    71.     get_rstp_status,
    72.     set_rstp_port,
    73.     get_rstp_port_status,
    74.     set_queues,
    75.     bundle_set,
    76.     bundle_remove,
    77.     mirror_set__,
    78.     mirror_get_stats__,
    79.     set_flood_vlans,
    80.     is_mirror_output_bundle,
    81.     forward_bpdu_changed,
    82.     set_mac_table_config,
    83.     set_mcast_snooping,
    84.     set_mcast_snooping_port,
    85.     set_realdev,
    86.     NULL, /* meter_get_features */
    87.     NULL, /* meter_set */
    88.     NULL, /* meter_get */
    89.     NULL, /* meter_del */
    90.     group_alloc, /* group_alloc */
    91.     group_construct, /* group_construct */
    92.     group_destruct, /* group_destruct */
    93.     group_dealloc, /* group_dealloc */
    94.     group_modify, /* group_modify */
    95.     group_get_stats, /* group_get_stats */
    96.     get_datapath_version, /* get_datapath_version */
    97. };

     

    在ofproto-provider.h中注释里是这样说的。

    这里定义了四类数据结构

    Struct ofproto表示一个交换机

    Struct ofport表示交换机上的一个端口

    Struct rule表示交换机上的一条flow规则

    Struct ofgroup表示一个flow规则组

     

    上面说到启动的过程中,会调用ofproto_class->run,也即会调用ofproto-dpif.c中的static int run(struct ofproto *ofproto_)函数。

     

    在这个函数中,会初始化netflow, sflow, ipfix,stp, rstp, mac address learning等一系列操作。

     

    bridge_run还会调用static void bridge_reconfigure(const struct ovsrec_open_vswitch *ovs_cfg),其中ovs_cfg是从ovsdb-server里面读取出来的配置。

     

    在这个函数里面,对于每一个网桥,将网卡添加进去

    1. HMAP_FOR_EACH (br, node, &all_bridges) {
    2.     bridge_add_ports(br, &br->wanted_ports);
    3.     shash_destroy(&br->wanted_ports);
    4. }

     

    1. static void
    2. bridge_add_ports(struct bridge *br, const struct shash *wanted_ports)
    3. {
    4.     /* First add interfaces that request a particular port number. */
    5.     bridge_add_ports__(br, wanted_ports, true);
    6.  
    7.     /* Then add interfaces that want automatic port number assignment.
    8.      * We add these afterward to avoid accidentally taking a specifically
    9.      * requested port number. */
    10.     bridge_add_ports__(br, wanted_ports, false);
    11. }

     

    static void bridge_add_ports__(struct bridge *br, const struct shash *wanted_ports, bool with_requested_port)会调用

    static bool iface_create(struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg)会调用

    static int iface_do_create(const struct bridge *br, const struct ovsrec_interface *iface_cfg, const struct ovsrec_port *port_cfg, ofp_port_t *ofp_portp, struct netdev **netdevp, char **errp)会调用

    int ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev, ofp_port_t *ofp_portp)会调用

     

    1. error = ofproto->ofproto_class->port_add(ofproto, netdev);

     

    会调用ofproto-dpif.c中的ofproto_dpif_class的static int port_add(struct ofproto *ofproto_, struct netdev *netdev)函数。

     

    会调用int dpif_port_add(struct dpif *dpif, struct netdev *netdev, odp_port_t *port_nop)会调用

     

    1. error = dpif->dpif_class->port_add(dpif, netdev, &port_no);

     

    会调用dpif_netlink_class的port_add函数,也即dpif_netlink_port_add,也即

    static int dpif_netlink_port_add(struct dpif *dpif_, struct netdev *netdev,odp_port_t *port_nop)会调用

    static int dpif_netlink_port_add__(struct dpif_netlink *dpif, struct netdev *netdev, odp_port_t *port_nop)

     

    在这个函数里面,会调用netlink的API,命令为OVS_VPORT_CMD_NEW

     

    1. const char *name = netdev_vport_get_dpif_port(netdev,
    2.                                                   namebuf, sizeof namebuf);
    3. struct dpif_netlink_vport request, reply;
    4. struct nl_sock **socksp = NULL;
    5.  
    6. if (dpif->handlers) {
    7.     socksp = vport_create_socksp(dpif, &error);
    8.     if (!socksp) {
    9.         return error;
    10.     }
    11. }
    12.  
    13. dpif_netlink_vport_init(&request);
    14. request.cmd = OVS_VPORT_CMD_NEW;
    15. request.dp_ifindex = dpif->dp_ifindex;
    16. request.type = netdev_to_ovs_vport_type(netdev);
    17.  
    18. request.name = name;
    19.  
    20. upcall_pids = vport_socksp_to_pids(socksp, dpif->n_handlers);
    21. request.n_upcall_pids = socksp ? dpif->n_handlers : 1;
    22. request.upcall_pids = upcall_pids;
    23. error = dpif_netlink_vport_transact(&request, &reply, &buf);

     

    这里会调用内核模块openvswitch.ko,在内核中添加虚拟网卡。这部分详细的过程将在下一节分析。

    二、虚拟网卡的初始化netdev_run()

     

    1. void
    2. netdev_run(void)
    3.     OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)
    4. {
    5.     struct netdev_registered_class *rc;
    6.  
    7.     netdev_initialize();
    8.     ovs_mutex_lock(&netdev_class_mutex);
    9.     HMAP_FOR_EACH (rc, hmap_node, &netdev_classes) {
    10.         if (rc->class->run) {
    11.             rc->class->run();
    12.         }
    13.     }
    14.     ovs_mutex_unlock(&netdev_class_mutex);
    15. }

     

    依次循环调用netdev_classes中的每一个run。

     

    对于不同类型的虚拟网卡,都有对应的netdev_class。

     

    例如对于dpdk的网卡有

     

    1. static const struct netdev_class dpdk_class =
    2.     NETDEV_DPDK_CLASS(
    3.         "dpdk",
    4.         NULL,
    5.         netdev_dpdk_construct,
    6.         netdev_dpdk_destruct,
    7.         netdev_dpdk_set_multiq,
    8.         netdev_dpdk_eth_send,
    9.         netdev_dpdk_get_carrier,
    10.         netdev_dpdk_get_stats,
    11.         netdev_dpdk_get_features,
    12.         netdev_dpdk_get_status,
    13.         netdev_dpdk_rxq_recv);

     

    对于物理网卡,也需要有相应的netdev_class

     

    1. const struct netdev_class netdev_linux_class =
    2.     NETDEV_LINUX_CLASS(
    3.         "system",
    4.         netdev_linux_construct,
    5.         netdev_linux_get_stats,
    6.         netdev_linux_get_features,
    7.         netdev_linux_get_status);

     

    对于连接到KVM的tap网卡

    1. const struct netdev_class netdev_tap_class =
    2.     NETDEV_LINUX_CLASS(
    3.         "tap",
    4.         netdev_linux_construct_tap,
    5.         netdev_tap_get_stats,
    6.         netdev_linux_get_features,
    7.         netdev_linux_get_status);

     

    对于虚拟的软网卡,比如veth pair

    1. const struct netdev_class netdev_internal_class =
    2.     NETDEV_LINUX_CLASS(
    3.         "internal",
    4.         netdev_linux_construct,
    5.         netdev_internal_get_stats,
    6.         NULL, /* get_features */
    7.         netdev_internal_get_status);

     

    其中NETDEV_LINUX_CLASS是一个宏,不是所有的参数都需要全部填写。

    1. #define NETDEV_LINUX_CLASS(NAME, CONSTRUCT, GET_STATS,
    2.                            GET_FEATURES, GET_STATUS)
    3. {
    4.     NAME,
    5.                                                                 
    6.     NULL,
    7.     netdev_linux_run,
    8.     netdev_linux_wait,
    9.                                                                 
    10.     netdev_linux_alloc,
    11.     CONSTRUCT,
    12.     netdev_linux_destruct,
    13.     netdev_linux_dealloc,
    14.     NULL, /* get_config */
    15.     NULL, /* set_config */
    16.     NULL, /* get_tunnel_config */
    17.     NULL, /* build header */
    18.     NULL, /* push header */
    19.     NULL, /* pop header */
    20.     NULL, /* get_numa_id */
    21.     NULL, /* set_multiq */
    22.                                                                 
    23.     netdev_linux_send,
    24.     netdev_linux_send_wait,
    25.                                                                 
    26.     netdev_linux_set_etheraddr,
    27.     netdev_linux_get_etheraddr,
    28.     netdev_linux_get_mtu,
    29.     netdev_linux_set_mtu,
    30.     netdev_linux_get_ifindex,
    31.     netdev_linux_get_carrier,
    32.     netdev_linux_get_carrier_resets,
    33.     netdev_linux_set_miimon_interval,
    34.     GET_STATS,
    35.                                                                 
    36.     GET_FEATURES,
    37.     netdev_linux_set_advertisements,
    38.                                                                 
    39.     netdev_linux_set_policing,
    40.     netdev_linux_get_qos_types,
    41.     netdev_linux_get_qos_capabilities,
    42.     netdev_linux_get_qos,
    43.     netdev_linux_set_qos,
    44.     netdev_linux_get_queue,
    45.     netdev_linux_set_queue,
    46.     netdev_linux_delete_queue,
    47.     netdev_linux_get_queue_stats,
    48.     netdev_linux_queue_dump_start,
    49.     netdev_linux_queue_dump_next,
    50.     netdev_linux_queue_dump_done,
    51.     netdev_linux_dump_queue_stats,
    52.                                                                 
    53.     netdev_linux_get_in4,
    54.     netdev_linux_set_in4,
    55.     netdev_linux_get_in6,
    56.     netdev_linux_add_router,
    57.     netdev_linux_get_next_hop,
    58.     GET_STATUS,
    59.     netdev_linux_arp_lookup,
    60.                                                                 
    61.     netdev_linux_update_flags,
    62.                                                                 
    63.     netdev_linux_rxq_alloc,
    64.     netdev_linux_rxq_construct,
    65.     netdev_linux_rxq_destruct,
    66.     netdev_linux_rxq_dealloc,
    67.     netdev_linux_rxq_recv,
    68.     netdev_linux_rxq_wait,
    69.     netdev_linux_rxq_drain,
    70. }

     

    rc->class->run()调用的是netdev-linux.c下的netdev_linux_run

     

    netdev_linux_run会调用netlink的sock得到虚拟网卡的状态,并且更新状态。

     

    1. error = nl_sock_recv(sock, &buf, false);
    2. if (!error) {
    3.     struct rtnetlink_change change;
    4.     if (rtnetlink_parse(&buf, &change)) {
    5.         struct netdev *netdev_ = netdev_from_name(change.ifname);
    6.         if (netdev_ && is_netdev_linux_class(netdev_->netdev_class)) {
    7.            struct netdev_linux *netdev = netdev_linux_cast(netdev_);
    8.            ovs_mutex_lock(&netdev->mutex);
    9.            netdev_linux_update(netdev, &change);
    10.            ovs_mutex_unlock(&netdev->mutex);
    11.         }
    12.         netdev_close(netdev_);
    13.      }
    14. }
  • 相关阅读:
    Golang mysql数据库
    C++ list结构体变量排序
    VS2013 ERROR MSB8020
    error C4996: 'fopen': This function or variable may be unsafe. Consider using fopen_s instead. To disable deprecation
    error C2664: “FILE *fopen(const char *,const char *)”: 无法将参数 1 从“LPCTSTR”转换为“const char *”
    error C4430: missing type specifier
    虚拟地址转物理地址
    vs2013 x64 编译汇编代码
    fs寄存器相关,PEB,TEB
    boost 1.57 vs2013 编译
  • 原文地址:https://www.cnblogs.com/popsuper1982/p/5851603.html
Copyright © 2011-2022 走看看