zoukankan      html  css  js  c++  java
  • Openvswitch原理与代码分析(6):用户态流表flow table的操作

    当内核无法查找到流表项的时候,则会通过upcall来调用用户态ovs-vswtichd中的flow table。

    会调用ofproto-dpif-upcall.c中的udpif_upcall_handler函数。

    1. static void *
    2. udpif_upcall_handler(void *arg)
    3. {
    4.     struct handler *handler = arg;
    5.     struct udpif *udpif = handler->udpif;
    6.  
    7.     while (!latch_is_set(&handler->udpif->exit_latch)) {
    8.         if (recv_upcalls(handler)) {
    9.             poll_immediate_wake();
    10.         } else {
    11.             dpif_recv_wait(udpif->dpif, handler->handler_id);
    12.             latch_wait(&udpif->exit_latch);
    13.         }
    14.         poll_block();
    15.     }
    16.  
    17.     return NULL;
    18. }

     

    会调用static size_t recv_upcalls(struct handler *handler)

    在这个函数里面

    (1) 首先读取upcall调用static int upcall_receive(struct upcall *upcall, const struct dpif_backer *backer, const struct dp_packet *packet, enum dpif_upcall_type type, const struct nlattr *userdata, const struct flow *flow, const unsigned int mru, const ovs_u128 *ufid, const unsigned pmd_id)

    (2) 其次提取包头调用void flow_extract(struct dp_packet *packet, struct flow *flow),提取出的flow如下:

    1. struct flow {
    2.     /* Metadata */
    3.     struct flow_tnl tunnel; /* Encapsulating tunnel parameters. */
    4.     ovs_be64 metadata; /* OpenFlow Metadata. */
    5.     uint32_t regs[FLOW_N_REGS]; /* Registers. */
    6.     uint32_t skb_priority; /* Packet priority for QoS. */
    7.     uint32_t pkt_mark; /* Packet mark. */
    8.     uint32_t dp_hash; /* Datapath computed hash value. The exact
    9.                                  * computation is opaque to the user space. */
    10.     union flow_in_port in_port; /* Input port.*/
    11.     uint32_t recirc_id; /* Must be exact match. */
    12.     uint16_t ct_state; /* Connection tracking state. */
    13.     uint16_t ct_zone; /* Connection tracking zone. */
    14.     uint32_t ct_mark; /* Connection mark.*/
    15.     uint8_t pad1[4]; /* Pad to 64 bits. */
    16.     ovs_u128 ct_label; /* Connection label. */
    17.     uint32_t conj_id; /* Conjunction ID. */
    18.     ofp_port_t actset_output; /* Output port in action set. */
    19.     uint8_t pad2[2]; /* Pad to 64 bits. */
    20.  
    21.     /* L2, Order the same as in the Ethernet header! (64-bit aligned) */
    22.     struct eth_addr dl_dst; /* Ethernet destination address. */
    23.     struct eth_addr dl_src; /* Ethernet source address. */
    24.     ovs_be16 dl_type; /* Ethernet frame type. */
    25.     ovs_be16 vlan_tci; /* If 802.1Q, TCI | VLAN_CFI; otherwise 0. */
    26.     ovs_be32 mpls_lse[ROUND_UP(FLOW_MAX_MPLS_LABELS, 2)]; /* MPLS label stack
    27.                                                              (with padding). */
    28.     /* L3 (64-bit aligned) */
    29.     ovs_be32 nw_src; /* IPv4 source address. */
    30.     ovs_be32 nw_dst; /* IPv4 destination address. */
    31.     struct in6_addr ipv6_src; /* IPv6 source address. */
    32.     struct in6_addr ipv6_dst; /* IPv6 destination address. */
    33.     ovs_be32 ipv6_label; /* IPv6 flow label. */
    34.     uint8_t nw_frag; /* FLOW_FRAG_* flags. */
    35.     uint8_t nw_tos; /* IP ToS (including DSCP and ECN). */
    36.     uint8_t nw_ttl; /* IP TTL/Hop Limit. */
    37.     uint8_t nw_proto; /* IP protocol or low 8 bits of ARP opcode. */
    38.     struct in6_addr nd_target; /* IPv6 neighbor discovery (ND) target. */
    39.     struct eth_addr arp_sha; /* ARP/ND source hardware address. */
    40.     struct eth_addr arp_tha; /* ARP/ND target hardware address. */
    41.     ovs_be16 tcp_flags; /* TCP flags. With L3 to avoid matching L4. */
    42.     ovs_be16 pad3; /* Pad to 64 bits. */
    43.  
    44.     /* L4 (64-bit aligned) */
    45.     ovs_be16 tp_src; /* TCP/UDP/SCTP source port/ICMP type. */
    46.     ovs_be16 tp_dst; /* TCP/UDP/SCTP destination port/ICMP code. */
    47.     ovs_be32 igmp_group_ip4; /* IGMP group IPv4 address.
    48.                                  * Keep last for BUILD_ASSERT_DECL below. */
    49. };

     

    (3) 然后调用static int process_upcall(struct udpif *udpif, struct upcall *upcall, struct ofpbuf *odp_actions, struct flow_wildcards *wc)来处理upcall。

     

    对于MISS_UPCALL,调用static void upcall_xlate(struct udpif *udpif, struct upcall *upcall, struct ofpbuf *odp_actions, struct flow_wildcards *wc)

    1. switch (classify_upcall(upcall->type, userdata)) {
    2. case MISS_UPCALL:
    3.     upcall_xlate(udpif, upcall, odp_actions, wc);
    4.     return 0;

     

    会调用enum xlate_error xlate_actions(struct xlate_in *xin, struct xlate_out *xout)

    在这个函数里面,会在flow table里面查找rule

    ctx.rule = rule_dpif_lookup_from_table( ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc, ctx.xin->resubmit_stats, &ctx.table_id, flow->in_port.ofp_port, true, true);

    找到rule之后,调用static void do_xlate_actions(const struct ofpact *ofpacts, size_t ofpacts_len, struct xlate_ctx *ctx)在这个函数里面,根据action的不同,修改flow的内容。

    1. switch (a->type) {
    2. case OFPACT_OUTPUT:
    3.     xlate_output_action(ctx, ofpact_get_OUTPUT(a)->port,
    4.                         ofpact_get_OUTPUT(a)->max_len, true);
    5.     break;
    6.  
    7. case OFPACT_SET_VLAN_VID:
    8.     wc->masks.vlan_tci |= htons(VLAN_VID_MASK | VLAN_CFI);
    9.     if (flow->vlan_tci & htons(VLAN_CFI) ||
    10.         ofpact_get_SET_VLAN_VID(a)->push_vlan_if_needed) {
    11.         flow->vlan_tci &= ~htons(VLAN_VID_MASK);
    12.         flow->vlan_tci |= (htons(ofpact_get_SET_VLAN_VID(a)->vlan_vid)
    13.                            | htons(VLAN_CFI));
    14.     }
    15.     break;
    16.  
    17. case OFPACT_SET_ETH_SRC:
    18.     WC_MASK_FIELD(wc, dl_src);
    19.     flow->dl_src = ofpact_get_SET_ETH_SRC(a)->mac;
    20.     break;
    21.  
    22. case OFPACT_SET_ETH_DST:
    23.     WC_MASK_FIELD(wc, dl_dst);
    24.     flow->dl_dst = ofpact_get_SET_ETH_DST(a)->mac;
    25.     break;
    26.  
    27. case OFPACT_SET_IPV4_SRC:
    28.     CHECK_MPLS_RECIRCULATION();
    29.     if (flow->dl_type == htons(ETH_TYPE_IP)) {
    30.         memset(&wc->masks.nw_src, 0xff, sizeof wc->masks.nw_src);
    31.         flow->nw_src = ofpact_get_SET_IPV4_SRC(a)->ipv4;
    32.     }
    33.     break;
    34.  
    35. case OFPACT_SET_IPV4_DST:
    36.     CHECK_MPLS_RECIRCULATION();
    37.     if (flow->dl_type == htons(ETH_TYPE_IP)) {
    38.         memset(&wc->masks.nw_dst, 0xff, sizeof wc->masks.nw_dst);
    39.         flow->nw_dst = ofpact_get_SET_IPV4_DST(a)->ipv4;
    40.     }
    41.     break;
    42.  
    43. case OFPACT_SET_L4_SRC_PORT:
    44.     CHECK_MPLS_RECIRCULATION();
    45.     if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
    46.         memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
    47.         memset(&wc->masks.tp_src, 0xff, sizeof wc->masks.tp_src);
    48.         flow->tp_src = htons(ofpact_get_SET_L4_SRC_PORT(a)->port);
    49.     }
    50.     break;
    51.  
    52. case OFPACT_SET_L4_DST_PORT:
    53.     CHECK_MPLS_RECIRCULATION();
    54.     if (is_ip_any(flow) && !(flow->nw_frag & FLOW_NW_FRAG_LATER)) {
    55.         memset(&wc->masks.nw_proto, 0xff, sizeof wc->masks.nw_proto);
    56.         memset(&wc->masks.tp_dst, 0xff, sizeof wc->masks.tp_dst);
    57.         flow->tp_dst = htons(ofpact_get_SET_L4_DST_PORT(a)->port);
    58.     }
    59.     break;

     

     

    (4) 最后调用static void handle_upcalls(struct udpif *udpif, struct upcall *upcalls, size_t n_upcalls)将flow rule添加到内核中的datapath

    他会调用void dpif_operate(struct dpif *dpif, struct dpif_op **ops, size_t n_ops),他会调用dpif->dpif_class->operate(dpif, ops, chunk);

    会调用dpif_netlink_operate()

    1. static void
    2. dpif_netlink_operate(struct dpif *dpif_, struct dpif_op **ops, size_t n_ops)
    3. {
    4.     struct dpif_netlink *dpif = dpif_netlink_cast(dpif_);
    5.  
    6.     while (n_ops > 0) {
    7.         size_t chunk = dpif_netlink_operate__(dpif, ops, n_ops);
    8.         ops += chunk;
    9.         n_ops -= chunk;
    10.     }
    11. }

     

    在static size_t dpif_netlink_operate__(struct dpif_netlink *dpif, struct dpif_op **ops, size_t n_ops)中,有以下的代码:

    1. switch (op->type) {
    2. case DPIF_OP_FLOW_PUT:
    3.     put = &op->u.flow_put;
    4.     dpif_netlink_init_flow_put(dpif, put, &flow);
    5.     if (put->stats) {
    6.         flow.nlmsg_flags |= NLM_F_ECHO;
    7.         aux->txn.reply = &aux->reply;
    8.     }
    9.     dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
    10.     break;
    11.  
    12. case DPIF_OP_FLOW_DEL:
    13.     del = &op->u.flow_del;
    14.     dpif_netlink_init_flow_del(dpif, del, &flow);
    15.     if (del->stats) {
    16.         flow.nlmsg_flags |= NLM_F_ECHO;
    17.         aux->txn.reply = &aux->reply;
    18.     }
    19.     dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
    20.     break;

     

    会调用netlink修改内核中datapath的规则。

  • 相关阅读:
    cpu几种架构区别
    linux之cp/scp命令+scp命令详解
    解读Linux命令格式(转)
    IO虚拟化简单了解
    NoSQL-来自维基百科
    kvm命令参数记录
    kvm 简单了解
    host与guest间共享文件夹的三种方法(原创)
    新装linux系统最基本设置
    kernel编译速度提高
  • 原文地址:https://www.cnblogs.com/popsuper1982/p/5902125.html
Copyright © 2011-2022 走看看