1.在ovs_dp_process_packet中查找kernel缓存流表,查到后执行ovs_execute_actions->do_execute_actions,其中有个actions是OVS_ACTION_ATTR_HASH
1 /* Must be called with rcu_read_lock. */ 2 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) 3 { 4 const struct vport *p = OVS_CB(skb)->input_vport; 5 struct datapath *dp = p->dp; 6 struct sw_flow *flow; 7 struct sw_flow_actions *sf_acts; 8 struct dp_stats_percpu *stats; 9 u64 *stats_counter; 10 u32 n_mask_hit; 11 12 stats = this_cpu_ptr(dp->stats_percpu); 13 14 /* Look up flow. */ 15 flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), 16 &n_mask_hit); 17 if (unlikely(!flow)) { 18 struct dp_upcall_info upcall; 19 int error; 20 21 memset(&upcall, 0, sizeof(upcall)); 22 upcall.cmd = OVS_PACKET_CMD_MISS; 23 upcall.portid = ovs_vport_find_upcall_portid(p, skb); 24 upcall.mru = OVS_CB(skb)->mru; 25 error = ovs_dp_upcall(dp, skb, key, &upcall, 0); 26 if (unlikely(error)) 27 kfree_skb(skb); 28 else 29 consume_skb(skb); 30 stats_counter = &stats->n_missed; 31 goto out; 32 } 33 34 ovs_flow_stats_update(flow, key->tp.flags, skb); 35 sf_acts = rcu_dereference(flow->sf_acts); 36 ovs_execute_actions(dp, skb, sf_acts, key); 37 38 stats_counter = &stats->n_hit; 39 40 out: 41 /* Update datapath statistics. */ 42 u64_stats_update_begin(&stats->syncp); 43 (*stats_counter)++; 44 stats->n_mask_hit += n_mask_hit; 45 u64_stats_update_end(&stats->syncp); 46 }
1 /* Execute a list of actions against 'skb'. */ 2 int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, 3 const struct sw_flow_actions *acts, 4 struct sw_flow_key *key) 5 { 6 int err, level; 7 8 level = __this_cpu_inc_return(exec_actions_level); 9 if (unlikely(level > OVS_RECURSION_LIMIT)) { 10 net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error ", 11 ovs_dp_name(dp)); 12 kfree_skb(skb); 13 err = -ENETDOWN; 14 goto out; 15 } 16 17 OVS_CB(skb)->acts_origlen = acts->orig_len; 18 err = do_execute_actions(dp, skb, key, 19 acts->actions, acts->actions_len); 20 21 if (level == 1) 22 process_deferred_actions(dp); 23 24 out: 25 __this_cpu_dec(exec_actions_level); 26 return err; 27 }
2.do_execute_actions中会调用execute_hash
1 static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key, 2 const struct nlattr *attr) 3 { 4 struct ovs_action_hash *hash_act = nla_data(attr); 5 u32 hash = 0; 6 7 /* OVS_HASH_ALG_L4 is the only possible hash algorithm. */ 8 hash = skb_get_hash(skb); 9 hash = jhash_1word(hash, hash_act->hash_basis); 10 if (!hash) 11 hash = 0x1; 12 13 key->ovs_flow_hash = hash; 14 }
3.该action仅对key的ovs_flow_hash成员变量进行了修改,从该变量的使用地方逆推,最终是queue_userspace_packet会使用,该函数是把报文发送给用户态进程,本次就看下queue_userspace_packet函数是如何使用到该成员变量的
1 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, 2 const struct sw_flow_key *key, 3 const struct dp_upcall_info *upcall_info, 4 uint32_t cutlen) 5 { 6 struct ovs_header *upcall; 7 struct sk_buff *nskb = NULL; 8 struct sk_buff *user_skb = NULL; /* to be queued to userspace */ 9 struct nlattr *nla; 10 size_t len; 11 unsigned int hlen; 12 int err, dp_ifindex; 13 14 dp_ifindex = get_dpifindex(dp); 15 if (!dp_ifindex) 16 return -ENODEV; 17 18 if (skb_vlan_tag_present(skb)) { 19 nskb = skb_clone(skb, GFP_ATOMIC); 20 if (!nskb) 21 return -ENOMEM; 22 23 nskb = __vlan_hwaccel_push_inside(nskb); 24 if (!nskb) 25 return -ENOMEM; 26 27 skb = nskb; 28 } 29 30 if (nla_attr_size(skb->len) > USHRT_MAX) { 31 err = -EFBIG; 32 goto out; 33 } 34 35 /* Complete checksum if needed */ 36 if (skb->ip_summed == CHECKSUM_PARTIAL && 37 (err = skb_csum_hwoffload_help(skb, 0))) 38 goto out; 39 40 /* Older versions of OVS user space enforce alignment of the last 41 * Netlink attribute to NLA_ALIGNTO which would require extensive 42 * padding logic. Only perform zerocopy if padding is not required. 43 */ 44 if (dp->user_features & OVS_DP_F_UNALIGNED) 45 hlen = skb_zerocopy_headlen(skb); 46 else 47 hlen = skb->len; 48 49 len = upcall_msg_size(upcall_info, hlen - cutlen, 50 OVS_CB(skb)->acts_origlen); 51 user_skb = genlmsg_new(len, GFP_ATOMIC); 52 if (!user_skb) { 53 err = -ENOMEM; 54 goto out; 55 } 56 57 upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family, 58 0, upcall_info->cmd); 59 upcall->dp_ifindex = dp_ifindex; 60 61 err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb); 62 BUG_ON(err); 63 64 if (upcall_info->userdata) 65 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA, 66 nla_len(upcall_info->userdata), 67 nla_data(upcall_info->userdata)); 68 69 70 if (upcall_info->egress_tun_info) { 71 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY); 72 err = ovs_nla_put_tunnel_info(user_skb, 73 upcall_info->egress_tun_info); 74 BUG_ON(err); 75 nla_nest_end(user_skb, nla); 76 } 77 78 if (upcall_info->actions_len) { 79 nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS); 80 err = ovs_nla_put_actions(upcall_info->actions, 81 upcall_info->actions_len, 82 user_skb); 83 if (!err) 84 nla_nest_end(user_skb, nla); 85 else 86 nla_nest_cancel(user_skb, nla); 87 } 88 89 /* Add OVS_PACKET_ATTR_MRU */ 90 if (upcall_info->mru) { 91 if (nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, 92 upcall_info->mru)) { 93 err = -ENOBUFS; 94 goto out; 95 } 96 pad_packet(dp, user_skb); 97 } 98 99 /* Add OVS_PACKET_ATTR_LEN when packet is truncated */ 100 if (cutlen > 0) { 101 if (nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, 102 skb->len)) { 103 err = -ENOBUFS; 104 goto out; 105 } 106 pad_packet(dp, user_skb); 107 } 108 109 /* Only reserve room for attribute header, packet data is added 110 * in skb_zerocopy() 111 */ 112 if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) { 113 err = -ENOBUFS; 114 goto out; 115 } 116 nla->nla_len = nla_attr_size(skb->len - cutlen); 117 118 err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen); 119 if (err) 120 goto out; 121 122 /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */ 123 pad_packet(dp, user_skb); 124 125 ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len; 126 127 err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid); 128 user_skb = NULL; 129 out: 130 if (err) 131 skb_tx_error(skb); 132 kfree_skb(user_skb); 133 kfree_skb(nskb); 134 return err; 135 }
4.ovs_nla_put_key函数
1 int ovs_nla_put_key(const struct sw_flow_key *swkey, 2 const struct sw_flow_key *output, int attr, bool is_mask, 3 struct sk_buff *skb) 4 { 5 int err; 6 struct nlattr *nla; 7 8 nla = nla_nest_start(skb, attr); 9 if (!nla) 10 return -EMSGSIZE; 11 err = __ovs_nla_put_key(swkey, output, is_mask, skb); 12 if (err) 13 return err; 14 nla_nest_end(skb, nla); 15 16 return 0; 17 }
5.__ovs_nla_put_key函数
1 static int __ovs_nla_put_key(const struct sw_flow_key *swkey, 2 const struct sw_flow_key *output, bool is_mask, 3 struct sk_buff *skb) 4 { 5 struct ovs_key_ethernet *eth_key; 6 struct nlattr *nla; 7 struct nlattr *encap = NULL; 8 struct nlattr *in_encap = NULL; 9 10 if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) 11 goto nla_put_failure; 12 13 if (nla_put_u32(skb, OVS_KEY_ATTR_DP_HASH, output->ovs_flow_hash)) 14 goto nla_put_failure; 15 16 if (nla_put_u32(skb, OVS_KEY_ATTR_PRIORITY, output->phy.priority)) 17 goto nla_put_failure; 18 19 if ((swkey->tun_proto || is_mask)) { 20 const void *opts = NULL; 21 22 if (output->tun_key.tun_flags & TUNNEL_OPTIONS_PRESENT) 23 opts = TUN_METADATA_OPTS(output, swkey->tun_opts_len); 24 25 if (ip_tun_to_nlattr(skb, &output->tun_key, opts, 26 swkey->tun_opts_len, swkey->tun_proto)) 27 goto nla_put_failure; 28 } 29 30 if (swkey->phy.in_port == DP_MAX_PORTS) { 31 if (is_mask && (output->phy.in_port == 0xffff)) 32 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 0xffffffff)) 33 goto nla_put_failure; 34 } else { 35 u16 upper_u16; 36 upper_u16 = !is_mask ? 0 : 0xffff; 37 38 if (nla_put_u32(skb, OVS_KEY_ATTR_IN_PORT, 39 (upper_u16 << 16) | output->phy.in_port)) 40 goto nla_put_failure; 41 } 42 43 if (nla_put_u32(skb, OVS_KEY_ATTR_SKB_MARK, output->phy.skb_mark)) 44 goto nla_put_failure; 45 46 if (ovs_ct_put_key(swkey, output, skb)) 47 goto nla_put_failure; 48 49 if (ovs_key_mac_proto(swkey) == MAC_PROTO_ETHERNET) { 50 nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key)); 51 if (!nla) 52 goto nla_put_failure; 53 54 eth_key = nla_data(nla); 55 ether_addr_copy(eth_key->eth_src, output->eth.src); 56 ether_addr_copy(eth_key->eth_dst, output->eth.dst); 57 58 if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { 59 if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) 60 goto nla_put_failure; 61 encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 62 if (!swkey->eth.vlan.tci) 63 goto unencap; 64 65 if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { 66 if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) 67 goto nla_put_failure; 68 in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); 69 if (!swkey->eth.cvlan.tci) 70 goto unencap; 71 } 72 } 73 74 if (swkey->eth.type == htons(ETH_P_802_2)) { 75 /* 76 * Ethertype 802.2 is represented in the netlink with omitted 77 * OVS_KEY_ATTR_ETHERTYPE in the flow key attribute, and 78 * 0xffff in the mask attribute. Ethertype can also 79 * be wildcarded. 80 */ 81 if (is_mask && output->eth.type) 82 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, 83 output->eth.type)) 84 goto nla_put_failure; 85 goto unencap; 86 } 87 } 88 89 if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) 90 goto nla_put_failure; 91 92 if (eth_type_vlan(swkey->eth.type)) { 93 /* There are 3 VLAN tags, we don't know anything about the rest 94 * of the packet, so truncate here. 95 */ 96 WARN_ON_ONCE(!(encap && in_encap)); 97 goto unencap; 98 } 99 100 if (swkey->eth.type == htons(ETH_P_IP)) { 101 struct ovs_key_ipv4 *ipv4_key; 102 103 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key)); 104 if (!nla) 105 goto nla_put_failure; 106 ipv4_key = nla_data(nla); 107 ipv4_key->ipv4_src = output->ipv4.addr.src; 108 ipv4_key->ipv4_dst = output->ipv4.addr.dst; 109 ipv4_key->ipv4_proto = output->ip.proto; 110 ipv4_key->ipv4_tos = output->ip.tos; 111 ipv4_key->ipv4_ttl = output->ip.ttl; 112 ipv4_key->ipv4_frag = output->ip.frag; 113 } else if (swkey->eth.type == htons(ETH_P_IPV6)) { 114 struct ovs_key_ipv6 *ipv6_key; 115 116 nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key)); 117 if (!nla) 118 goto nla_put_failure; 119 ipv6_key = nla_data(nla); 120 memcpy(ipv6_key->ipv6_src, &output->ipv6.addr.src, 121 sizeof(ipv6_key->ipv6_src)); 122 memcpy(ipv6_key->ipv6_dst, &output->ipv6.addr.dst, 123 sizeof(ipv6_key->ipv6_dst)); 124 ipv6_key->ipv6_label = output->ipv6.label; 125 ipv6_key->ipv6_proto = output->ip.proto; 126 ipv6_key->ipv6_tclass = output->ip.tos; 127 ipv6_key->ipv6_hlimit = output->ip.ttl; 128 ipv6_key->ipv6_frag = output->ip.frag; 129 } else if (swkey->eth.type == htons(ETH_P_NSH)) { 130 if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) 131 goto nla_put_failure; 132 } else if (swkey->eth.type == htons(ETH_P_ARP) || 133 swkey->eth.type == htons(ETH_P_RARP)) { 134 struct ovs_key_arp *arp_key; 135 136 nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key)); 137 if (!nla) 138 goto nla_put_failure; 139 arp_key = nla_data(nla); 140 memset(arp_key, 0, sizeof(struct ovs_key_arp)); 141 arp_key->arp_sip = output->ipv4.addr.src; 142 arp_key->arp_tip = output->ipv4.addr.dst; 143 arp_key->arp_op = htons(output->ip.proto); 144 ether_addr_copy(arp_key->arp_sha, output->ipv4.arp.sha); 145 ether_addr_copy(arp_key->arp_tha, output->ipv4.arp.tha); 146 } else if (eth_p_mpls(swkey->eth.type)) { 147 struct ovs_key_mpls *mpls_key; 148 149 nla = nla_reserve(skb, OVS_KEY_ATTR_MPLS, sizeof(*mpls_key)); 150 if (!nla) 151 goto nla_put_failure; 152 mpls_key = nla_data(nla); 153 mpls_key->mpls_lse = output->mpls.top_lse; 154 } 155 156 if ((swkey->eth.type == htons(ETH_P_IP) || 157 swkey->eth.type == htons(ETH_P_IPV6)) && 158 swkey->ip.frag != OVS_FRAG_TYPE_LATER) { 159 160 if (swkey->ip.proto == IPPROTO_TCP) { 161 struct ovs_key_tcp *tcp_key; 162 163 nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key)); 164 if (!nla) 165 goto nla_put_failure; 166 tcp_key = nla_data(nla); 167 tcp_key->tcp_src = output->tp.src; 168 tcp_key->tcp_dst = output->tp.dst; 169 if (nla_put_be16(skb, OVS_KEY_ATTR_TCP_FLAGS, 170 output->tp.flags)) 171 goto nla_put_failure; 172 } else if (swkey->ip.proto == IPPROTO_UDP) { 173 struct ovs_key_udp *udp_key; 174 175 nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key)); 176 if (!nla) 177 goto nla_put_failure; 178 udp_key = nla_data(nla); 179 udp_key->udp_src = output->tp.src; 180 udp_key->udp_dst = output->tp.dst; 181 } else if (swkey->ip.proto == IPPROTO_SCTP) { 182 struct ovs_key_sctp *sctp_key; 183 184 nla = nla_reserve(skb, OVS_KEY_ATTR_SCTP, sizeof(*sctp_key)); 185 if (!nla) 186 goto nla_put_failure; 187 sctp_key = nla_data(nla); 188 sctp_key->sctp_src = output->tp.src; 189 sctp_key->sctp_dst = output->tp.dst; 190 } else if (swkey->eth.type == htons(ETH_P_IP) && 191 swkey->ip.proto == IPPROTO_ICMP) { 192 struct ovs_key_icmp *icmp_key; 193 194 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key)); 195 if (!nla) 196 goto nla_put_failure; 197 icmp_key = nla_data(nla); 198 icmp_key->icmp_type = ntohs(output->tp.src); 199 icmp_key->icmp_code = ntohs(output->tp.dst); 200 } else if (swkey->eth.type == htons(ETH_P_IPV6) && 201 swkey->ip.proto == IPPROTO_ICMPV6) { 202 struct ovs_key_icmpv6 *icmpv6_key; 203 204 nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6, 205 sizeof(*icmpv6_key)); 206 if (!nla) 207 goto nla_put_failure; 208 icmpv6_key = nla_data(nla); 209 icmpv6_key->icmpv6_type = ntohs(output->tp.src); 210 icmpv6_key->icmpv6_code = ntohs(output->tp.dst); 211 212 if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION || 213 icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) { 214 struct ovs_key_nd *nd_key; 215 216 nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key)); 217 if (!nla) 218 goto nla_put_failure; 219 nd_key = nla_data(nla); 220 memcpy(nd_key->nd_target, &output->ipv6.nd.target, 221 sizeof(nd_key->nd_target)); 222 ether_addr_copy(nd_key->nd_sll, output->ipv6.nd.sll); 223 ether_addr_copy(nd_key->nd_tll, output->ipv6.nd.tll); 224 } 225 } 226 } 227 228 unencap: 229 if (in_encap) 230 nla_nest_end(skb, in_encap); 231 if (encap) 232 nla_nest_end(skb, encap); 233 234 return 0; 235 236 nla_put_failure: 237 return -EMSGSIZE; 238 }