以ip v4 ESP tunnel模式为例分析ipsec的收包过程;
在esp4_init注册了协议号为50的ESP报文处理函数xfrm4_rcv
int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_encap(skb, 0); }
对于发完本机且IP头中协议号为50的ESP报文则会进入xfrm4_rcv_encap进行解密;
xfrm4_rcv_encap提取报文中ESP头的SPI,然后根据SPI和目的IP地址查找SA,根据该SA进行重放检查,解密,并把每个步骤用到的SA记录在该skb->sp中;最后把解密后的报文调用netif_rx重新交给IP协议栈处理;
反重放检查函数xfrm_replay_check,已经反重放窗口的更新函数xfrm_replay_advance
int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq) { u32 diff; u32 seq = ntohl(net_seq); if (unlikely(seq == 0)) return -EINVAL; /* 当前处理报文的seq大于处理过的报文seq最大值即为合法报文 */ if (likely(seq > x->replay.seq)) return 0; /* 只有在处理过的报文seq最大值的一个Window内的seq合法 */ diff = x->replay.seq - seq; if (diff >= min_t(unsigned int, x->props.replay_window, sizeof(x->replay.bitmap) * 8)) { x->stats.replay_window++; return -EINVAL; } /* 在replay窗口中是否已有相同seq报文到达 */ if (x->replay.bitmap & (1U << diff)) { x->stats.replay++; return -EINVAL; } return 0; }
void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) { u32 diff; u32 seq = ntohl(net_seq); /* 当前报文的seq比之前记录的报文最大seq值大 */ if (seq > x->replay.seq) { diff = seq - x->replay.seq; /* 记录当前seq报文已到达,如果差值比窗口小去掉不在窗口内的部分 */ if (diff < x->props.replay_window) x->replay.bitmap = ((x->replay.bitmap) << diff) | 1; else x->replay.bitmap = 1; /* 更新最大seq值 */ x->replay.seq = seq; } else { /* 在replay窗口中记录该seq报文已处理 */ diff = x->replay.seq - seq; x->replay.bitmap |= (1U << diff); } if (xfrm_aevent_is_on()) xfrm_replay_notify(x, XFRM_REPLAY_UPDATE); }
下面看下解密以及解隧道的过程;
首先是ESP隧道模式报文的格式:
其中黄色部分为原报文+padding+pad len+next header加密后的形式,蓝色虚线框内的部分表示最后的authentication数据认证的部分(在网络传输中不允许修改的部分);
在xfrm4_rcv_encap查到对应的SA以后,调用了x->type->input(x, skb)以及x->mode->input(x, skb)分别进行ESP解密以及隧道头剥离;
x->type->input在ESP协议下为esp_input;
static int esp_input(struct xfrm_state *x, struct sk_buff *skb) { struct iphdr *iph; struct ip_esp_hdr *esph; struct esp_data *esp = x->data; struct crypto_blkcipher *tfm = esp->conf.tfm; struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); /* authentication data length */ int alen = esp->auth.icv_trunc_len; /* encrypted data length */ int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; int ihl; u8 nexthdr[2]; struct scatterlist *sg; int padlen; int err; /* 长度必须大于esp header */ if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) goto out; /* 加密部分必须是blksize对齐 */ if (elen <= 0 || (elen & (blksize-1))) goto out; /* 如果需要对报文进行认证检查 */ /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { u8 sum[alen]; /* 计算报文散列值到esp->auth.work_icv */ err = esp_mac_digest(esp, skb, 0, skb->len - alen); if (err) goto out; /* 报文中的authentication data拷贝到sum */ if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); /* 比较报文的散列值与报文中的authentication data是否一致 */ if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; goto out; } } /* 需要对报文进行写操作 */ if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) goto out; skb->ip_summed = CHECKSUM_NONE; esph = (struct ip_esp_hdr*)skb->data; /* 设置算法的初始化向量 */ /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); sg = &esp->sgbuf[0]; if (unlikely(nfrags > ESP_NUM_FAST_SG)) { sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC); if (!sg) goto out; } /* 解密 */ skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); if (unlikely(err)) return err; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); padlen = nexthdr[0]; if (padlen+2 >= elen) goto out; /* ... check padding bits here. Silly. :-) */ iph = skb->nh.iph; ihl = iph->ihl * 4; if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; struct udphdr *uh = (void *)(skb->nh.raw + ihl); /* NAT穿越对端IP或源端口改变,通知IKE程序协商 */ /* * 1) if the NAT-T peer's IP or port changed then * advertize the change to the keying daemon. * This is an inbound SA, so just compare * SRC ports. */ if (iph->saddr != x->props.saddr.a4 || uh->source != encap->encap_sport) { xfrm_address_t ipaddr; ipaddr.a4 = iph->saddr; km_new_mapping(x, &ipaddr, uh->source); /* XXX: perhaps add an extra * policy check here, to see * if we should allow or * reject a packet from a * different source * address/port. */ } /* * 2) ignore UDP/TCP checksums in case * of NAT-T in Transport Mode, or * perform other post-processing fixes * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ if (x->props.mode == XFRM_MODE_TRANSPORT || x->props.mode == XFRM_MODE_BEET) skb->ip_summed = CHECKSUM_UNNECESSARY; } /* 修正IP协议,隧道模式下为IPPROTO_IPIP */ iph->protocol = nexthdr[1]; /* padding */ pskb_trim(skb, skb->len - alen - padlen - 2); /* pull esp header */ skb->h.raw = __skb_pull(skb, sizeof(*esph) + esp->conf.ivlen) - ihl; return 0; out: return -EINVAL; }
x->mode->input在隧道模式下为xfrm4_tunnel_input
static int xfrm4_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { struct iphdr *iph = skb->nh.iph; int err = -EINVAL; switch(iph->protocol){ case IPPROTO_IPIP: break; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) case IPPROTO_IPV6: break; #endif default: goto out; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) goto out; if (skb_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) goto out; iph = skb->nh.iph; if (iph->protocol == IPPROTO_IPIP) { if (x->props.flags & XFRM_STATE_DECAP_DSCP) ipv4_copy_dscp(iph, skb->h.ipiph); if (!(x->props.flags & XFRM_STATE_NOECN)) ipip_ecn_decapsulate(skb); } #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) else { if (!(x->props.flags & XFRM_STATE_NOECN)) ipip6_ecn_decapsulate(iph, skb); skb->protocol = htons(ETH_P_IPV6); } #endif /* 拷贝L2 header */ skb->mac.raw = memmove(skb->data - skb->mac_len, skb->mac.raw, skb->mac_len); /* nh头指向内层IP头 */ skb->nh.raw = skb->data; err = 0; out: return err; }
对于解密后的报文,在转发ip_forward以及传输层收包函数tcp_v4_rcv,udp_queue_rcv_skb中都会调用__xfrm_policy_check来检查解密过程中用的SA,即skb->sp与policy绑定的SA是否一致;
/* ok: return 1 */ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; int npols = 0; int xfrm_nr; int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); int xerr_idx = -1; /* * 1. 如果是解密后的报文,比较skb->sp中每个state的selector是否与报文匹配 * 2. 查找对应policy * 3. 比较skb->sp与policy关联的state是否一致 */ if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); /* RFC2367, 对于使用了代理的情况要检查解密时使用的SA的selector是否与解密后报文IP一致 */ /* First, check used SA against their selectors. */ if (skb->sp) { int i; for (i=skb->sp->len-1; i>=0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) return 0; } } pol = NULL; if (sk && sk->sk_policy[dir]) { pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (IS_ERR(pol)) return 0; } if (!pol) pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); if (IS_ERR(pol)) return 0; if (!pol) { if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { xfrm_secpath_reject(xerr_idx, skb, &fl); return 0; } return 1; } pol->curlft.use_time = (unsigned long)xtime.tv_sec; pols[0] = pol; npols ++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, &fl, family, XFRM_POLICY_IN); if (pols[1]) { if (IS_ERR(pols[1])) return 0; pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; npols ++; } } #endif if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; for (pi = 0; pi < npols; pi++) { if (pols[pi] != pol && pols[pi]->action != XFRM_POLICY_ALLOW) goto reject; if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) goto reject_error; for (i = 0; i < pols[pi]->xfrm_nr; i++) tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; if (npols > 1) { xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); tpp = stp; } /* AH+ESP+PAYLOAD, SP[0]:AH, SP[1]:ESP, pol->xfrm_vec[0]:ESP pol->xfrm_vec[1]:AH,因此倒过来检查 */ /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ for (i = xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(tpp[i], sp, k, family); if (k < 0) { if (k < -1) /* "-2 - errored_index" returned */ xerr_idx = -(2+k); goto reject; } } if (secpath_has_nontransport(sp, k, &xerr_idx)) goto reject; xfrm_pols_put(pols, npols); return 1; } reject: xfrm_secpath_reject(xerr_idx, skb, &fl); reject_error: xfrm_pols_put(pols, npols); return 0; }