zoukankan      html  css  js  c++  java
  • Linux中处理需要传输的IP报文流程

    本文主要讲解了Linux中处理需要传输的IP报文流程,使用的内核的版本是2.6.32.27

    为了方便理解,本文采用整体流程图加伪代码的方式对Linux中处理需要传输的IP报文流程进行了讲解,希望可以对大家有所帮助。阅读本文章假设大家对C语言有了一定的了解


    首先从IP的更高层传输层看看是如何管理的


     

    //-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    
    /*四层协议的注册,都注册为net_protocol结构,并hash到inet_protos表中进行统一管理*/
    #ifdef CONFIG_IP_MULTICAST
    static const struct net_protocol igmp_protocol = {
    	.handler =	igmp_rcv,
    	.netns_ok =	1,
    };
    #endif
    
    static const struct net_protocol tcp_protocol = {
    	.handler =	tcp_v4_rcv,
    	.err_handler =	tcp_v4_err,
    	.gso_send_check = tcp_v4_gso_send_check,
    	.gso_segment =	tcp_tso_segment,
    	.gro_receive =	tcp4_gro_receive,
    	.gro_complete =	tcp4_gro_complete,
    	.no_policy =	1,
    	.netns_ok =	1,
    };
    
    static const struct net_protocol udp_protocol = {
    	.handler =	udp_rcv,
    	.err_handler =	udp_err,
    	.gso_send_check = udp4_ufo_send_check,
    	.gso_segment = udp4_ufo_fragment,
    	.no_policy =	1,
    	.netns_ok =	1,
    };
    
    static const struct net_protocol icmp_protocol = {
    	.handler =	icmp_rcv,
    	.no_policy =	1,
    	.netns_ok =	1,
    };
    
    
    static int __init inet_init(void)
    {
    	/*
    	 *	Add all the base protocols.
    	 */
    	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
    		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol
    ");
    	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
    		printk(KERN_CRIT "inet_init: Cannot add UDP protocol
    ");
    	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
    		printk(KERN_CRIT "inet_init: Cannot add TCP protocol
    ");
    #ifdef CONFIG_IP_MULTICAST
    	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
    		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol
    ");
    #endif	
    }


    在4层处理完成之后,4层会调用IP层的接口ip_qeueu_xmit进行 报文发送

     

    //-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    
    /*ipv4.c中注册的让上层协议使用的接口*/
    static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
    	.queue_xmit	   = ip_queue_xmit,
    };
    
    /*将dccp_ipv4_af_ops注册到协议中*/
    static int dccp_v4_init_sock(struct sock *sk)
    {
    	inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops;
    }
    
    
    /*TCP数据报文发送函数*/
    static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,   gfp_t gfp_mask)
    {
    	const struct inet_connection_sock *icsk = inet_csk(sk);
    	
    	/*使用ip_queue_xmit发送数据报文*/
    	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
    
    }
    
    
    //-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
    
    
    int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
    {
    	struct sock *sk = skb->sk;
    	struct inet_sock *inet = inet_sk(sk);
    	struct ip_options *opt = inet->opt;
    	struct rtable *rt;
    	struct iphdr *iph;
    
    	/*检查套接字结构中sk->dst中是否有一个指针指向路由缓存中的某个入口项
    	 *如果有,再检查这个指针是否有效,由于套接字的所有包都去往同一个目标
    	 *地址,因此路由就存放在skb->_skb_dst中,内容为dst_entry结构
    	 */
    	
    	rt = skb_rtable(skb);
    	if (rt != NULL)
    		goto packet_routed;
    
    	rt = (struct rtable *)__sk_dst_check(sk, 0);
    	{
    		if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL)
    		{
    			sk->sk_dst_cache = NULL;
    			dst_release(dst);
    			return NULL;			
    		}
    	}
    	
    	/*如果尚未设置路由,那么使用ip_route_output_flow进行路由选路*/
    	if (rt == NULL) 
    	{
    		//......
    			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
    				goto no_route;
    	}
    
    	//......
    packet_routed:
    	/*填充IP报头*/
    	//.....
    	iph->ttl      = ip_select_ttl(inet, &rt->u.dst);
    	iph->protocol = sk->sk_protocol;
    	iph->saddr    = rt->rt_src;
    	iph->daddr    = rt->rt_dst;
    
    	/*填充IP选项*/
    	if (opt && opt->optlen) {
    		iph->ihl += opt->optlen >> 2;
    		ip_options_build(skb, opt, inet->daddr, rt, 0);
    	}
    
    	//......
    	return ip_local_out(skb);
    
    no_route:
    	//.....
    }
    
    
    int ip_local_out(struct sk_buff *skb)
    {
    	int err;
    
    	err = __ip_local_out(skb);
    	if (likely(err == 1))
    		err = dst_output(skb);
    
    	return err;
    }
    
    
    int __ip_local_out(struct sk_buff *skb)
    {
    	struct iphdr *iph = ip_hdr(skb);
    
    	iph->tot_len = htons(skb->len);
    	ip_send_check(iph);
    	
    	/*进入 NF_INET_LOCAL_OUT 的序列钩子进行处理,处理之后放入dst_output中处理*/
    	return nf_hook(PF_INET, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
    		       dst_output);
    }
    
    
    static inline int dst_output(struct sk_buff *skb)
    {
    	/*调用dst_entry中注册的output函数,IP单播也就是ip_output函数*/
    	return skb_dst(skb)->output(skb);
    }
    
    /*在__mkroute_output中曾经对output和input进行过注册*/
    static int __mkroute_output(struct rtable **result,
    			    struct fib_result *res,
    			    const struct flowi *fl,
    			    const struct flowi *oldflp,
    			    struct net_device *dev_out,
    			    unsigned flags)
    {
    	struct rtable *rth;
    	rth->u.dst.output=ip_output;
    	
    	if (flags & RTCF_LOCAL) {
    		rth->u.dst.input = ip_local_deliver;
    	}
    	
    	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
    		if (flags & RTCF_LOCAL &&   !(dev_out->flags & IFF_LOOPBACK))
    		{
    			rth->u.dst.output = ip_mc_output;
    		}
    		if (res->type == RTN_MULTICAST)
    		{
    			rth->u.dst.input = ip_mr_input;
    			rth->u.dst.output = ip_mc_output;
    		}
    	}
    }
    
    /*IPV4单播*/
    int ip_output(struct sk_buff *skb)
    {
    	struct net_device *dev = skb_dst(skb)->dev;
    	
    	skb->dev = dev;
    	skb->protocol = htons(ETH_P_IP);
    
    	/*经过 NF_INET_POST_ROUTING 处理链后,进入ip_finish_output处理*/
    	return NF_HOOK_COND(PF_INET, NF_INET_POST_ROUTING, skb, NULL, dev,
    			    ip_finish_output,
    			    !(IPCB(skb)->flags & IPSKB_REROUTED));
    }
    
    
    static int ip_finish_output(struct sk_buff *skb)
    {
    	/*IP分片后,进入ip_finish_output2处理*/
    	if (skb->len > ip_skb_dst_mtu(skb) && !skb_is_gso(skb))
    		return ip_fragment(skb, ip_finish_output2);
    	else
    		return ip_finish_output2(skb);
    }
    
    
    static inline int ip_finish_output2(struct sk_buff *skb)
    {
    	/*如果没有二层头,启用ARP处理*/
    	if (dst->hh)
    		return neigh_hh_output(dst->hh, skb);
    	/*如果有二层头进行处理,侧使用dst->neighbour->output也就是 dev_queue_xmit*/
    	else if (dst->neighbour)
    		return dst->neighbour->output(skb);
    }
    
    
    /*dev_queue_xmit在ARP中的注册过程如下*/
    static const struct neigh_ops arp_hh_ops = {
    	.family =		AF_INET,
    	.output =		neigh_resolve_output,
    	.hh_output =	dev_queue_xmit,
    };
    
    
    static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst,   __be16 protocol)
    {
    	struct hh_cache	*hh;
    
    	//......
    	
    	if (n->nud_state & NUD_CONNECTED)
    		hh->hh_output = n->ops->hh_output; /*也就是dev_queue_xmit*/
    	else
    		hh->hh_output = n->ops->output;
    
    	//......
    }


    最终一路从ip_queue_xmit进行发送调用到二层发送的入口点dev_queue_xmit

    关于处理流程的整体架构图,请参见我的上一篇博客

    <<Linux内核IP层的报文处理流程--从网卡接收的报文处理流程>>

    关于二层是如何继续处理报文并发送的,请参考博客

    <<Linux内核数据包的发送传输>>


    希望大家批评指正



  • 相关阅读:
    机器学习初篇(0.0)
    MQTT 入门介绍
    《八极拳谱》(李书文)
    Golang实战群:日志的处理机制
    【转】火山引擎 Redis 云原生实践
    【转】7000字前端性能优化总结 | 干货建议收藏
    微信小程序canvas绘制圆角边框
    【转】语义化版本 2.0.0
    Verdaccio私有 npm 服务器搭建及其配置
    【转】根据条件配置多个npm仓库
  • 原文地址:https://www.cnblogs.com/riskyer/p/3320175.html
Copyright © 2011-2022 走看看