Netfilter通过连接跟踪来记录和跟踪连接的状态,为状态防火墙和NAT提供基础支持;
钩子点与钩子函数
下图为钩子点和钩子函数的关系图,其中ipv4_conntrack_defrag、ipv4_conntrack_in、ipv4_helper、ipv4_confirm为连接跟踪相关的钩子函数,其作用的钩子点为PRE_ROUTING、LOCAL_IN、LOCAL_OUT、POST_ROUTING:
钩子函数的调用流程
通过上图,可以得到连接跟踪的流程:
输入本地:
ipv4_conntrack_defrag–>ipv4_conntrack_in–>ipv4_helper–>ipv4_confirm
【<————-PRE_ROUTING————>】 【<—— LOCAL_IN——>】
转发:
ipv4_conntrack_defrag–>ipv4_conntrack_in–>ipv4_helper–>ipv4_confirm
【<————-PRE_ROUTING————>】 【<—— LOCAL_IN——>】
本地输出:
ipv4_conntrack_defrag–>ipv4_conntrack_local–>ipv4_helper–>ipv4_confirm
【<————–LOCAL_OUT—————–>】 【<—-POST_ROUTING—>】
连接跟踪的状态
ip_conntrack_info用来描述连接跟踪的状态,如下:
1 enum ip_conntrack_info { 2 /* Part of an established connection (either direction). */ 3 /* 已建立连接的一部分(任一方向) */ 4 IP_CT_ESTABLISHED, 5 6 /* Like NEW, but related to an existing connection, or ICMP error 7 (in either direction). */ 8 /* 已建立连接的关联连接,或者是ICMP错误(任一方向) */ 9 IP_CT_RELATED, 10 11 /* Started a new connection to track (only 12 IP_CT_DIR_ORIGINAL); may be a retransmission. */ 13 /* 开始一个新连接; 可能是重传 */ 14 IP_CT_NEW, 15 16 /* >= this indicates reply direction */ 17 /* >=这个值的都是响应方向的 */ 18 IP_CT_IS_REPLY, 19 20 /* 已建立连接的响应 */ 21 IP_CT_ESTABLISHED_REPLY = IP_CT_ESTABLISHED + IP_CT_IS_REPLY, 22 /* 已建立连接的关联连接的响应 */ 23 IP_CT_RELATED_REPLY = IP_CT_RELATED + IP_CT_IS_REPLY, 24 /* No NEW in reply direction. */ 25 26 /* Number of distinct IP_CT types. */ 27 /* IP_CT类型的数量 */ 28 IP_CT_NUMBER, 29 30 /* only for userspace compatibility */ 31 #ifndef __KERNEL__ 32 IP_CT_NEW_REPLY = IP_CT_NUMBER, 33 #else 34 IP_CT_UNTRACKED = 7, 35 #endif 36 };
数据结构图
本文中涉及的数据结构之间的关系图如下:
源码分析
nf_conn是对连接跟踪抽象的基础结构,其中tuplehash为连接跟踪nf_conntrack_tuple的hash,分两个方向;
1 struct nf_conn { 2 /* Usage count in here is 1 for hash table, 1 per skb, 3 * plus 1 for any connection(s) we are `master' for 4 * 5 * Hint, SKB address this struct and refcnt via skb->_nfct and 6 * helpers nf_conntrack_get() and nf_conntrack_put(). 7 * Helper nf_ct_put() equals nf_conntrack_put() by dec refcnt, 8 * beware nf_ct_get() is different and don't inc refcnt. 9 */ 10 /* 连接跟踪的引用计数 */ 11 struct nf_conntrack ct_general; 12 13 spinlock_t lock; 14 u16 cpu; 15 16 #ifdef CONFIG_NF_CONNTRACK_ZONES 17 struct nf_conntrack_zone zone; 18 #endif 19 /* XXX should I move this to the tail ? - Y.K */ 20 /* These are my tuples; original and reply */ 21 /* 连接跟踪两个方向的tuple节点,即五元组 */ 22 struct nf_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; 23 24 /* Have we seen traffic both ways yet? (bitset) */ 25 /* 状态 */ 26 unsigned long status; 27 28 /* jiffies32 when this ct is considered dead */ 29 /* 连接跟踪的超时时间 */ 30 u32 timeout; 31 32 /* 命名空间 */ 33 possible_net_t ct_net; 34 35 #if IS_ENABLED(CONFIG_NF_NAT) 36 struct rhlist_head nat_bysource; 37 #endif 38 /* all members below initialized via memset */ 39 u8 __nfct_init_offset[0]; 40 41 /* If we were expected by an expectation, this will be it */ 42 /* 如果当前连接是某个连接期望的连接,该字段指向主连接 */ 43 struct nf_conn *master; 44 45 #if defined(CONFIG_NF_CONNTRACK_MARK) 46 u_int32_t mark; 47 #endif 48 49 #ifdef CONFIG_NF_CONNTRACK_SECMARK 50 u_int32_t secmark; 51 #endif 52 53 /* Extensions */ 54 /* 扩展项 */ 55 struct nf_ct_ext *ext; 56 57 /* Storage reserved for other modules, must be the last member */ 58 /* 不同协议实现连接跟踪的额外参数 */ 59 union nf_conntrack_proto proto; 60 };
nf_conntrack_tuple_hash的定义如下:
1 struct nf_conntrack_tuple_hash { 2 struct hlist_nulls_node hnnode; 3 struct nf_conntrack_tuple tuple; 4 };
nf_ct_ext用于实现对连接跟踪的扩展;
1 struct nf_ct_ext { 2 struct rcu_head rcu; 3 u8 offset[NF_CT_EXT_NUM]; 4 u8 len; 5 char data[0]; 6 };
nf_conntrack_tuple是用来区分一条连接的信息,定义如下:
1 /* 该结构包含源目的信息用来区分一条连接 */ 2 struct nf_conntrack_tuple { 3 /* 源,可操作? */ 4 struct nf_conntrack_man src; 5 6 /* These are the parts of the tuple which are fixed. */ 7 /* 目的,不可操作? */ 8 struct { 9 union nf_inet_addr u3; 10 union { 11 /* Add other protocols here. */ 12 __be16 all; 13 14 struct { 15 __be16 port; 16 } tcp; 17 struct { 18 __be16 port; 19 } udp; 20 struct { 21 u_int8_t type, code; 22 } icmp; 23 struct { 24 __be16 port; 25 } dccp; 26 struct { 27 __be16 port; 28 } sctp; 29 struct { 30 __be16 key; 31 } gre; 32 } u; 33 34 /* The protocol. */ 35 /* 协议 */ 36 u_int8_t protonum; 37 38 /* The direction (for tuplehash) */ 39 /* 方向(tuplehash使用) */ 40 u_int8_t dir; 41 } dst; 42 };
上面结构中的源方向信息使用了nf_conntrack_man结构,其中包括了三层识别信息,四层识别信息,以及三层协议号;
1 /* The manipulable part of the tuple. */ 2 /* tuple可操作的部分? */ 3 struct nf_conntrack_man { 4 /* 三层识别信息 */ 5 union nf_inet_addr u3; 6 /* 四层识别信息 */ 7 union nf_conntrack_man_proto u; 8 /* Layer 3 protocol */ 9 /* 三层协议号 */ 10 u_int16_t l3num; 11 };
1 union nf_inet_addr { 2 __u32 all[4]; 3 __be32 ip; 4 __be32 ip6[4]; 5 struct in_addr in; 6 struct in6_addr in6; 7 };
1 /* The protocol-specific manipulable parts of the tuple: always in 2 * network order 3 */ 4 union nf_conntrack_man_proto { 5 /* Add other protocols here. */ 6 __be16 all; 7 8 struct { 9 __be16 port; 10 } tcp; 11 struct { 12 __be16 port; 13 } udp; 14 struct { 15 __be16 id; 16 } icmp; 17 struct { 18 __be16 port; 19 } dccp; 20 struct { 21 __be16 port; 22 } sctp; 23 struct { 24 __be16 key; /* GRE key is 32bit, PPtP only uses 16bit */ 25 } gre; 26 };
nf_conn中的proto成员用来存储协议特有的用来表示连接跟踪的信息,其联合体nf_conntack_proto定义如下:
1 /* per conntrack: protocol private data */ 2 union nf_conntrack_proto { 3 /* insert conntrack proto private data here */ 4 struct nf_ct_dccp dccp; 5 struct ip_ct_sctp sctp; 6 struct ip_ct_tcp tcp; 7 struct nf_ct_gre gre; 8 unsigned int tmpl_padto; 9 };
下面是TCP的一些必要信息;
1 struct ip_ct_tcp { 2 struct ip_ct_tcp_state seen[2]; /* connection parameters per direction */ 3 u_int8_t state; /* state of the connection (enum tcp_conntrack) */ 4 /* For detecting stale connections */ 5 u_int8_t last_dir; /* Direction of the last packet (enum ip_conntrack_dir) */ 6 u_int8_t retrans; /* Number of retransmitted packets */ 7 u_int8_t last_index; /* Index of the last packet */ 8 u_int32_t last_seq; /* Last sequence number seen in dir */ 9 u_int32_t last_ack; /* Last sequence number seen in opposite dir */ 10 u_int32_t last_end; /* Last seq + len */ 11 u_int16_t last_win; /* Last window advertisement seen in dir */ 12 /* For SYN packets while we may be out-of-sync */ 13 u_int8_t last_wscale; /* Last window scaling factor seen */ 14 u_int8_t last_flags; /* Last flags set */ 15 };
1 struct ip_ct_tcp_state { 2 u_int32_t td_end; /* max of seq + len */ 3 u_int32_t td_maxend; /* max of ack + max(win, 1) */ 4 u_int32_t td_maxwin; /* max(win) */ 5 u_int32_t td_maxack; /* max of ack */ 6 u_int8_t td_scale; /* window scale factor */ 7 u_int8_t flags; /* per direction options */ 8 };