概述
本文主要分析table,rule,match,target的作用和其数据结构之间的关系,为后面的匹配流程做铺垫,通过本文中代码流程的分析,可以得到如下的关系图:
详细分析
table
iptables分为五种:
filter:This is the default table (if no -t option is passed). It contains the built-in chains INPUT (for packets destined to local sockets), FORWARD (for packets being routed through the box), and OUTPUT (for locally-generated packets).
nat:This table is consulted when a packet that creates a new connection is encountered. It consists of three built-ins: PREROUTING (for altering packets as soon as they come in), OUTPUT (for altering locally-generated packets before routing), and POSTROUTING (for altering packets as they are about to go out).
mangle:This table is used for specialized packet alteration. Until kernel 2.4.17 it had two built-in chains: PREROUTING (for altering incoming packets before routing) and OUTPUT (for altering locally-generated packets before routing). Since kernel 2.4.18, three other built-in chains are also supported: INPUT (for packets coming into the box itself), FORWARD (for altering packets being routed through the box), and POSTROUTING (for altering packets as they are about to go out).
raw:This table is used mainly for configuring exemptions from connection tracking in combination with the NOTRACK target. It registers at the netfilter hooks with higher priority and is thus called before ip_conntrack, or any other IP tables. It provides the following built-in chains: PREROUTING (for packets arriving via any network interface) OUTPUT (for packets generated by local processes)
security:This table is used for Mandatory Access Control (MAC) networking rules, such as those enabled by the SECMARK and CONNSECMARK targets. Mandatory Access Control is implemented by Linux Security Modules such as SELinux. The security table is called after the filter table, allowing any Discretionary Access Control (DAC) rules in the filter table to take effect before MAC rules. This table provides the following built-in chains: INPUT (for packets coming into the box itself), OUTPUT (for altering locally-generated packets before routing), and FORWARD (for altering packets being routed through the box).
在net结构中的成员struct netns_xt xt,是用来存储所有table的,
1 struct net { 2 #ifdef CONFIG_NETFILTER 3 struct netns_nf nf; 4 struct netns_xt xt; 5 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) 6 struct netns_ct ct; 7 #endif 8 #if defined(CONFIG_NF_TABLES) || defined(CONFIG_NF_TABLES_MODULE) 9 struct netns_nftables nft; 10 #endif 11 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) 12 struct netns_nf_frag nf_frag; 13 #endif 14 struct sock *nfnl; 15 struct sock *nfnl_stash; 16 #if IS_ENABLED(CONFIG_NETFILTER_NETLINK_ACCT) 17 struct list_head nfnl_acct_list; 18 #endif 19 #if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) 20 struct list_head nfct_timeout_list; 21 #endif 22 };
netns_xt结构的成员如下,其中tables存储了多种协议对应的table链表,每种协议对应一个链表,多种table存储在自己所属协议的链表上;
1 struct netns_xt { 2 struct list_head tables[NFPROTO_NUMPROTO]; 3 bool notrack_deprecated_warning; 4 bool clusterip_deprecated_warning; 5 #if defined(CONFIG_BRIDGE_NF_EBTABLES) || 6 defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE) 7 struct ebt_table *broute_table; 8 struct ebt_table *frame_filter; 9 struct ebt_table *frame_nat; 10 #endif 11 };
接下来,再来看下table结构,定义每个具体类型的table:
1 /* 对应iptables表,定义表的通用信息 */ 2 struct xt_table { 3 struct list_head list; 4 5 /* What hooks you will enter on */ 6 /* 该表关注的钩子点 */ 7 unsigned int valid_hooks; 8 9 /* Man behind the curtain... */ 10 /* 私有数据,真正的规则,指向xt_table_info */ 11 struct xt_table_info *private; 12 13 /* Set this to THIS_MODULE if you are a module, otherwise NULL */ 14 /* 模块名 */ 15 struct module *me; 16 17 /* 协议族 */ 18 u_int8_t af; /* address/protocol family */ 19 /* 优先级 */ 20 int priority; /* hook order */ 21 22 /* called when table is needed in the given netns */ 23 int (*table_init)(struct net *net); 24 25 /* A unique name... */ 26 /* 表名称 */ 27 const char name[XT_TABLE_MAXNAMELEN]; 28 };
xt_table的private成员又指向了xt_table_info结构,存储真正的规则相关信息,包括入口和偏移;
1 struct xt_table_info { 2 /* Size per table */ 3 /* 表大小,占用的内存空间 */ 4 unsigned int size; 5 /* Number of entries: FIXME. --RR */ 6 /* 表中规则数量 */ 7 unsigned int number; 8 /* Initial number of entries. Needed for module usage count */ 9 /* 初始的规则数量,用于模块计数 */ 10 unsigned int initial_entries; 11 12 /* Entry points and underflows */ 13 /* 钩子规则入口,相对于下面的entries偏移量 */ 14 unsigned int hook_entry[NF_INET_NUMHOOKS]; 15 /* 与hook_entry相对应的规则表上限偏移量,当无规则录入时,hook_entry和underflow均为0 */ 16 unsigned int underflow[NF_INET_NUMHOOKS]; 17 18 /* 19 * Number of user chains. Since tables cannot have loops, at most 20 * @stacksize jumps (number of user chains) can possibly be made. 21 */ 22 unsigned int stacksize; 23 void ***jumpstack; 24 25 /* 每个cpu的ipt_entry指针,指向ipt_entry的首地址 */ 26 unsigned char entries[0] __aligned(8); 27 };
xt_table_info结构的entries成员指向了匹配规则的入口,入口的每个数组包含了多个rule;
rule
ipt_standard结构对应着一条rule,其中包含ipt_entry+xt_entry_match+xt_standard_target;
1 /* Standard entry. */ 2 struct ipt_standard { 3 struct ipt_entry entry; 4 struct xt_standard_target target; 5 };
rule是规则的整体,下面分别介绍规则中的每个成员:
match
用于规则匹配,其中分为标准match和扩展match;
标准match通过匹配ipt_entry->ip成员进行,主要是ip中包含的地址,接口,协议信息等;
扩展match通过xt_entry_match成员进行,是标准match的扩展,通常以模块或者插件形式存在;
ipt_entry是一条规则的入口,其首部包含标准match结构,其余字段存储了target偏移,下一个ipt_entry的偏移,扩展match入口等:
1 struct ipt_entry { 2 struct ipt_ip ip; 3 4 /* Mark with fields that we care about. */ 5 unsigned int nfcache; 6 7 /* Size of ipt_entry + matches */ 8 /* 规则中的target相对于该ipt_entry首地址的偏移 */ 9 __u16 target_offset; 10 /* Size of ipt_entry + matches + target */ 11 /* 下一个规则相对于该ipt_entry首地址的偏移 */ 12 __u16 next_offset; 13 14 /* Back pointer */ 15 /* 16 判断table表中的规则链是否存在环路, 17 或 遍历规则链时用于用户自定义链的规则执行完时返回到主链时候使用 18 */ 19 unsigned int comefrom; 20 21 /* Packet and byte counters. */ 22 struct xt_counters counters; 23 24 /* The matches (if any), then the target. */ 25 /* 26 match(es)与ipt_netry是连续的,这里用于动态扩展match的内存 27 */ 28 unsigned char elems[0]; 29 };
ipt_entry中包含ipt_ip结构,用于标准match,匹配内容为源目的地址,入出口设备,协议等,其结构如下:
1 /* 标准匹配 */ 2 struct ipt_ip { 3 /* Source and destination IP addr */ 4 /* 源目的地址 */ 5 struct in_addr src, dst; 6 /* Mask for src and dest IP addr */ 7 /* 源目的掩码 */ 8 struct in_addr smsk, dmsk; 9 /* 入口出口设备 */ 10 char iniface[IFNAMSIZ], outiface[IFNAMSIZ]; 11 /* 入口出口设备掩码 */ 12 unsigned char iniface_mask[IFNAMSIZ], outiface_mask[IFNAMSIZ]; 13 14 /* Protocol, 0 = ANY */ 15 /* 协议号 */ 16 __u16 proto; 17 18 /* Flags word */ 19 __u8 flags; 20 /* Inverse flags */ 21 /* 是否是反转匹配 */ 22 __u8 invflags; 23 };
xt_entry_match紧接着ipt_entry,可能有多个,用于扩展match;
1 struct xt_entry_match { 2 union { 3 struct { 4 /* 该match所占用的内存大小 */ 5 __u16 match_size; 6 7 /* Used by userspace */ 8 /* match名称 */ 9 char name[XT_EXTENSION_MAXNAMELEN]; 10 /* match版本 */ 11 __u8 revision; 12 } user; 13 struct { 14 __u16 match_size; 15 16 /* Used inside the kernel */ 17 struct xt_match *match; 18 } kernel; 19 20 /* Total length */ 21 __u16 match_size; 22 } u; 23 24 /* 下一个match关联 */ 25 unsigned char data[0]; 26 };
target
在某条规则匹配之后,执行的动作;也分为标准target和扩展target;
标准target:t->u.kernel.target->target为NULL,则为标准target,根据verdict返回值决定如何进行下一步处理;
扩展target:t->u.kernel.target->target不为NULL,则为扩展target,这时候需要执行该target函数;
xt_standard_target对xt_entry_target成员进行了封装,增加了verdict,该字段用于返回处理结果给Netfilter;
1 struct xt_standard_target { 2 struct xt_entry_target target; 3 int verdict; 4 };
xt_entry_target结构的定义与match的形式几乎是一致的;
1 struct xt_entry_target { 2 union { 3 struct { 4 __u16 target_size; 5 6 /* Used by userspace */ 7 char name[XT_EXTENSION_MAXNAMELEN]; 8 __u8 revision; 9 } user; 10 struct { 11 __u16 target_size; 12 13 /* Used inside the kernel */ 14 struct xt_target *target; 15 } kernel; 16 17 /* Total length */ 18 __u16 target_size; 19 } u; 20 21 unsigned char data[0]; 22 };