概述
本文主要对filter表的初始化流程,以及钩子函数的规则match流程的源码进行分析;
源码分析
所在钩子点:
1 /* 在LOCAL_IN,FORWARD, LOCAL_OUT钩子点工作 */ 2 #define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | 3 (1 << NF_INET_FORWARD) | 4 (1 << NF_INET_LOCAL_OUT))
filter表信息:
1 /* filter表的信息 */ 2 static const struct xt_table packet_filter = { 3 .name = "filter", 4 .valid_hooks = FILTER_VALID_HOOKS, /* filter工作的钩子点 */ 5 .me = THIS_MODULE, 6 .af = NFPROTO_IPV4, 7 .priority = NF_IP_PRI_FILTER, 8 .table_init = iptable_filter_table_init, 9 };
初始化:
1 static int __net_init iptable_filter_table_init(struct net *net) 2 { 3 struct ipt_replace *repl; 4 int err; 5 6 /* filter表已经被初始化了,返回 */ 7 if (net->ipv4.iptable_filter) 8 return 0; 9 10 /* 分配初始化表,用于下面的表注册 */ 11 repl = ipt_alloc_initial_table(&packet_filter); 12 if (repl == NULL) 13 return -ENOMEM; 14 /* Entry 1 is the FORWARD hook */ 15 /* 入口1是否为FORWARD钩子点时的verdict值设置 */ 16 ((struct ipt_standard *)repl->entries)[1].target.verdict = 17 forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; 18 19 /* 注册filter表,注册后,ipv4.iptable_filter保存了注册后的新表 */ 20 err = ipt_register_table(net, &packet_filter, repl, filter_ops, 21 &net->ipv4.iptable_filter); 22 23 /* 释放初始化表 */ 24 kfree(repl); 25 return err; 26 }
分配用于初始化的table结构,其中的xt_alloc_initial_table是以宏的形式存在的;
1 void *ipt_alloc_initial_table(const struct xt_table *info) 2 { 3 return xt_alloc_initial_table(ipt, IPT); 4 }
为了看起来方便,这里对函数进行了宏替换;
1 void * ipt_alloc_initial_table(const struct xt_table *info) { 2 /* 钩子点掩码 */ 3 unsigned int hook_mask = info->valid_hooks; 4 /* 钩子点数量 */ 5 unsigned int nhooks = hweight32(hook_mask); 6 unsigned int bytes = 0, hooknum = 0, i = 0; 7 /* 此次构造的表结构 */ 8 struct { 9 struct ipt_replace repl; 10 struct ipt_standard entries[]; 11 } *tbl; 12 13 struct ipt_error *term; 14 15 /* 算出entries的偏移 */ 16 size_t term_offset = (offsetof(iptof(*tbl), entries[nhooks]) + 17 __alignof__(*term) - 1) & ~(__alignof__(*term) - 1); 18 /* 分配内存 */ 19 tbl = kzalloc(term_offset + sizeof(*term), GFP_KERNEL); 20 if (tbl == NULL) 21 return NULL; 22 /* 找到error部分 */ 23 term = (struct ipt_error *)&(((char *)tbl)[term_offset]); 24 /* 拷贝表名 */ 25 strncpy(tbl->repl.name, info->name, sizeof(tbl->repl.name)); 26 /* 初始化error */ 27 *term = (struct ipt_error)IPT_ERROR_INIT; 28 /* 初始化钩子点,数量(包括error),占用内存大小 */ 29 tbl->repl.valid_hooks = hook_mask; 30 tbl->repl.num_entries = nhooks + 1; 31 tbl->repl.size = nhooks * sizeof(struct ipt_standard) + 32 sizeof(struct ipt_error); 33 /* 对每个偏移进行初始化 */ 34 for (; hook_mask != 0; hook_mask >>= 1, ++hooknum) { 35 if (!(hook_mask & 1)) 36 continue; 37 tbl->repl.hook_entry[hooknum] = bytes; 38 tbl->repl.underflow[hooknum] = bytes; 39 tbl->entries[i++] = (struct ipt_standard) 40 IPT_STANDARD_INIT(NF_ACCEPT); 41 bytes += sizeof(struct ipt_standard); 42 } 43 /* 返回表 */ 44 return tbl; 45 }
ipt_register_table完成表注册流程,其中包括了分配table_info结构,并且与table->private进行关联,table中规则的合法性检查,以及调用nf_register_net_hooks进行钩子函数的注册;
1 /* 表注册 */ 2 int ipt_register_table(struct net *net, const struct xt_table *table, 3 const struct ipt_replace *repl, 4 const struct nf_hook_ops *ops, struct xt_table **res) 5 { 6 int ret; 7 struct xt_table_info *newinfo; 8 struct xt_table_info bootstrap = {0}; 9 void *loc_cpu_entry; 10 struct xt_table *new_table; 11 12 /* 分配table_info结构 */ 13 newinfo = xt_alloc_table_info(repl->size); 14 if (!newinfo) 15 return -ENOMEM; 16 17 /* 拷贝entries到table_info */ 18 loc_cpu_entry = newinfo->entries; 19 memcpy(loc_cpu_entry, repl->entries, repl->size); 20 21 /* 合法性检查 */ 22 ret = translate_table(net, newinfo, loc_cpu_entry, repl); 23 if (ret != 0) 24 goto out_free; 25 26 /* 建立新表,关联private到newinfo */ 27 new_table = xt_register_table(net, table, &bootstrap, newinfo); 28 if (IS_ERR(new_table)) { 29 ret = PTR_ERR(new_table); 30 goto out_free; 31 } 32 33 /* set res now, will see skbs right after nf_register_net_hooks */ 34 /* 设置返回值指向新表 */ 35 WRITE_ONCE(*res, new_table); 36 37 /* 注册钩子函数 */ 38 ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); 39 if (ret != 0) { 40 __ipt_unregister_table(net, new_table); 41 *res = NULL; 42 } 43 44 return ret; 45 46 out_free: 47 xt_free_table_info(newinfo); 48 return ret; 49 }
xt_register_table建立新表,将xt_table_info与表进行关联,并将表加入到net->xt.tables[table->af]链表;
1 struct xt_table *xt_register_table(struct net *net, 2 const struct xt_table *input_table, 3 struct xt_table_info *bootstrap, 4 struct xt_table_info *newinfo) 5 { 6 int ret; 7 struct xt_table_info *private; 8 struct xt_table *t, *table; 9 10 /* Don't add one object to multiple lists. */ 11 /* 建立新表 */ 12 table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL); 13 if (!table) { 14 ret = -ENOMEM; 15 goto out; 16 } 17 18 mutex_lock(&xt[table->af].mutex); 19 /* Don't autoload: we'd eat our tail... */ 20 /* 验证是否已经存在相同名字的表 */ 21 list_for_each_entry(t, &net->xt.tables[table->af], list) { 22 if (strcmp(t->name, table->name) == 0) { 23 ret = -EEXIST; 24 goto unlock; 25 } 26 } 27 28 /* Simplifies replace_table code. */ 29 table->private = bootstrap; 30 31 /* 设置newinfo到table的privates */ 32 if (!xt_replace_table(table, 0, newinfo, &ret)) 33 goto unlock; 34 35 private = table->private; 36 pr_debug("table->private->number = %u ", private->number); 37 38 /* save number of initial entries */ 39 private->initial_entries = private->number; 40 41 /* 将表加入到xt.tables中 */ 42 list_add(&table->list, &net->xt.tables[table->af]); 43 mutex_unlock(&xt[table->af].mutex); 44 45 /* 返回新表 */ 46 return table; 47 48 unlock: 49 mutex_unlock(&xt[table->af].mutex); 50 kfree(table); 51 out: 52 return ERR_PTR(ret); 53 }
钩子函数iptable_filter_hook,该函数主要调用ipt_do_table函数进行规则的匹配;
1 static unsigned int 2 iptable_filter_hook(void *priv, struct sk_buff *skb, 3 const struct nf_hook_state *state) 4 { 5 /* LOCAL_OUT && (数据长度不足ip头 || 实际ip头部长度不足最小ip头),在使用raw socket */ 6 if (state->hook == NF_INET_LOCAL_OUT && 7 (skb->len < sizeof(struct iphdr) || 8 ip_hdrlen(skb) < sizeof(struct iphdr))) 9 /* root is playing with raw sockets. */ 10 return NF_ACCEPT; 11 12 /* 核心规则匹配流程 */ 13 return ipt_do_table(skb, state, state->net->ipv4.iptable_filter); 14 }
ipt_do_table是核心的规则匹配流程,其中包括了标准match,扩展match,标准target,扩展target的相关处理;
1 /* 遍历钩子链上的所有规则,进行标准匹配和扩展匹配,执行其target操作 */ 2 unsigned int 3 ipt_do_table(struct sk_buff *skb, 4 const struct nf_hook_state *state, 5 struct xt_table *table) 6 { 7 unsigned int hook = state->hook; 8 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); 9 const struct iphdr *ip; 10 /* Initializing verdict to NF_DROP keeps gcc happy. */ 11 unsigned int verdict = NF_DROP; 12 const char *indev, *outdev; 13 const void *table_base; 14 struct ipt_entry *e, **jumpstack; 15 unsigned int stackidx, cpu; 16 const struct xt_table_info *private; 17 struct xt_action_param acpar; 18 unsigned int addend; 19 20 /* Initialization */ 21 stackidx = 0; 22 ip = ip_hdr(skb); 23 indev = state->in ? state->in->name : nulldevname; 24 outdev = state->out ? state->out->name : nulldevname; 25 /* We handle fragments by dealing with the first fragment as 26 * if it was a normal packet. All other fragments are treated 27 * normally, except that they will NEVER match rules that ask 28 * things we don't know, ie. tcp syn flag or ports). If the 29 * rule is also a fragment-specific rule, non-fragments won't 30 * match it. */ 31 acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; 32 acpar.thoff = ip_hdrlen(skb); 33 acpar.hotdrop = false; 34 acpar.state = state; 35 36 IP_NF_ASSERT(table->valid_hooks & (1 << hook)); 37 local_bh_disable(); 38 addend = xt_write_recseq_begin(); 39 private = table->private; 40 cpu = smp_processor_id(); 41 /* 42 * Ensure we load private-> members after we've fetched the base 43 * pointer. 44 */ 45 smp_read_barrier_depends(); 46 /* 首个规则地址 */ 47 table_base = private->entries; 48 jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; 49 50 /* Switch to alternate jumpstack if we're being invoked via TEE. 51 * TEE issues XT_CONTINUE verdict on original skb so we must not 52 * clobber the jumpstack. 53 * 54 * For recursion via REJECT or SYNPROXY the stack will be clobbered 55 * but it is no problem since absolute verdict is issued by these. 56 */ 57 if (static_key_false(&xt_tee_enabled)) 58 jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated); 59 60 /* 获取对应链上的首个匹配规则 */ 61 e = get_entry(table_base, private->hook_entry[hook]); 62 63 do { 64 const struct xt_entry_target *t; 65 const struct xt_entry_match *ematch; 66 struct xt_counters *counter; 67 68 IP_NF_ASSERT(e); 69 /* 标准match */ 70 if (!ip_packet_match(ip, indev, outdev, 71 &e->ip, acpar.fragoff)) { 72 no_match: 73 /* 未匹配成功,继续下一个规则 */ 74 e = ipt_next_entry(e); 75 continue; 76 } 77 78 /* 扩展match */ 79 xt_ematch_foreach(ematch, e) { 80 acpar.match = ematch->u.kernel.match; 81 acpar.matchinfo = ematch->data; 82 /* 只要有返回不匹配的,则说明匹配当前规则失败 */ 83 if (!acpar.match->match(skb, &acpar)) 84 goto no_match; 85 } 86 87 counter = xt_get_this_cpu_counter(&e->counters); 88 ADD_COUNTER(*counter, skb->len, 1); 89 90 /* 标准match和扩展match都成功 */ 91 92 /* 获取target */ 93 t = ipt_get_target(e); 94 IP_NF_ASSERT(t->u.kernel.target); 95 96 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) 97 /* The packet is traced: log it */ 98 if (unlikely(skb->nf_trace)) 99 trace_packet(state->net, skb, hook, state->in, 100 state->out, table->name, private, e); 101 #endif 102 /* Standard target? */ 103 /* 标准target */ 104 if (!t->u.kernel.target->target) { 105 int v; 106 107 v = ((struct xt_standard_target *)t)->verdict; 108 /* 不会跳转到用户自定义规则 */ 109 if (v < 0) { 110 /* Pop from stack? */ 111 /* 不是XT_RETURN,则跳出处理结果 */ 112 if (v != XT_RETURN) { 113 verdict = (unsigned int)(-v) - 1; 114 break; 115 } 116 117 /* XT_RETURN则继续匹配下一条规则 */ 118 if (stackidx == 0) { 119 e = get_entry(table_base, 120 private->underflow[hook]); 121 } else { 122 e = jumpstack[--stackidx]; 123 e = ipt_next_entry(e); 124 } 125 continue; 126 } 127 128 /* 记录跳转规则,以便返回时获取下一跳规则进行后续匹配 */ 129 if (table_base + v != ipt_next_entry(e) && 130 !(e->ip.flags & IPT_F_GOTO)) 131 jumpstack[stackidx++] = e; 132 133 /* 获取自定义规则 */ 134 e = get_entry(table_base, v); 135 continue; 136 } 137 138 /* 扩展target,执行target回调 */ 139 140 acpar.target = t->u.kernel.target; 141 acpar.targinfo = t->data; 142 143 verdict = t->u.kernel.target->target(skb, &acpar); 144 /* Target might have changed stuff. */ 145 ip = ip_hdr(skb); 146 147 /* 需要继续匹配 */ 148 if (verdict == XT_CONTINUE) 149 e = ipt_next_entry(e); 150 /* 跳出处理匹配结果 */ 151 else 152 /* Verdict */ 153 break; 154 /* 无hotdrop,继续匹配 */ 155 } while (!acpar.hotdrop); 156 157 xt_write_recseq_end(addend); 158 local_bh_enable(); 159 160 /* drop标记 */ 161 if (acpar.hotdrop) 162 return NF_DROP; 163 /* 返回匹配结果 */ 164 else return verdict; 165 }