linux bridge - mac forward
https://www.jianshu.com/nb/46196362
linux bridge在虚拟化场景和docker中用的比较多,之前也知道它的原理,基本上就是类似二层交换机,根据mac地址和vid转发。但是对于vlan的处理网上的文档比较少,所以这次就看一下源码,分析下不配置vlan时如何转发,vlan又如何生效。
不配置vlan时,bridge纯靠mac转发,可通过如下两个命令之一查看mac转发表
//此命令只显示单播转发表,比较符合硬件交换机的显示规范,
//匹配到mac的,从port转发出去(可通过brctl showbsp br1查看端
//口号和端口的对应关系)
root@node2:~# brctl showmacs br1
port no mac addr is local? ageing timer
2 12:27:96:8c:f4:58 yes 0.00
2 12:27:96:8c:f4:58 yes 0.00
1 66:e6:6f:a8:d4:97 yes 0.00
1 66:e6:6f:a8:d4:97 yes 0.00
//通过此命令可显示所有的单播和组播表项
root@node2:~# bridge fdb show br br1
33:33:00:00:00:01 dev br1 self permanent
66:e6:6f:a8:d4:97 dev vetha master br1 permanent
66:e6:6f:a8:d4:97 dev vetha vlan 1 master br1 permanent
33:33:00:00:00:01 dev vetha self permanent
01:00:5e:00:00:01 dev vetha self permanent
12:27:96:8c:f4:58 dev vethx master br1 permanent
12:27:96:8c:f4:58 dev vethx vlan 1 master br1 permanent
33:33:00:00:00:01 dev vethx self permanent
01:00:5e:00:00:01 dev vethx self permanent
这篇文档就先介绍不使能vlan的情况,主要分为下面几个部分
a. kernel端bridge module的初始化都做了哪些事
b. 添加网桥时,命令行和kernel端代码流程
c. 给网桥添加端口时,命令行和kernel端代码流程
d. 从端口收到报文后,内部是如何转发的
广播/组播/未知单播报文flood到所有端口。
查找到转发表项的已知单播报文,发送到此表项的出端口。
广播/组播/已知单播并且dst为locol的报文,或者网桥设备使能了混杂模式,这几种情况都需要通过网桥设备将报文上送本机协议栈处理。
e. 从网桥br发出去的报文如何转发
广播/组播/未知单播报文,flood到所有端口。
能查找到转发表项的单播报文,从表项的出端口发送出去。
bridge还有如下几个注意的地方
单播flood: 控制单播报文是否从此端口发送一份,有两种设置方式,
a. bridge link set dev vnet1 flood on
b. echo 1 > /sys/class/net/br1/brif/vnet1/unicast_flood
hairpin模式:控制接收到广播/组播/未知单播的端口,再次从此端口发出。已知单播正常转发。
a. bridge link set dev vnet1 hairpin on
b. echo 1 > /sys/class/net/br1/brif/vnet1/hairpin_mode
网桥设备down后,所有端口状态都会变成 disabled, 导致网桥不会正确转发。
vetha (1)
port id 8001 state disabled
designated root 8000.3adce07c2043 path cost 2
designated bridge 8000.3adce07c2043 message age timer 0.00
designated port 8001 forward delay timer 0.00
designated cost 0 hold timer 0.00
flags
bridge netfilter框架,可使用ebtables设置和查看
1. module初始化流程
#module初始化流程
module_init(br_init)
static int __init br_init(void)
static const struct stp_proto br_stp_proto = {
.rcv = br_stp_rcv,
};
//注册stp协议处理函数,防止环路产生,此文不看stp部分
stp_proto_register(&br_stp_proto);
//初始化fdb表项用到的cache
br_fdb_init();
static struct kmem_cache *br_fdb_cache __read_mostly;
br_fdb_cache = kmem_cache_create("bridge_fdb_cache",
sizeof(struct net_bridge_fdb_entry),0,
SLAB_HWCACHE_ALIGN, NULL);
static u32 fdb_salt __read_mostly;
get_random_bytes(&fdb_salt, sizeof(fdb_salt));
static struct pernet_operations br_net_ops = {
.exit = br_net_exit,
};
//注册pernet操作,只提供了exit,所以namespace初始化时无操作
register_pernet_subsys(&br_net_ops);
static struct notifier_block br_device_notifier = {
.notifier_call = br_device_event
};
//注册网络设备事件处理函数
register_netdevice_notifier(&br_device_notifier);
br_netlink_init();
br_mdb_init();
rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL);
rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL);
rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL);
static struct rtnl_af_ops br_af_ops = {
.family = AF_BRIDGE,
.get_link_af_size = br_get_link_af_size,
};
rtnl_af_register(&br_af_ops);
list_add_tail(&ops->list, &rtnl_af_ops);
struct rtnl_link_ops br_link_ops __read_mostly = {
.kind = "bridge",
.priv_size = sizeof(struct net_bridge),
.setup = br_dev_setup,
.maxtype = IFLA_BRPORT_MAX,
.policy = br_policy,
.validate = br_validate,
.newlink = br_dev_newlink,
.changelink = br_changelink,
.dellink = br_dev_delete,
.get_size = br_get_size,
.fill_info = br_fill_info,
.slave_maxtype = IFLA_BRPORT_MAX,
.slave_policy = br_port_policy,
.slave_changelink = br_port_slave_changelink,
.get_slave_size = br_port_get_slave_size,
.fill_slave_info = br_port_fill_slave_info,
};
rtnl_link_register(&br_link_ops);
__rtnl_link_register(ops);
list_add_tail(&ops->list, &link_ops);
//注册hook函数到br_ioctl_hook,添加网桥时调用br_ioctl_hook
brioctl_set(br_ioctl_deviceless_stub)