linux路由选择流程1 ip_rcv_finish2 ip_route_input_slow3 ip_mkroute_input4 整体函数流程

131 阅读 0 评论 87 点赞

我是靠谱客的博主清爽大象，这篇文章主要介绍linux路由选择流程1 ip_rcv_finish2 ip_route_input_slow3 ip_mkroute_input4 整体函数流程，现在分享给大家，希望可以做个参考。

1 ip_rcv_finish

ip_rcv这个函数主要是对数据包做各种正确性验证，然后调用掉网络过滤子系统的在PRE_ROUTEING链上的回调函数，经过网络子系统的处理在调用ip_rec_finish，ip_rcv_finish主要的工作：确定数据包是前送还是在本机协议栈上传，如果是前送要确保输出网络设备和下一个接受栈的地址。

我们看一下ip_rcv_finish的源码，首先判断skb->dst是否为NULL，如果是表明数据包的去向还没有定，那么需要调用ip_route_input决定数据包的去向

复制代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
static int ip_rcv_finish(struct sk_buff *skb)
{
    const struct iphdr *iph = ip_hdr(skb);
    struct rtable *rt;
    /*
     *    Initialise the virtual path cache for the packet. It describes
     *    how the packet travels inside Linux networking.
     */
    if (skb->dst == NULL) {      //目的地址是空
        int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos,
                     skb->dev);
    }
 .....
}

ip_route_input函数主要是调用rt_hash根据目的地址得到路由哈希值，然后调用ip_route_input_slow继续进一步处理

复制代码

1
2
3
4
5
6
7
8
int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
		   u8 tos, struct net_device *dev)
{
	struct rtable * rth;
	unsigned	hash;
.....
	return ip_route_input_slow(skb, daddr, saddr, tos, dev);	//下一步进行路由判断
}

2 ip_route_input_slow

ip_route_inout_slow最重要的是调用fib_lookup根据目的地址查找路由表得到路由结果struct fib_result *res，决定数据包去向，如果res.type == RTN_LOCAL那么就是本地接受的数据，那么设置数据包下一步处理函数为ip_local_deliver，如果是转发数据包那么进一步调用ip_mkroute_input

复制代码

static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			       u8 tos, struct net_device *dev)
{
	struct fib_result res;

......

if ((err = fib_lookup(net, &fl, &res)) != 0) {			//这个函数很重要，查询路由表，将路由结果保存在struct fib_result *res， 决定数据包的走向
		if (!IN_DEV_FORWARD(in_dev))
			goto e_hostunreach;
		goto no_route;
	}

......
	if (res.type == RTN_LOCAL) {			//本地接受的数据包，
		int result;
		result = fib_validate_source(saddr, daddr, tos,
					     net->loopback_dev->ifindex,
					     dev, &spec_dst, &itag);
		if (result < 0)
			goto martian_source;
		if (result)
			flags |= RTCF_DIRECTSRC;
		spec_dst = daddr;
		goto local_input;
	}
......

err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);	//路由转发处理

brd_input:
	if (skb->protocol != htons(ETH_P_IP))
		goto e_inval;

if (ipv4_is_zeronet(saddr))
		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
	else {
		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
					  &itag);
		if (err < 0)
			goto martian_source;
		if (err)
			flags |= RTCF_DIRECTSRC;
	}
	flags |= RTCF_BROADCAST;
	res.type = RTN_BROADCAST;
	RT_CACHE_STAT_INC(in_brd);

local_input:                //本地接受处理，做一些初始化
	rth = dst_alloc(&ipv4_dst_ops);
	if (!rth)

rth->rt_iif	=
	rth->fl.iif	= dev->ifindex;
	rth->u.dst.dev	= net->loopback_dev;
	dev_hold(rth->u.dst.dev);
	rth->idev	= in_dev_get(rth->u.dst.dev);
	rth->rt_gateway	= daddr;
	rth->rt_spec_dst= spec_dst;
	rth->u.dst.input= ip_local_deliver;		//设置本地结束处理函数
	rth->rt_flags 	= flags|RTCF_LOCAL;

......

}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
			       u8 tos, struct net_device *dev)
{
	struct fib_result res;

......

	if ((err = fib_lookup(net, &fl, &res)) != 0) {			//这个函数很重要，查询路由表，将路由结果保存在struct fib_result *res， 决定数据包的走向
		if (!IN_DEV_FORWARD(in_dev))
			goto e_hostunreach;
		goto no_route;
	}

......
	if (res.type == RTN_LOCAL) {			//本地接受的数据包，
		int result;
		result = fib_validate_source(saddr, daddr, tos,
					     net->loopback_dev->ifindex,
					     dev, &spec_dst, &itag);
		if (result < 0)
			goto martian_source;
		if (result)
			flags |= RTCF_DIRECTSRC;
		spec_dst = daddr;
		goto local_input;
	}
......

	err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);	//路由转发处理

brd_input:
	if (skb->protocol != htons(ETH_P_IP))
		goto e_inval;

	if (ipv4_is_zeronet(saddr))
		spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
	else {
		err = fib_validate_source(saddr, 0, tos, 0, dev, &spec_dst,
					  &itag);
		if (err < 0)
			goto martian_source;
		if (err)
			flags |= RTCF_DIRECTSRC;
	}
	flags |= RTCF_BROADCAST;
	res.type = RTN_BROADCAST;
	RT_CACHE_STAT_INC(in_brd);

local_input:                //本地接受处理，做一些初始化
	rth = dst_alloc(&ipv4_dst_ops);
	if (!rth)

	rth->rt_iif	=
	rth->fl.iif	= dev->ifindex;
	rth->u.dst.dev	= net->loopback_dev;
	dev_hold(rth->u.dst.dev);
	rth->idev	= in_dev_get(rth->u.dst.dev);
	rth->rt_gateway	= daddr;
	rth->rt_spec_dst= spec_dst;
	rth->u.dst.input= ip_local_deliver;		//设置本地结束处理函数
	rth->rt_flags 	= flags|RTCF_LOCAL;

......

}

2.1 fib_lookup

fib_lookup查询路由表，将路由结果保存在struct fib_result *res，决定数据包的走向,先查找本地路由表,再查找main路由表

复制代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
static inline int fib_lookup(struct net *net, const struct flowi *flp,
			     struct fib_result *res)
{
	struct fib_table *table;

	table = fib_get_table(net, RT_TABLE_LOCAL);
	if (!table->tb_lookup(table, flp, res))			//先查找本地路由表
		return 0;

	table = fib_get_table(net, RT_TABLE_MAIN);		//再查找main路由表
	if (!table->tb_lookup(table, flp, res))
		return 0;
	return -ENETUNREACH;
}

3 ip_mkroute_input

ip_mkroute_input实际调用的是__mkroute_input，__mkroute_input根据路由结果确定数据包的发包网卡，设置数据包转发函数ip_forward。设置出口处理函数ip_output

复制代码

static int __mkroute_input(struct sk_buff *skb,
			   struct fib_result *res,
			   struct in_device *in_dev,
			   __be32 daddr, __be32 saddr, u32 tos,
			   struct rtable **result)
{

struct rtable *rth;
	int err;
	struct in_device *out_dev;
	unsigned flags = 0;
	__be32 spec_dst;
	u32 itag;

/* get a working reference to the output device */
	out_dev = in_dev_get(FIB_RES_DEV(*res));		//根据路由表结果得到出口数据包dev
	if (out_dev == NULL) {
		if (net_ratelimit())
			printk(KERN_CRIT "Bug in ip_route_input" 
			       "_slow(). Please, reportn");
		return -EINVAL;
	}

err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),		//根据原地址，目的地址验证路由转发的可靠性
				  in_dev->dev, &spec_dst, &itag);
	if (err < 0) {
		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
					 saddr);

err = -EINVAL;
		goto cleanup;
	}

if (err)
		flags |= RTCF_DIRECTSRC;

if (out_dev == in_dev && err &&
	    (IN_DEV_SHARED_MEDIA(out_dev) ||
	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
		flags |= RTCF_DOREDIRECT;

if (skb->protocol != htons(ETH_P_IP)) {
		/* Not IP (i.e. ARP). Do not create route, if it is
		 * invalid for proxy arp. DNAT routes are always valid.
		 */
		if (out_dev == in_dev) {
			err = -EINVAL;
			goto cleanup;
		}
	}

rth = dst_alloc(&ipv4_dst_ops);
	if (!rth) {
		err = -ENOBUFS;
		goto cleanup;
	}

atomic_set(&rth->u.dst.__refcnt, 1);
	rth->u.dst.flags= DST_HOST;
	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
		rth->u.dst.flags |= DST_NOPOLICY;
	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
		rth->u.dst.flags |= DST_NOXFRM;
	rth->fl.fl4_dst	= daddr;
	rth->rt_dst	= daddr;
	rth->fl.fl4_tos	= tos;
	rth->fl.mark    = skb->mark;
	rth->fl.fl4_src	= saddr;
	rth->rt_src	= saddr;
	rth->rt_gateway	= daddr;
	rth->rt_iif 	=
		rth->fl.iif	= in_dev->dev->ifindex;
	rth->u.dst.dev	= (out_dev)->dev;		//设置出口dev
	dev_hold(rth->u.dst.dev);
	rth->idev	= in_dev_get(rth->u.dst.dev);
	rth->fl.oif 	= 0;
	rth->rt_spec_dst= spec_dst;

rth->u.dst.input = ip_forward;		//设置转发函数
	rth->u.dst.output = ip_output;		//设置出口函数
	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));

rt_set_nexthop(rth, res, itag);

rth->rt_flags = flags;

*result = rth;
	err = 0;
 cleanup:
	/* release the working reference to the output device */
	in_dev_put(out_dev);
	return err;
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
static int __mkroute_input(struct sk_buff *skb,
			   struct fib_result *res,
			   struct in_device *in_dev,
			   __be32 daddr, __be32 saddr, u32 tos,
			   struct rtable **result)
{

	struct rtable *rth;
	int err;
	struct in_device *out_dev;
	unsigned flags = 0;
	__be32 spec_dst;
	u32 itag;

	/* get a working reference to the output device */
	out_dev = in_dev_get(FIB_RES_DEV(*res));		//根据路由表结果得到出口数据包dev
	if (out_dev == NULL) {
		if (net_ratelimit())
			printk(KERN_CRIT "Bug in ip_route_input" 
			       "_slow(). Please, reportn");
		return -EINVAL;
	}


	err = fib_validate_source(saddr, daddr, tos, FIB_RES_OIF(*res),		//根据原地址，目的地址验证路由转发的可靠性
				  in_dev->dev, &spec_dst, &itag);
	if (err < 0) {
		ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
					 saddr);

		err = -EINVAL;
		goto cleanup;
	}

	if (err)
		flags |= RTCF_DIRECTSRC;

	if (out_dev == in_dev && err &&
	    (IN_DEV_SHARED_MEDIA(out_dev) ||
	     inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
		flags |= RTCF_DOREDIRECT;

	if (skb->protocol != htons(ETH_P_IP)) {
		/* Not IP (i.e. ARP). Do not create route, if it is
		 * invalid for proxy arp. DNAT routes are always valid.
		 */
		if (out_dev == in_dev) {
			err = -EINVAL;
			goto cleanup;
		}
	}


	rth = dst_alloc(&ipv4_dst_ops);
	if (!rth) {
		err = -ENOBUFS;
		goto cleanup;
	}

	atomic_set(&rth->u.dst.__refcnt, 1);
	rth->u.dst.flags= DST_HOST;
	if (IN_DEV_CONF_GET(in_dev, NOPOLICY))
		rth->u.dst.flags |= DST_NOPOLICY;
	if (IN_DEV_CONF_GET(out_dev, NOXFRM))
		rth->u.dst.flags |= DST_NOXFRM;
	rth->fl.fl4_dst	= daddr;
	rth->rt_dst	= daddr;
	rth->fl.fl4_tos	= tos;
	rth->fl.mark    = skb->mark;
	rth->fl.fl4_src	= saddr;
	rth->rt_src	= saddr;
	rth->rt_gateway	= daddr;
	rth->rt_iif 	=
		rth->fl.iif	= in_dev->dev->ifindex;
	rth->u.dst.dev	= (out_dev)->dev;		//设置出口dev
	dev_hold(rth->u.dst.dev);
	rth->idev	= in_dev_get(rth->u.dst.dev);
	rth->fl.oif 	= 0;
	rth->rt_spec_dst= spec_dst;

	rth->u.dst.input = ip_forward;		//设置转发函数
	rth->u.dst.output = ip_output;		//设置出口函数
	rth->rt_genid = rt_genid(dev_net(rth->u.dst.dev));

	rt_set_nexthop(rth, res, itag);

	rth->rt_flags = flags;

	*result = rth;
	err = 0;
 cleanup:
	/* release the working reference to the output device */
	in_dev_put(out_dev);
	return err;
}