概述
其实kube-proxy的代码本身并不复杂,只是有个细节容易被大家忽略,大家可能都知道它有轮询的复杂均衡策略,是通过iptables实现的,那它是怎样控制平均转发的呢?iptables有个random的模块支持,那怎样控制权重呢?
看代码,一步一步分析
{
tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT}
for _, table := range tablesNeedServicesChain {
if _, err := proxier.iptables.EnsureChain(table, kubeServicesChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, kubeServicesChain, err)
return
}
}
tableChainsNeedJumpServices := []struct {
table utiliptables.Table
chain utiliptables.Chain
}{
{utiliptables.TableFilter, utiliptables.ChainInput},
{utiliptables.TableFilter, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainPrerouting},
}
comment := "kubernetes service portals"
args := []string{"-m", "comment", "--comment", comment, "-j", string(kubeServicesChain)}
for _, tc := range tableChainsNeedJumpServices {
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, kubeServicesChain, err)
return
}
}
}
首先是建立filter表的INPUT/OUTPUT和nat表的OUTPUT/PREROUTE规则全部跳转到service链
效果如下:
-A OUTPUT -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
-A PREROUTING -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
-A OUTPUT -m comment --comment "kubernetes service portals" -j KUBE-SERVICES
这样出去的流量都会被service的链截获了
当然如果有些流量需要通过SNAT出去
{
if _, err := proxier.iptables.EnsureChain(utiliptables.TableNAT, kubePostroutingChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, kubePostroutingChain, err)
return
}
comment := "kubernetes postrouting rules"
args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)}
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err)
return
}
}
效果如下:
-A POSTROUTING -m comment --comment "kubernetes postrouting rules" -j KUBE-POSTROUTING
-A KUBE-POSTROUTING -m comment --comment "kubernetes service traffic requiring SNAT" -m mark --mark 0x4000/0x4000 -j MASQUERADE
现在开始建立kubernetes proxy的各个链
writeLine(proxier.filterChains, "*filter")
writeLine(proxier.natChains, "*nat")
// Make sure we keep stats for the top-level chains, if they existed
// (which most should have because we created them above).
if chain, ok := existingFilterChains[kubeServicesChain]; ok {
writeLine(proxier.filterChains, chain)
} else {
writeLine(proxier.filterChains, utiliptables.MakeChainLine(kubeServicesChain))
}
if chain, ok := existingNATChains[kubeServicesChain]; ok {
writeLine(proxier.natChains, chain)
} else {
writeLine(proxier.natChains, utiliptables.MakeChainLine(kubeServicesChain))
}
if chain, ok := existingNATChains[kubeNodePortsChain]; ok {
writeLine(proxier.natChains, chain)
} else {
writeLine(proxier.natChains, utiliptables.MakeChainLine(kubeNodePortsChain))
}
if chain, ok := existingNATChains[kubePostroutingChain]; ok {
writeLine(proxier.natChains, chain)
} else {
writeLine(proxier.natChains, utiliptables.MakeChainLine(kubePostroutingChain))
}
if chain, ok := existingNATChains[KubeMarkMasqChain]; ok {
writeLine(proxier.natChains, chain)
} else {
writeLine(proxier.natChains, utiliptables.MakeChainLine(KubeMarkMasqChain))
}
这个里面创建KUBE-SERVICES、KUBE-NODEPORTS、KUBE-POSTROUTING、KUBE-MARK-MASQ
通过kubernetes创建的service会分配一个clusterIP,这些clusterIP是在iptables上面实现的
args := []string{
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcNameString),
"-m", protocol, "-p", protocol,
"-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()),
"--dport", fmt.Sprintf("%d", svcInfo.port),
}
if proxier.masqueradeAll {
writeLine(proxier.natRules, append(args, "-j", string(KubeMarkMasqChain))...)
}
if len(proxier.clusterCIDR) > 0 {
writeLine(proxier.natRules, append(args, "! -s", proxier.clusterCIDR, "-j", string(KubeMarkMasqChain))...)
}
writeLine(proxier.natRules, append(args, "-j", string(svcChain))...)
上面就是截获clusterIP的流量做DNAT,这里面需要补充的就是如果一个服务后面有多个endpoint的,
for i, endpointChain := range endpointChains {
// Balancing rules in the per-service chain.
args := []string{
"-A", string(svcChain),
"-m", "comment", "--comment", svcNameString,
}
if i < (n - 1) {
// Each rule is a probabilistic match.
args = append(args,
"-m", "statistic",
"--mode", "random",
"--probability", fmt.Sprintf("%0.5f", 1.0/float64(n-i)))
}
// The final (or only if n == 1) rule is a guaranteed match.
args = append(args, "-j", string(endpointChain))
writeLine(proxier.natRules, args...)
// Rules in the per-endpoint chain.
args = []string{
"-A", string(endpointChain),
"-m", "comment", "--comment", svcNameString,
}
// Handle traffic that loops back to the originator with SNAT.
writeLine(proxier.natRules, append(args,
"-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i].endpoint, ":")[0]),
"-j", string(KubeMarkMasqChain))...)
// Update client-affinity lists.
if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
args = append(args, "-m", "recent", "--name", string(endpointChain), "--set")
}
// DNAT to final destination.
args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", endpoints[i].endpoint)
writeLine(proxier.natRules, args...)
}
上面通过循环的方式创建后端endpoint的转发,概率是通过probability后的1.0/float64(n-i)计算出来的,譬如有两个的场景,那么将会是一个0.5和1也就是第一个是50%概率第二个是100%概率,如果是三个的话类似,33%、50%、100%。下面是10个endpoint的例子。
kubectl get svc --all-namespaces
NAMESPACE NAME CLUSTER-IP EXTERNAL-IP PORT(S) AGE
admin docker2048 10.13.52.135 11.11.1.1 80/TCP 1d
[root@master-62 ~]#
[root@master-62 ~]# iptables-save |grep 10.13.52.135
-A KUBE-SERVICES -d 10.13.52.135/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 cluster IP" -m tcp --dport 80 -j KUBE-SVC-MHWEDWK6NM5OGU2T
[root@master-62 ~]#
[root@master-62 ~]#
[root@master-62 ~]# iptables-save |grep KUBE-SVC-MHWEDWK6NM5OGU2T
:KUBE-SVC-MHWEDWK6NM5OGU2T - [0:0]
-A KUBE-SERVICES -d 10.13.52.135/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 cluster IP" -m tcp --dport 80 -j KUBE-SVC-MHWEDWK6NM5OGU2T
-A KUBE-SERVICES -d 11.11.1.1/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 external IP" -m tcp --dport 80 -m physdev ! --physdev-is-in -m addrtype ! --src-type LOCAL -j KUBE-SVC-MHWEDWK6NM5OGU2T
-A KUBE-SERVICES -d 11.11.1.1/32 -p tcp -m comment --comment "admin/docker2048:docker2048-1 external IP" -m tcp --dport 80 -m addrtype --dst-type LOCAL -j KUBE-SVC-MHWEDWK6NM5OGU2T
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.10000000009 -j KUBE-SEP-VC767CJYOTCBCN3B
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.11110999994 -j KUBE-SEP-HQELSIUR5HSCB2VN
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.12500000000 -j KUBE-SEP-X2UDSU7Q4UA4IKY7
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.14286000002 -j KUBE-SEP-DQ3TZIZIDTXU77P7
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.16667000018 -j KUBE-SEP-A3JWOZYQIIDDEKNM
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.20000000019 -j KUBE-SEP-6EZ2MUBOPU2WH44E
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.25000000000 -j KUBE-SEP-4KG3GD3BQ5TCAUPR
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.33332999982 -j KUBE-SEP-6EXLETYC4LYB5NLM
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -m statistic --mode random --probability 0.50000000000 -j KUBE-SEP-VLQQMEFA6Y5RZLE7
-A KUBE-SVC-MHWEDWK6NM5OGU2T -m comment --comment "admin/docker2048:docker2048-1" -j KUBE-SEP-CXDZACZ7ESWWLYJM
最后
以上就是长情冷风为你收集整理的kube-proxy细节分析的全部内容,希望文章能够帮你解决kube-proxy细节分析所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
发表评论 取消回复