我是靠谱客的博主 背后牛排,这篇文章主要介绍ovs vswitchd的启动分析ovs vswitchd的启动,现在分享给大家,希望可以做个参考。

ovs vswitchd的启动

vswitchd启动代码可参考ovs-vswitchd.c的main函数,其中最重要的两个函数是bridge_run以及netdev_run

bridge_run

复制代码
1
2
3
4
5
6
7
8
9
10
void bridge_run() { ... /* Initialize the ofproto library. This only needs to run once, but * it must be done after the configuration is set. If the * initialization has already occurred, bridge_init_ofproto() * returns immediately. */ bridge_init_ofproto(cfg); bridge_run__(); ... }

bridge_init_ofproto用于初始化ofproto-dpif组件,首先会从ovsdb记录中汇总bridge, port, interfaces等信息,之后针对这些调用ofproto_init

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/* Must be called to initialize the ofproto library. * * The caller may pass in 'iface_hints', which contains an shash of * "iface_hint" elements indexed by the interface's name. The provider * may use these hints to describe the startup configuration in order to * reinitialize its state. The caller owns the provided data, so a * provider will make copies of anything required. An ofproto provider * will remove any existing state that is not described by the hint, and * may choose to remove it all. */ void ofproto_init(const struct shash *iface_hints) { struct shash_node *node; size_t i; ofproto_class_register(&ofproto_dpif_class); /* Make a local copy, since we don't own 'iface_hints' elements. */ SHASH_FOR_EACH(node, iface_hints) { const struct iface_hint *orig_hint = node->data; struct iface_hint *new_hint = xmalloc(sizeof *new_hint); const char *br_type = ofproto_normalize_type(orig_hint->br_type); new_hint->br_name = xstrdup(orig_hint->br_name); new_hint->br_type = xstrdup(br_type); new_hint->ofp_port = orig_hint->ofp_port; shash_add(&init_ofp_ports, node->name, new_hint); } for (i = 0; i < n_ofproto_classes; i++) { ofproto_classes[i]->init(&init_ofp_ports); } ofproto_unixctl_init(); }

ofproto_init首先注册ofproto_dpif_class,这个是ovs的ofproto-dpif的默认实现,之后对已经注册的所有ofproto_classes,都会调用ofproto_class->init函数。
ofproto_dpif_class是ovs的ofproto实现,在ofproto/ofproto-dpif.c中,这里先介绍init函数

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static void init(const struct shash *iface_hints) { struct shash_node *node; /* Make a local copy, since we don't own 'iface_hints' elements. */ SHASH_FOR_EACH(node, iface_hints) { const struct iface_hint *orig_hint = node->data; struct iface_hint *new_hint = xmalloc(sizeof *new_hint); new_hint->br_name = xstrdup(orig_hint->br_name); new_hint->br_type = xstrdup(orig_hint->br_type); new_hint->ofp_port = orig_hint->ofp_port; shash_add(&init_ofp_ports, node->name, new_hint); } ofproto_unixctl_init(); udpif_init(); }

ofproto_unixctl_init,udpif_init都通过unixctl_command_register注册了多个ovs-appctl的命令参数,

bridge_run___最终都是调用的ofproto_dpif_class里注册的函数

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
static void bridge_run__(void) { struct bridge *br; struct sset types; const char *type; /* Let each datapath type do the work that it needs to do. */ sset_init(&types); ofproto_enumerate_types(&types); SSET_FOR_EACH (type, &types) { ofproto_type_run(type); } sset_destroy(&types); /* Let each bridge do the work that it needs to do. */ HMAP_FOR_EACH (br, node, &all_bridges) { ofproto_run(br->ofproto); } }

ofproto_enumerate_types实际调用的是ofproto_dpif_class里注册的enumerate_types函数,里面实际调用了dp_enumerate_types

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
dp_enumerate_types(struct sset *types) { struct shash_node *node; dp_initialize(); ovs_mutex_lock(&dpif_mutex); SHASH_FOR_EACH(node, &dpif_classes) { const struct registered_dpif_class *registered_class = node->data; sset_add(types, registered_class->dpif_class->type); } ovs_mutex_unlock(&dpif_mutex); }

dp_enumerate_types列出所有支持的dpif_class,目前主要有dpif_netdev_class以及dpif_netlink_class两类

ofproto_type_run实际调用了ofproto_dpif_class的type_run函数,里面有调用了dpif_class->run,以及udpif_run
struct udpif代表了ofproto_dpif的upcall handler结构体,包含两部分,一是struct handler的数组,用于处理upcall请求,可以看做是一个线程池,二是revalidators的数组,这块我没有细看,应该是一种类似gc机制的线程池,用于回收过期的flow

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
/* An upcall handler for ofproto_dpif. * * udpif keeps records of two kind of logically separate units: * * upcall handling * --------------- * * - An array of 'struct handler's for upcall handling and flow * installation. * * flow revalidation * ----------------- * * - Revalidation threads which read the datapath flow table and maintains * them. */ struct udpif { struct ovs_list list_node; /* In all_udpifs list. */ struct dpif *dpif; /* Datapath handle. */ struct dpif_backer *backer; /* Opaque dpif_backer pointer. */ struct handler *handlers; /* Upcall handlers. */ size_t n_handlers; struct revalidator *revalidators; /* Flow revalidators. */ size_t n_revalidators; struct latch exit_latch; /* Tells child threads to exit. */ /* Revalidation. */ struct seq *reval_seq; /* Incremented to force revalidation. */ bool reval_exit; /* Set by leader on 'exit_latch. */ struct ovs_barrier reval_barrier; /* Barrier used by revalidators. */ struct dpif_flow_dump *dump; /* DPIF flow dump state. */ long long int dump_duration; /* Duration of the last flow dump. */ struct seq *dump_seq; /* Increments each dump iteration. */ atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */ /* These variables provide a mechanism for the main thread to pause * all revalidation without having to completely shut the threads down. * 'pause_latch' is shared between the main thread and the lead * revalidator thread, so when it is desirable to halt revalidation, the * main thread will set the latch. 'pause' and 'pause_barrier' are shared * by revalidator threads. The lead revalidator will set 'pause' when it * observes the latch has been set, and this will cause all revalidator * threads to wait on 'pause_barrier' at the beginning of the next * revalidation round. */ bool pause; /* Set by leader on 'pause_latch. */ struct latch pause_latch; /* Set to force revalidators pause. */ struct ovs_barrier pause_barrier; /* Barrier used to pause all */ /* revalidators by main thread. */ /* Following fields are accessed and modified by different threads. */ atomic_uint flow_limit; /* Datapath flow hard limit. */ /* n_flows_mutex prevents multiple threads updating these concurrently. */ atomic_uint n_flows; /* Number of flows in the datapath. */ atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */ struct ovs_mutex n_flows_mutex; /* Following fields are accessed and modified only from the main thread. */ struct unixctl_conn **conns; /* Connections waiting on dump_seq. */ uint64_t conn_seq; /* Corresponds to 'dump_seq' when conns[n_conns-1] was stored. */ size_t n_conns; /* Number of connections waiting. */ };

struct handler以及struct revalidator

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
/* A thread that reads upcalls from dpif, forwards each upcall's packet, * and possibly sets up a kernel flow as a cache. */ struct handler { struct udpif *udpif; /* Parent udpif. */ pthread_t thread; /* Thread ID. */ uint32_t handler_id; /* Handler id. */ }; /* A thread that processes datapath flows, updates OpenFlow statistics, and * updates or removes them if necessary. * * Revalidator threads operate in two phases: "dump" and "sweep". In between * each phase, all revalidators sync up so that all revalidator threads are * either in one phase or the other, but not a combination. * * During the dump phase, revalidators fetch flows from the datapath and * attribute the statistics to OpenFlow rules. Each datapath flow has a * corresponding ukey which caches the most recently seen statistics. If * a flow needs to be deleted (for example, because it is unused over a * period of time), revalidator threads may delete the flow during the * dump phase. The datapath is not guaranteed to reliably dump all flows * from the datapath, and there is no mapping between datapath flows to * revalidators, so a particular flow may be handled by zero or more * revalidators during a single dump phase. To avoid duplicate attribution * of statistics, ukeys are never deleted during this phase. * During the sweep phase, each revalidator takes ownership of a different * slice of umaps and sweeps through all ukeys in those umaps to figure out * whether they need to be deleted. During this phase, revalidators may * fetch individual flows which were not dumped during the dump phase to * validate them and attribute statistics. */ struct revalidator { struct udpif *udpif; /* Parent udpif. */ pthread_t thread; /* Thread ID. */ unsigned int id; /* ovsthread_id_self(). */ };

之后对每个bridge,调用ofproto_run


netdev_run

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
/* Performs periodic work needed by all the various kinds of netdevs. * * If your program opens any netdevs, it must call this function within its * main poll loop. */ void netdev_run(void) OVS_EXCLUDED(netdev_mutex) { netdev_initialize(); struct netdev_registered_class *rc; CMAP_FOR_EACH (rc, cmap_node, &netdev_classes) { if (rc->class->run) { rc->class->run(rc->class); } } }

netdev_run会首先调用netdev_initialize,该函数用一个struct ovsthread_once代码块来管理,保证只会调用一次。其主要工作是注册了不同的netdev_class和tunnel_class

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
static void netdev_initialize(void) OVS_EXCLUDED(netdev_mutex) { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { fatal_signal_add_hook(restore_all_flags, NULL, NULL, true); netdev_vport_patch_register(); netdev_register_provider(&netdev_linux_class); netdev_register_provider(&netdev_internal_class); netdev_register_provider(&netdev_tap_class); netdev_vport_tunnel_register(); ovsthread_once_done(&once); } }

netdev_vport_patch_register注册了patch port这么一类的vport(patch port用于连接不同的bridge,本身没有dpif的任何属性),netdev_vport_tunnel_register则注册了多种tunnel类型,e.g.

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
static const struct vport_class vport_classes[] = { TUNNEL_CLASS("geneve", "genev_sys", netdev_geneve_build_header, netdev_tnl_push_udp_header, netdev_geneve_pop_header), TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header, netdev_gre_push_header, netdev_gre_pop_header), TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL), TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header, netdev_tnl_push_udp_header, netdev_vxlan_pop_header), TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL), TUNNEL_CLASS("stt", "stt_sys", NULL, NULL, NULL), };

除此之外,多个类型的netdev_class也被注册进来,包括dpdk的多个netdev_class

复制代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
const struct netdev_class netdev_linux_class = NETDEV_LINUX_CLASS( "system", netdev_linux_construct, netdev_linux_get_stats, netdev_linux_get_features, netdev_linux_get_status); const struct netdev_class netdev_tap_class = NETDEV_LINUX_CLASS( "tap", netdev_linux_construct_tap, netdev_tap_get_stats, netdev_linux_get_features, netdev_linux_get_status); const struct netdev_class netdev_internal_class = NETDEV_LINUX_CLASS( "internal", netdev_linux_construct, netdev_internal_get_stats, NULL, /* get_features */ netdev_internal_get_status); static const struct netdev_class dpdk_class = NETDEV_DPDK_CLASS( "dpdk", netdev_dpdk_construct, netdev_dpdk_destruct, netdev_dpdk_set_config, netdev_dpdk_set_tx_multiq, netdev_dpdk_eth_send, netdev_dpdk_get_carrier, netdev_dpdk_get_stats, netdev_dpdk_get_features, netdev_dpdk_get_status, netdev_dpdk_reconfigure, netdev_dpdk_rxq_recv); static const struct netdev_class dpdk_ring_class = NETDEV_DPDK_CLASS( "dpdkr", netdev_dpdk_ring_construct, netdev_dpdk_destruct, netdev_dpdk_ring_set_config, netdev_dpdk_set_tx_multiq, netdev_dpdk_ring_send, netdev_dpdk_get_carrier, netdev_dpdk_get_stats, netdev_dpdk_get_features, netdev_dpdk_get_status, netdev_dpdk_reconfigure, netdev_dpdk_rxq_recv); static const struct netdev_class dpdk_vhost_class = NETDEV_DPDK_CLASS( "dpdkvhostuser", netdev_dpdk_vhost_construct, netdev_dpdk_vhost_destruct, NULL, NULL, netdev_dpdk_vhost_send, netdev_dpdk_vhost_get_carrier, netdev_dpdk_vhost_get_stats, NULL, NULL, netdev_dpdk_vhost_reconfigure, netdev_dpdk_vhost_rxq_recv);

最后对于每一类注册的netdev_class,都会调用run函数,以netdev_linux_class为例(dpdk_class的run函数为空),主要是通过netlink sock来定期做一些更新的工作,不细说了

最后

以上就是背后牛排最近收集整理的关于ovs vswitchd的启动分析ovs vswitchd的启动的全部内容,更多相关ovs内容请搜索靠谱客的其他文章。

本图文内容来源于网友提供,作为学习参考使用,或来自网络收集整理,版权属于原作者所有。
点赞(56)

评论列表共有 0 条评论

立即
投稿
返回
顶部