概述
ovs vswitchd的启动
vswitchd启动代码可参考ovs-vswitchd.c的main函数,其中最重要的两个函数是bridge_run以及netdev_run
bridge_run
void bridge_run() {
...
/* Initialize the ofproto library. This only needs to run once, but
* it must be done after the configuration is set. If the
* initialization has already occurred, bridge_init_ofproto()
* returns immediately. */
bridge_init_ofproto(cfg);
bridge_run__();
...
}
bridge_init_ofproto用于初始化ofproto-dpif组件,首先会从ovsdb记录中汇总bridge, port, interfaces等信息,之后针对这些调用ofproto_init
/* Must be called to initialize the ofproto library.
*
* The caller may pass in 'iface_hints', which contains an shash of
* "iface_hint" elements indexed by the interface's name. The provider
* may use these hints to describe the startup configuration in order to
* reinitialize its state. The caller owns the provided data, so a
* provider will make copies of anything required. An ofproto provider
* will remove any existing state that is not described by the hint, and
* may choose to remove it all. */
void
ofproto_init(const struct shash *iface_hints)
{
struct shash_node *node;
size_t i;
ofproto_class_register(&ofproto_dpif_class);
/* Make a local copy, since we don't own 'iface_hints' elements. */
SHASH_FOR_EACH(node, iface_hints) {
const struct iface_hint *orig_hint = node->data;
struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
const char *br_type = ofproto_normalize_type(orig_hint->br_type);
new_hint->br_name = xstrdup(orig_hint->br_name);
new_hint->br_type = xstrdup(br_type);
new_hint->ofp_port = orig_hint->ofp_port;
shash_add(&init_ofp_ports, node->name, new_hint);
}
for (i = 0; i < n_ofproto_classes; i++) {
ofproto_classes[i]->init(&init_ofp_ports);
}
ofproto_unixctl_init();
}
ofproto_init首先注册ofproto_dpif_class,这个是ovs的ofproto-dpif的默认实现,之后对已经注册的所有ofproto_classes,都会调用ofproto_class->init函数。
ofproto_dpif_class是ovs的ofproto实现,在ofproto/ofproto-dpif.c中,这里先介绍init函数
static void
init(const struct shash *iface_hints)
{
struct shash_node *node;
/* Make a local copy, since we don't own 'iface_hints' elements. */
SHASH_FOR_EACH(node, iface_hints) {
const struct iface_hint *orig_hint = node->data;
struct iface_hint *new_hint = xmalloc(sizeof *new_hint);
new_hint->br_name = xstrdup(orig_hint->br_name);
new_hint->br_type = xstrdup(orig_hint->br_type);
new_hint->ofp_port = orig_hint->ofp_port;
shash_add(&init_ofp_ports, node->name, new_hint);
}
ofproto_unixctl_init();
udpif_init();
}
ofproto_unixctl_init,udpif_init都通过unixctl_command_register注册了多个ovs-appctl的命令参数,
bridge_run___最终都是调用的ofproto_dpif_class里注册的函数
static void
bridge_run__(void)
{
struct bridge *br;
struct sset types;
const char *type;
/* Let each datapath type do the work that it needs to do. */
sset_init(&types);
ofproto_enumerate_types(&types);
SSET_FOR_EACH (type, &types) {
ofproto_type_run(type);
}
sset_destroy(&types);
/* Let each bridge do the work that it needs to do. */
HMAP_FOR_EACH (br, node, &all_bridges) {
ofproto_run(br->ofproto);
}
}
ofproto_enumerate_types实际调用的是ofproto_dpif_class里注册的enumerate_types函数,里面实际调用了dp_enumerate_types
dp_enumerate_types(struct sset *types)
{
struct shash_node *node;
dp_initialize();
ovs_mutex_lock(&dpif_mutex);
SHASH_FOR_EACH(node, &dpif_classes) {
const struct registered_dpif_class *registered_class = node->data;
sset_add(types, registered_class->dpif_class->type);
}
ovs_mutex_unlock(&dpif_mutex);
}
dp_enumerate_types列出所有支持的dpif_class,目前主要有dpif_netdev_class以及dpif_netlink_class两类
ofproto_type_run实际调用了ofproto_dpif_class的type_run函数,里面有调用了dpif_class->run,以及udpif_run
struct udpif代表了ofproto_dpif的upcall handler结构体,包含两部分,一是struct handler的数组,用于处理upcall请求,可以看做是一个线程池,二是revalidators的数组,这块我没有细看,应该是一种类似gc机制的线程池,用于回收过期的flow
/* An upcall handler for ofproto_dpif.
*
* udpif keeps records of two kind of logically separate units:
*
* upcall handling
* ---------------
*
* - An array of 'struct handler's for upcall handling and flow
* installation.
*
* flow revalidation
* -----------------
*
* - Revalidation threads which read the datapath flow table and maintains
* them.
*/
struct udpif {
struct ovs_list list_node; /* In all_udpifs list. */
struct dpif *dpif; /* Datapath handle. */
struct dpif_backer *backer; /* Opaque dpif_backer pointer. */
struct handler *handlers; /* Upcall handlers. */
size_t n_handlers;
struct revalidator *revalidators; /* Flow revalidators. */
size_t n_revalidators;
struct latch exit_latch; /* Tells child threads to exit. */
/* Revalidation. */
struct seq *reval_seq; /* Incremented to force revalidation. */
bool reval_exit; /* Set by leader on 'exit_latch. */
struct ovs_barrier reval_barrier; /* Barrier used by revalidators. */
struct dpif_flow_dump *dump; /* DPIF flow dump state. */
long long int dump_duration; /* Duration of the last flow dump. */
struct seq *dump_seq; /* Increments each dump iteration. */
atomic_bool enable_ufid; /* If true, skip dumping flow attrs. */
/* These variables provide a mechanism for the main thread to pause
* all revalidation without having to completely shut the threads down.
* 'pause_latch' is shared between the main thread and the lead
* revalidator thread, so when it is desirable to halt revalidation, the
* main thread will set the latch. 'pause' and 'pause_barrier' are shared
* by revalidator threads. The lead revalidator will set 'pause' when it
* observes the latch has been set, and this will cause all revalidator
* threads to wait on 'pause_barrier' at the beginning of the next
* revalidation round. */
bool pause; /* Set by leader on 'pause_latch. */
struct latch pause_latch; /* Set to force revalidators pause. */
struct ovs_barrier pause_barrier; /* Barrier used to pause all */
/* revalidators by main thread. */
/* Following fields are accessed and modified by different threads. */
atomic_uint flow_limit; /* Datapath flow hard limit. */
/* n_flows_mutex prevents multiple threads updating these concurrently. */
atomic_uint n_flows; /* Number of flows in the datapath. */
atomic_llong n_flows_timestamp; /* Last time n_flows was updated. */
struct ovs_mutex n_flows_mutex;
/* Following fields are accessed and modified only from the main thread. */
struct unixctl_conn **conns; /* Connections waiting on dump_seq. */
uint64_t conn_seq; /* Corresponds to 'dump_seq' when
conns[n_conns-1] was stored. */
size_t n_conns; /* Number of connections waiting. */
};
struct handler以及struct revalidator
/* A thread that reads upcalls from dpif, forwards each upcall's packet,
* and possibly sets up a kernel flow as a cache. */
struct handler {
struct udpif *udpif; /* Parent udpif. */
pthread_t thread; /* Thread ID. */
uint32_t handler_id; /* Handler id. */
};
/* A thread that processes datapath flows, updates OpenFlow statistics, and
* updates or removes them if necessary.
*
* Revalidator threads operate in two phases: "dump" and "sweep". In between
* each phase, all revalidators sync up so that all revalidator threads are
* either in one phase or the other, but not a combination.
*
* During the dump phase, revalidators fetch flows from the datapath and
* attribute the statistics to OpenFlow rules. Each datapath flow has a
* corresponding ukey which caches the most recently seen statistics. If
* a flow needs to be deleted (for example, because it is unused over a
* period of time), revalidator threads may delete the flow during the
* dump phase. The datapath is not guaranteed to reliably dump all flows
* from the datapath, and there is no mapping between datapath flows to
* revalidators, so a particular flow may be handled by zero or more
* revalidators during a single dump phase. To avoid duplicate attribution
* of statistics, ukeys are never deleted during this phase.
* During the sweep phase, each revalidator takes ownership of a different
* slice of umaps and sweeps through all ukeys in those umaps to figure out
* whether they need to be deleted. During this phase, revalidators may
* fetch individual flows which were not dumped during the dump phase to
* validate them and attribute statistics.
*/
struct revalidator {
struct udpif *udpif; /* Parent udpif. */
pthread_t thread; /* Thread ID. */
unsigned int id; /* ovsthread_id_self(). */
};
之后对每个bridge,调用ofproto_run
netdev_run
/* Performs periodic work needed by all the various kinds of netdevs.
*
* If your program opens any netdevs, it must call this function within its
* main poll loop. */
void
netdev_run(void)
OVS_EXCLUDED(netdev_mutex)
{
netdev_initialize();
struct netdev_registered_class *rc;
CMAP_FOR_EACH (rc, cmap_node, &netdev_classes) {
if (rc->class->run) {
rc->class->run(rc->class);
}
}
}
netdev_run会首先调用netdev_initialize,该函数用一个struct ovsthread_once代码块来管理,保证只会调用一次。其主要工作是注册了不同的netdev_class和tunnel_class
static void
netdev_initialize(void)
OVS_EXCLUDED(netdev_mutex)
{
static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
if (ovsthread_once_start(&once)) {
fatal_signal_add_hook(restore_all_flags, NULL, NULL, true);
netdev_vport_patch_register();
netdev_register_provider(&netdev_linux_class);
netdev_register_provider(&netdev_internal_class);
netdev_register_provider(&netdev_tap_class);
netdev_vport_tunnel_register();
ovsthread_once_done(&once);
}
}
netdev_vport_patch_register注册了patch port这么一类的vport(patch port用于连接不同的bridge,本身没有dpif的任何属性),netdev_vport_tunnel_register则注册了多种tunnel类型,e.g.
static const struct vport_class vport_classes[] = {
TUNNEL_CLASS("geneve", "genev_sys", netdev_geneve_build_header,
netdev_tnl_push_udp_header,
netdev_geneve_pop_header),
TUNNEL_CLASS("gre", "gre_sys", netdev_gre_build_header,
netdev_gre_push_header,
netdev_gre_pop_header),
TUNNEL_CLASS("ipsec_gre", "gre_sys", NULL, NULL, NULL),
TUNNEL_CLASS("vxlan", "vxlan_sys", netdev_vxlan_build_header,
netdev_tnl_push_udp_header,
netdev_vxlan_pop_header),
TUNNEL_CLASS("lisp", "lisp_sys", NULL, NULL, NULL),
TUNNEL_CLASS("stt", "stt_sys", NULL, NULL, NULL),
};
除此之外,多个类型的netdev_class也被注册进来,包括dpdk的多个netdev_class
const struct netdev_class netdev_linux_class =
NETDEV_LINUX_CLASS(
"system",
netdev_linux_construct,
netdev_linux_get_stats,
netdev_linux_get_features,
netdev_linux_get_status);
const struct netdev_class netdev_tap_class =
NETDEV_LINUX_CLASS(
"tap",
netdev_linux_construct_tap,
netdev_tap_get_stats,
netdev_linux_get_features,
netdev_linux_get_status);
const struct netdev_class netdev_internal_class =
NETDEV_LINUX_CLASS(
"internal",
netdev_linux_construct,
netdev_internal_get_stats,
NULL, /* get_features */
netdev_internal_get_status);
static const struct netdev_class dpdk_class =
NETDEV_DPDK_CLASS(
"dpdk",
netdev_dpdk_construct,
netdev_dpdk_destruct,
netdev_dpdk_set_config,
netdev_dpdk_set_tx_multiq,
netdev_dpdk_eth_send,
netdev_dpdk_get_carrier,
netdev_dpdk_get_stats,
netdev_dpdk_get_features,
netdev_dpdk_get_status,
netdev_dpdk_reconfigure,
netdev_dpdk_rxq_recv);
static const struct netdev_class dpdk_ring_class =
NETDEV_DPDK_CLASS(
"dpdkr",
netdev_dpdk_ring_construct,
netdev_dpdk_destruct,
netdev_dpdk_ring_set_config,
netdev_dpdk_set_tx_multiq,
netdev_dpdk_ring_send,
netdev_dpdk_get_carrier,
netdev_dpdk_get_stats,
netdev_dpdk_get_features,
netdev_dpdk_get_status,
netdev_dpdk_reconfigure,
netdev_dpdk_rxq_recv);
static const struct netdev_class dpdk_vhost_class =
NETDEV_DPDK_CLASS(
"dpdkvhostuser",
netdev_dpdk_vhost_construct,
netdev_dpdk_vhost_destruct,
NULL,
NULL,
netdev_dpdk_vhost_send,
netdev_dpdk_vhost_get_carrier,
netdev_dpdk_vhost_get_stats,
NULL,
NULL,
netdev_dpdk_vhost_reconfigure,
netdev_dpdk_vhost_rxq_recv);
最后对于每一类注册的netdev_class,都会调用run函数,以netdev_linux_class为例(dpdk_class的run函数为空),主要是通过netlink sock来定期做一些更新的工作,不细说了
最后
以上就是背后牛排为你收集整理的ovs vswitchd的启动分析ovs vswitchd的启动的全部内容,希望文章能够帮你解决ovs vswitchd的启动分析ovs vswitchd的启动所遇到的程序开发问题。
如果觉得靠谱客网站的内容还不错,欢迎将靠谱客网站推荐给程序员好友。
发表评论 取消回复