2.3. vlib_frame_t
从arguments地址开始的内存空间是vector空间,作为结点接收数据包索引的缓冲区。
2.3.1. vlib_frame_t内存空间
每个线程都会从堆中分配(mmap)一块内存clib_per_cpu_mheaps
vm->heap_base = clib_mem_get_heap(); 默认大小为64MB
每个vlib_frame_t都将在此mheap上申请,并对应一个elt
函数vlib_frame_alloc_to_node用于申请vlib_frame_t和并将其与某个特定node关联起来
2.3.2. frame_index的获得
下图所示为从vlib_frame_t地址算成index的过程,本质上是地址算术算法,做了一次归一化处理。
当系统为32位系统时,按32Bytes对齐,所以直接通过frame_index * VLIB_FRAME_ALIGN的方式得到frame指针
2.4. vlib_next_frame_t
typedef struct {
/* Frame index. */
u32 frame_index;
/* Node runtime for this next. */
u32 node_runtime_index;
/* Next frame flags. */
u32 flags;
/* Reflects node frame-used flag for this next. */
#define VLIB_FRAME_NO_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH
/* This next frame owns enqueue to node corresponding to node_runtime_index. */
#define VLIB_FRAME_OWNER (1 << 15)
/* Set when frame has been allocated for this next. */
#define VLIB_FRAME_IS_ALLOCATED VLIB_NODE_FLAG_IS_OUTPUT
/* Set when frame has been added to pending vector. */
#define VLIB_FRAME_PENDING VLIB_NODE_FLAG_IS_DROP
/* Set when frame is to be freed after dispatch. */
#define VLIB_FRAME_FREE_AFTER_DISPATCH VLIB_NODE_FLAG_IS_PUNT
/* Set when frame has traced packets. */
#define VLIB_FRAME_TRACE VLIB_NODE_FLAG_TRACE
/* Number of vectors enqueue to this next since last overflow. */
u32 vectors_since_last_overflow;
} vlib_next_frame_t;
2.5. vlib_pending_frame_t
/* A frame pending dispatch by main loop. */
typedef struct {
/* Node and runtime for this frame. */
u32 node_runtime_index;
/* Frame index (in the heap). */
u32 frame_index;
/* Start of next frames for this node. */
u32 next_frame_index;
/* Special value for next_frame_index when there is no next frame. */
#define VLIB_PENDING_FRAME_NO_NEXT_FRAME ((u32) ~0)
} vlib_pending_frame_t;
2.6. vlib_node_t
typedef struct vlib_node_t {
/* Vector processing function for this node. */
vlib_node_function_t *function;
/* Node name. */
u8 *name;
/* Node name index in elog string table. */
u32 name_elog_string;
/* Total statistics for this node. */
vlib_node_stats_t stats_total;
/* Saved values as of last clear (or zero if never cleared).
Current values are always stats_total - stats_last_clear. */
vlib_node_stats_t stats_last_clear;
/* Type of this node. */
vlib_node_type_t type;
/* Node index. */
u32 index;
/* Index of corresponding node runtime. */
u32 runtime_index;
/* Runtime data for this node. */
void *runtime_data;
/* Node flags. */
u16 flags;
/* Processing function keeps frame.
Tells node dispatching code not to free frame after dispatch is done. */
#define VLIB_NODE_FLAG_FRAME_NO_FREE_AFTER_DISPATCH (1 << 0)
/* Node counts as output/drop/punt node for stats purposes. */
#define VLIB_NODE_FLAG_IS_OUTPUT (1 << 1)
#define VLIB_NODE_FLAG_IS_DROP (1 << 2)
#define VLIB_NODE_FLAG_IS_PUNT (1 << 3)
#define VLIB_NODE_FLAG_IS_HANDOFF (1 << 4)
/* Set if current node runtime has traced vectors. */
#define VLIB_NODE_FLAG_TRACE (1 << 5)
#define VLIB_NODE_FLAG_SWITCH_FROM_INTERRUPT_TO_POLLING_MODE (1 << 6)
#define VLIB_NODE_FLAG_SWITCH_FROM_POLLING_TO_INTERRUPT_MODE (1 << 7)
/* State for input nodes. */
u8 state;
/* Number of bytes of run time data. */
u8 runtime_data_bytes;
/* Number of error codes used by this node. */
u16 n_errors;
/* Size of scalar and vector arguments in bytes. */
u16 scalar_size, vector_size;
/* Handle/index in error heap for this node. */
u32 error_heap_handle;
u32 error_heap_index;
/* Error strings indexed by error code for this node. */
char **error_strings;
/* Vector of next node names.
Only used before next_nodes array is initialized. */
char **next_node_names;
/* Next node indices for this node. */
u32 *next_nodes;
/* Name of node that we are sibling of. */
char *sibling_of;
/* Bitmap of all of this node's siblings. */
uword *sibling_bitmap;
/* Total number of vectors sent to each next node. */
u64 *n_vectors_by_next_node;
/* Hash table mapping next node index into slot in
next_nodes vector. Quickly determines whether this node
is connected to given next node and, if so, with which slot. */
uword *next_slot_by_node;
/* Bitmap of node indices which feed this node. */
uword *prev_node_bitmap;
/* Node/next-index which own enqueue rights with to this node. */
u32 owner_node_index, owner_next_index;
/* Buffer format/unformat for this node. */
format_function_t *format_buffer;
unformat_function_t *unformat_buffer;
/* Trace buffer format/unformat for this node. */
format_function_t *format_trace;
/* Function to validate incoming frames. */
u8 *(*validate_frame) (struct vlib_main_t * vm, struct vlib_node_runtime_t *, struct vlib_frame_t * f);
/* for pretty-printing, not typically valid */
u8 *state_string;
} vlib_node_t;
2.7. vlib_node_main_t
typedef struct {
/* Public nodes. */
vlib_node_t **nodes;
/* Node index hashed by node name. */
uword *node_by_name;
u32 flags;
#define VLIB_NODE_MAIN_RUNTIME_STARTED (1 << 0)
/* Nodes segregated by type for cache locality.
Does not apply to nodes of type VLIB_NODE_TYPE_INTERNAL. */
vlib_node_runtime_t *nodes_by_type[VLIB_N_NODE_TYPE];
/* Node runtime indices for input nodes with pending interrupts. */
u32 *pending_interrupt_node_runtime_indices;
clib_spinlock_t pending_interrupt_lock;
/* Input nodes are switched from/to interrupt to/from polling mode
when average vector length goes above/below polling/interrupt
thresholds. */
u32 polling_threshold_vector_length;
u32 interrupt_threshold_vector_length;
/* Vector of next frames. */
vlib_next_frame_t *next_frames;
/* Vector of internal node's frames waiting to be called. */
vlib_pending_frame_t *pending_frames;
/* Timing wheel for scheduling time-based node dispatch. */
void *timing_wheel;
vlib_signal_timed_event_data_t *signal_timed_event_data_pool;
/* Opaque data vector added via timing_wheel_advance. */
u32 *data_from_advancing_timing_wheel;
/* CPU time of next process to be ready on timing wheel. */
f64 time_next_process_ready;
/* Vector of process nodes.
One for each node of type VLIB_NODE_TYPE_PROCESS. */
vlib_process_t **processes;
/* Current running process or ~0 if no process running. */
u32 current_process_index;
/* Pool of pending process frames. */
vlib_pending_frame_t *suspended_process_frames;
/* Vector of event data vectors pending recycle. */
void **recycled_event_data_vectors;
/* Current counts of nodes in each state. */
u32 input_node_counts_by_state[VLIB_N_NODE_STATE];
/* Hash of (scalar_size,vector_size) to frame_sizes index. */
uword *frame_size_hash;
/* Per-size frame allocation information. */
vlib_frame_size_t *frame_sizes;
/* Time of last node runtime stats clear. */
f64 time_last_runtime_stats_clear;
/* Node registrations added by constructors */
vlib_node_registration_t *node_registrations;
} vlib_node_main_t;
2.8. node调度流程
vlib_main_or_worker_loop的主要流程如下:
- 处理pre-input结点——polling形式
- 处理input结点——polling形式
- 处理来自control-plane API queue的信号事件
- 处理input结点——interrupt形式
- timing wheel处理
- 执行pending_frames中记录的信息
根据pending_frames里面记载的信息进行nodes调度,这里nodes调度类似于流水线处理方式,一直执行直到pending_frames中无任何数据为止。
- pending_frames中的internal结点也许会resume process node所以需要在跳回timing wheel做一次调度