net_device结构体,存储着网络设备的所有信息,每个设备都有这种结构。所有设备的net_device结构放在一个全局变量dev_base所有全局列表中。和sk_buff一样,结构也是相当庞大的。
结构的字段可以分为:
n  配置(configuration)
n  统计数据(Statistics)
n  设备状态(Device status)
n  列表管理(List management)
n  流量管理(Traffic management)
n  功能专用(Feature specific)
n  通用(Generic)
n  函数指针(或VFT)
结构体中有一个next指针,用来连接系统中所有网络设备。内核把这些连接起来的设备组成一个链表,并由全局变量dev_base指向链表的第一个元素。
每个网络设备都会有一个对应的实例,然后调用register_netdevie()(定义与文件net/core/dev.c)注册到系统中,注销可以通过unregister_netdevice()。
1.1.1.1 net_device定义
定义在:include/linux/netdevice.h文件中。非常庞大,差不多是内核中最复杂的结构了。源码中也承认这个结构是一个大失误了,将来可能会修改。
* Actually, this whole structure is a big mistake. It mixes I/O
* data with strictly "high-level" data, and it has to know about
* almost every data structure used in the INET module.
struct net_device {
char name[IFNAMSIZ];//接口的名字
struct hlist_node name_hlist;//设备名散列链表的链表元素,哈希链
struct dev_ifalias __rcu *ifalias;//SNMP alias
/*
* I/O specific fields
* FIXME: Merge these and struct ifmap into one
*/
unsigned long mem_end;//共享内存结束位置
unsigned long mem_start; //共享内存开始位置
unsigned long base_addr; // 设备IO地址
int irq;//设备IRQ编号
/*
* Some hardware also needs these fields (state,dev_list,
* napi_list,unreg_list,close_list) but they are not
* part of the usual set specified in Space.c.
*/
unsigned long state;// Generic network queuing layer state, see netdev_state_t
struct list_head dev_list;// The global list of network devices
struct list_head napi_list;// List entry used for polling NAPI devices
struct list_head unreg_list;// List entry when we are unregistering the device; see the function unregister_netdev
struct list_head close_list;// List entry used when we are closing the device
struct list_head ptype_all;// Device-specific packet handlers for all protocols
struct list_head ptype_specific;// Device-specific, protocol-specific packet handlers
struct {
struct list_head upper;
struct list_head lower;
} adj_list;// Directly linked devices, like slaves for bonding
netdev_features_t features;// Currently active device features
netdev_features_t hw_features;// User-changeable features
netdev_features_t wanted_features;// User-requested features
netdev_features_t vlan_features;// Mask of features inheritable by VLAN devices
netdev_features_t hw_enc_features;// Mask of features inherited by encapsulating devices This field indicates what encapsulation offloads the hardware is capable of doing,and drivers will need to set them appropriately.
netdev_features_t mpls_features;// Mask of features inheritable by MPLS
netdev_features_t gso_partial_features;
int ifindex;// interface index
int group;// The group the device belongs to
struct net_device_stats stats;// Statistics struct, which was left as a legacy, use rtnl_link_stats64 instead
atomic_long_t rx_dropped;// Dropped packets by core network, do not use this in drivers
atomic_long_t tx_dropped;// Dropped packets by core network,
do not use this in drivers
atomic_long_t rx_nohandler;// nohandler dropped packets by core network on inactive devices, do not use this in drivers
/* Stats to monitor link on/off, flapping */
atomic_t carrier_up_count;// Number of times the carrier has been up
atomic_t carrier_down_count;// Number of times the carrier has been down
#ifdef CONFIG_WIRELESS_EXT
const struct iw_handler_def *wireless_handlers;// List of functions to handle Wireless Extensions, instead of ioctl, see <net/iw_handler.h> for details.
struct iw_public_data *wireless_data;// Instance data managed by the core of wireless extensions
#endif
const struct net_device_ops *netdev_ops;// Includes several pointers to callbacks, if one wants to override the ndo_*() functions
const struct ethtool_ops *ethtool_ops;// Management operations
#ifdef CONFIG_NET_SWITCHDEV
const struct switchdev_ops *switchdev_ops;
#endif
#ifdef CONFIG_NET_L3_MASTER_DEV
const struct l3mdev_ops *l3mdev_ops;
#endif
#if IS_ENABLED(CONFIG_IPV6)
const struct ndisc_ops *ndisc_ops;// Includes callbacks for different IPv6 neighbour discovery handling. Necessary for e.g. 6LoWPAN.
#endif
#ifdef CONFIG_XFRM_OFFLOAD
const struct xfrmdev_ops *xfrmdev_ops;
#endif
const struct header_ops *header_ops;// Includes callbacks for creating,parsing,caching,etc of Layer 2 headers.
unsigned int flags;//接口标志
unsigned int priv_flags;// Like 'flags' but invisible to userspace, see if.h for the definitions
unsigned short gflags;// Global flags ( kept as legacy )
unsigned short padded;// How much padding added by alloc_netdev()
unsigned char operstate;// RFC2863 operstate
unsigned char link_mode;// Mapping policy to operstate
unsigned char if_port;// Selectable AUI, TP, ...
unsigned char dma;// DMA channel
unsigned int mtu;//接口MTU值
unsigned int min_mtu;// Interface Minimum MTU value
unsigned int max_mtu;// Interface Maximum MTU value
unsigned short type;//接口硬件类型
unsigned short hard_header_len;//最大硬件首部长度
unsigned char min_header_len; //最小硬件首部长度
unsigned short needed_headroom;// Extra headroom the hardware may need, but not in all cases can this be guaranteed
unsigned short needed_tailroom;// Extra tailroom the hardware may need, but not in all cases can this be guaranteed. Some cases also use LL_MAX_HEADER instead to allocate the skb
/* Interface address info. */
unsigned char perm_addr[MAX_ADDR_LEN];//持久硬件地址
unsigned char addr_assign_type;// Hw address assignment type
unsigned char addr_len;// Hardware address length
unsigned short neigh_priv_len;// Used in neigh_alloc()
unsigned short dev_id;// Used to differentiate devices that share the same link layer address
unsigned short dev_port;// Used to differentiate devices that share the same function
spinlock_t addr_list_lock;// XXX: need comments on this one
unsigned char name_assign_type;// Counter that indicates promiscuous mode has been enabled due to the need to listen to additional unicast addresses in a device that does not implement ndo_set_rx_mode()
bool uc_promisc;
struct netdev_hw_addr_list uc;// unicast mac addresses
struct netdev_hw_addr_list mc;// multicast mac addresses
struct netdev_hw_addr_list dev_addrs;// list of device hw addresses
#ifdef CONFIG_SYSFS
struct kset *queues_kset;// Group of all Kobjects in the Tx and RX queues promiscuous mode; if it becomes 0 the NIC will exit promiscuous mode
#endif
unsigned int promiscuity;// Number of times the NIC is told to work in
unsigned int allmulti;// Counter, enables or disables allmulticast mode
/* Protocol-specific pointers */
#if IS_ENABLED(CONFIG_VLAN_8021Q)
struct vlan_info __rcu *vlan_info;// VLAN info
#endif
#if IS_ENABLED(CONFIG_NET_DSA)
struct dsa_port *dsa_ptr;// dsa specific data
#endif
#if IS_ENABLED(CONFIG_TIPC)
struct tipc_bearer __rcu *tipc_ptr;// TIPC specific data
#endif
void *atalk_ptr;//appletalk相关指针
struct in_device __rcu *ip_ptr;//ipv4相关数据
struct dn_dev __rcu *dn_ptr;//DECnet相关数据
struct inet6_dev __rcu *ip6_ptr;//ipv6相关数据
void *ax25_ptr;//AX.25相关数据
struct wireless_dev *ieee80211_ptr;// IEEE 802.11 specific data, assign before registering
struct wpan_dev *ieee802154_ptr;
#if IS_ENABLED(CONFIG_MPLS_ROUTING)
struct mpls_dev __rcu *mpls_ptr;
#endif
/*
* Cache lines mostly used on receive path (including eth_type_trans())
*/
/* Interface address info used in eth_type_trans() */
unsigned char *dev_addr;// Hw address (before bcast, because most packets are unicast)
struct netdev_rx_queue *_rx;// Array of RX queues
unsigned int num_rx_queues;// Number of RX queues allocated at register_netdev() time
unsigned int real_num_rx_queues;// Number of RX queues currently active in device
struct bpf_prog __rcu *xdp_prog;
unsigned long gro_flush_timeout;
rx_handler_func_t __rcu *rx_handler;// handler for received packets
void __rcu *rx_handler_data;// XXX: need comments on this one
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_ingress;// ingress/clsact qdisc specific data for ingress processing
#endif
struct netdev_queue __rcu *ingress_queue;// XXX: need comments on this one
#ifdef CONFIG_NETFILTER_INGRESS
struct nf_hook_entries __rcu *nf_hooks_ingress;
#endif
unsigned char broadcast[MAX_ADDR_LEN];//硬件多播地址
#ifdef CONFIG_RFS_ACCEL
struct cpu_rmap *rx_cpu_rmap;// CPU reverse-mapping for RX completion interrupts, indexed by RX queue number. Assigned by driver. This must only be set if the ndo_rx_flow_steer operation is defined
#endif
struct hlist_node index_hlist;// Device index hash chain
/*
* Cache lines mostly used on transmit path
*/
struct netdev_queue *_tx ____cacheline_aligned_in_smp;// Array of TX queues
unsigned int num_tx_queues;// Number of TX queues allocated at alloc_netdev_mq() time
unsigned int real_num_tx_queues;// Number of TX queues currently active in device
struct Qdisc *qdisc;// Root qdisc from userspace point of view
#ifdef CONFIG_NET_SCHED
DECLARE_HASHTABLE (qdisc_hash, 4);
#endif
unsigned int tx_queue_len;// Max frames per queue allowed
spinlock_t tx_global_lock;// XXX: need comments on this one
int watchdog_timeo;//超时值
#ifdef CONFIG_XPS
struct xps_dev_maps __rcu *xps_maps;// XXX: need comments on this one
#endif
#ifdef CONFIG_NET_CLS_ACT
struct mini_Qdisc __rcu *miniq_egress;// clsact qdisc specific data for egress processing
#endif
/* These may be needed for future network-power-down code. */
struct timer_list watchdog_timer;// List of timers
int __percpu *pcpu_refcnt;// Number of references to this device
struct list_head todo_list;// Delayed register/unregister
struct list_head link_watch_list;// XXX: need comments on this one
enum { NETREG_UNINITIALIZED=0,
NETREG_REGISTERED, /* completed register_netdevice */
NETREG_UNREGISTERING, /* called unregister_netdevice */
NETREG_UNREGISTERED, /* completed unregister todo */
NETREG_RELEASED, /* called free_netdev */
NETREG_DUMMY, /* dummy device for NAPI poll */
} reg_state:8;// Register/unregister state machine
bool dismantle;// Device is going to be freed
enum {
RTNL_LINK_INITIALIZED,
RTNL_LINK_INITIALIZING,
} rtnl_link_state:16;// This enum represents the phases of creating a new link
bool needs_free_netdev;// Should unregister perform free_netdev?
void (*priv_destructor)(struct net_device *dev);// Called from unregister
#ifdef CONFIG_NETPOLL
struct netpoll_info __rcu *npinfo;// XXX: need comments on this one
#endif
possible_net_t nd_net;//该设备所在的命名空间
/* mid-layer private */
union {
void *ml_priv;// Mid-layer private
struct pcpu_lstats __percpu *lstats;// Loopback statistics
struct pcpu_sw_netstats __percpu *tstats;// Tunnel statistics
struct pcpu_dstats __percpu *dstats;// Dummy statistics
struct pcpu_vstats __percpu *vstats;// Virtual ethernet statistics
};
#if IS_ENABLED(CONFIG_GARP)
struct garp_port __rcu *garp_port;// GARP
#endif
#if IS_ENABLED(CONFIG_MRP)
struct mrp_port __rcu *mrp_port;// MRP
#endif
struct device dev;// Class/net/name entry
const struct attribute_group *sysfs_groups[4];// Space for optional device, statistics and wireless sysfs groups
const struct attribute_group *sysfs_rx_queue_group;// Space for optional per-rx queue attributes
const struct rtnl_link_ops *rtnl_link_ops;// Rtnl_link_ops
/* for setting kernel sock attribute on TCP connection setup */
#define GSO_MAX_SIZE 65536
unsigned int gso_max_size;// Maximum size of generic segmentation offload
#define GSO_MAX_SEGS 65535
u16 gso_max_segs;// Maximum number of segments that can be passed to the NIC for GSO
#ifdef CONFIG_DCB
const struct dcbnl_rtnl_ops *dcbnl_ops;// Data Center Bridging netlink ops
#endif
u8 num_tc;// Number of traffic classes in the net device
struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];// XXX: need comments on this one
u8 prio_tc_map[TC_BITMASK + 1];// XXX: need comments on this one
#if IS_ENABLED(CONFIG_FCOE)
unsigned int fcoe_ddp_xid;// Max exchange id for FCoE LRO by ddp
#endif
#if IS_ENABLED(CONFIG_CGROUP_NET_PRIO)
struct netprio_map __rcu *priomap;// XXX: need comments on this one
#endif
struct phy_device *phydev;// Physical device may attach itself for hardware timestamping
struct lock_class_key *qdisc_tx_busylock;// lockdep class annotating Qdisc->busylock spinlock
struct lock_class_key *qdisc_running_key;// lockdep class annotating Qdisc->running seqcount
bool proto_down;// protocol port state information can be sent to the switch driver and used to set the phys state of the switch port.
};
其中设备相关的属性定义在include/linux/netdev_features.h文件中。
NETIF_F_SG_BIT, /* Scatter/gather IO. */
NETIF_F_IP_CSUM_BIT, /* Can checksum TCP/UDP over IPv4. */
__UNUSED_NETIF_F_1,
NETIF_F_HW_CSUM_BIT, /* Can checksum all the packets. */
NETIF_F_IPV6_CSUM_BIT, /* Can checksum TCP/UDP over IPV6 */
NETIF_F_HIGHDMA_BIT, /* Can DMA to high memory. */
NETIF_F_FRAGLIST_BIT, /* Scatter/gather IO. */
……..
接口的flags定义在如下的文件中:
include/uapi/linux/if.h