内核中的UDP socket流程(5)——inet_create
作者:gfree.wind@gmail.com
进入函数inet_create
static int inet_create(struct net *net, struct socket *sock, int protocol, int kern) { struct sock *sk; struct inet_protosw *answer; struct inet_sock *inet; struct proto *answer_prot; unsigned char answer_flags; char answer_no_check; int try_loading_module = 0; int err;
if (unlikely(!inet_ehash_secret)) if (sock->type != SOCK_RAW && sock->type != SOCK_DGRAM) build_ehash_secret(); |
当socket为UDP时,sock->type = SOCK_DGRAM。所以这里不会调用到huild_ehash_secret()————其实这个函数是为了产生一个随机的数赋给inet_ehash_secret————暂时还不知道这个变量的确切用途。
通过sock->type和protocol在inetsw链表中找到对应的proto;
/* Look for the requested type/protocol pair. */ lookup_protocol: err = -ESOCKTNOSUPPORT; rcu_read_lock(); list_for_each_entry_rcu(answer, &inetsw[sock->type], list) {
err = 0; /* Check the non-wild match. */ if (protocol == answer->protocol) { if (protocol != IPPROTO_IP) break; } else { /* Check for the two wild cases. */ if (IPPROTO_IP == protocol) { protocol = answer->protocol; break; } if (IPPROTO_IP == answer->protocol) break; } err = -EPROTONOSUPPORT; }
|
从以上代码可以看出,IPPROTO_IP是作为一个wild protocol使用的。首先是尽量寻找与参数protocol完全匹配的协议,如果找不到完全匹配的,如果参数是IPPROTO_IP,那么任一protocol都可以使用,如果找到PPROTO_IP协议的话,就使用该协议。
inetsw是一个static的全局变量
static struct list_head inetsw[SOCK_MAX]; |
该全局变量在函数inet_init中初始化
static int __init inet_init(void) { /* Skip some codes */ /* Register the socket-side information for inet_create. */ for (r = &inetsw[0]; r &inetsw[SOCK_MAX]; ++r) INIT_LIST_HEAD(r);
for (q = inetsw_array; q &inetsw_array[INETSW_ARRAY_LEN]; ++q) inet_register_protosw(q);
|
通过inetsw_array来初始化inetsw,那么在inetsw中保存的协议顺序就与inetsw相同。
/* Upon startup we insert all the elements in inetsw_array[] into * the linked list inetsw. */ static struct inet_protosw inetsw_array[] = { { .type = SOCK_STREAM, .protocol = IPPROTO_TCP, .prot = &tcp_prot, .ops = &inet_stream_ops, .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, },
{ .type = SOCK_DGRAM, .protocol = IPPROTO_UDP, .prot = &udp_prot, .ops = &inet_dgram_ops, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, },
{ .type = SOCK_RAW, .protocol = IPPROTO_IP, /* wild card */ .prot = &raw_prot, .ops = &inet_sockraw_ops, .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, } };
|
那么对于UDP来说,再找到对应的UDP protocol后,answer就指向inetsw_array[1]。
sock->ops = answer->ops; answer_prot = answer->prot; answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); |
对于UDP来说,sock->ops就指向inet_dgram_ops,answer_prot就是udp_prot,answer_no_check为UDP_CSUM_DEFAULT,answer_flags为INET_PROTOSW_PERMANENT。
下面是inet_dgram_ops的定义
const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, .bind = inet_bind, .connect = inet_dgram_connect, .socketpair = sock_no_socketpair, .accept = sock_no_accept, .getname = inet_getname, .poll = udp_poll, .ioctl = inet_ioctl, .listen = sock_no_listen, .shutdown = inet_shutdown, .setsockopt = sock_common_setsockopt, .getsockopt = sock_common_getsockopt, .sendmsg = inet_sendmsg, .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .sendpage = inet_sendpage, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, #endif };
|
answer_prot的定义是
struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, .close = udp_lib_close, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, .destroy = udp_destroy_sock, .setsockopt = udp_setsockopt, .getsockopt = udp_getsockopt, .sendmsg = udp_sendmsg, .recvmsg = udp_recvmsg, .sendpage = udp_sendpage, .backlog_rcv = __udp_queue_rcv_skb, .hash = udp_lib_hash, .unhash = udp_lib_unhash, .rehash = udp_v4_rehash, .get_port = udp_v4_get_port, .memory_allocated = &udp_memory_allocated, .sysctl_mem = sysctl_udp_mem, .sysctl_wmem = &sysctl_udp_wmem_min, .sysctl_rmem = &sysctl_udp_rmem_min, .obj_size = sizeof(struct udp_sock), .slab_flags = SLAB_DESTROY_BY_RCU, .h.udp_table = &udp_table, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif }; |
继续后面的代码
err = -ENOBUFS; sk = sk_alloc(net, PF_INET, GFP_KERNEL, answer_prot); if (sk == NULL) goto out;
err = 0; sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = 1; |
首先申请一个struct sock的内存,然后sk->sk_no_check用于表示是否检查checksum,1为不检查,0为检查。通过与INET_PROTOSW_REUSE的位与操作——该宏表示这个协议是自动重用socket。
inet = inet_sk(sk); inet->is_icsk = (INET_PROTOSW_ICSK & answer_flags) != 0;
inet->nodefrag = 0; |
首先,将通用sock结构用函数inet_sk转为inet_sock——UDP的域是AF_INET,然后与宏INET_PROTOSW_ICSK相与来确定是否是基于连接——查看inetsw_array,只有TCP是有这个标志的。inet->nodefrag设为0,是允许分片。
if (SOCK_RAW == sock->type) { inet->inet_num = protocol; if (IPPROTO_RAW == protocol) inet->hdrincl = 1; } |
如果创建的是RAW socket,那么就以protocol协议号作为本地端口。如果protocol是IPPROTO_RAW,那么就给inet->hdincl置1,表示由用户来创建IP头,而不再由内核添加IP头。
if (ipv4_config.no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT;
inet->inet_id = 0; |
设置MTU的策略——这里说策略不太合适,IP_PMTUDISC_DONT表示不对frame帧分片,IP_PMTUDISC_WANT表示通过route转发过来的包来确定最小的MTU。然后初始化inet_id.
sock_init_data(sock, sk);
sk->sk_destruct = inet_sock_destruct; sk->sk_protocol = protocol; sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv; 初始化sk的结构体。
inet->uc_ttl = -1; inet->mc_loop = 1; inet->mc_ttl = 1; inet->mc_all = 1; inet->mc_index = 0; inet->mc_list = NULL;
sk_refcnt_debug_inc(sk);
|
初始化剩余的inet的成员变量,这些变量的含义可以直接看inet_sock结构体的注释,很清晰。uc_ttl表示单播ttl,mc_loop表示回环是否有效,mc_ttl表示组播ttl,mc_all是为了支持新的socket option IP_MULTICAST_ALL,如果设置了这个标志,表示所有发往端口的组播都会传给这个socket,如果没有设置,那么只有加入了的组的组播才会传给socket。
if (inet->inet_num) { /* It assumes that any protocol which allows * the user to assign a number at socket * creation time automatically * shares. */ inet->inet_sport = htons(inet->inet_num); /* Add to protocol hash chains. */ sk->sk_prot->hash(sk); }
if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); if (err) sk_common_release(sk); } |
当inet->inet_num不为0时,设置inet的source port,并把sk加到hash表中——对于UDP,不会执行这个。如果该协议有init函数,就调用init。通过inetsw_array可知,udp是没有init函数的,而tcp和ip,raw的init函数分别为tcp_v4_init_sock,raw_init。
到此,UDP的socket已经建立成功了。