作者:gfree.wind@gmail.com
博客:linuxfocus.blog.chinaunix.net
下面是函数的具体的代码。
- int ip_push_pending_frames(struct sock *sk)
- {
- struct sk_buff *skb, *tmp_skb;
- struct sk_buff **tail_skb;
- struct inet_sock *inet = inet_sk(sk);
- struct net *net = sock_net(sk);
- struct ip_options *opt = NULL;
- struct rtable *rt = (struct rtable *)inet->cork.dst;
- struct iphdr *iph;
- __be16 df = 0;
- __u8 ttl;
- int err = 0;
/* 发送队列可能为空 */
- if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
- goto out;
- /* 获得分片链表 */
- tail_skb = &(skb_shinfo(skb)->frag_list);
-
- /* move skb->data to ip header from ext header */
- /* 调整data指针位置 */
- if (skb->data skb_network_header(skb))
- __skb_pull(skb, skb_network_offset(skb));
/* 调整所有发送缓冲中的sk_buff的data指针位置,并更新第一个sk_buff的数据长度 */
- while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
- __skb_pull(tmp_skb, skb_network_header_len(skb));
- *tail_skb = tmp_skb;
- tail_skb = &(tmp_skb->next);
- skb->len = tmp_skb->len;
- skb->data_len = tmp_skb->len;
- skb->truesize = tmp_skb->truesize;
- tmp_skb->destructor = NULL;
- tmp_skb->sk = NULL;
- }
-
- /* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
- * to fragment the frame generated here. No matter, what transforms
- * how transforms change size of the packet, it will come out.
- */
- /* 允许本地分片 */
- if (inet->pmtudisc IP_PMTUDISC_DO)
- skb->local_df = 1;
-
- /* DF bit is set when we want to see DF on outgoing frames.
- * If local_df is set too, we still allow to fragment this frame
- * locally. */
- /* 不允许分片,或者不需要分片 */
- if (inet->pmtudisc >= IP_PMTUDISC_DO ||
- (skb->len = dst_mtu(&rt->dst) &&
- ip_dont_fragment(sk, &rt->dst)))
- df = htons(IP_DF);
/* ip option 保存在cork中, 则使用cork中的option */
- if (inet->cork.flags & IPCORK_OPT)
- opt = inet->cork.opt;
/* 选择合适的TTL值 */
- if (rt->rt_type == RTN_MULTICAST)
- ttl = inet->mc_ttl;
- else
- ttl = ip_select_ttl(inet, &rt->dst);
/* 得到IP报文头的地址 */
- iph = (struct iphdr *)skb->data;
- /* 初始化IP报文头的内容 */
- iph->version = 4;
- iph->ihl = 5;
- if (opt) {
- /*
填充IP option - 看到这里,可以发现opt只可能从cork中获得
- */
- iph->ihl = opt->optlen>>2;
- ip_options_build(skb, opt, inet->cork.addr, rt, 0);
- }
- iph->tos = inet->tos;
- iph->frag_off = df;
- ip_select_ident(iph, &rt->dst, sk);
- iph->ttl = ttl;
- iph->protocol = sk->sk_protocol;
- iph->saddr = rt->rt_src;
- iph->daddr = rt->rt_dst;
-
- skb->priority = sk->sk_priority;
- skb->mark = sk->sk_mark;
- /*
- * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
- * on dst refcount
- */
- inet->cork.dst = NULL;
- skb_dst_set(skb, &rt->dst);
-
- if (iph->protocol == IPPROTO_ICMP)
- icmp_out_count(net, ((struct icmphdr *)
- skb_transport_header(skb))->type);
-
- /* Netfilter gets whole the not fragmented skb. */
- /* 发送数据 */
- err = ip_local_out(skb);
- if (err) {
- if (err > 0)
- err = net_xmit_errno(err);
- if (err)
- goto error;
- }
-
- out:
- ip_cork_release(inet);
- return err;
-
- error:
- IP_INC_STATS(net, IPSTATS_MIB_OUTDISCARDS);
- goto out;
- }
这个函数比较容易看懂——当然不是所有细节都非常清楚了。
第一遍浏览这些API,只要搞懂该API的大致流程和用途。当整个框架建立好以后,再慢慢补充细节。