icmp协议
ICMP(Internet Control Message Protocol)Internet控制报文协议。它是TCP/IP协议簇的一个子协议,用于在IP主机、路由器之间传递控制消息。控制消息是指网络通不通、主机是否可达、路由是否可用等网络本身的消息。这些控制消息虽然并不传输用户数据,但是对于用户数据的传递起着重要的作用。 [1]
ICMP使用IP的基本支持,就像它是一个更高级别的协议,但是,ICMP实际上是IP的一个组成部分,必须由每个IP模块实现。
icmp报文结构
打包icmp
打包其实都是一个原理,直接放代码了:
static void dpdk_encode_icmp_pkt(uint8_t* msg, uint8_t* dst_mac, uint32_t sip, uint32_t dip, uint16_t id, uint16_t seq) { struct rte_ether_hdr* eth = (struct rte_ether_hdr*)msg; rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN); rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN); eth->ether_type = htons(RTE_ETHER_TYPE_IPV4); struct rte_ipv4_hdr* ip = (struct rte_ipv4_hdr*)(eth + 1); ip->version_ihl = 0x45; ip->type_of_service = 0; ip->total_length = htons(sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_icmp_hdr)); ip->packet_id = 0; ip->fragment_offset = 0; ip->time_to_live = 64; ip->next_proto_id = IPPROTO_ICMP; ip->dst_addr = dip; ip->src_addr = sip; ip->hdr_checksum = 0; ip->hdr_checksum = rte_ipv4_cksum(ip); struct rte_icmp_hdr* icmp = (struct rte_icmp_hdr*)(ip + 1); icmp->icmp_type = RTE_IP_ICMP_ECHO_REPLY; icmp->icmp_code = 0; icmp->icmp_ident = id; icmp->icmp_seq_nb = seq; icmp->icmp_cksum = 0; icmp->icmp_cksum = rte_icmp_cksum((uint16_t*)icmp, sizeof(struct rte_icmp_hdr)); } static struct rte_mbuf* dpdk_send_icmp(struct rte_mempool* mbuf_pool, uint8_t* dst_mac, uint32_t sip, uint32_t dip, uint16_t id, uint16_t seq) { struct rte_mbuf* mbuf = rte_pktmbuf_alloc(mbuf_pool); if(!mbuf) { rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc\n"); } uint16_t total_len = sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_icmp_hdr); mbuf->pkt_len = total_len; mbuf->data_len = total_len; uint8_t* pkt = rte_pktmbuf_mtod(mbuf, uint8_t*); dpdk_encode_icmp_pkt(pkt, dst_mac, sip, dip, id, seq); return mbuf; }
这里我们依然是做的基础的回复包,效果是使用物理机进行ping操作的时候,可以收到数据返回,所以我们只是打了一个RTE_IP_ICMP_ECHO_REPLY的包。
icmp校验
由于DPDK中好像没有专门的ICMP校验接口,这里我们参考文档自己写一个,文档我会放在最后:
static uint16_t rte_icmp_cksum(uint16_t* addr, int count) { long sum = 0; while (count > 1) { sum += *(unsigned short*)addr++; count -= 2; } if(count > 0) sum += *(unsigned char*)addr; while(sum >> 16) sum = (sum & 0xffff) + (sum >> 16); return ~sum; }
完整代码
#include <rte_eal.h> #include <rte_ethdev.h> #include <stdio.h> #include <arpa/inet.h> #define MBUF_LEN (4096-1) #define MBUF_SIZE 64 static const int gDpdkPortId = 0; #define MAKE_IPV4_ADDR(a, b, c, d) (a + (b<<8) + (c<<16) + (d<<24)) uint32_t gLocalIp = MAKE_IPV4_ADDR(192, 168, 1, 185); uint32_t gSrcIp; uint32_t gDstIp; uint8_t gSrcMac[RTE_ETHER_ADDR_LEN]; uint8_t gDstMac[RTE_ETHER_ADDR_LEN]; uint16_t gSrcPort; uint16_t gDstPort; struct rte_eth_conf default_port_info = { .rxmode = {.max_rx_pkt_len = RTE_ETHER_MAX_LEN}, }; static void dpdk_port_init(struct rte_mempool* mbuf_pool) { uint16_t sys_port_count = rte_eth_dev_count_avail(); if(sys_port_count == 0) rte_exit(EXIT_FAILURE, "Could not support port\n"); struct rte_eth_dev_info dev_info; rte_eth_dev_info_get(gDpdkPortId, &dev_info); const unsigned nb_rx_queue = 1; const unsigned nb_tx_queue = 1; struct rte_eth_conf port_conf = default_port_info; rte_eth_dev_configure(gDpdkPortId, nb_rx_queue, nb_tx_queue, &port_conf); if(rte_eth_rx_queue_setup(gDpdkPortId, 0, 128, rte_eth_dev_socket_id(gDpdkPortId), NULL, mbuf_pool) < 0) rte_exit(EXIT_FAILURE, "Could not setup RX queue\n"); struct rte_eth_txconf txconf = dev_info.default_txconf; txconf.offloads = default_port_info.rxmode.offloads; if(rte_eth_tx_queue_setup(gDpdkPortId, 0, 512, rte_eth_dev_socket_id(gDpdkPortId), &txconf) < 0) rte_exit(EXIT_FAILURE, "Could not setup TX queue\n"); if(rte_eth_dev_start(gDpdkPortId) < 0) rte_exit(EXIT_FAILURE, "Could not start\n"); } static void dpdk_encode_udp_pkt(uint8_t* msg, uint8_t* data, uint16_t total_len) { struct rte_ether_hdr* eth = (struct rte_ether_hdr*)msg; rte_memcpy(eth->d_addr.addr_bytes, gDstMac, RTE_ETHER_ADDR_LEN); rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN); eth->ether_type = htons(RTE_ETHER_TYPE_IPV4); struct rte_ipv4_hdr* ip = (struct rte_ipv4_hdr*)(eth + 1); ip->version_ihl = 0x45; ip->type_of_service = 0; ip->total_length = htons(total_len - sizeof(struct rte_ether_hdr)); ip->packet_id = 0; ip->fragment_offset = 0; ip->time_to_live = 64; ip->next_proto_id = IPPROTO_UDP; ip->dst_addr = gDstIp; ip->src_addr = gSrcIp; ip->hdr_checksum = 0; ip->hdr_checksum = rte_ipv4_cksum(ip); struct rte_udp_hdr* udp = (struct rte_udp_hdr*)(ip + 1); uint16_t length = total_len - sizeof(struct rte_ether_hdr) - sizeof(struct rte_ipv4_hdr); udp->dst_port = gDstPort; udp->src_port = gSrcIp; udp->dgram_len = htons(length); rte_memcpy((uint8_t*)(udp + 1), data, length); udp->dgram_cksum = 0; udp->dgram_cksum = rte_ipv4_udptcp_cksum(ip, udp); } static struct rte_mbuf* dpdk_send_udp(struct rte_mempool* mbuf_pool, uint8_t* data, uint16_t length) { struct rte_mbuf* mbuf = rte_pktmbuf_alloc(mbuf_pool); if(!mbuf) rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc\n"); uint16_t total_len = length + 42; mbuf->pkt_len = total_len; mbuf->data_len = total_len; uint8_t* pkt = rte_pktmbuf_mtod(mbuf, uint8_t*); dpdk_encode_udp_pkt(pkt, data, length); return mbuf; } static void dpdk_encode_arp_pkt(uint8_t* msg, uint8_t* dst_mac, uint32_t sip, uint32_t dip) { struct rte_ether_hdr* eth = (struct rte_ether_hdr*)(msg); rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN); rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN); eth->ether_type = htons(RTE_ETHER_TYPE_ARP); struct rte_arp_hdr* arp = (struct rte_arp_hdr*)(eth + 1); arp->arp_hardware = htons(1); arp->arp_protocol = htons(RTE_ETHER_TYPE_IPV4); arp->arp_plen = sizeof(uint32_t); arp->arp_hlen = RTE_ETHER_ADDR_LEN; arp->arp_opcode = htons(2); arp->arp_data.arp_sip = sip; arp->arp_data.arp_tip = dip; rte_memcpy(arp->arp_data.arp_sha.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN); rte_memcpy(arp->arp_data.arp_tha.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN); } static struct rte_mbuf* dpdk_send_arp(struct rte_mempool* mbuf_pool, uint8_t* dst_mac, uint32_t sip, uint32_t dip) { struct rte_mbuf* mbuf = rte_pktmbuf_alloc(mbuf_pool); if(!mbuf) rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc\n"); uint16_t total_len = sizeof(struct rte_ether_hdr) + sizeof(struct rte_arp_hdr); mbuf->pkt_len = total_len; mbuf->data_len = total_len; uint8_t* pkt = rte_pktmbuf_mtod(mbuf, uint8_t*); dpdk_encode_arp_pkt(pkt, dst_mac, sip, dip); return mbuf; } static uint16_t rte_icmp_cksum(uint16_t* addr, int count) { long sum = 0; while (count > 1) { sum += *(unsigned short*)addr++; count -= 2; } if(count > 0) sum += *(unsigned char*)addr; while(sum >> 16) sum = (sum & 0xffff) + (sum >> 16); return ~sum; } static void dpdk_encode_icmp_pkt(uint8_t* msg, uint8_t* dst_mac, uint32_t sip, uint32_t dip, uint16_t id, uint16_t seq) { struct rte_ether_hdr* eth = (struct rte_ether_hdr*)msg; rte_memcpy(eth->s_addr.addr_bytes, gSrcMac, RTE_ETHER_ADDR_LEN); rte_memcpy(eth->d_addr.addr_bytes, dst_mac, RTE_ETHER_ADDR_LEN); eth->ether_type = htons(RTE_ETHER_TYPE_IPV4); struct rte_ipv4_hdr* ip = (struct rte_ipv4_hdr*)(eth + 1); ip->version_ihl = 0x45; ip->type_of_service = 0; ip->total_length = htons(sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_icmp_hdr)); ip->packet_id = 0; ip->fragment_offset = 0; ip->time_to_live = 64; ip->next_proto_id = IPPROTO_ICMP; ip->dst_addr = dip; ip->src_addr = sip; ip->hdr_checksum = 0; ip->hdr_checksum = rte_ipv4_cksum(ip); struct rte_icmp_hdr* icmp = (struct rte_icmp_hdr*)(ip + 1); icmp->icmp_type = RTE_IP_ICMP_ECHO_REPLY; icmp->icmp_code = 0; icmp->icmp_ident = id; icmp->icmp_seq_nb = seq; icmp->icmp_cksum = 0; icmp->icmp_cksum = rte_icmp_cksum((uint16_t*)icmp, sizeof(struct rte_icmp_hdr)); } static struct rte_mbuf* dpdk_send_icmp(struct rte_mempool* mbuf_pool, uint8_t* dst_mac, uint32_t sip, uint32_t dip, uint16_t id, uint16_t seq) { struct rte_mbuf* mbuf = rte_pktmbuf_alloc(mbuf_pool); if(!mbuf) { rte_exit(EXIT_FAILURE, "rte_pktmbuf_alloc\n"); } uint16_t total_len = sizeof(struct rte_ether_hdr) + sizeof(struct rte_ipv4_hdr) + sizeof(struct rte_icmp_hdr); mbuf->pkt_len = total_len; mbuf->data_len = total_len; uint8_t* pkt = rte_pktmbuf_mtod(mbuf, uint8_t*); dpdk_encode_icmp_pkt(pkt, dst_mac, sip, dip, id, seq); return mbuf; } int main(int argc, char* argv[]) { if(rte_eal_init(argc, argv) < 0) rte_exit(EXIT_FAILURE, "Error with eal init\n"); struct rte_mempool* mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_LEN, 0, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id()); if(!mbuf_pool) rte_exit(EXIT_FAILURE, "Error with mempool create\n"); dpdk_port_init(mbuf_pool); rte_eth_macaddr_get(gDpdkPortId, (struct rte_ether_addr*)gSrcMac); while(1) { struct rte_mbuf* mbufs[MBUF_SIZE]; int nb_pkt = rte_eth_rx_burst(gDpdkPortId, 0, mbufs, MBUF_SIZE); if(nb_pkt > MBUF_SIZE) rte_exit(EXIT_FAILURE, "Error withs pkt num\n"); int i; for(i = 0; i < nb_pkt; i++) { struct rte_ether_hdr* eth = rte_pktmbuf_mtod(mbufs[i], struct rte_ether_hdr*); if(eth->ether_type == rte_cpu_to_be_16(RTE_ETHER_TYPE_ARP)) { struct rte_arp_hdr* arp = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_arp_hdr*, sizeof(struct rte_ether_hdr)); struct in_addr addr; addr.s_addr = arp->arp_data.arp_sip; printf("arp-->src: %s ", inet_ntoa(addr)); addr.s_addr = arp->arp_data.arp_tip; printf("dst: %s\n", inet_ntoa(addr)); if(arp->arp_data.arp_tip == gLocalIp) { struct rte_mbuf* txbuf = dpdk_send_arp(mbuf_pool, arp->arp_data.arp_sha.addr_bytes, gLocalIp, arp->arp_data.arp_sip); rte_eth_tx_burst(gDpdkPortId, 0, &txbuf, 1); rte_pktmbuf_free(txbuf); rte_pktmbuf_free(mbufs[i]); txbuf = NULL; mbufs[i] = NULL; } continue; } if(eth->ether_type != rte_cpu_to_be_16(RTE_ETHER_TYPE_IPV4)) { rte_pktmbuf_free(mbufs[i]); mbufs[i] = NULL; continue; } struct rte_ipv4_hdr* ip = rte_pktmbuf_mtod_offset(mbufs[i], struct rte_ipv4_hdr*, sizeof(struct rte_ether_hdr)); if(ip->next_proto_id == IPPROTO_UDP) { struct rte_udp_hdr* udp = (struct rte_udp_hdr*)(ip + 1); rte_memcpy(gDstMac, eth->s_addr.addr_bytes, RTE_ETHER_ADDR_LEN); rte_memcpy(&gSrcIp, &ip->dst_addr, sizeof(uint32_t)); rte_memcpy(&gDstIp, &ip->src_addr, sizeof(uint32_t)); rte_memcpy(&gSrcPort, &udp->dst_port, sizeof(uint16_t)); rte_memcpy(&gDstPort, &udp->src_port, sizeof(uint16_t)); uint16_t len = ntohs(udp->dgram_len); *((char*)udp + len) = '\0'; struct in_addr addr; addr.s_addr = ip->src_addr; printf("udp-->src: %s:%d ", inet_ntoa(addr), ntohs(udp->src_port)); addr.s_addr = ip->dst_addr; printf("dst: %s:%d %s\n", inet_ntoa(addr), udp->dst_port, (char*)(udp + 1)); struct rte_mbuf* txbuf = dpdk_send_udp(mbuf_pool, (uint8_t*)(udp + 1), len); rte_eth_tx_burst(gDpdkPortId, 0, &txbuf, 1); rte_pktmbuf_free(txbuf); rte_pktmbuf_free(mbufs[i]); txbuf = NULL; mbufs[i] = NULL; } if(ip->next_proto_id == IPPROTO_ICMP) { struct rte_icmp_hdr* icmp = (struct rte_icmp_hdr*)(ip + 1); struct in_addr addr; addr.s_addr = ip->src_addr; printf("icmp-->src: %s ", inet_ntoa(addr)); if(icmp->icmp_type == RTE_IP_ICMP_ECHO_REQUEST) { addr.s_addr = ip->dst_addr; printf("local:%s, type: %d\n", inet_ntoa(addr), icmp->icmp_type); struct rte_mbuf* txbuf = dpdk_send_icmp(mbuf_pool, eth->s_addr.addr_bytes, ip->dst_addr, ip->src_addr, icmp->icmp_ident, icmp->icmp_seq_nb); rte_eth_tx_burst(gDpdkPortId, 0, &txbuf, 1); rte_pktmbuf_free(txbuf); rte_pktmbuf_free(mbufs[i]); } } } } return 0; }