在项目中,对报文进行压缩、加密后,最后一步一般是 base64 编码。因为 base64 编码的字符串更适合不同平台,不同语言的传输。
base64 编码的优点:
- 算法是编码,不是压缩,编码后只会增加字节数(一般是比之前的多1/3,比如之前是3, 编码后是4)
- 算法简单,基本不影响效率
- 算法可逆,解码很方便,不用于私密传输。
- 毕竟编码了,肉眼不能直接读出原始内容。
- 加密后的字符串只有【0-9a-zA-Z+/=】 不可打印字符(转译字符)也可以传输
为啥要编码
计算机中任何数据都是 ASCII 码存储的, ascii 是 128-255 之间的不可见字符。在网络上进行数据交换,从 A 到 B, 往往要经过多个路由器,不同设备之间对字符的处理方式有一些不同,不可见字符有可能被错误处理,是不利于传输的,因此要先做一个 base64 编码,变成可见字符,这样出错的可能性比较大。
看看英文版怎么说:
When you have some binary data that you want to ship across a network, you generally don't do it by just streaming the bits and bytes over the wire in a raw format. Why? because some media are made for streaming text. You never know -- some protocols may interpret your binary data as control characters (like a modem), or your binary data could be screwed up because the underlying protocol might think that you've entered a special character combination (like how FTP translates line endings). So to get around this, people encode the binary data into characters. Base64 is one of these types of encodings. Why 64? Because you can generally rely on the same 64 characters being present in many character sets, and you can be reasonably confident that your data's going to end up on the other side of the wire uncorrupted.
使用场景
- 对于证书来说,尤其是根证书,一般是 base64 编码的,在网上被很多人下载
- 电子邮件的附件一般是 base64 编码,因为附件往往有不可见字符
- 比如 http 协议中 key , value 字段的值,必须进行 URLEncode,因为有写特殊符号,有特殊含义,那么需要把这些字符传输完再解析回来。
- xml 中如果像嵌入另外一个 xml 文件,直接嵌入,往往 xml 标签就乱套了, 不容易解析,因此,需要把 xml 编译成字节数组的字符串,编译成可见字符。
- 网页中的一些小图片,可以直接以 base64 编码的方式嵌入,不用再链接请求消耗网络资源。
- 较老的纯文本协议 SMTP ,这些文本偶尔传输一个文件时,需要用 base64
base64 编码步骤
- 将待编码的字符串转换成二进制表示出来
- 3个字节为一组,也就是24位二进制为一组
- 将这个24位分成4组,每 6个为一组,每组签名补 00 将6为二进制转换成8个二进制,从原来的3字节转换为4字节
- 计算这4个字节对应的十进制,然后跟 ASCII 表对应,拼接字符串形成最后的 base64 编码。
base64 源码
base64.h
#include "Base64.h" const std::string CBase64::base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; int CBase64::find_utf_8_bit_head(const unsigned char *src_content,int src_size ) { int i_ret = -1; int byteNum = 0; //字符数 if( src_content ) { for(int i = src_size-1; i >= 0; i--) { if( 0 == (src_content[i] >> 7 ) ) { byteNum = 1; i_ret = src_size - i; break; } if( 0x06 == (src_content[i]>> 5) ) { byteNum = 2; i_ret = src_size - i; break; } if( 0x0E == (src_content[i] >> 4) ) { byteNum = 3; i_ret = src_size - i; break; } if( 0x1E == (src_content[i] >> 3) ) { byteNum = 4; i_ret = src_size - i; break; } if( 0x3E == (src_content[i] >> 2) ) { byteNum = 5; i_ret = src_size - i; break; } if( 0x7E == (src_content[i] >> 1) ) { byteNum = 6; i_ret = src_size - i; break; } } if( i_ret == byteNum ) i_ret = -1; } return i_ret; } std::string CBase64::base64_encode(const unsigned char* bytes_to_encode, unsigned int in_len) { std::string ret; int i = 0; int j = 0; unsigned char char_array_3[3]; unsigned char char_array_4[4]; while (in_len--) { char_array_3[i++] = *(bytes_to_encode++); if (i == 3) { char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = char_array_3[2] & 0x3f; for(i = 0; (i <4) ; i++) ret += base64_chars[char_array_4[i]]; i = 0; } } if (i) { for(j = i; j < 3; j++) char_array_3[j] = '\0'; char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = char_array_3[2] & 0x3f; for (j = 0; (j < i + 1); j++) ret += base64_chars[char_array_4[j]]; while((i++ < 3)) ret += '='; } return ret; } //CString CBase64::Base64Encode(LPCTSTR lpszSrc) //{ // char* strSrc = new char[_tcslen(lpszSrc)+1]; // ZeroMemory(strSrc, _tcslen(lpszSrc)+1); // strcpy(strSrc, (char*)(_bstr_t(lpszSrc))); // std::string str = base64_encode((unsigned char*)strSrc, (int)strlen(strSrc)); // CString strDst = str.c_str(); // return strDst; //} std::string CBase64::Base64Encode(char* lpszSrc) { int str_len = strlen(lpszSrc); int find_index = find_utf_8_bit_head((unsigned char*)lpszSrc, str_len); if(find_index > -1) { memset(lpszSrc+(str_len-find_index), 0, find_index); } str_len = strlen(lpszSrc); return base64_encode((unsigned char*)lpszSrc, str_len); } std::string CBase64::base64_decode(const unsigned char* bytes_to_encode, unsigned int in_len) { int i = 0; int j = 0; int in_ = 0; unsigned char char_array_4[4], char_array_3[3]; std::string ret; while (in_len-- && ( bytes_to_encode[in_] != '=') /*&& is_base64(bytes_to_encode[in_])*/) { char_array_4[i++] = bytes_to_encode[in_]; in_++; if (i ==4) { for (i = 0; i <4; i++) char_array_4[i] = base64_chars.find(char_array_4[i]); char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; for (i = 0; (i < 3); i++) ret += char_array_3[i]; i = 0; } } if (i) { for (j = i; j <4; j++) char_array_4[j] = 0; for (j = 0; j <4; j++) char_array_4[j] = base64_chars.find(char_array_4[j]); char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; } return ret; } //CString CBase64::Base64Decode(LPCTSTR lpszSrc) //{ // char* strSrc = new char[_tcslen(lpszSrc)+1]; // ZeroMemory(strSrc, _tcslen(lpszSrc)+1); // strcpy(strSrc, (char*)(_bstr_t(lpszSrc))); // std::string str = base64_decode((unsigned char*)strSrc, (int)strlen(strSrc)); // CString strDst = str.c_str(); // return strDst; //} std::string CBase64::Base64Decode(char* lpszSrc) { return base64_decode((unsigned char*)lpszSrc, (int)strlen(lpszSrc)); }
base64.cpp
#include "Base64.h" const std::string CBase64::base64_chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789+/"; int CBase64::find_utf_8_bit_head(const unsigned char *src_content,int src_size ) { int i_ret = -1; int byteNum = 0; //字符数 if( src_content ) { for(int i = src_size-1; i >= 0; i--) { if( 0 == (src_content[i] >> 7 ) ) { byteNum = 1; i_ret = src_size - i; break; } if( 0x06 == (src_content[i]>> 5) ) { byteNum = 2; i_ret = src_size - i; break; } if( 0x0E == (src_content[i] >> 4) ) { byteNum = 3; i_ret = src_size - i; break; } if( 0x1E == (src_content[i] >> 3) ) { byteNum = 4; i_ret = src_size - i; break; } if( 0x3E == (src_content[i] >> 2) ) { byteNum = 5; i_ret = src_size - i; break; } if( 0x7E == (src_content[i] >> 1) ) { byteNum = 6; i_ret = src_size - i; break; } } if( i_ret == byteNum ) i_ret = -1; } return i_ret; } std::string CBase64::base64_encode(const unsigned char* bytes_to_encode, unsigned int in_len) { std::string ret; int i = 0; int j = 0; unsigned char char_array_3[3]; unsigned char char_array_4[4]; while (in_len--) { char_array_3[i++] = *(bytes_to_encode++); if (i == 3) { char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = char_array_3[2] & 0x3f; for(i = 0; (i <4) ; i++) ret += base64_chars[char_array_4[i]]; i = 0; } } if (i) { for(j = i; j < 3; j++) char_array_3[j] = '\0'; char_array_4[0] = (char_array_3[0] & 0xfc) >> 2; char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4); char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6); char_array_4[3] = char_array_3[2] & 0x3f; for (j = 0; (j < i + 1); j++) ret += base64_chars[char_array_4[j]]; while((i++ < 3)) ret += '='; } return ret; } //CString CBase64::Base64Encode(LPCTSTR lpszSrc) //{ // char* strSrc = new char[_tcslen(lpszSrc)+1]; // ZeroMemory(strSrc, _tcslen(lpszSrc)+1); // strcpy(strSrc, (char*)(_bstr_t(lpszSrc))); // std::string str = base64_encode((unsigned char*)strSrc, (int)strlen(strSrc)); // CString strDst = str.c_str(); // return strDst; //} std::string CBase64::Base64Encode(char* lpszSrc) { int str_len = strlen(lpszSrc); int find_index = find_utf_8_bit_head((unsigned char*)lpszSrc, str_len); if(find_index > -1) { memset(lpszSrc+(str_len-find_index), 0, find_index); } str_len = strlen(lpszSrc); return base64_encode((unsigned char*)lpszSrc, str_len); } std::string CBase64::base64_decode(const unsigned char* bytes_to_encode, unsigned int in_len) { int i = 0; int j = 0; int in_ = 0; unsigned char char_array_4[4], char_array_3[3]; std::string ret; while (in_len-- && ( bytes_to_encode[in_] != '=') /*&& is_base64(bytes_to_encode[in_])*/) { char_array_4[i++] = bytes_to_encode[in_]; in_++; if (i ==4) { for (i = 0; i <4; i++) char_array_4[i] = base64_chars.find(char_array_4[i]); char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; for (i = 0; (i < 3); i++) ret += char_array_3[i]; i = 0; } } if (i) { for (j = i; j <4; j++) char_array_4[j] = 0; for (j = 0; j <4; j++) char_array_4[j] = base64_chars.find(char_array_4[j]); char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4); char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2); char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3]; for (j = 0; (j < i - 1); j++) ret += char_array_3[j]; } return ret; } //CString CBase64::Base64Decode(LPCTSTR lpszSrc) //{ // char* strSrc = new char[_tcslen(lpszSrc)+1]; // ZeroMemory(strSrc, _tcslen(lpszSrc)+1); // strcpy(strSrc, (char*)(_bstr_t(lpszSrc))); // std::string str = base64_decode((unsigned char*)strSrc, (int)strlen(strSrc)); // CString strDst = str.c_str(); // return strDst; //} std::string CBase64::Base64Decode(char* lpszSrc) { return base64_decode((unsigned char*)lpszSrc, (int)strlen(lpszSrc)); }