今天在爬取一个网站返回的一段json数据但是一看吓一跳
查询得知这是为了更好的传输中文,json进行了Unicode编码。
下面是工具方法:
1 public static String decodeUnicode(String theString) { 2 char aChar; 3 int len = theString.length(); 4 StringBuffer outBuffer = new StringBuffer(len); 5 for (int x = 0; x < len;) { 6 aChar = theString.charAt(x++); 7 if (aChar == '\\') { 8 aChar = theString.charAt(x++); 9 if (aChar == 'u') { 10 // Read the xxxx 11 int value = 0; 12 for (int i = 0; i < 4; i++) { 13 aChar = theString.charAt(x++); 14 switch (aChar) { 15 case '0': 16 case '1': 17 case '2': 18 case '3': 19 case '4': 20 case '5': 21 case '6': 22 case '7': 23 case '8': 24 case '9': 25 value = (value << 4) + aChar - '0'; 26 break; 27 case 'a': 28 case 'b': 29 case 'c': 30 case 'd': 31 case 'e': 32 case 'f': 33 value = (value << 4) + 10 + aChar - 'a'; 34 break; 35 case 'A': 36 case 'B': 37 case 'C': 38 case 'D': 39 case 'E': 40 case 'F': 41 value = (value << 4) + 10 + aChar - 'A'; 42 break; 43 default: 44 throw new IllegalArgumentException( 45 "Malformed \\uxxxx encoding."); 46 } 47 48 } 49 outBuffer.append((char) value); 50 } else { 51 if (aChar == 't') 52 aChar = '\t'; 53 else if (aChar == 'r') 54 aChar = '\r'; 55 else if (aChar == 'n') 56 aChar = '\n'; 57 else if (aChar == 'f') 58 aChar = '\f'; 59 outBuffer.append(aChar); 60 } 61 } else 62 outBuffer.append(aChar); 63 } 64 return outBuffer.toString(); 65 }
欢迎大家一起说出自己的想法。