朋友让我帮他写个gb2312->utf-8的字符转换程序,找了半天没有在网上找到合适的,于是自己动手写了一个,呵呵。把它贴在这里,免得以后忘记了 ^_^
实现思路大致如下:
- 取得一个汉字的Unicode码
- 把Unicode码分解为两个16进制数据字符串(丢弃前两个字节)
- 把这两个16进制数据字符串转换成二进制数据字符串
- 把二进制数据字符串分解为三个串,第一个串为4(0~4)个位,在高位加上标记位“1110”,第二(4~10)、三个(10~16)串均为6个位,分别在高位加上“10”标记位
- 把这三个二进制串分别转换为10进制数据并赋值给字节型数组
- 根据这个字节型数组构造UTF-8字符
java 代码
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.UnsupportedEncodingException;
- /**
- * 2007-8-10 jyin at gomez dot com
- */
- public class CharsetConvertor {
- public static void main(String[] args) {
- String str = "This is a test for *中网!@#$。,?";
- try {
- File f = new File("D:/test.txt");
- FileOutputStream fio = new FileOutputStream(f);
- String s = gbToUtf8(str);
- fio.write(s.getBytes("UTF-8"));
- fio.close();
- }
- catch (Exception e) {
- e.printStackTrace();
- }
- }
- public static String gbToUtf8(String str) throws UnsupportedEncodingException {
- StringBuffer sb = new StringBuffer();
- for (int i = 0; i < str.length(); i++) {
- String s = str.substring(i, i + 1);
- if (s.charAt(0) > 0x80) {
- byte[] bytes = s.getBytes("Unicode");
- String binaryStr = "";
- for (int j = 2; j < bytes.length; j += 2) {
- // the first byte
- String hexStr = getHexString(bytes[j + 1]);
- String binStr = getBinaryString(Integer.valueOf(hexStr, 16));
- binaryStr += binStr;
- // the second byte
- hexStr = getHexString(bytes[j]);
- binStr = getBinaryString(Integer.valueOf(hexStr, 16));
- binaryStr += binStr;
- }
- // convert unicode to utf-8
- String s1 = "1110" + binaryStr.substring(0, 4);
- String s2 = "10" + binaryStr.substring(4, 10);
- String s3 = "10" + binaryStr.substring(10, 16);
- byte[] bs = new byte[3];
- bs[0] = Integer.valueOf(s1, 2).byteValue();
- bs[1] = Integer.valueOf(s2, 2).byteValue();
- bs[2] = Integer.valueOf(s3, 2).byteValue();
- String ss = new String(bs, "UTF-8");
- sb.append(ss);
- } else {
- sb.append(s);
- }
- }
- return sb.toString();
- }
- private static String getHexString(byte b) {
- String hexStr = Integer.toHexString(b);
- int m = hexStr.length();
- if (m < 2) {
- hexStr = "0" + hexStr;
- } else {
- hexStr = hexStr.substring(m - 2);
- }
- return hexStr;
- }
- private static String getBinaryString(int i) {
- String binaryStr = Integer.toBinaryString(i);
- int length = binaryStr.length();
- for (int l = 0; l < 8 - length; l++) {
- binaryStr = "0" + binaryStr;
- }
- return binaryStr;
- }
- }
另外:
import java.io.UnsupportedEncodingException; import java.net.URLEncoder; public class UnicodeChange { //UTF-8->GB2312 public static String utf8Togb2312(String str){ StringBuffer sb = new StringBuffer(); for ( int i=0; i<str.length(); i++) { char c = str.charAt(i); switch (c) { case '+' : sb.append( ' ' ); break ; case '%' : try { sb.append(( char )Integer.parseInt ( str.substring(i+1,i+3),16)); } catch (NumberFormatException e) { throw new IllegalArgumentException(); } i += 2; break ; default : sb.append(c); break ; } } String result = sb.toString(); String res= null ; try { byte [] inputBytes = result.getBytes( "8859_1" ); res= new String(inputBytes, "UTF-8" ); } catch (Exception e){} return res; } //GB2312->UTF-8 public static String gb2312ToUtf8(String str) { String urlEncode = "" ; try { urlEncode = URLEncoder.encode (str, "UTF-8" ); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } return urlEncode; } public static void main(String[] args){ try { String str="輸入簡體字,點下面繁體字按鈕進行在線轉換.";//%E4%B8%AD%E5%9B%BD str=gb2312ToUtf8(str); System.out.println(str); str=utf8Togb2312(str); System.out.println(str); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } } }