package com.qiuyueyuan.common.utils;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sourceforge.pinyin4j.PinyinHelper;
import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat;
import net.sourceforge.pinyin4j.format.HanyuPinyinToneType;
/**
* @ClassName: ChinesToEnglishUtil
* @Description: 汉字转拼音工具
* @author yanxinjie
* @date 2020年12月13日 下午2:44:57
*
*/
public class ChinesToEnglishUtil {
public static String convertHanzi2Pinyin(String hanzi, boolean flag) {
/***
* ^[\u2E80-\u9FFF]+$ 匹配所有东亚区的语言
* ^[\u4E00-\u9FFF]+$ 匹配简体和繁体
* ^[\u4E00-\u9FA5]+$ 匹配简体
*/
String regExp = "^[\u4E00-\u9FFF]+$";
StringBuffer sb = new StringBuffer();
if (hanzi == null || "".equals(hanzi.trim())) {
return "";
}
String pinyin = "";
for (int i = 0; i < hanzi.length(); i++) {
char unit = hanzi.charAt(i);
if (match(String.valueOf(unit), regExp))//是汉字,则转拼音
{
pinyin = convertSingleHanzi2Pinyin(unit);
if (flag) {
sb.append(pinyin);
} else {
sb.append(pinyin.charAt(0));
}
} else {
sb.append(unit);
}
}
return sb.toString();
}
/***
* 将单个汉字转成拼音
*
* @param hanzi
* @return
*/
private static String convertSingleHanzi2Pinyin(char hanzi) {
HanyuPinyinOutputFormat outputFormat = new HanyuPinyinOutputFormat();
outputFormat.setToneType(HanyuPinyinToneType.WITHOUT_TONE);
String[] res;
StringBuffer sb = new StringBuffer();
try {
res = PinyinHelper.toHanyuPinyinStringArray(hanzi, outputFormat);
sb.append(res[0]);//对于多音字,只用第一个拼音
} catch (Exception e) {
return "";
}
return sb.toString();
}
/***
* @param str 源字符串
* @param regex 正则表达式
* @return 是否匹配
*/
public static boolean match(String str, String regex) {
Pattern pattern = Pattern.compile(regex);
Matcher matcher = pattern.matcher(str);
return matcher.find();
}
private final static int[] li_SecPosValue = { 1601, 1637, 1833, 2078, 2274,
2302, 2433, 2594, 2787, 3106, 3212, 3472, 3635, 3722, 3730, 3858,
4027, 4086, 4390, 4558, 4684, 4925, 5249, 5590 };
private final static String[] lc_FirstLetter = { "a", "b", "c", "d", "e",
"f", "g", "h", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s",
"t", "w", "x", "y", "z" };
/**
* 取得给定汉字串的首字母串,即声母串
* @param str 给定汉字串
* @return 声母串
*/
public static String getAllFirstLetter(String str) {
if (str == null || str.trim().length() == 0) {
return "";
}
String _str = "";
for (int i = 0; i < str.length(); i++) {
_str = _str + getFirstLetter(str.substring(i, i + 1));
}
return _str;
}
/**
* 取得给定汉字的首字母,即声母
* @param chinese 给定的汉字
* @return 给定汉字的声母
*/
public static String getFirstLetter(String chinese) {
if (chinese == null || chinese.trim().length() == 0) {
return "";
}
chinese = conversionStr(chinese, "GB2312", "ISO8859-1");
if (chinese.length() > 1) // 判断是不是汉字
{
int li_SectorCode = (int) chinese.charAt(0); // 汉字区码
int li_PositionCode = (int) chinese.charAt(1); // 汉字位码
li_SectorCode = li_SectorCode - 160;
li_PositionCode = li_PositionCode - 160;
int li_SecPosCode = li_SectorCode * 100 + li_PositionCode; // 汉字区位码
if (li_SecPosCode > 1600 && li_SecPosCode < 5590) {
for (int i = 0; i < 23; i++) {
if (li_SecPosCode >= li_SecPosValue[i]
&& li_SecPosCode < li_SecPosValue[i + 1]) {
chinese = lc_FirstLetter[i];
break;
}
}
} else // 非汉字字符,如图形符号或ASCII码
{
chinese =conversionStr(chinese, "ISO8859-1", "GB2312");
chinese = chinese.substring(0, 1);
}
}
return chinese;
}
/**
* 字符串编码转换
* @param str 要转换编码的字符串
* @param charsetName 原来的编码
* @param toCharsetName 转换后的编码
* @return 经过编码转换后的字符串
*/
private static String conversionStr(String str, String charsetName,String toCharsetName) {
try {
str = new String(str.getBytes(charsetName), toCharsetName);
} catch (UnsupportedEncodingException ex) {
System.out.println("字符串编码转换异常:" + ex.getMessage());
}
return str;
}
public static void main(String[] args) {
System.out.println(getAllFirstLetter("耐克"));
}
}