1 package org.liufeng.course.util;
2 import java.util.regex.Matcher;
3 import java.util.regex.Pattern;
4 /**
5 * Description 提供判断字符串是中文或者是英文的一种思路
6 *
7 */
8 public class ChineseAndEnglish_Util {
9 public static void main(String[] args) {
10 String str = "我爱你,!?():;“”、。";
11 char[] charArray = str.toCharArray();
12 for (int i = 0; i < charArray.length; i++) {
13 isChinese(charArray[i]);
14 }
15 String chinese = "中国god damn";
16 System.out.println(isContainChinese(chinese));
17 String english = "dfafdabac";
18 System.out.println(isEnglish(english));
19 }
20
21 /**
22 * 1.判断字节是否是中文
23 *
24 * CJK的意思是“Chinese,Japanese,Korea”的简写 ,实际上就是指中日韩三国的象形文字的Unicode编码
25 * Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS :4E00-9FBF:CJK 统一表意符号
26 * Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS :F900-FAFF:CJK 兼容象形文字
27 * Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A :3400-4DBF:CJK 统一表意符号扩展 A
28 * Character.UnicodeBlock.GENERAL_PUNCTUATION :2000-206F:常用标点
29 * Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION :3000-303F:CJK 符号和标点
30 * Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS :FF00-FFEF:半角及全角形式
31 *
32 */
33 public static boolean isChinese(char c) {
34 Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
35 if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
36 || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
37 || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
38 || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION
39 || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION
40 || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
41 return true;
42 }
43 return false;
44 }
45 //2.检测是否包含英文
46 public static boolean isEnglish(String charaString) {
47 return charaString.matches("^[a-zA-Z]*");
48 }
49 //3.检测是否包含中文
50 public static boolean isContainChinese(String str) {
51 String regEx = "[\\u4E00-\\u9FA5]+";
52 Pattern p = Pattern.compile(regEx);
53 Matcher m = p.matcher(str);
54 if (m.find()) {
55 return true;
56 } else {
57 return false;
58 }
59 }
60 }