开发者社区 问答 正文

Java过滤敏感词,报异常了 400 请求报错 

用java.util.regex中的Pattern和Matcher过滤敏感词,词库为一个文件,现在执行报异常:Exception in thread "main" java.util.regex.PatternSyntaxException: Unclosed group near index 5972

package filter;

import java.io.BufferedReader; import java.io.FileReader; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.log4j.Logger;

/** * 敏感词过滤 * * Pattern根据正则表达式编译生成pattern * Matcher根据pattern生成matcher * matcher再和字符串匹配 * * @author Matthew * @date 2011-9-5 * @version 1.0 */ public class FilterKeyWorld { private static Logger logger = Logger.getLogger(FilterKeyWorld.class); private static Pattern pattern = null;

public static void initPattern(String filePath) throws Exception{
	StringBuilder sb = new StringBuilder("");	
	BufferedReader bf = new BufferedReader(new FileReader(filePath));
	//读取敏感词库	
	sb.append("(");
	String line;
	int flag = 0;
	while((line = bf.readLine()) != null){
		logger.info(flag + " - " + line);
		flag ++;
		sb.append(line + "|");
	}
	//去除字符串的最后的|
	sb.deleteCharAt(sb.length()-1);			
	sb.append(")");
	pattern = Pattern.compile(sb.toString());
	bf.close();
}

// 执行过滤
private static String doFilter(String str){
	Matcher matcher = pattern.matcher(str);
	str = matcher.replaceAll("*");
	return str;
}

public static void main(String[] args) throws Exception {
	String content = "有个女优山口美树叫淫母苍井空功真相功日功修炼人";
	long start = System.currentTimeMillis();
	String file = "C:/Users/Matthew/Indigo/KeyWordFilter/src/resource/words.data";
	initPattern(file);
	String result = doFilter(content);
	long end = System.currentTimeMillis();
	logger.info(result);
	logger.info("Total: " + (end-start));
} 

}

其中这行出错:pattern = Pattern.compile(sb.toString());

请问是不是Pattern.compile()方法中字符串的长度是不是有长度限制的?

展开
收起
kun坤 2020-05-31 13:02:20 509 分享 版权
1 条回答
写回答
取消 提交回答
  • private static Pattern pattern = null;######这行代码怎么了?######顺便说一下,敏感词库中敏感词比较少时可以过滤######@红薯
    求助

    2020-06-01 09:47:56
    赞同 展开评论