一、所需依赖
<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>1.1.0</version>
</dependency>
<dependency>
<groupId>com.janeluo</groupId>
<artifactId>ikanalyzer</artifactId>
<version>2012_u6</version>
</dependency>
二、实现代码
package com.link.datawarehouse.hive;
/**
* @author 包菜
* @date 2020/12/8 15:08
*/
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import org.apache.hadoop.hive.ql.exec.UDF;
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;
public class IkParticiple extends UDF {
public String evaluate(String input) {
// 如果输入为空,则直接返回空即可
String output="";
if (input == null || input.trim().length() == 0) {
return null;
}
//JiebaSegmenter segmenter = new JiebaSegmenter();
// output=segmenter.sentenceProcess(input).toString().replaceAll(", ", " ").toLowerCase();
byte[] bt = input.getBytes();
InputStream ip = new ByteArrayInputStream(bt);
Reader read = new InputStreamReader(ip);
IKSegmenter iks = new IKSegmenter(read, true);
Lexeme t;
try {
while ((t = iks.next()) != null) {
output=output+t.getLexemeText().toLowerCase()+" ";
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return output;
}
/*测试使用的main方法*/
public static void main(String[] args) {
System.out.println(new IkParticiple().evaluate("超级喜欢写代码"));
}
}
三、数据结果
四、打包上传,创建函数
注意:自定义UDF函数只能在相应的库使用
select linkdata_warehouse.fenciqi('超级喜欢写代码');