package
com.hive.myudf;
import
java.net.URL;
import
java.util.regex.Matcher;
import
java.util.regex.Pattern;
import
org.apache.hadoop.hive.ql.exec.UDF;
public
class
UDFNginxParseUrl
extends
UDF {
private
String schemal =
"http://"
;
private
String host1 =
null
;
private
Pattern p1 =
null
;
private
URL url =
null
;
private
Pattern p =
null
;
private
String lastKey =
null
;
public
UDFNginxParseUrl() {
}
public
String evaluate(String host1, String urlStr, String partToExtract) {
if
(host1 ==
null
|| urlStr ==
null
|| partToExtract ==
null
) {
return
null
;
}
p1 = Pattern.compile(
"(.+?) +(.+?) (.+)"
);
Matcher m1 = p1.matcher(urlStr);
if
(m1.matches()){
String realUrl = schemal + host1 + m1.group(
2
);
System.out.println(
"URL is "
+ realUrl);
try
{
url =
new
URL(realUrl);
}
catch
(Exception e){
return
null
;
}
}
if
(partToExtract.equals(
"HOST"
)) {
return
url.getHost();
}
if
(partToExtract.equals(
"PATH"
)) {
return
url.getPath();
}
if
(partToExtract.equals(
"QUERY"
)) {
return
url.getQuery();
}
if
(partToExtract.equals(
"REF"
)) {
return
url.getRef();
}
if
(partToExtract.equals(
"PROTOCOL"
)) {
return
url.getProtocol();
}
if
(partToExtract.equals(
"FILE"
)) {
return
url.getFile();
}
if
(partToExtract.equals(
"AUTHORITY"
)) {
return
url.getAuthority();
}
if
(partToExtract.equals(
"USERINFO"
)) {
return
url.getUserInfo();
}
return
null
;
}
public
String evaluate(String host, String urlStr, String partToExtract, String key) {
if
(!partToExtract.equals(
"QUERY"
)) {
return
null
;
}
String query =
this
.evaluate(host, urlStr, partToExtract);
if
(query ==
null
) {
return
null
;
}
if
(!key.equals(lastKey)) {
p = Pattern.compile(
"(&|^)"
+ key +
"=([^&]*)"
);
}
lastKey = key;
Matcher m = p.matcher(query);
if
(m.find()) {
return
m.group(
2
);
}
return
null
;
}
}