内容
-
创建某个目录的索引
-
查询索引
1.基于文件目录,创建索引
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
/**
* 创建目录下的文件索引
*
* @param indexedDir
* :待创建索引目录
* @param indexTargetDir
* : 索引目标路径
*/
public
void
newIndex(String indexedDir, String indexTargetDir) {
File fileDir =
new
File(indexedDir);
Collection<File> files = FileUtils.listFiles(fileDir,
null
,
true
);
IndexWriterConfig config =
new
IndexWriterConfig(Version.LUCENE_35,
new
StandardAnalyzer(Version.LUCENE_35));
File indexFile =
new
File(indexTargetDir);
IndexWriter iw =
null
;
try
{
Directory idxDirc = FSDirectory.open(indexFile);
iw =
new
IndexWriter(idxDirc, config);
for
(File file : files) {
if
(file.isFile()) {
Document doc =
new
Document();
Reader reader =
new
FileReader(file);
String type = FilenameUtils.getExtension(file.getName());
//文件名称 域
doc.add(
new
Field(
"filename"
, file.getAbsolutePath(),
Field.Store.YES, Field.Index.NOT_ANALYZED));
//文件内容 域
doc.add(
new
Field(
"content"
, reader));
//文件类型
doc.add(
new
Field(
"type"
, type, Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
//文件日期
doc.add(
new
NumericField(
"date"
, Field.Store.YES,
true
)
.setLongValue(file.lastModified()));
//文件大小
doc.add(
new
NumericField(
"size"
, Field.Store.YES,
false
)
.setIntValue(
new
Long(file.length()).intValue()));
iw.addDocument(doc);
}
}
}
catch
(CorruptIndexException e) {
e.printStackTrace();
}
catch
(LockObtainFailedException e) {
e.printStackTrace();
}
catch
(FileNotFoundException e) {
e.printStackTrace();
}
catch
(IOException e) {
e.printStackTrace();
}
finally
{
try
{
iw.close();
}
catch
(CorruptIndexException e) {
e.printStackTrace();
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
|
创建索引
1
2
3
4
|
String indexedDir =
"F:\\lucene\\data"
;
String indexTargetDir =
"F:\\lucene\\index"
;
Index01 util =
new
Index01();
util.newIndex(indexedDir, indexTargetDir);
|
执行成功后,F:/lucene/index目录下会有fdt,fdx,fnm,nrm,prx等等文件。
2.根据文件类型进行查询
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
/**
* 查询文件类型
*
* @param indexPath: 索引路径
* @param type:文件类型
* @param size:条目
*/
public
void
queryByFileType(String indexPath, String type,
int
size) {
Directory dir =
null
;
IndexReader reader =
null
;
IndexSearcher searcher =
null
;
try
{
dir = FSDirectory.open(
new
File(indexPath));
reader = IndexReader.open(dir);
searcher =
new
IndexSearcher(reader);
Query query =
new
TermQuery(
new
Term(
"type"
, type));
TopDocs tops = searcher.search(query, size);
ScoreDoc[] docs = tops.scoreDocs;
for
(ScoreDoc sd : docs) {
Document doc = searcher.doc(sd.doc);
System.out.print(
"filename:"
+ doc.get(
"filename"
)+
"\t"
);
System.out.print(
"size:"
+ doc.get(
"size"
));
System.out.println();
}
}
catch
(Exception e) {
e.printStackTrace();
}
finally
{
try
{
reader.close();
}
catch
(IOException e) {
e.printStackTrace();
}
}
}
|
1
2
3
4
|
System.out.println(
"查询类型为\"java\"文件列表"
);
util.queryByFileType(indexTargetDir,
"java"
,
100
);
System.out.println(
"查询类型为\"xml\"文件列表"
);
util.queryByFileType(indexTargetDir,
"xml"
,
100
);
|
执行结果
1
2
3
4
5
6
7
8
9
10
11
12
13
14
|
查询类型为
"java"
文件列表
filename:F:\lucene\data\java\com\service\BaseService.java size:353
filename:F:\lucene\data\java\com\validate\Person.java size:471
filename:F:\lucene\data\java\com\validate\PersonValidator.java size:682
filename:F:\lucene\data\java\com\spring\aop\Person.java size:101
filename:F:\lucene\data\java\com\spring\aop\Man.java size:151
filename:F:\lucene\data\java\com\spring\aop\PersonProxy.java size:613
filename:F:\lucene\data\java\org\wh\tech\spring\aop\BusinessService.java size:173
filename:F:\lucene\data\java\org\wh\tech\spring\aop\BusinessServiceImpl.java size:375
filename:F:\lucene\data\java\org\wh\tech\spring\aop\XMLExampleAspect.java size:1654
查询类型为
"xml"
文件列表
filename:F:\lucene\data\resources\service.xml size:2429
filename:F:\lucene\data\resources\service.xml size:2429
filename:F:\lucene\data\resources\service.xml size:2429
|
此时,发现查询的xml文件,有重复。这是由于lucene创建索引默认是基于递增的创建方式。
1
2
|
org.apache.lucene.index.IndexWriterConfig.OpenMode
public
static
enum
OpenMode { CREATE, APPEND, CREATE_OR_APPEND }
|
可以通过以下语句,删除索引,再创建。
1,先删后增
1
|
org.apache.lucene.index.IndexWriter.deleteAll()
|
2.设置创建索引的OPEN_MODE
1
2
3
|
IndexWriterConfig config =
new
IndexWriterConfig(Version.LUCENE_35,
new
StandardAnalyzer(Version.LUCENE_35));
config.setOpenMode(OpenMode.CREATE);
|
本文转自 randy_shandong 51CTO博客,原文链接:http://blog.51cto.com/dba10g/1359564,如需转载请自行联系原作者